* config/arm/arm.c (arm_expand_builtin): Remove redundant declaration.
[gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8
9 This file is part of GCC.
10
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
15
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "real.h"
35 #include "insn-config.h"
36 #include "conditions.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "reload.h"
41 #include "function.h"
42 #include "expr.h"
43 #include "optabs.h"
44 #include "toplev.h"
45 #include "recog.h"
46 #include "cgraph.h"
47 #include "ggc.h"
48 #include "except.h"
49 #include "c-pragma.h"
50 #include "integrate.h"
51 #include "tm_p.h"
52 #include "target.h"
53 #include "target-def.h"
54 #include "debug.h"
55 #include "langhooks.h"
56 #include "df.h"
57 #include "intl.h"
58 #include "libfuncs.h"
59
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
63
64 void (*arm_lang_output_object_attributes_hook)(void);
65
66 /* Forward function declarations. */
67 static int arm_compute_static_chain_stack_bytes (void);
68 static arm_stack_offsets *arm_get_frame_offsets (void);
69 static void arm_add_gc_roots (void);
70 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
71 HOST_WIDE_INT, rtx, rtx, int, int);
72 static unsigned bit_count (unsigned long);
73 static int arm_address_register_rtx_p (rtx, int);
74 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
75 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
76 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
77 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
78 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
79 inline static int thumb1_index_register_rtx_p (rtx, int);
80 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
81 static int thumb_far_jump_used_p (void);
82 static bool thumb_force_lr_save (void);
83 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
84 static rtx emit_sfm (int, int);
85 static unsigned arm_size_return_regs (void);
86 static bool arm_assemble_integer (rtx, unsigned int, int);
87 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
88 static arm_cc get_arm_condition_code (rtx);
89 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
90 static rtx is_jump_table (rtx);
91 static const char *output_multi_immediate (rtx *, const char *, const char *,
92 int, HOST_WIDE_INT);
93 static const char *shift_op (rtx, HOST_WIDE_INT *);
94 static struct machine_function *arm_init_machine_status (void);
95 static void thumb_exit (FILE *, int);
96 static rtx is_jump_table (rtx);
97 static HOST_WIDE_INT get_jump_table_size (rtx);
98 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
99 static Mnode *add_minipool_forward_ref (Mfix *);
100 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
101 static Mnode *add_minipool_backward_ref (Mfix *);
102 static void assign_minipool_offsets (Mfix *);
103 static void arm_print_value (FILE *, rtx);
104 static void dump_minipool (rtx);
105 static int arm_barrier_cost (rtx);
106 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
107 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
108 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
109 rtx);
110 static void arm_reorg (void);
111 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
112 static unsigned long arm_compute_save_reg0_reg12_mask (void);
113 static unsigned long arm_compute_save_reg_mask (void);
114 static unsigned long arm_isr_value (tree);
115 static unsigned long arm_compute_func_type (void);
116 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
117 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
118 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
119 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
120 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
121 #endif
122 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
123 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
124 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
125 static int arm_comp_type_attributes (const_tree, const_tree);
126 static void arm_set_default_type_attributes (tree);
127 static int arm_adjust_cost (rtx, rtx, rtx, int);
128 static int count_insns_for_constant (HOST_WIDE_INT, int);
129 static int arm_get_strip_length (int);
130 static bool arm_function_ok_for_sibcall (tree, tree);
131 static enum machine_mode arm_promote_function_mode (const_tree,
132 enum machine_mode, int *,
133 const_tree, int);
134 static bool arm_return_in_memory (const_tree, const_tree);
135 static rtx arm_function_value (const_tree, const_tree, bool);
136 static rtx arm_libcall_value (enum machine_mode, const_rtx);
137
138 static void arm_internal_label (FILE *, const char *, unsigned long);
139 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
140 tree);
141 static bool arm_have_conditional_execution (void);
142 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
143 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
144 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
145 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
146 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
147 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
148 static bool arm_rtx_costs (rtx, int, int, int *, bool);
149 static int arm_address_cost (rtx, bool);
150 static bool arm_memory_load_p (rtx);
151 static bool arm_cirrus_insn_p (rtx);
152 static void cirrus_reorg (rtx);
153 static void arm_init_builtins (void);
154 static void arm_init_iwmmxt_builtins (void);
155 static rtx safe_vector_operand (rtx, enum machine_mode);
156 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
157 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
158 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
159 static void emit_constant_insn (rtx cond, rtx pattern);
160 static rtx emit_set_insn (rtx, rtx);
161 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
162 tree, bool);
163 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
164 const_tree);
165 static int aapcs_select_return_coproc (const_tree, const_tree);
166
167 #ifdef OBJECT_FORMAT_ELF
168 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
169 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
170 #endif
171 #ifndef ARM_PE
172 static void arm_encode_section_info (tree, rtx, int);
173 #endif
174
175 static void arm_file_end (void);
176 static void arm_file_start (void);
177
178 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
179 tree, int *, int);
180 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
181 enum machine_mode, const_tree, bool);
182 static bool arm_promote_prototypes (const_tree);
183 static bool arm_default_short_enums (void);
184 static bool arm_align_anon_bitfield (void);
185 static bool arm_return_in_msb (const_tree);
186 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
187 static bool arm_return_in_memory (const_tree, const_tree);
188 #ifdef TARGET_UNWIND_INFO
189 static void arm_unwind_emit (FILE *, rtx);
190 static bool arm_output_ttype (rtx);
191 #endif
192 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
193 static rtx arm_dwarf_register_span (rtx);
194
195 static tree arm_cxx_guard_type (void);
196 static bool arm_cxx_guard_mask_bit (void);
197 static tree arm_get_cookie_size (tree);
198 static bool arm_cookie_has_size (void);
199 static bool arm_cxx_cdtor_returns_this (void);
200 static bool arm_cxx_key_method_may_be_inline (void);
201 static void arm_cxx_determine_class_data_visibility (tree);
202 static bool arm_cxx_class_data_always_comdat (void);
203 static bool arm_cxx_use_aeabi_atexit (void);
204 static void arm_init_libfuncs (void);
205 static tree arm_build_builtin_va_list (void);
206 static void arm_expand_builtin_va_start (tree, rtx);
207 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
208 static bool arm_handle_option (size_t, const char *, int);
209 static void arm_target_help (void);
210 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
211 static bool arm_cannot_copy_insn_p (rtx);
212 static bool arm_tls_symbol_p (rtx x);
213 static int arm_issue_rate (void);
214 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
215 static bool arm_allocate_stack_slots_for_args (void);
216 static const char *arm_invalid_parameter_type (const_tree t);
217 static const char *arm_invalid_return_type (const_tree t);
218 static tree arm_promoted_type (const_tree t);
219 static tree arm_convert_to_type (tree type, tree expr);
220 static bool arm_scalar_mode_supported_p (enum machine_mode);
221 static bool arm_frame_pointer_required (void);
222 static bool arm_can_eliminate (const int, const int);
223 static void arm_asm_trampoline_template (FILE *);
224 static void arm_trampoline_init (rtx, tree, rtx);
225 static rtx arm_trampoline_adjust_address (rtx);
226 static rtx arm_pic_static_addr (rtx orig, rtx reg);
227
228 \f
229 /* Table of machine attributes. */
230 static const struct attribute_spec arm_attribute_table[] =
231 {
232 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
233 /* Function calls made to this symbol must be done indirectly, because
234 it may lie outside of the 26 bit addressing range of a normal function
235 call. */
236 { "long_call", 0, 0, false, true, true, NULL },
237 /* Whereas these functions are always known to reside within the 26 bit
238 addressing range. */
239 { "short_call", 0, 0, false, true, true, NULL },
240 /* Specify the procedure call conventions for a function. */
241 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
242 /* Interrupt Service Routines have special prologue and epilogue requirements. */
243 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
244 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
245 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
246 #ifdef ARM_PE
247 /* ARM/PE has three new attributes:
248 interfacearm - ?
249 dllexport - for exporting a function/variable that will live in a dll
250 dllimport - for importing a function/variable from a dll
251
252 Microsoft allows multiple declspecs in one __declspec, separating
253 them with spaces. We do NOT support this. Instead, use __declspec
254 multiple times.
255 */
256 { "dllimport", 0, 0, true, false, false, NULL },
257 { "dllexport", 0, 0, true, false, false, NULL },
258 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
259 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
260 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
261 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
262 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
263 #endif
264 { NULL, 0, 0, false, false, false, NULL }
265 };
266 \f
267 /* Initialize the GCC target structure. */
268 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
269 #undef TARGET_MERGE_DECL_ATTRIBUTES
270 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
271 #endif
272
273 #undef TARGET_LEGITIMIZE_ADDRESS
274 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
275
276 #undef TARGET_ATTRIBUTE_TABLE
277 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
278
279 #undef TARGET_ASM_FILE_START
280 #define TARGET_ASM_FILE_START arm_file_start
281 #undef TARGET_ASM_FILE_END
282 #define TARGET_ASM_FILE_END arm_file_end
283
284 #undef TARGET_ASM_ALIGNED_SI_OP
285 #define TARGET_ASM_ALIGNED_SI_OP NULL
286 #undef TARGET_ASM_INTEGER
287 #define TARGET_ASM_INTEGER arm_assemble_integer
288
289 #undef TARGET_ASM_FUNCTION_PROLOGUE
290 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
291
292 #undef TARGET_ASM_FUNCTION_EPILOGUE
293 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
294
295 #undef TARGET_DEFAULT_TARGET_FLAGS
296 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
297 #undef TARGET_HANDLE_OPTION
298 #define TARGET_HANDLE_OPTION arm_handle_option
299 #undef TARGET_HELP
300 #define TARGET_HELP arm_target_help
301
302 #undef TARGET_COMP_TYPE_ATTRIBUTES
303 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
304
305 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
306 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
307
308 #undef TARGET_SCHED_ADJUST_COST
309 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
310
311 #undef TARGET_ENCODE_SECTION_INFO
312 #ifdef ARM_PE
313 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
314 #else
315 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
316 #endif
317
318 #undef TARGET_STRIP_NAME_ENCODING
319 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
320
321 #undef TARGET_ASM_INTERNAL_LABEL
322 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
323
324 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
325 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
326
327 #undef TARGET_FUNCTION_VALUE
328 #define TARGET_FUNCTION_VALUE arm_function_value
329
330 #undef TARGET_LIBCALL_VALUE
331 #define TARGET_LIBCALL_VALUE arm_libcall_value
332
333 #undef TARGET_ASM_OUTPUT_MI_THUNK
334 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
335 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
336 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
337
338 #undef TARGET_RTX_COSTS
339 #define TARGET_RTX_COSTS arm_rtx_costs
340 #undef TARGET_ADDRESS_COST
341 #define TARGET_ADDRESS_COST arm_address_cost
342
343 #undef TARGET_SHIFT_TRUNCATION_MASK
344 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
345 #undef TARGET_VECTOR_MODE_SUPPORTED_P
346 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
347
348 #undef TARGET_MACHINE_DEPENDENT_REORG
349 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
350
351 #undef TARGET_INIT_BUILTINS
352 #define TARGET_INIT_BUILTINS arm_init_builtins
353 #undef TARGET_EXPAND_BUILTIN
354 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
355
356 #undef TARGET_INIT_LIBFUNCS
357 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
358
359 #undef TARGET_PROMOTE_FUNCTION_MODE
360 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
361 #undef TARGET_PROMOTE_PROTOTYPES
362 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
363 #undef TARGET_PASS_BY_REFERENCE
364 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
365 #undef TARGET_ARG_PARTIAL_BYTES
366 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
367
368 #undef TARGET_SETUP_INCOMING_VARARGS
369 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
370
371 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
372 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
373
374 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
375 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
376 #undef TARGET_TRAMPOLINE_INIT
377 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
378 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
379 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
380
381 #undef TARGET_DEFAULT_SHORT_ENUMS
382 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
383
384 #undef TARGET_ALIGN_ANON_BITFIELD
385 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
386
387 #undef TARGET_NARROW_VOLATILE_BITFIELD
388 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
389
390 #undef TARGET_CXX_GUARD_TYPE
391 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
392
393 #undef TARGET_CXX_GUARD_MASK_BIT
394 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
395
396 #undef TARGET_CXX_GET_COOKIE_SIZE
397 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
398
399 #undef TARGET_CXX_COOKIE_HAS_SIZE
400 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
401
402 #undef TARGET_CXX_CDTOR_RETURNS_THIS
403 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
404
405 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
406 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
407
408 #undef TARGET_CXX_USE_AEABI_ATEXIT
409 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
410
411 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
412 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
413 arm_cxx_determine_class_data_visibility
414
415 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
416 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
417
418 #undef TARGET_RETURN_IN_MSB
419 #define TARGET_RETURN_IN_MSB arm_return_in_msb
420
421 #undef TARGET_RETURN_IN_MEMORY
422 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
423
424 #undef TARGET_MUST_PASS_IN_STACK
425 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
426
427 #ifdef TARGET_UNWIND_INFO
428 #undef TARGET_UNWIND_EMIT
429 #define TARGET_UNWIND_EMIT arm_unwind_emit
430
431 /* EABI unwinding tables use a different format for the typeinfo tables. */
432 #undef TARGET_ASM_TTYPE
433 #define TARGET_ASM_TTYPE arm_output_ttype
434
435 #undef TARGET_ARM_EABI_UNWINDER
436 #define TARGET_ARM_EABI_UNWINDER true
437 #endif /* TARGET_UNWIND_INFO */
438
439 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
440 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
441
442 #undef TARGET_DWARF_REGISTER_SPAN
443 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
444
445 #undef TARGET_CANNOT_COPY_INSN_P
446 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
447
448 #ifdef HAVE_AS_TLS
449 #undef TARGET_HAVE_TLS
450 #define TARGET_HAVE_TLS true
451 #endif
452
453 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
454 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
455
456 #undef TARGET_CANNOT_FORCE_CONST_MEM
457 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
458
459 #undef TARGET_MAX_ANCHOR_OFFSET
460 #define TARGET_MAX_ANCHOR_OFFSET 4095
461
462 /* The minimum is set such that the total size of the block
463 for a particular anchor is -4088 + 1 + 4095 bytes, which is
464 divisible by eight, ensuring natural spacing of anchors. */
465 #undef TARGET_MIN_ANCHOR_OFFSET
466 #define TARGET_MIN_ANCHOR_OFFSET -4088
467
468 #undef TARGET_SCHED_ISSUE_RATE
469 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
470
471 #undef TARGET_MANGLE_TYPE
472 #define TARGET_MANGLE_TYPE arm_mangle_type
473
474 #undef TARGET_BUILD_BUILTIN_VA_LIST
475 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
476 #undef TARGET_EXPAND_BUILTIN_VA_START
477 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
478 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
479 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
480
481 #ifdef HAVE_AS_TLS
482 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
483 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
484 #endif
485
486 #undef TARGET_LEGITIMATE_ADDRESS_P
487 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
488
489 #undef TARGET_INVALID_PARAMETER_TYPE
490 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
491
492 #undef TARGET_INVALID_RETURN_TYPE
493 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
494
495 #undef TARGET_PROMOTED_TYPE
496 #define TARGET_PROMOTED_TYPE arm_promoted_type
497
498 #undef TARGET_CONVERT_TO_TYPE
499 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
500
501 #undef TARGET_SCALAR_MODE_SUPPORTED_P
502 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
503
504 #undef TARGET_FRAME_POINTER_REQUIRED
505 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
506
507 #undef TARGET_CAN_ELIMINATE
508 #define TARGET_CAN_ELIMINATE arm_can_eliminate
509
510 struct gcc_target targetm = TARGET_INITIALIZER;
511 \f
512 /* Obstack for minipool constant handling. */
513 static struct obstack minipool_obstack;
514 static char * minipool_startobj;
515
516 /* The maximum number of insns skipped which
517 will be conditionalised if possible. */
518 static int max_insns_skipped = 5;
519
520 extern FILE * asm_out_file;
521
522 /* True if we are currently building a constant table. */
523 int making_const_table;
524
525 /* The processor for which instructions should be scheduled. */
526 enum processor_type arm_tune = arm_none;
527
528 /* The current tuning set. */
529 const struct tune_params *current_tune;
530
531 /* The default processor used if not overridden by commandline. */
532 static enum processor_type arm_default_cpu = arm_none;
533
534 /* Which floating point hardware to schedule for. */
535 int arm_fpu_attr;
536
537 /* Which floating popint hardware to use. */
538 const struct arm_fpu_desc *arm_fpu_desc;
539
540 /* Whether to use floating point hardware. */
541 enum float_abi_type arm_float_abi;
542
543 /* Which __fp16 format to use. */
544 enum arm_fp16_format_type arm_fp16_format;
545
546 /* Which ABI to use. */
547 enum arm_abi_type arm_abi;
548
549 /* Which thread pointer model to use. */
550 enum arm_tp_type target_thread_pointer = TP_AUTO;
551
552 /* Used to parse -mstructure_size_boundary command line option. */
553 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
554
555 /* Used for Thumb call_via trampolines. */
556 rtx thumb_call_via_label[14];
557 static int thumb_call_reg_needed;
558
559 /* Bit values used to identify processor capabilities. */
560 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
561 #define FL_ARCH3M (1 << 1) /* Extended multiply */
562 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
563 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
564 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
565 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
566 #define FL_THUMB (1 << 6) /* Thumb aware */
567 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
568 #define FL_STRONG (1 << 8) /* StrongARM */
569 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
570 #define FL_XSCALE (1 << 10) /* XScale */
571 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
572 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
573 media instructions. */
574 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
575 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
576 Note: ARM6 & 7 derivatives only. */
577 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
578 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
579 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
580 profile. */
581 #define FL_DIV (1 << 18) /* Hardware divide. */
582 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
583 #define FL_NEON (1 << 20) /* Neon instructions. */
584 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
585 architecture. */
586
587 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
588
589 #define FL_FOR_ARCH2 FL_NOTM
590 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
591 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
592 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
593 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
594 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
595 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
596 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
597 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
598 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
599 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
600 #define FL_FOR_ARCH6J FL_FOR_ARCH6
601 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
602 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
603 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
604 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
605 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
606 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
607 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
608 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
609 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
610 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
611
612 /* The bits in this mask specify which
613 instructions we are allowed to generate. */
614 static unsigned long insn_flags = 0;
615
616 /* The bits in this mask specify which instruction scheduling options should
617 be used. */
618 static unsigned long tune_flags = 0;
619
620 /* The following are used in the arm.md file as equivalents to bits
621 in the above two flag variables. */
622
623 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
624 int arm_arch3m = 0;
625
626 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
627 int arm_arch4 = 0;
628
629 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
630 int arm_arch4t = 0;
631
632 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
633 int arm_arch5 = 0;
634
635 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
636 int arm_arch5e = 0;
637
638 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
639 int arm_arch6 = 0;
640
641 /* Nonzero if this chip supports the ARM 6K extensions. */
642 int arm_arch6k = 0;
643
644 /* Nonzero if instructions not present in the 'M' profile can be used. */
645 int arm_arch_notm = 0;
646
647 /* Nonzero if instructions present in ARMv7E-M can be used. */
648 int arm_arch7em = 0;
649
650 /* Nonzero if this chip can benefit from load scheduling. */
651 int arm_ld_sched = 0;
652
653 /* Nonzero if this chip is a StrongARM. */
654 int arm_tune_strongarm = 0;
655
656 /* Nonzero if this chip is a Cirrus variant. */
657 int arm_arch_cirrus = 0;
658
659 /* Nonzero if this chip supports Intel Wireless MMX technology. */
660 int arm_arch_iwmmxt = 0;
661
662 /* Nonzero if this chip is an XScale. */
663 int arm_arch_xscale = 0;
664
665 /* Nonzero if tuning for XScale */
666 int arm_tune_xscale = 0;
667
668 /* Nonzero if we want to tune for stores that access the write-buffer.
669 This typically means an ARM6 or ARM7 with MMU or MPU. */
670 int arm_tune_wbuf = 0;
671
672 /* Nonzero if tuning for Cortex-A9. */
673 int arm_tune_cortex_a9 = 0;
674
675 /* Nonzero if generating Thumb instructions. */
676 int thumb_code = 0;
677
678 /* Nonzero if we should define __THUMB_INTERWORK__ in the
679 preprocessor.
680 XXX This is a bit of a hack, it's intended to help work around
681 problems in GLD which doesn't understand that armv5t code is
682 interworking clean. */
683 int arm_cpp_interwork = 0;
684
685 /* Nonzero if chip supports Thumb 2. */
686 int arm_arch_thumb2;
687
688 /* Nonzero if chip supports integer division instruction. */
689 int arm_arch_hwdiv;
690
691 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
692 must report the mode of the memory reference from PRINT_OPERAND to
693 PRINT_OPERAND_ADDRESS. */
694 enum machine_mode output_memory_reference_mode;
695
696 /* The register number to be used for the PIC offset register. */
697 unsigned arm_pic_register = INVALID_REGNUM;
698
699 /* Set to 1 after arm_reorg has started. Reset to start at the start of
700 the next function. */
701 static int after_arm_reorg = 0;
702
703 static enum arm_pcs arm_pcs_default;
704
705 /* For an explanation of these variables, see final_prescan_insn below. */
706 int arm_ccfsm_state;
707 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
708 enum arm_cond_code arm_current_cc;
709 rtx arm_target_insn;
710 int arm_target_label;
711 /* The number of conditionally executed insns, including the current insn. */
712 int arm_condexec_count = 0;
713 /* A bitmask specifying the patterns for the IT block.
714 Zero means do not output an IT block before this insn. */
715 int arm_condexec_mask = 0;
716 /* The number of bits used in arm_condexec_mask. */
717 int arm_condexec_masklen = 0;
718
719 /* The condition codes of the ARM, and the inverse function. */
720 static const char * const arm_condition_codes[] =
721 {
722 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
723 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
724 };
725
726 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
727 #define streq(string1, string2) (strcmp (string1, string2) == 0)
728
729 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
730 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
731 | (1 << PIC_OFFSET_TABLE_REGNUM)))
732 \f
733 /* Initialization code. */
734
735 struct processors
736 {
737 const char *const name;
738 enum processor_type core;
739 const char *arch;
740 const unsigned long flags;
741 const struct tune_params *const tune;
742 };
743
744 const struct tune_params arm_slowmul_tune =
745 {
746 arm_slowmul_rtx_costs,
747 3
748 };
749
750 const struct tune_params arm_fastmul_tune =
751 {
752 arm_fastmul_rtx_costs,
753 1
754 };
755
756 const struct tune_params arm_xscale_tune =
757 {
758 arm_xscale_rtx_costs,
759 2
760 };
761
762 const struct tune_params arm_9e_tune =
763 {
764 arm_9e_rtx_costs,
765 1
766 };
767
768 /* Not all of these give usefully different compilation alternatives,
769 but there is no simple way of generalizing them. */
770 static const struct processors all_cores[] =
771 {
772 /* ARM Cores */
773 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
774 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
775 #include "arm-cores.def"
776 #undef ARM_CORE
777 {NULL, arm_none, NULL, 0, NULL}
778 };
779
780 static const struct processors all_architectures[] =
781 {
782 /* ARM Architectures */
783 /* We don't specify tuning costs here as it will be figured out
784 from the core. */
785
786 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
787 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
788 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
789 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
790 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
791 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
792 implementations that support it, so we will leave it out for now. */
793 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
794 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
795 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
796 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
797 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
798 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
799 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
800 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
801 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
802 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
803 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
804 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
805 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
806 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
807 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
808 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
809 {"armv7e-m", cortexm3, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
810 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
811 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
812 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
813 {NULL, arm_none, NULL, 0 , NULL}
814 };
815
816 struct arm_cpu_select
817 {
818 const char * string;
819 const char * name;
820 const struct processors * processors;
821 };
822
823 /* This is a magic structure. The 'string' field is magically filled in
824 with a pointer to the value specified by the user on the command line
825 assuming that the user has specified such a value. */
826
827 static struct arm_cpu_select arm_select[] =
828 {
829 /* string name processors */
830 { NULL, "-mcpu=", all_cores },
831 { NULL, "-march=", all_architectures },
832 { NULL, "-mtune=", all_cores }
833 };
834
835 /* Defines representing the indexes into the above table. */
836 #define ARM_OPT_SET_CPU 0
837 #define ARM_OPT_SET_ARCH 1
838 #define ARM_OPT_SET_TUNE 2
839
840 /* The name of the preprocessor macro to define for this architecture. */
841
842 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
843
844 /* Available values for -mfpu=. */
845
846 static const struct arm_fpu_desc all_fpus[] =
847 {
848 {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
849 {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
850 {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
851 {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
852 {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
853 {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
854 {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
855 {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
856 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
857 {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
858 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
859 {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
860 {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
861 {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
862 {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
863 {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
864 {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
865 /* Compatibility aliases. */
866 {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
867 };
868
869
870 struct float_abi
871 {
872 const char * name;
873 enum float_abi_type abi_type;
874 };
875
876
877 /* Available values for -mfloat-abi=. */
878
879 static const struct float_abi all_float_abis[] =
880 {
881 {"soft", ARM_FLOAT_ABI_SOFT},
882 {"softfp", ARM_FLOAT_ABI_SOFTFP},
883 {"hard", ARM_FLOAT_ABI_HARD}
884 };
885
886
887 struct fp16_format
888 {
889 const char *name;
890 enum arm_fp16_format_type fp16_format_type;
891 };
892
893
894 /* Available values for -mfp16-format=. */
895
896 static const struct fp16_format all_fp16_formats[] =
897 {
898 {"none", ARM_FP16_FORMAT_NONE},
899 {"ieee", ARM_FP16_FORMAT_IEEE},
900 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
901 };
902
903
904 struct abi_name
905 {
906 const char *name;
907 enum arm_abi_type abi_type;
908 };
909
910
911 /* Available values for -mabi=. */
912
913 static const struct abi_name arm_all_abis[] =
914 {
915 {"apcs-gnu", ARM_ABI_APCS},
916 {"atpcs", ARM_ABI_ATPCS},
917 {"aapcs", ARM_ABI_AAPCS},
918 {"iwmmxt", ARM_ABI_IWMMXT},
919 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
920 };
921
922 /* Supported TLS relocations. */
923
924 enum tls_reloc {
925 TLS_GD32,
926 TLS_LDM32,
927 TLS_LDO32,
928 TLS_IE32,
929 TLS_LE32
930 };
931
932 /* The maximum number of insns to be used when loading a constant. */
933 inline static int
934 arm_constant_limit (bool size_p)
935 {
936 return size_p ? 1 : current_tune->constant_limit;
937 }
938
939 /* Emit an insn that's a simple single-set. Both the operands must be known
940 to be valid. */
941 inline static rtx
942 emit_set_insn (rtx x, rtx y)
943 {
944 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
945 }
946
947 /* Return the number of bits set in VALUE. */
948 static unsigned
949 bit_count (unsigned long value)
950 {
951 unsigned long count = 0;
952
953 while (value)
954 {
955 count++;
956 value &= value - 1; /* Clear the least-significant set bit. */
957 }
958
959 return count;
960 }
961
962 /* Set up library functions unique to ARM. */
963
964 static void
965 arm_init_libfuncs (void)
966 {
967 /* There are no special library functions unless we are using the
968 ARM BPABI. */
969 if (!TARGET_BPABI)
970 return;
971
972 /* The functions below are described in Section 4 of the "Run-Time
973 ABI for the ARM architecture", Version 1.0. */
974
975 /* Double-precision floating-point arithmetic. Table 2. */
976 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
977 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
978 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
979 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
980 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
981
982 /* Double-precision comparisons. Table 3. */
983 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
984 set_optab_libfunc (ne_optab, DFmode, NULL);
985 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
986 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
987 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
988 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
989 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
990
991 /* Single-precision floating-point arithmetic. Table 4. */
992 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
993 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
994 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
995 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
996 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
997
998 /* Single-precision comparisons. Table 5. */
999 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1000 set_optab_libfunc (ne_optab, SFmode, NULL);
1001 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1002 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1003 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1004 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1005 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1006
1007 /* Floating-point to integer conversions. Table 6. */
1008 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1009 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1010 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1011 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1012 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1013 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1014 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1015 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1016
1017 /* Conversions between floating types. Table 7. */
1018 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1019 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1020
1021 /* Integer to floating-point conversions. Table 8. */
1022 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1023 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1024 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1025 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1026 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1027 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1028 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1029 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1030
1031 /* Long long. Table 9. */
1032 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1033 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1034 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1035 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1036 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1037 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1038 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1039 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1040
1041 /* Integer (32/32->32) division. \S 4.3.1. */
1042 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1043 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1044
1045 /* The divmod functions are designed so that they can be used for
1046 plain division, even though they return both the quotient and the
1047 remainder. The quotient is returned in the usual location (i.e.,
1048 r0 for SImode, {r0, r1} for DImode), just as would be expected
1049 for an ordinary division routine. Because the AAPCS calling
1050 conventions specify that all of { r0, r1, r2, r3 } are
1051 callee-saved registers, there is no need to tell the compiler
1052 explicitly that those registers are clobbered by these
1053 routines. */
1054 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1055 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1056
1057 /* For SImode division the ABI provides div-without-mod routines,
1058 which are faster. */
1059 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1060 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1061
1062 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1063 divmod libcalls instead. */
1064 set_optab_libfunc (smod_optab, DImode, NULL);
1065 set_optab_libfunc (umod_optab, DImode, NULL);
1066 set_optab_libfunc (smod_optab, SImode, NULL);
1067 set_optab_libfunc (umod_optab, SImode, NULL);
1068
1069 /* Half-precision float operations. The compiler handles all operations
1070 with NULL libfuncs by converting the SFmode. */
1071 switch (arm_fp16_format)
1072 {
1073 case ARM_FP16_FORMAT_IEEE:
1074 case ARM_FP16_FORMAT_ALTERNATIVE:
1075
1076 /* Conversions. */
1077 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1078 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1079 ? "__gnu_f2h_ieee"
1080 : "__gnu_f2h_alternative"));
1081 set_conv_libfunc (sext_optab, SFmode, HFmode,
1082 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1083 ? "__gnu_h2f_ieee"
1084 : "__gnu_h2f_alternative"));
1085
1086 /* Arithmetic. */
1087 set_optab_libfunc (add_optab, HFmode, NULL);
1088 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1089 set_optab_libfunc (smul_optab, HFmode, NULL);
1090 set_optab_libfunc (neg_optab, HFmode, NULL);
1091 set_optab_libfunc (sub_optab, HFmode, NULL);
1092
1093 /* Comparisons. */
1094 set_optab_libfunc (eq_optab, HFmode, NULL);
1095 set_optab_libfunc (ne_optab, HFmode, NULL);
1096 set_optab_libfunc (lt_optab, HFmode, NULL);
1097 set_optab_libfunc (le_optab, HFmode, NULL);
1098 set_optab_libfunc (ge_optab, HFmode, NULL);
1099 set_optab_libfunc (gt_optab, HFmode, NULL);
1100 set_optab_libfunc (unord_optab, HFmode, NULL);
1101 break;
1102
1103 default:
1104 break;
1105 }
1106
1107 if (TARGET_AAPCS_BASED)
1108 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1109 }
1110
1111 /* On AAPCS systems, this is the "struct __va_list". */
1112 static GTY(()) tree va_list_type;
1113
1114 /* Return the type to use as __builtin_va_list. */
1115 static tree
1116 arm_build_builtin_va_list (void)
1117 {
1118 tree va_list_name;
1119 tree ap_field;
1120
1121 if (!TARGET_AAPCS_BASED)
1122 return std_build_builtin_va_list ();
1123
1124 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1125 defined as:
1126
1127 struct __va_list
1128 {
1129 void *__ap;
1130 };
1131
1132 The C Library ABI further reinforces this definition in \S
1133 4.1.
1134
1135 We must follow this definition exactly. The structure tag
1136 name is visible in C++ mangled names, and thus forms a part
1137 of the ABI. The field name may be used by people who
1138 #include <stdarg.h>. */
1139 /* Create the type. */
1140 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1141 /* Give it the required name. */
1142 va_list_name = build_decl (BUILTINS_LOCATION,
1143 TYPE_DECL,
1144 get_identifier ("__va_list"),
1145 va_list_type);
1146 DECL_ARTIFICIAL (va_list_name) = 1;
1147 TYPE_NAME (va_list_type) = va_list_name;
1148 /* Create the __ap field. */
1149 ap_field = build_decl (BUILTINS_LOCATION,
1150 FIELD_DECL,
1151 get_identifier ("__ap"),
1152 ptr_type_node);
1153 DECL_ARTIFICIAL (ap_field) = 1;
1154 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1155 TYPE_FIELDS (va_list_type) = ap_field;
1156 /* Compute its layout. */
1157 layout_type (va_list_type);
1158
1159 return va_list_type;
1160 }
1161
1162 /* Return an expression of type "void *" pointing to the next
1163 available argument in a variable-argument list. VALIST is the
1164 user-level va_list object, of type __builtin_va_list. */
1165 static tree
1166 arm_extract_valist_ptr (tree valist)
1167 {
1168 if (TREE_TYPE (valist) == error_mark_node)
1169 return error_mark_node;
1170
1171 /* On an AAPCS target, the pointer is stored within "struct
1172 va_list". */
1173 if (TARGET_AAPCS_BASED)
1174 {
1175 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1176 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1177 valist, ap_field, NULL_TREE);
1178 }
1179
1180 return valist;
1181 }
1182
1183 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1184 static void
1185 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1186 {
1187 valist = arm_extract_valist_ptr (valist);
1188 std_expand_builtin_va_start (valist, nextarg);
1189 }
1190
1191 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1192 static tree
1193 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1194 gimple_seq *post_p)
1195 {
1196 valist = arm_extract_valist_ptr (valist);
1197 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1198 }
1199
1200 /* Implement TARGET_HANDLE_OPTION. */
1201
1202 static bool
1203 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1204 {
1205 switch (code)
1206 {
1207 case OPT_march_:
1208 arm_select[1].string = arg;
1209 return true;
1210
1211 case OPT_mcpu_:
1212 arm_select[0].string = arg;
1213 return true;
1214
1215 case OPT_mhard_float:
1216 target_float_abi_name = "hard";
1217 return true;
1218
1219 case OPT_msoft_float:
1220 target_float_abi_name = "soft";
1221 return true;
1222
1223 case OPT_mtune_:
1224 arm_select[2].string = arg;
1225 return true;
1226
1227 default:
1228 return true;
1229 }
1230 }
1231
1232 static void
1233 arm_target_help (void)
1234 {
1235 int i;
1236 static int columns = 0;
1237 int remaining;
1238
1239 /* If we have not done so already, obtain the desired maximum width of
1240 the output. Note - this is a duplication of the code at the start of
1241 gcc/opts.c:print_specific_help() - the two copies should probably be
1242 replaced by a single function. */
1243 if (columns == 0)
1244 {
1245 const char *p;
1246
1247 GET_ENVIRONMENT (p, "COLUMNS");
1248 if (p != NULL)
1249 {
1250 int value = atoi (p);
1251
1252 if (value > 0)
1253 columns = value;
1254 }
1255
1256 if (columns == 0)
1257 /* Use a reasonable default. */
1258 columns = 80;
1259 }
1260
1261 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1262
1263 /* The - 2 is because we know that the last entry in the array is NULL. */
1264 i = ARRAY_SIZE (all_cores) - 2;
1265 gcc_assert (i > 0);
1266 printf (" %s", all_cores[i].name);
1267 remaining = columns - (strlen (all_cores[i].name) + 4);
1268 gcc_assert (remaining >= 0);
1269
1270 while (i--)
1271 {
1272 int len = strlen (all_cores[i].name);
1273
1274 if (remaining > len + 2)
1275 {
1276 printf (", %s", all_cores[i].name);
1277 remaining -= len + 2;
1278 }
1279 else
1280 {
1281 if (remaining > 0)
1282 printf (",");
1283 printf ("\n %s", all_cores[i].name);
1284 remaining = columns - (len + 4);
1285 }
1286 }
1287
1288 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1289
1290 i = ARRAY_SIZE (all_architectures) - 2;
1291 gcc_assert (i > 0);
1292
1293 printf (" %s", all_architectures[i].name);
1294 remaining = columns - (strlen (all_architectures[i].name) + 4);
1295 gcc_assert (remaining >= 0);
1296
1297 while (i--)
1298 {
1299 int len = strlen (all_architectures[i].name);
1300
1301 if (remaining > len + 2)
1302 {
1303 printf (", %s", all_architectures[i].name);
1304 remaining -= len + 2;
1305 }
1306 else
1307 {
1308 if (remaining > 0)
1309 printf (",");
1310 printf ("\n %s", all_architectures[i].name);
1311 remaining = columns - (len + 4);
1312 }
1313 }
1314 printf ("\n");
1315
1316 }
1317
1318 /* Fix up any incompatible options that the user has specified.
1319 This has now turned into a maze. */
1320 void
1321 arm_override_options (void)
1322 {
1323 unsigned i;
1324 enum processor_type target_arch_cpu = arm_none;
1325 enum processor_type selected_cpu = arm_none;
1326
1327 /* Set up the flags based on the cpu/architecture selected by the user. */
1328 for (i = ARRAY_SIZE (arm_select); i--;)
1329 {
1330 struct arm_cpu_select * ptr = arm_select + i;
1331
1332 if (ptr->string != NULL && ptr->string[0] != '\0')
1333 {
1334 const struct processors * sel;
1335
1336 for (sel = ptr->processors; sel->name != NULL; sel++)
1337 if (streq (ptr->string, sel->name))
1338 {
1339 /* Set the architecture define. */
1340 if (i != ARM_OPT_SET_TUNE)
1341 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1342
1343 /* Determine the processor core for which we should
1344 tune code-generation. */
1345 if (/* -mcpu= is a sensible default. */
1346 i == ARM_OPT_SET_CPU
1347 /* -mtune= overrides -mcpu= and -march=. */
1348 || i == ARM_OPT_SET_TUNE)
1349 arm_tune = (enum processor_type) (sel - ptr->processors);
1350
1351 /* Remember the CPU associated with this architecture.
1352 If no other option is used to set the CPU type,
1353 we'll use this to guess the most suitable tuning
1354 options. */
1355 if (i == ARM_OPT_SET_ARCH)
1356 target_arch_cpu = sel->core;
1357
1358 if (i == ARM_OPT_SET_CPU)
1359 selected_cpu = (enum processor_type) (sel - ptr->processors);
1360
1361 if (i != ARM_OPT_SET_TUNE)
1362 {
1363 /* If we have been given an architecture and a processor
1364 make sure that they are compatible. We only generate
1365 a warning though, and we prefer the CPU over the
1366 architecture. */
1367 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1368 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1369 ptr->string);
1370
1371 insn_flags = sel->flags;
1372 }
1373
1374 break;
1375 }
1376
1377 if (sel->name == NULL)
1378 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1379 }
1380 }
1381
1382 /* Guess the tuning options from the architecture if necessary. */
1383 if (arm_tune == arm_none)
1384 arm_tune = target_arch_cpu;
1385
1386 /* If the user did not specify a processor, choose one for them. */
1387 if (insn_flags == 0)
1388 {
1389 const struct processors * sel;
1390 unsigned int sought;
1391
1392 selected_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
1393 if (selected_cpu == arm_none)
1394 {
1395 #ifdef SUBTARGET_CPU_DEFAULT
1396 /* Use the subtarget default CPU if none was specified by
1397 configure. */
1398 selected_cpu = (enum processor_type) SUBTARGET_CPU_DEFAULT;
1399 #endif
1400 /* Default to ARM6. */
1401 if (selected_cpu == arm_none)
1402 selected_cpu = arm6;
1403 }
1404 sel = &all_cores[selected_cpu];
1405
1406 insn_flags = sel->flags;
1407
1408 /* Now check to see if the user has specified some command line
1409 switch that require certain abilities from the cpu. */
1410 sought = 0;
1411
1412 if (TARGET_INTERWORK || TARGET_THUMB)
1413 {
1414 sought |= (FL_THUMB | FL_MODE32);
1415
1416 /* There are no ARM processors that support both APCS-26 and
1417 interworking. Therefore we force FL_MODE26 to be removed
1418 from insn_flags here (if it was set), so that the search
1419 below will always be able to find a compatible processor. */
1420 insn_flags &= ~FL_MODE26;
1421 }
1422
1423 if (sought != 0 && ((sought & insn_flags) != sought))
1424 {
1425 /* Try to locate a CPU type that supports all of the abilities
1426 of the default CPU, plus the extra abilities requested by
1427 the user. */
1428 for (sel = all_cores; sel->name != NULL; sel++)
1429 if ((sel->flags & sought) == (sought | insn_flags))
1430 break;
1431
1432 if (sel->name == NULL)
1433 {
1434 unsigned current_bit_count = 0;
1435 const struct processors * best_fit = NULL;
1436
1437 /* Ideally we would like to issue an error message here
1438 saying that it was not possible to find a CPU compatible
1439 with the default CPU, but which also supports the command
1440 line options specified by the programmer, and so they
1441 ought to use the -mcpu=<name> command line option to
1442 override the default CPU type.
1443
1444 If we cannot find a cpu that has both the
1445 characteristics of the default cpu and the given
1446 command line options we scan the array again looking
1447 for a best match. */
1448 for (sel = all_cores; sel->name != NULL; sel++)
1449 if ((sel->flags & sought) == sought)
1450 {
1451 unsigned count;
1452
1453 count = bit_count (sel->flags & insn_flags);
1454
1455 if (count >= current_bit_count)
1456 {
1457 best_fit = sel;
1458 current_bit_count = count;
1459 }
1460 }
1461
1462 gcc_assert (best_fit);
1463 sel = best_fit;
1464 }
1465
1466 insn_flags = sel->flags;
1467 }
1468 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1469 arm_default_cpu = (enum processor_type) (sel - all_cores);
1470 if (arm_tune == arm_none)
1471 arm_tune = arm_default_cpu;
1472 }
1473
1474 /* The processor for which we should tune should now have been
1475 chosen. */
1476 gcc_assert (arm_tune != arm_none);
1477
1478 tune_flags = all_cores[(int)arm_tune].flags;
1479 current_tune = all_cores[(int)arm_tune].tune;
1480
1481 if (target_fp16_format_name)
1482 {
1483 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1484 {
1485 if (streq (all_fp16_formats[i].name, target_fp16_format_name))
1486 {
1487 arm_fp16_format = all_fp16_formats[i].fp16_format_type;
1488 break;
1489 }
1490 }
1491 if (i == ARRAY_SIZE (all_fp16_formats))
1492 error ("invalid __fp16 format option: -mfp16-format=%s",
1493 target_fp16_format_name);
1494 }
1495 else
1496 arm_fp16_format = ARM_FP16_FORMAT_NONE;
1497
1498 if (target_abi_name)
1499 {
1500 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1501 {
1502 if (streq (arm_all_abis[i].name, target_abi_name))
1503 {
1504 arm_abi = arm_all_abis[i].abi_type;
1505 break;
1506 }
1507 }
1508 if (i == ARRAY_SIZE (arm_all_abis))
1509 error ("invalid ABI option: -mabi=%s", target_abi_name);
1510 }
1511 else
1512 arm_abi = ARM_DEFAULT_ABI;
1513
1514 /* Make sure that the processor choice does not conflict with any of the
1515 other command line choices. */
1516 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1517 error ("target CPU does not support ARM mode");
1518
1519 /* BPABI targets use linker tricks to allow interworking on cores
1520 without thumb support. */
1521 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1522 {
1523 warning (0, "target CPU does not support interworking" );
1524 target_flags &= ~MASK_INTERWORK;
1525 }
1526
1527 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1528 {
1529 warning (0, "target CPU does not support THUMB instructions");
1530 target_flags &= ~MASK_THUMB;
1531 }
1532
1533 if (TARGET_APCS_FRAME && TARGET_THUMB)
1534 {
1535 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1536 target_flags &= ~MASK_APCS_FRAME;
1537 }
1538
1539 /* Callee super interworking implies thumb interworking. Adding
1540 this to the flags here simplifies the logic elsewhere. */
1541 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1542 target_flags |= MASK_INTERWORK;
1543
1544 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1545 from here where no function is being compiled currently. */
1546 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1547 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1548
1549 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1550 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1551
1552 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1553 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1554
1555 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1556 {
1557 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1558 target_flags |= MASK_APCS_FRAME;
1559 }
1560
1561 if (TARGET_POKE_FUNCTION_NAME)
1562 target_flags |= MASK_APCS_FRAME;
1563
1564 if (TARGET_APCS_REENT && flag_pic)
1565 error ("-fpic and -mapcs-reent are incompatible");
1566
1567 if (TARGET_APCS_REENT)
1568 warning (0, "APCS reentrant code not supported. Ignored");
1569
1570 /* If this target is normally configured to use APCS frames, warn if they
1571 are turned off and debugging is turned on. */
1572 if (TARGET_ARM
1573 && write_symbols != NO_DEBUG
1574 && !TARGET_APCS_FRAME
1575 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1576 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1577
1578 if (TARGET_APCS_FLOAT)
1579 warning (0, "passing floating point arguments in fp regs not yet supported");
1580
1581 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1582 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1583 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1584 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1585 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1586 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1587 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1588 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1589 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1590 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1591 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1592 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1593 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1594
1595 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1596 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1597 thumb_code = (TARGET_ARM == 0);
1598 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1599 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1600 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1601 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1602 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1603
1604 /* If we are not using the default (ARM mode) section anchor offset
1605 ranges, then set the correct ranges now. */
1606 if (TARGET_THUMB1)
1607 {
1608 /* Thumb-1 LDR instructions cannot have negative offsets.
1609 Permissible positive offset ranges are 5-bit (for byte loads),
1610 6-bit (for halfword loads), or 7-bit (for word loads).
1611 Empirical results suggest a 7-bit anchor range gives the best
1612 overall code size. */
1613 targetm.min_anchor_offset = 0;
1614 targetm.max_anchor_offset = 127;
1615 }
1616 else if (TARGET_THUMB2)
1617 {
1618 /* The minimum is set such that the total size of the block
1619 for a particular anchor is 248 + 1 + 4095 bytes, which is
1620 divisible by eight, ensuring natural spacing of anchors. */
1621 targetm.min_anchor_offset = -248;
1622 targetm.max_anchor_offset = 4095;
1623 }
1624
1625 /* V5 code we generate is completely interworking capable, so we turn off
1626 TARGET_INTERWORK here to avoid many tests later on. */
1627
1628 /* XXX However, we must pass the right pre-processor defines to CPP
1629 or GLD can get confused. This is a hack. */
1630 if (TARGET_INTERWORK)
1631 arm_cpp_interwork = 1;
1632
1633 if (arm_arch5)
1634 target_flags &= ~MASK_INTERWORK;
1635
1636 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1637 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1638
1639 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1640 error ("iwmmxt abi requires an iwmmxt capable cpu");
1641
1642 if (target_fpu_name == NULL && target_fpe_name != NULL)
1643 {
1644 if (streq (target_fpe_name, "2"))
1645 target_fpu_name = "fpe2";
1646 else if (streq (target_fpe_name, "3"))
1647 target_fpu_name = "fpe3";
1648 else
1649 error ("invalid floating point emulation option: -mfpe=%s",
1650 target_fpe_name);
1651 }
1652
1653 if (target_fpu_name == NULL)
1654 {
1655 #ifdef FPUTYPE_DEFAULT
1656 target_fpu_name = FPUTYPE_DEFAULT;
1657 #else
1658 if (arm_arch_cirrus)
1659 target_fpu_name = "maverick";
1660 else
1661 target_fpu_name = "fpe2";
1662 #endif
1663 }
1664
1665 arm_fpu_desc = NULL;
1666 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1667 {
1668 if (streq (all_fpus[i].name, target_fpu_name))
1669 {
1670 arm_fpu_desc = &all_fpus[i];
1671 break;
1672 }
1673 }
1674
1675 if (!arm_fpu_desc)
1676 {
1677 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1678 return;
1679 }
1680
1681 switch (arm_fpu_desc->model)
1682 {
1683 case ARM_FP_MODEL_FPA:
1684 if (arm_fpu_desc->rev == 2)
1685 arm_fpu_attr = FPU_FPE2;
1686 else if (arm_fpu_desc->rev == 3)
1687 arm_fpu_attr = FPU_FPE3;
1688 else
1689 arm_fpu_attr = FPU_FPA;
1690 break;
1691
1692 case ARM_FP_MODEL_MAVERICK:
1693 arm_fpu_attr = FPU_MAVERICK;
1694 break;
1695
1696 case ARM_FP_MODEL_VFP:
1697 arm_fpu_attr = FPU_VFP;
1698 break;
1699
1700 default:
1701 gcc_unreachable();
1702 }
1703
1704 if (target_float_abi_name != NULL)
1705 {
1706 /* The user specified a FP ABI. */
1707 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1708 {
1709 if (streq (all_float_abis[i].name, target_float_abi_name))
1710 {
1711 arm_float_abi = all_float_abis[i].abi_type;
1712 break;
1713 }
1714 }
1715 if (i == ARRAY_SIZE (all_float_abis))
1716 error ("invalid floating point abi: -mfloat-abi=%s",
1717 target_float_abi_name);
1718 }
1719 else
1720 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1721
1722 if (TARGET_AAPCS_BASED
1723 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1724 error ("FPA is unsupported in the AAPCS");
1725
1726 if (TARGET_AAPCS_BASED)
1727 {
1728 if (TARGET_CALLER_INTERWORKING)
1729 error ("AAPCS does not support -mcaller-super-interworking");
1730 else
1731 if (TARGET_CALLEE_INTERWORKING)
1732 error ("AAPCS does not support -mcallee-super-interworking");
1733 }
1734
1735 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1736 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1737 will ever exist. GCC makes no attempt to support this combination. */
1738 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1739 sorry ("iWMMXt and hardware floating point");
1740
1741 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1742 if (TARGET_THUMB2 && TARGET_IWMMXT)
1743 sorry ("Thumb-2 iWMMXt");
1744
1745 /* __fp16 support currently assumes the core has ldrh. */
1746 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1747 sorry ("__fp16 and no ldrh");
1748
1749 /* If soft-float is specified then don't use FPU. */
1750 if (TARGET_SOFT_FLOAT)
1751 arm_fpu_attr = FPU_NONE;
1752
1753 if (TARGET_AAPCS_BASED)
1754 {
1755 if (arm_abi == ARM_ABI_IWMMXT)
1756 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1757 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1758 && TARGET_HARD_FLOAT
1759 && TARGET_VFP)
1760 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1761 else
1762 arm_pcs_default = ARM_PCS_AAPCS;
1763 }
1764 else
1765 {
1766 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1767 sorry ("-mfloat-abi=hard and VFP");
1768
1769 if (arm_abi == ARM_ABI_APCS)
1770 arm_pcs_default = ARM_PCS_APCS;
1771 else
1772 arm_pcs_default = ARM_PCS_ATPCS;
1773 }
1774
1775 /* For arm2/3 there is no need to do any scheduling if there is only
1776 a floating point emulator, or we are doing software floating-point. */
1777 if ((TARGET_SOFT_FLOAT
1778 || (TARGET_FPA && arm_fpu_desc->rev))
1779 && (tune_flags & FL_MODE32) == 0)
1780 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1781
1782 if (target_thread_switch)
1783 {
1784 if (strcmp (target_thread_switch, "soft") == 0)
1785 target_thread_pointer = TP_SOFT;
1786 else if (strcmp (target_thread_switch, "auto") == 0)
1787 target_thread_pointer = TP_AUTO;
1788 else if (strcmp (target_thread_switch, "cp15") == 0)
1789 target_thread_pointer = TP_CP15;
1790 else
1791 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1792 }
1793
1794 /* Use the cp15 method if it is available. */
1795 if (target_thread_pointer == TP_AUTO)
1796 {
1797 if (arm_arch6k && !TARGET_THUMB1)
1798 target_thread_pointer = TP_CP15;
1799 else
1800 target_thread_pointer = TP_SOFT;
1801 }
1802
1803 if (TARGET_HARD_TP && TARGET_THUMB1)
1804 error ("can not use -mtp=cp15 with 16-bit Thumb");
1805
1806 /* Override the default structure alignment for AAPCS ABI. */
1807 if (TARGET_AAPCS_BASED)
1808 arm_structure_size_boundary = 8;
1809
1810 if (structure_size_string != NULL)
1811 {
1812 int size = strtol (structure_size_string, NULL, 0);
1813
1814 if (size == 8 || size == 32
1815 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1816 arm_structure_size_boundary = size;
1817 else
1818 warning (0, "structure size boundary can only be set to %s",
1819 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1820 }
1821
1822 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1823 {
1824 error ("RTP PIC is incompatible with Thumb");
1825 flag_pic = 0;
1826 }
1827
1828 /* If stack checking is disabled, we can use r10 as the PIC register,
1829 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1830 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1831 {
1832 if (TARGET_VXWORKS_RTP)
1833 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1834 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1835 }
1836
1837 if (flag_pic && TARGET_VXWORKS_RTP)
1838 arm_pic_register = 9;
1839
1840 if (arm_pic_register_string != NULL)
1841 {
1842 int pic_register = decode_reg_name (arm_pic_register_string);
1843
1844 if (!flag_pic)
1845 warning (0, "-mpic-register= is useless without -fpic");
1846
1847 /* Prevent the user from choosing an obviously stupid PIC register. */
1848 else if (pic_register < 0 || call_used_regs[pic_register]
1849 || pic_register == HARD_FRAME_POINTER_REGNUM
1850 || pic_register == STACK_POINTER_REGNUM
1851 || pic_register >= PC_REGNUM
1852 || (TARGET_VXWORKS_RTP
1853 && (unsigned int) pic_register != arm_pic_register))
1854 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1855 else
1856 arm_pic_register = pic_register;
1857 }
1858
1859 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1860 if (fix_cm3_ldrd == 2)
1861 {
1862 if (selected_cpu == cortexm3)
1863 fix_cm3_ldrd = 1;
1864 else
1865 fix_cm3_ldrd = 0;
1866 }
1867
1868 if (TARGET_THUMB1 && flag_schedule_insns)
1869 {
1870 /* Don't warn since it's on by default in -O2. */
1871 flag_schedule_insns = 0;
1872 }
1873
1874 if (optimize_size)
1875 {
1876 /* If optimizing for size, bump the number of instructions that we
1877 are prepared to conditionally execute (even on a StrongARM). */
1878 max_insns_skipped = 6;
1879 }
1880 else
1881 {
1882 /* StrongARM has early execution of branches, so a sequence
1883 that is worth skipping is shorter. */
1884 if (arm_tune_strongarm)
1885 max_insns_skipped = 3;
1886 }
1887
1888 /* Hot/Cold partitioning is not currently supported, since we can't
1889 handle literal pool placement in that case. */
1890 if (flag_reorder_blocks_and_partition)
1891 {
1892 inform (input_location,
1893 "-freorder-blocks-and-partition not supported on this architecture");
1894 flag_reorder_blocks_and_partition = 0;
1895 flag_reorder_blocks = 1;
1896 }
1897
1898 /* Register global variables with the garbage collector. */
1899 arm_add_gc_roots ();
1900 }
1901
1902 static void
1903 arm_add_gc_roots (void)
1904 {
1905 gcc_obstack_init(&minipool_obstack);
1906 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1907 }
1908 \f
1909 /* A table of known ARM exception types.
1910 For use with the interrupt function attribute. */
1911
1912 typedef struct
1913 {
1914 const char *const arg;
1915 const unsigned long return_value;
1916 }
1917 isr_attribute_arg;
1918
1919 static const isr_attribute_arg isr_attribute_args [] =
1920 {
1921 { "IRQ", ARM_FT_ISR },
1922 { "irq", ARM_FT_ISR },
1923 { "FIQ", ARM_FT_FIQ },
1924 { "fiq", ARM_FT_FIQ },
1925 { "ABORT", ARM_FT_ISR },
1926 { "abort", ARM_FT_ISR },
1927 { "ABORT", ARM_FT_ISR },
1928 { "abort", ARM_FT_ISR },
1929 { "UNDEF", ARM_FT_EXCEPTION },
1930 { "undef", ARM_FT_EXCEPTION },
1931 { "SWI", ARM_FT_EXCEPTION },
1932 { "swi", ARM_FT_EXCEPTION },
1933 { NULL, ARM_FT_NORMAL }
1934 };
1935
1936 /* Returns the (interrupt) function type of the current
1937 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1938
1939 static unsigned long
1940 arm_isr_value (tree argument)
1941 {
1942 const isr_attribute_arg * ptr;
1943 const char * arg;
1944
1945 if (!arm_arch_notm)
1946 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1947
1948 /* No argument - default to IRQ. */
1949 if (argument == NULL_TREE)
1950 return ARM_FT_ISR;
1951
1952 /* Get the value of the argument. */
1953 if (TREE_VALUE (argument) == NULL_TREE
1954 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1955 return ARM_FT_UNKNOWN;
1956
1957 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1958
1959 /* Check it against the list of known arguments. */
1960 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1961 if (streq (arg, ptr->arg))
1962 return ptr->return_value;
1963
1964 /* An unrecognized interrupt type. */
1965 return ARM_FT_UNKNOWN;
1966 }
1967
1968 /* Computes the type of the current function. */
1969
1970 static unsigned long
1971 arm_compute_func_type (void)
1972 {
1973 unsigned long type = ARM_FT_UNKNOWN;
1974 tree a;
1975 tree attr;
1976
1977 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1978
1979 /* Decide if the current function is volatile. Such functions
1980 never return, and many memory cycles can be saved by not storing
1981 register values that will never be needed again. This optimization
1982 was added to speed up context switching in a kernel application. */
1983 if (optimize > 0
1984 && (TREE_NOTHROW (current_function_decl)
1985 || !(flag_unwind_tables
1986 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1987 && TREE_THIS_VOLATILE (current_function_decl))
1988 type |= ARM_FT_VOLATILE;
1989
1990 if (cfun->static_chain_decl != NULL)
1991 type |= ARM_FT_NESTED;
1992
1993 attr = DECL_ATTRIBUTES (current_function_decl);
1994
1995 a = lookup_attribute ("naked", attr);
1996 if (a != NULL_TREE)
1997 type |= ARM_FT_NAKED;
1998
1999 a = lookup_attribute ("isr", attr);
2000 if (a == NULL_TREE)
2001 a = lookup_attribute ("interrupt", attr);
2002
2003 if (a == NULL_TREE)
2004 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2005 else
2006 type |= arm_isr_value (TREE_VALUE (a));
2007
2008 return type;
2009 }
2010
2011 /* Returns the type of the current function. */
2012
2013 unsigned long
2014 arm_current_func_type (void)
2015 {
2016 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2017 cfun->machine->func_type = arm_compute_func_type ();
2018
2019 return cfun->machine->func_type;
2020 }
2021
2022 bool
2023 arm_allocate_stack_slots_for_args (void)
2024 {
2025 /* Naked functions should not allocate stack slots for arguments. */
2026 return !IS_NAKED (arm_current_func_type ());
2027 }
2028
2029 \f
2030 /* Output assembler code for a block containing the constant parts
2031 of a trampoline, leaving space for the variable parts.
2032
2033 On the ARM, (if r8 is the static chain regnum, and remembering that
2034 referencing pc adds an offset of 8) the trampoline looks like:
2035 ldr r8, [pc, #0]
2036 ldr pc, [pc]
2037 .word static chain value
2038 .word function's address
2039 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2040
2041 static void
2042 arm_asm_trampoline_template (FILE *f)
2043 {
2044 if (TARGET_ARM)
2045 {
2046 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2047 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2048 }
2049 else if (TARGET_THUMB2)
2050 {
2051 /* The Thumb-2 trampoline is similar to the arm implementation.
2052 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2053 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2054 STATIC_CHAIN_REGNUM, PC_REGNUM);
2055 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2056 }
2057 else
2058 {
2059 ASM_OUTPUT_ALIGN (f, 2);
2060 fprintf (f, "\t.code\t16\n");
2061 fprintf (f, ".Ltrampoline_start:\n");
2062 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2063 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2064 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2065 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2066 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2067 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2068 }
2069 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2070 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2071 }
2072
2073 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2074
2075 static void
2076 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2077 {
2078 rtx fnaddr, mem, a_tramp;
2079
2080 emit_block_move (m_tramp, assemble_trampoline_template (),
2081 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2082
2083 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2084 emit_move_insn (mem, chain_value);
2085
2086 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2087 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2088 emit_move_insn (mem, fnaddr);
2089
2090 a_tramp = XEXP (m_tramp, 0);
2091 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2092 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2093 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2094 }
2095
2096 /* Thumb trampolines should be entered in thumb mode, so set
2097 the bottom bit of the address. */
2098
2099 static rtx
2100 arm_trampoline_adjust_address (rtx addr)
2101 {
2102 if (TARGET_THUMB)
2103 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2104 NULL, 0, OPTAB_LIB_WIDEN);
2105 return addr;
2106 }
2107 \f
2108 /* Return 1 if it is possible to return using a single instruction.
2109 If SIBLING is non-null, this is a test for a return before a sibling
2110 call. SIBLING is the call insn, so we can examine its register usage. */
2111
2112 int
2113 use_return_insn (int iscond, rtx sibling)
2114 {
2115 int regno;
2116 unsigned int func_type;
2117 unsigned long saved_int_regs;
2118 unsigned HOST_WIDE_INT stack_adjust;
2119 arm_stack_offsets *offsets;
2120
2121 /* Never use a return instruction before reload has run. */
2122 if (!reload_completed)
2123 return 0;
2124
2125 func_type = arm_current_func_type ();
2126
2127 /* Naked, volatile and stack alignment functions need special
2128 consideration. */
2129 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2130 return 0;
2131
2132 /* So do interrupt functions that use the frame pointer and Thumb
2133 interrupt functions. */
2134 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2135 return 0;
2136
2137 offsets = arm_get_frame_offsets ();
2138 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2139
2140 /* As do variadic functions. */
2141 if (crtl->args.pretend_args_size
2142 || cfun->machine->uses_anonymous_args
2143 /* Or if the function calls __builtin_eh_return () */
2144 || crtl->calls_eh_return
2145 /* Or if the function calls alloca */
2146 || cfun->calls_alloca
2147 /* Or if there is a stack adjustment. However, if the stack pointer
2148 is saved on the stack, we can use a pre-incrementing stack load. */
2149 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2150 && stack_adjust == 4)))
2151 return 0;
2152
2153 saved_int_regs = offsets->saved_regs_mask;
2154
2155 /* Unfortunately, the insn
2156
2157 ldmib sp, {..., sp, ...}
2158
2159 triggers a bug on most SA-110 based devices, such that the stack
2160 pointer won't be correctly restored if the instruction takes a
2161 page fault. We work around this problem by popping r3 along with
2162 the other registers, since that is never slower than executing
2163 another instruction.
2164
2165 We test for !arm_arch5 here, because code for any architecture
2166 less than this could potentially be run on one of the buggy
2167 chips. */
2168 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2169 {
2170 /* Validate that r3 is a call-clobbered register (always true in
2171 the default abi) ... */
2172 if (!call_used_regs[3])
2173 return 0;
2174
2175 /* ... that it isn't being used for a return value ... */
2176 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2177 return 0;
2178
2179 /* ... or for a tail-call argument ... */
2180 if (sibling)
2181 {
2182 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2183
2184 if (find_regno_fusage (sibling, USE, 3))
2185 return 0;
2186 }
2187
2188 /* ... and that there are no call-saved registers in r0-r2
2189 (always true in the default ABI). */
2190 if (saved_int_regs & 0x7)
2191 return 0;
2192 }
2193
2194 /* Can't be done if interworking with Thumb, and any registers have been
2195 stacked. */
2196 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2197 return 0;
2198
2199 /* On StrongARM, conditional returns are expensive if they aren't
2200 taken and multiple registers have been stacked. */
2201 if (iscond && arm_tune_strongarm)
2202 {
2203 /* Conditional return when just the LR is stored is a simple
2204 conditional-load instruction, that's not expensive. */
2205 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2206 return 0;
2207
2208 if (flag_pic
2209 && arm_pic_register != INVALID_REGNUM
2210 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2211 return 0;
2212 }
2213
2214 /* If there are saved registers but the LR isn't saved, then we need
2215 two instructions for the return. */
2216 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2217 return 0;
2218
2219 /* Can't be done if any of the FPA regs are pushed,
2220 since this also requires an insn. */
2221 if (TARGET_HARD_FLOAT && TARGET_FPA)
2222 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2223 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2224 return 0;
2225
2226 /* Likewise VFP regs. */
2227 if (TARGET_HARD_FLOAT && TARGET_VFP)
2228 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2229 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2230 return 0;
2231
2232 if (TARGET_REALLY_IWMMXT)
2233 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2234 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2235 return 0;
2236
2237 return 1;
2238 }
2239
2240 /* Return TRUE if int I is a valid immediate ARM constant. */
2241
2242 int
2243 const_ok_for_arm (HOST_WIDE_INT i)
2244 {
2245 int lowbit;
2246
2247 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2248 be all zero, or all one. */
2249 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2250 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2251 != ((~(unsigned HOST_WIDE_INT) 0)
2252 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2253 return FALSE;
2254
2255 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2256
2257 /* Fast return for 0 and small values. We must do this for zero, since
2258 the code below can't handle that one case. */
2259 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2260 return TRUE;
2261
2262 /* Get the number of trailing zeros. */
2263 lowbit = ffs((int) i) - 1;
2264
2265 /* Only even shifts are allowed in ARM mode so round down to the
2266 nearest even number. */
2267 if (TARGET_ARM)
2268 lowbit &= ~1;
2269
2270 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2271 return TRUE;
2272
2273 if (TARGET_ARM)
2274 {
2275 /* Allow rotated constants in ARM mode. */
2276 if (lowbit <= 4
2277 && ((i & ~0xc000003f) == 0
2278 || (i & ~0xf000000f) == 0
2279 || (i & ~0xfc000003) == 0))
2280 return TRUE;
2281 }
2282 else
2283 {
2284 HOST_WIDE_INT v;
2285
2286 /* Allow repeated pattern. */
2287 v = i & 0xff;
2288 v |= v << 16;
2289 if (i == v || i == (v | (v << 8)))
2290 return TRUE;
2291 }
2292
2293 return FALSE;
2294 }
2295
2296 /* Return true if I is a valid constant for the operation CODE. */
2297 static int
2298 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2299 {
2300 if (const_ok_for_arm (i))
2301 return 1;
2302
2303 switch (code)
2304 {
2305 case PLUS:
2306 case COMPARE:
2307 case EQ:
2308 case NE:
2309 case GT:
2310 case LE:
2311 case LT:
2312 case GE:
2313 case GEU:
2314 case LTU:
2315 case GTU:
2316 case LEU:
2317 case UNORDERED:
2318 case ORDERED:
2319 case UNEQ:
2320 case UNGE:
2321 case UNLT:
2322 case UNGT:
2323 case UNLE:
2324 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2325
2326 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2327 case XOR:
2328 return 0;
2329
2330 case IOR:
2331 if (TARGET_THUMB2)
2332 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2333 return 0;
2334
2335 case AND:
2336 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2337
2338 default:
2339 gcc_unreachable ();
2340 }
2341 }
2342
2343 /* Emit a sequence of insns to handle a large constant.
2344 CODE is the code of the operation required, it can be any of SET, PLUS,
2345 IOR, AND, XOR, MINUS;
2346 MODE is the mode in which the operation is being performed;
2347 VAL is the integer to operate on;
2348 SOURCE is the other operand (a register, or a null-pointer for SET);
2349 SUBTARGETS means it is safe to create scratch registers if that will
2350 either produce a simpler sequence, or we will want to cse the values.
2351 Return value is the number of insns emitted. */
2352
2353 /* ??? Tweak this for thumb2. */
2354 int
2355 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2356 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2357 {
2358 rtx cond;
2359
2360 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2361 cond = COND_EXEC_TEST (PATTERN (insn));
2362 else
2363 cond = NULL_RTX;
2364
2365 if (subtargets || code == SET
2366 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2367 && REGNO (target) != REGNO (source)))
2368 {
2369 /* After arm_reorg has been called, we can't fix up expensive
2370 constants by pushing them into memory so we must synthesize
2371 them in-line, regardless of the cost. This is only likely to
2372 be more costly on chips that have load delay slots and we are
2373 compiling without running the scheduler (so no splitting
2374 occurred before the final instruction emission).
2375
2376 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2377 */
2378 if (!after_arm_reorg
2379 && !cond
2380 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2381 1, 0)
2382 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2383 + (code != SET))))
2384 {
2385 if (code == SET)
2386 {
2387 /* Currently SET is the only monadic value for CODE, all
2388 the rest are diadic. */
2389 if (TARGET_USE_MOVT)
2390 arm_emit_movpair (target, GEN_INT (val));
2391 else
2392 emit_set_insn (target, GEN_INT (val));
2393
2394 return 1;
2395 }
2396 else
2397 {
2398 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2399
2400 if (TARGET_USE_MOVT)
2401 arm_emit_movpair (temp, GEN_INT (val));
2402 else
2403 emit_set_insn (temp, GEN_INT (val));
2404
2405 /* For MINUS, the value is subtracted from, since we never
2406 have subtraction of a constant. */
2407 if (code == MINUS)
2408 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2409 else
2410 emit_set_insn (target,
2411 gen_rtx_fmt_ee (code, mode, source, temp));
2412 return 2;
2413 }
2414 }
2415 }
2416
2417 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2418 1);
2419 }
2420
2421 /* Return the number of instructions required to synthesize the given
2422 constant, if we start emitting them from bit-position I. */
2423 static int
2424 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2425 {
2426 HOST_WIDE_INT temp1;
2427 int step_size = TARGET_ARM ? 2 : 1;
2428 int num_insns = 0;
2429
2430 gcc_assert (TARGET_ARM || i == 0);
2431
2432 do
2433 {
2434 int end;
2435
2436 if (i <= 0)
2437 i += 32;
2438 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2439 {
2440 end = i - 8;
2441 if (end < 0)
2442 end += 32;
2443 temp1 = remainder & ((0x0ff << end)
2444 | ((i < end) ? (0xff >> (32 - end)) : 0));
2445 remainder &= ~temp1;
2446 num_insns++;
2447 i -= 8 - step_size;
2448 }
2449 i -= step_size;
2450 } while (remainder);
2451 return num_insns;
2452 }
2453
2454 static int
2455 find_best_start (unsigned HOST_WIDE_INT remainder)
2456 {
2457 int best_consecutive_zeros = 0;
2458 int i;
2459 int best_start = 0;
2460
2461 /* If we aren't targetting ARM, the best place to start is always at
2462 the bottom. */
2463 if (! TARGET_ARM)
2464 return 0;
2465
2466 for (i = 0; i < 32; i += 2)
2467 {
2468 int consecutive_zeros = 0;
2469
2470 if (!(remainder & (3 << i)))
2471 {
2472 while ((i < 32) && !(remainder & (3 << i)))
2473 {
2474 consecutive_zeros += 2;
2475 i += 2;
2476 }
2477 if (consecutive_zeros > best_consecutive_zeros)
2478 {
2479 best_consecutive_zeros = consecutive_zeros;
2480 best_start = i - consecutive_zeros;
2481 }
2482 i -= 2;
2483 }
2484 }
2485
2486 /* So long as it won't require any more insns to do so, it's
2487 desirable to emit a small constant (in bits 0...9) in the last
2488 insn. This way there is more chance that it can be combined with
2489 a later addressing insn to form a pre-indexed load or store
2490 operation. Consider:
2491
2492 *((volatile int *)0xe0000100) = 1;
2493 *((volatile int *)0xe0000110) = 2;
2494
2495 We want this to wind up as:
2496
2497 mov rA, #0xe0000000
2498 mov rB, #1
2499 str rB, [rA, #0x100]
2500 mov rB, #2
2501 str rB, [rA, #0x110]
2502
2503 rather than having to synthesize both large constants from scratch.
2504
2505 Therefore, we calculate how many insns would be required to emit
2506 the constant starting from `best_start', and also starting from
2507 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2508 yield a shorter sequence, we may as well use zero. */
2509 if (best_start != 0
2510 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2511 && (count_insns_for_constant (remainder, 0) <=
2512 count_insns_for_constant (remainder, best_start)))
2513 best_start = 0;
2514
2515 return best_start;
2516 }
2517
2518 /* Emit an instruction with the indicated PATTERN. If COND is
2519 non-NULL, conditionalize the execution of the instruction on COND
2520 being true. */
2521
2522 static void
2523 emit_constant_insn (rtx cond, rtx pattern)
2524 {
2525 if (cond)
2526 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2527 emit_insn (pattern);
2528 }
2529
2530 /* As above, but extra parameter GENERATE which, if clear, suppresses
2531 RTL generation. */
2532 /* ??? This needs more work for thumb2. */
2533
2534 static int
2535 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2536 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2537 int generate)
2538 {
2539 int can_invert = 0;
2540 int can_negate = 0;
2541 int final_invert = 0;
2542 int can_negate_initial = 0;
2543 int i;
2544 int num_bits_set = 0;
2545 int set_sign_bit_copies = 0;
2546 int clear_sign_bit_copies = 0;
2547 int clear_zero_bit_copies = 0;
2548 int set_zero_bit_copies = 0;
2549 int insns = 0;
2550 unsigned HOST_WIDE_INT temp1, temp2;
2551 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2552 int step_size = TARGET_ARM ? 2 : 1;
2553
2554 /* Find out which operations are safe for a given CODE. Also do a quick
2555 check for degenerate cases; these can occur when DImode operations
2556 are split. */
2557 switch (code)
2558 {
2559 case SET:
2560 can_invert = 1;
2561 can_negate = 1;
2562 break;
2563
2564 case PLUS:
2565 can_negate = 1;
2566 can_negate_initial = 1;
2567 break;
2568
2569 case IOR:
2570 if (remainder == 0xffffffff)
2571 {
2572 if (generate)
2573 emit_constant_insn (cond,
2574 gen_rtx_SET (VOIDmode, target,
2575 GEN_INT (ARM_SIGN_EXTEND (val))));
2576 return 1;
2577 }
2578
2579 if (remainder == 0)
2580 {
2581 if (reload_completed && rtx_equal_p (target, source))
2582 return 0;
2583
2584 if (generate)
2585 emit_constant_insn (cond,
2586 gen_rtx_SET (VOIDmode, target, source));
2587 return 1;
2588 }
2589
2590 if (TARGET_THUMB2)
2591 can_invert = 1;
2592 break;
2593
2594 case AND:
2595 if (remainder == 0)
2596 {
2597 if (generate)
2598 emit_constant_insn (cond,
2599 gen_rtx_SET (VOIDmode, target, const0_rtx));
2600 return 1;
2601 }
2602 if (remainder == 0xffffffff)
2603 {
2604 if (reload_completed && rtx_equal_p (target, source))
2605 return 0;
2606 if (generate)
2607 emit_constant_insn (cond,
2608 gen_rtx_SET (VOIDmode, target, source));
2609 return 1;
2610 }
2611 can_invert = 1;
2612 break;
2613
2614 case XOR:
2615 if (remainder == 0)
2616 {
2617 if (reload_completed && rtx_equal_p (target, source))
2618 return 0;
2619 if (generate)
2620 emit_constant_insn (cond,
2621 gen_rtx_SET (VOIDmode, target, source));
2622 return 1;
2623 }
2624
2625 if (remainder == 0xffffffff)
2626 {
2627 if (generate)
2628 emit_constant_insn (cond,
2629 gen_rtx_SET (VOIDmode, target,
2630 gen_rtx_NOT (mode, source)));
2631 return 1;
2632 }
2633 break;
2634
2635 case MINUS:
2636 /* We treat MINUS as (val - source), since (source - val) is always
2637 passed as (source + (-val)). */
2638 if (remainder == 0)
2639 {
2640 if (generate)
2641 emit_constant_insn (cond,
2642 gen_rtx_SET (VOIDmode, target,
2643 gen_rtx_NEG (mode, source)));
2644 return 1;
2645 }
2646 if (const_ok_for_arm (val))
2647 {
2648 if (generate)
2649 emit_constant_insn (cond,
2650 gen_rtx_SET (VOIDmode, target,
2651 gen_rtx_MINUS (mode, GEN_INT (val),
2652 source)));
2653 return 1;
2654 }
2655 can_negate = 1;
2656
2657 break;
2658
2659 default:
2660 gcc_unreachable ();
2661 }
2662
2663 /* If we can do it in one insn get out quickly. */
2664 if (const_ok_for_arm (val)
2665 || (can_negate_initial && const_ok_for_arm (-val))
2666 || (can_invert && const_ok_for_arm (~val)))
2667 {
2668 if (generate)
2669 emit_constant_insn (cond,
2670 gen_rtx_SET (VOIDmode, target,
2671 (source
2672 ? gen_rtx_fmt_ee (code, mode, source,
2673 GEN_INT (val))
2674 : GEN_INT (val))));
2675 return 1;
2676 }
2677
2678 /* Calculate a few attributes that may be useful for specific
2679 optimizations. */
2680 /* Count number of leading zeros. */
2681 for (i = 31; i >= 0; i--)
2682 {
2683 if ((remainder & (1 << i)) == 0)
2684 clear_sign_bit_copies++;
2685 else
2686 break;
2687 }
2688
2689 /* Count number of leading 1's. */
2690 for (i = 31; i >= 0; i--)
2691 {
2692 if ((remainder & (1 << i)) != 0)
2693 set_sign_bit_copies++;
2694 else
2695 break;
2696 }
2697
2698 /* Count number of trailing zero's. */
2699 for (i = 0; i <= 31; i++)
2700 {
2701 if ((remainder & (1 << i)) == 0)
2702 clear_zero_bit_copies++;
2703 else
2704 break;
2705 }
2706
2707 /* Count number of trailing 1's. */
2708 for (i = 0; i <= 31; i++)
2709 {
2710 if ((remainder & (1 << i)) != 0)
2711 set_zero_bit_copies++;
2712 else
2713 break;
2714 }
2715
2716 switch (code)
2717 {
2718 case SET:
2719 /* See if we can use movw. */
2720 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2721 {
2722 if (generate)
2723 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2724 GEN_INT (val)));
2725 return 1;
2726 }
2727
2728 /* See if we can do this by sign_extending a constant that is known
2729 to be negative. This is a good, way of doing it, since the shift
2730 may well merge into a subsequent insn. */
2731 if (set_sign_bit_copies > 1)
2732 {
2733 if (const_ok_for_arm
2734 (temp1 = ARM_SIGN_EXTEND (remainder
2735 << (set_sign_bit_copies - 1))))
2736 {
2737 if (generate)
2738 {
2739 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2740 emit_constant_insn (cond,
2741 gen_rtx_SET (VOIDmode, new_src,
2742 GEN_INT (temp1)));
2743 emit_constant_insn (cond,
2744 gen_ashrsi3 (target, new_src,
2745 GEN_INT (set_sign_bit_copies - 1)));
2746 }
2747 return 2;
2748 }
2749 /* For an inverted constant, we will need to set the low bits,
2750 these will be shifted out of harm's way. */
2751 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2752 if (const_ok_for_arm (~temp1))
2753 {
2754 if (generate)
2755 {
2756 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2757 emit_constant_insn (cond,
2758 gen_rtx_SET (VOIDmode, new_src,
2759 GEN_INT (temp1)));
2760 emit_constant_insn (cond,
2761 gen_ashrsi3 (target, new_src,
2762 GEN_INT (set_sign_bit_copies - 1)));
2763 }
2764 return 2;
2765 }
2766 }
2767
2768 /* See if we can calculate the value as the difference between two
2769 valid immediates. */
2770 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2771 {
2772 int topshift = clear_sign_bit_copies & ~1;
2773
2774 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2775 & (0xff000000 >> topshift));
2776
2777 /* If temp1 is zero, then that means the 9 most significant
2778 bits of remainder were 1 and we've caused it to overflow.
2779 When topshift is 0 we don't need to do anything since we
2780 can borrow from 'bit 32'. */
2781 if (temp1 == 0 && topshift != 0)
2782 temp1 = 0x80000000 >> (topshift - 1);
2783
2784 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2785
2786 if (const_ok_for_arm (temp2))
2787 {
2788 if (generate)
2789 {
2790 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2791 emit_constant_insn (cond,
2792 gen_rtx_SET (VOIDmode, new_src,
2793 GEN_INT (temp1)));
2794 emit_constant_insn (cond,
2795 gen_addsi3 (target, new_src,
2796 GEN_INT (-temp2)));
2797 }
2798
2799 return 2;
2800 }
2801 }
2802
2803 /* See if we can generate this by setting the bottom (or the top)
2804 16 bits, and then shifting these into the other half of the
2805 word. We only look for the simplest cases, to do more would cost
2806 too much. Be careful, however, not to generate this when the
2807 alternative would take fewer insns. */
2808 if (val & 0xffff0000)
2809 {
2810 temp1 = remainder & 0xffff0000;
2811 temp2 = remainder & 0x0000ffff;
2812
2813 /* Overlaps outside this range are best done using other methods. */
2814 for (i = 9; i < 24; i++)
2815 {
2816 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2817 && !const_ok_for_arm (temp2))
2818 {
2819 rtx new_src = (subtargets
2820 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2821 : target);
2822 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2823 source, subtargets, generate);
2824 source = new_src;
2825 if (generate)
2826 emit_constant_insn
2827 (cond,
2828 gen_rtx_SET
2829 (VOIDmode, target,
2830 gen_rtx_IOR (mode,
2831 gen_rtx_ASHIFT (mode, source,
2832 GEN_INT (i)),
2833 source)));
2834 return insns + 1;
2835 }
2836 }
2837
2838 /* Don't duplicate cases already considered. */
2839 for (i = 17; i < 24; i++)
2840 {
2841 if (((temp1 | (temp1 >> i)) == remainder)
2842 && !const_ok_for_arm (temp1))
2843 {
2844 rtx new_src = (subtargets
2845 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2846 : target);
2847 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2848 source, subtargets, generate);
2849 source = new_src;
2850 if (generate)
2851 emit_constant_insn
2852 (cond,
2853 gen_rtx_SET (VOIDmode, target,
2854 gen_rtx_IOR
2855 (mode,
2856 gen_rtx_LSHIFTRT (mode, source,
2857 GEN_INT (i)),
2858 source)));
2859 return insns + 1;
2860 }
2861 }
2862 }
2863 break;
2864
2865 case IOR:
2866 case XOR:
2867 /* If we have IOR or XOR, and the constant can be loaded in a
2868 single instruction, and we can find a temporary to put it in,
2869 then this can be done in two instructions instead of 3-4. */
2870 if (subtargets
2871 /* TARGET can't be NULL if SUBTARGETS is 0 */
2872 || (reload_completed && !reg_mentioned_p (target, source)))
2873 {
2874 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2875 {
2876 if (generate)
2877 {
2878 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2879
2880 emit_constant_insn (cond,
2881 gen_rtx_SET (VOIDmode, sub,
2882 GEN_INT (val)));
2883 emit_constant_insn (cond,
2884 gen_rtx_SET (VOIDmode, target,
2885 gen_rtx_fmt_ee (code, mode,
2886 source, sub)));
2887 }
2888 return 2;
2889 }
2890 }
2891
2892 if (code == XOR)
2893 break;
2894
2895 /* Convert.
2896 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2897 and the remainder 0s for e.g. 0xfff00000)
2898 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2899
2900 This can be done in 2 instructions by using shifts with mov or mvn.
2901 e.g. for
2902 x = x | 0xfff00000;
2903 we generate.
2904 mvn r0, r0, asl #12
2905 mvn r0, r0, lsr #12 */
2906 if (set_sign_bit_copies > 8
2907 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2908 {
2909 if (generate)
2910 {
2911 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2912 rtx shift = GEN_INT (set_sign_bit_copies);
2913
2914 emit_constant_insn
2915 (cond,
2916 gen_rtx_SET (VOIDmode, sub,
2917 gen_rtx_NOT (mode,
2918 gen_rtx_ASHIFT (mode,
2919 source,
2920 shift))));
2921 emit_constant_insn
2922 (cond,
2923 gen_rtx_SET (VOIDmode, target,
2924 gen_rtx_NOT (mode,
2925 gen_rtx_LSHIFTRT (mode, sub,
2926 shift))));
2927 }
2928 return 2;
2929 }
2930
2931 /* Convert
2932 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2933 to
2934 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2935
2936 For eg. r0 = r0 | 0xfff
2937 mvn r0, r0, lsr #12
2938 mvn r0, r0, asl #12
2939
2940 */
2941 if (set_zero_bit_copies > 8
2942 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2943 {
2944 if (generate)
2945 {
2946 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2947 rtx shift = GEN_INT (set_zero_bit_copies);
2948
2949 emit_constant_insn
2950 (cond,
2951 gen_rtx_SET (VOIDmode, sub,
2952 gen_rtx_NOT (mode,
2953 gen_rtx_LSHIFTRT (mode,
2954 source,
2955 shift))));
2956 emit_constant_insn
2957 (cond,
2958 gen_rtx_SET (VOIDmode, target,
2959 gen_rtx_NOT (mode,
2960 gen_rtx_ASHIFT (mode, sub,
2961 shift))));
2962 }
2963 return 2;
2964 }
2965
2966 /* This will never be reached for Thumb2 because orn is a valid
2967 instruction. This is for Thumb1 and the ARM 32 bit cases.
2968
2969 x = y | constant (such that ~constant is a valid constant)
2970 Transform this to
2971 x = ~(~y & ~constant).
2972 */
2973 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2974 {
2975 if (generate)
2976 {
2977 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2978 emit_constant_insn (cond,
2979 gen_rtx_SET (VOIDmode, sub,
2980 gen_rtx_NOT (mode, source)));
2981 source = sub;
2982 if (subtargets)
2983 sub = gen_reg_rtx (mode);
2984 emit_constant_insn (cond,
2985 gen_rtx_SET (VOIDmode, sub,
2986 gen_rtx_AND (mode, source,
2987 GEN_INT (temp1))));
2988 emit_constant_insn (cond,
2989 gen_rtx_SET (VOIDmode, target,
2990 gen_rtx_NOT (mode, sub)));
2991 }
2992 return 3;
2993 }
2994 break;
2995
2996 case AND:
2997 /* See if two shifts will do 2 or more insn's worth of work. */
2998 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2999 {
3000 HOST_WIDE_INT shift_mask = ((0xffffffff
3001 << (32 - clear_sign_bit_copies))
3002 & 0xffffffff);
3003
3004 if ((remainder | shift_mask) != 0xffffffff)
3005 {
3006 if (generate)
3007 {
3008 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3009 insns = arm_gen_constant (AND, mode, cond,
3010 remainder | shift_mask,
3011 new_src, source, subtargets, 1);
3012 source = new_src;
3013 }
3014 else
3015 {
3016 rtx targ = subtargets ? NULL_RTX : target;
3017 insns = arm_gen_constant (AND, mode, cond,
3018 remainder | shift_mask,
3019 targ, source, subtargets, 0);
3020 }
3021 }
3022
3023 if (generate)
3024 {
3025 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3026 rtx shift = GEN_INT (clear_sign_bit_copies);
3027
3028 emit_insn (gen_ashlsi3 (new_src, source, shift));
3029 emit_insn (gen_lshrsi3 (target, new_src, shift));
3030 }
3031
3032 return insns + 2;
3033 }
3034
3035 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3036 {
3037 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3038
3039 if ((remainder | shift_mask) != 0xffffffff)
3040 {
3041 if (generate)
3042 {
3043 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3044
3045 insns = arm_gen_constant (AND, mode, cond,
3046 remainder | shift_mask,
3047 new_src, source, subtargets, 1);
3048 source = new_src;
3049 }
3050 else
3051 {
3052 rtx targ = subtargets ? NULL_RTX : target;
3053
3054 insns = arm_gen_constant (AND, mode, cond,
3055 remainder | shift_mask,
3056 targ, source, subtargets, 0);
3057 }
3058 }
3059
3060 if (generate)
3061 {
3062 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3063 rtx shift = GEN_INT (clear_zero_bit_copies);
3064
3065 emit_insn (gen_lshrsi3 (new_src, source, shift));
3066 emit_insn (gen_ashlsi3 (target, new_src, shift));
3067 }
3068
3069 return insns + 2;
3070 }
3071
3072 break;
3073
3074 default:
3075 break;
3076 }
3077
3078 for (i = 0; i < 32; i++)
3079 if (remainder & (1 << i))
3080 num_bits_set++;
3081
3082 if ((code == AND)
3083 || (code != IOR && can_invert && num_bits_set > 16))
3084 remainder ^= 0xffffffff;
3085 else if (code == PLUS && num_bits_set > 16)
3086 remainder = (-remainder) & 0xffffffff;
3087
3088 /* For XOR, if more than half the bits are set and there's a sequence
3089 of more than 8 consecutive ones in the pattern then we can XOR by the
3090 inverted constant and then invert the final result; this may save an
3091 instruction and might also lead to the final mvn being merged with
3092 some other operation. */
3093 else if (code == XOR && num_bits_set > 16
3094 && (count_insns_for_constant (remainder ^ 0xffffffff,
3095 find_best_start
3096 (remainder ^ 0xffffffff))
3097 < count_insns_for_constant (remainder,
3098 find_best_start (remainder))))
3099 {
3100 remainder ^= 0xffffffff;
3101 final_invert = 1;
3102 }
3103 else
3104 {
3105 can_invert = 0;
3106 can_negate = 0;
3107 }
3108
3109 /* Now try and find a way of doing the job in either two or three
3110 instructions.
3111 We start by looking for the largest block of zeros that are aligned on
3112 a 2-bit boundary, we then fill up the temps, wrapping around to the
3113 top of the word when we drop off the bottom.
3114 In the worst case this code should produce no more than four insns.
3115 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3116 best place to start. */
3117
3118 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3119 the same. */
3120 {
3121 /* Now start emitting the insns. */
3122 i = find_best_start (remainder);
3123 do
3124 {
3125 int end;
3126
3127 if (i <= 0)
3128 i += 32;
3129 if (remainder & (3 << (i - 2)))
3130 {
3131 end = i - 8;
3132 if (end < 0)
3133 end += 32;
3134 temp1 = remainder & ((0x0ff << end)
3135 | ((i < end) ? (0xff >> (32 - end)) : 0));
3136 remainder &= ~temp1;
3137
3138 if (generate)
3139 {
3140 rtx new_src, temp1_rtx;
3141
3142 if (code == SET || code == MINUS)
3143 {
3144 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3145 if (can_invert && code != MINUS)
3146 temp1 = ~temp1;
3147 }
3148 else
3149 {
3150 if ((final_invert || remainder) && subtargets)
3151 new_src = gen_reg_rtx (mode);
3152 else
3153 new_src = target;
3154 if (can_invert)
3155 temp1 = ~temp1;
3156 else if (can_negate)
3157 temp1 = -temp1;
3158 }
3159
3160 temp1 = trunc_int_for_mode (temp1, mode);
3161 temp1_rtx = GEN_INT (temp1);
3162
3163 if (code == SET)
3164 ;
3165 else if (code == MINUS)
3166 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3167 else
3168 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3169
3170 emit_constant_insn (cond,
3171 gen_rtx_SET (VOIDmode, new_src,
3172 temp1_rtx));
3173 source = new_src;
3174 }
3175
3176 if (code == SET)
3177 {
3178 can_invert = 0;
3179 code = PLUS;
3180 }
3181 else if (code == MINUS)
3182 code = PLUS;
3183
3184 insns++;
3185 i -= 8 - step_size;
3186 }
3187 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3188 shifts. */
3189 i -= step_size;
3190 }
3191 while (remainder);
3192 }
3193
3194 if (final_invert)
3195 {
3196 if (generate)
3197 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3198 gen_rtx_NOT (mode, source)));
3199 insns++;
3200 }
3201
3202 return insns;
3203 }
3204
3205 /* Canonicalize a comparison so that we are more likely to recognize it.
3206 This can be done for a few constant compares, where we can make the
3207 immediate value easier to load. */
3208
3209 enum rtx_code
3210 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
3211 rtx * op1)
3212 {
3213 unsigned HOST_WIDE_INT i = INTVAL (*op1);
3214 unsigned HOST_WIDE_INT maxval;
3215 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3216
3217 switch (code)
3218 {
3219 case EQ:
3220 case NE:
3221 return code;
3222
3223 case GT:
3224 case LE:
3225 if (i != maxval
3226 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3227 {
3228 *op1 = GEN_INT (i + 1);
3229 return code == GT ? GE : LT;
3230 }
3231 break;
3232
3233 case GE:
3234 case LT:
3235 if (i != ~maxval
3236 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3237 {
3238 *op1 = GEN_INT (i - 1);
3239 return code == GE ? GT : LE;
3240 }
3241 break;
3242
3243 case GTU:
3244 case LEU:
3245 if (i != ~((unsigned HOST_WIDE_INT) 0)
3246 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3247 {
3248 *op1 = GEN_INT (i + 1);
3249 return code == GTU ? GEU : LTU;
3250 }
3251 break;
3252
3253 case GEU:
3254 case LTU:
3255 if (i != 0
3256 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3257 {
3258 *op1 = GEN_INT (i - 1);
3259 return code == GEU ? GTU : LEU;
3260 }
3261 break;
3262
3263 default:
3264 gcc_unreachable ();
3265 }
3266
3267 return code;
3268 }
3269
3270
3271 /* Define how to find the value returned by a function. */
3272
3273 static rtx
3274 arm_function_value(const_tree type, const_tree func,
3275 bool outgoing ATTRIBUTE_UNUSED)
3276 {
3277 enum machine_mode mode;
3278 int unsignedp ATTRIBUTE_UNUSED;
3279 rtx r ATTRIBUTE_UNUSED;
3280
3281 mode = TYPE_MODE (type);
3282
3283 if (TARGET_AAPCS_BASED)
3284 return aapcs_allocate_return_reg (mode, type, func);
3285
3286 /* Promote integer types. */
3287 if (INTEGRAL_TYPE_P (type))
3288 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3289
3290 /* Promotes small structs returned in a register to full-word size
3291 for big-endian AAPCS. */
3292 if (arm_return_in_msb (type))
3293 {
3294 HOST_WIDE_INT size = int_size_in_bytes (type);
3295 if (size % UNITS_PER_WORD != 0)
3296 {
3297 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3298 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3299 }
3300 }
3301
3302 return LIBCALL_VALUE (mode);
3303 }
3304
3305 static int
3306 libcall_eq (const void *p1, const void *p2)
3307 {
3308 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3309 }
3310
3311 static hashval_t
3312 libcall_hash (const void *p1)
3313 {
3314 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3315 }
3316
3317 static void
3318 add_libcall (htab_t htab, rtx libcall)
3319 {
3320 *htab_find_slot (htab, libcall, INSERT) = libcall;
3321 }
3322
3323 static bool
3324 arm_libcall_uses_aapcs_base (const_rtx libcall)
3325 {
3326 static bool init_done = false;
3327 static htab_t libcall_htab;
3328
3329 if (!init_done)
3330 {
3331 init_done = true;
3332
3333 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3334 NULL);
3335 add_libcall (libcall_htab,
3336 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3337 add_libcall (libcall_htab,
3338 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3339 add_libcall (libcall_htab,
3340 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3341 add_libcall (libcall_htab,
3342 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3343
3344 add_libcall (libcall_htab,
3345 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3346 add_libcall (libcall_htab,
3347 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3348 add_libcall (libcall_htab,
3349 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3350 add_libcall (libcall_htab,
3351 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3352
3353 add_libcall (libcall_htab,
3354 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3355 add_libcall (libcall_htab,
3356 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3357 add_libcall (libcall_htab,
3358 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3359 add_libcall (libcall_htab,
3360 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3361 add_libcall (libcall_htab,
3362 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3363 add_libcall (libcall_htab,
3364 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3365 }
3366
3367 return libcall && htab_find (libcall_htab, libcall) != NULL;
3368 }
3369
3370 rtx
3371 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3372 {
3373 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3374 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3375 {
3376 /* The following libcalls return their result in integer registers,
3377 even though they return a floating point value. */
3378 if (arm_libcall_uses_aapcs_base (libcall))
3379 return gen_rtx_REG (mode, ARG_REGISTER(1));
3380
3381 }
3382
3383 return LIBCALL_VALUE (mode);
3384 }
3385
3386 /* Determine the amount of memory needed to store the possible return
3387 registers of an untyped call. */
3388 int
3389 arm_apply_result_size (void)
3390 {
3391 int size = 16;
3392
3393 if (TARGET_32BIT)
3394 {
3395 if (TARGET_HARD_FLOAT_ABI)
3396 {
3397 if (TARGET_VFP)
3398 size += 32;
3399 if (TARGET_FPA)
3400 size += 12;
3401 if (TARGET_MAVERICK)
3402 size += 8;
3403 }
3404 if (TARGET_IWMMXT_ABI)
3405 size += 8;
3406 }
3407
3408 return size;
3409 }
3410
3411 /* Decide whether TYPE should be returned in memory (true)
3412 or in a register (false). FNTYPE is the type of the function making
3413 the call. */
3414 static bool
3415 arm_return_in_memory (const_tree type, const_tree fntype)
3416 {
3417 HOST_WIDE_INT size;
3418
3419 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3420
3421 if (TARGET_AAPCS_BASED)
3422 {
3423 /* Simple, non-aggregate types (ie not including vectors and
3424 complex) are always returned in a register (or registers).
3425 We don't care about which register here, so we can short-cut
3426 some of the detail. */
3427 if (!AGGREGATE_TYPE_P (type)
3428 && TREE_CODE (type) != VECTOR_TYPE
3429 && TREE_CODE (type) != COMPLEX_TYPE)
3430 return false;
3431
3432 /* Any return value that is no larger than one word can be
3433 returned in r0. */
3434 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3435 return false;
3436
3437 /* Check any available co-processors to see if they accept the
3438 type as a register candidate (VFP, for example, can return
3439 some aggregates in consecutive registers). These aren't
3440 available if the call is variadic. */
3441 if (aapcs_select_return_coproc (type, fntype) >= 0)
3442 return false;
3443
3444 /* Vector values should be returned using ARM registers, not
3445 memory (unless they're over 16 bytes, which will break since
3446 we only have four call-clobbered registers to play with). */
3447 if (TREE_CODE (type) == VECTOR_TYPE)
3448 return (size < 0 || size > (4 * UNITS_PER_WORD));
3449
3450 /* The rest go in memory. */
3451 return true;
3452 }
3453
3454 if (TREE_CODE (type) == VECTOR_TYPE)
3455 return (size < 0 || size > (4 * UNITS_PER_WORD));
3456
3457 if (!AGGREGATE_TYPE_P (type) &&
3458 (TREE_CODE (type) != VECTOR_TYPE))
3459 /* All simple types are returned in registers. */
3460 return false;
3461
3462 if (arm_abi != ARM_ABI_APCS)
3463 {
3464 /* ATPCS and later return aggregate types in memory only if they are
3465 larger than a word (or are variable size). */
3466 return (size < 0 || size > UNITS_PER_WORD);
3467 }
3468
3469 /* For the arm-wince targets we choose to be compatible with Microsoft's
3470 ARM and Thumb compilers, which always return aggregates in memory. */
3471 #ifndef ARM_WINCE
3472 /* All structures/unions bigger than one word are returned in memory.
3473 Also catch the case where int_size_in_bytes returns -1. In this case
3474 the aggregate is either huge or of variable size, and in either case
3475 we will want to return it via memory and not in a register. */
3476 if (size < 0 || size > UNITS_PER_WORD)
3477 return true;
3478
3479 if (TREE_CODE (type) == RECORD_TYPE)
3480 {
3481 tree field;
3482
3483 /* For a struct the APCS says that we only return in a register
3484 if the type is 'integer like' and every addressable element
3485 has an offset of zero. For practical purposes this means
3486 that the structure can have at most one non bit-field element
3487 and that this element must be the first one in the structure. */
3488
3489 /* Find the first field, ignoring non FIELD_DECL things which will
3490 have been created by C++. */
3491 for (field = TYPE_FIELDS (type);
3492 field && TREE_CODE (field) != FIELD_DECL;
3493 field = TREE_CHAIN (field))
3494 continue;
3495
3496 if (field == NULL)
3497 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3498
3499 /* Check that the first field is valid for returning in a register. */
3500
3501 /* ... Floats are not allowed */
3502 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3503 return true;
3504
3505 /* ... Aggregates that are not themselves valid for returning in
3506 a register are not allowed. */
3507 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3508 return true;
3509
3510 /* Now check the remaining fields, if any. Only bitfields are allowed,
3511 since they are not addressable. */
3512 for (field = TREE_CHAIN (field);
3513 field;
3514 field = TREE_CHAIN (field))
3515 {
3516 if (TREE_CODE (field) != FIELD_DECL)
3517 continue;
3518
3519 if (!DECL_BIT_FIELD_TYPE (field))
3520 return true;
3521 }
3522
3523 return false;
3524 }
3525
3526 if (TREE_CODE (type) == UNION_TYPE)
3527 {
3528 tree field;
3529
3530 /* Unions can be returned in registers if every element is
3531 integral, or can be returned in an integer register. */
3532 for (field = TYPE_FIELDS (type);
3533 field;
3534 field = TREE_CHAIN (field))
3535 {
3536 if (TREE_CODE (field) != FIELD_DECL)
3537 continue;
3538
3539 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3540 return true;
3541
3542 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3543 return true;
3544 }
3545
3546 return false;
3547 }
3548 #endif /* not ARM_WINCE */
3549
3550 /* Return all other types in memory. */
3551 return true;
3552 }
3553
3554 /* Indicate whether or not words of a double are in big-endian order. */
3555
3556 int
3557 arm_float_words_big_endian (void)
3558 {
3559 if (TARGET_MAVERICK)
3560 return 0;
3561
3562 /* For FPA, float words are always big-endian. For VFP, floats words
3563 follow the memory system mode. */
3564
3565 if (TARGET_FPA)
3566 {
3567 return 1;
3568 }
3569
3570 if (TARGET_VFP)
3571 return (TARGET_BIG_END ? 1 : 0);
3572
3573 return 1;
3574 }
3575
3576 const struct pcs_attribute_arg
3577 {
3578 const char *arg;
3579 enum arm_pcs value;
3580 } pcs_attribute_args[] =
3581 {
3582 {"aapcs", ARM_PCS_AAPCS},
3583 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3584 #if 0
3585 /* We could recognize these, but changes would be needed elsewhere
3586 * to implement them. */
3587 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3588 {"atpcs", ARM_PCS_ATPCS},
3589 {"apcs", ARM_PCS_APCS},
3590 #endif
3591 {NULL, ARM_PCS_UNKNOWN}
3592 };
3593
3594 static enum arm_pcs
3595 arm_pcs_from_attribute (tree attr)
3596 {
3597 const struct pcs_attribute_arg *ptr;
3598 const char *arg;
3599
3600 /* Get the value of the argument. */
3601 if (TREE_VALUE (attr) == NULL_TREE
3602 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3603 return ARM_PCS_UNKNOWN;
3604
3605 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3606
3607 /* Check it against the list of known arguments. */
3608 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3609 if (streq (arg, ptr->arg))
3610 return ptr->value;
3611
3612 /* An unrecognized interrupt type. */
3613 return ARM_PCS_UNKNOWN;
3614 }
3615
3616 /* Get the PCS variant to use for this call. TYPE is the function's type
3617 specification, DECL is the specific declartion. DECL may be null if
3618 the call could be indirect or if this is a library call. */
3619 static enum arm_pcs
3620 arm_get_pcs_model (const_tree type, const_tree decl)
3621 {
3622 bool user_convention = false;
3623 enum arm_pcs user_pcs = arm_pcs_default;
3624 tree attr;
3625
3626 gcc_assert (type);
3627
3628 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3629 if (attr)
3630 {
3631 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3632 user_convention = true;
3633 }
3634
3635 if (TARGET_AAPCS_BASED)
3636 {
3637 /* Detect varargs functions. These always use the base rules
3638 (no argument is ever a candidate for a co-processor
3639 register). */
3640 bool base_rules = (TYPE_ARG_TYPES (type) != 0
3641 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type)))
3642 != void_type_node));
3643
3644 if (user_convention)
3645 {
3646 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3647 sorry ("Non-AAPCS derived PCS variant");
3648 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3649 error ("Variadic functions must use the base AAPCS variant");
3650 }
3651
3652 if (base_rules)
3653 return ARM_PCS_AAPCS;
3654 else if (user_convention)
3655 return user_pcs;
3656 else if (decl && flag_unit_at_a_time)
3657 {
3658 /* Local functions never leak outside this compilation unit,
3659 so we are free to use whatever conventions are
3660 appropriate. */
3661 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3662 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3663 if (i && i->local)
3664 return ARM_PCS_AAPCS_LOCAL;
3665 }
3666 }
3667 else if (user_convention && user_pcs != arm_pcs_default)
3668 sorry ("PCS variant");
3669
3670 /* For everything else we use the target's default. */
3671 return arm_pcs_default;
3672 }
3673
3674
3675 static void
3676 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3677 const_tree fntype ATTRIBUTE_UNUSED,
3678 rtx libcall ATTRIBUTE_UNUSED,
3679 const_tree fndecl ATTRIBUTE_UNUSED)
3680 {
3681 /* Record the unallocated VFP registers. */
3682 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3683 pcum->aapcs_vfp_reg_alloc = 0;
3684 }
3685
3686 /* Walk down the type tree of TYPE counting consecutive base elements.
3687 If *MODEP is VOIDmode, then set it to the first valid floating point
3688 type. If a non-floating point type is found, or if a floating point
3689 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3690 otherwise return the count in the sub-tree. */
3691 static int
3692 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3693 {
3694 enum machine_mode mode;
3695 HOST_WIDE_INT size;
3696
3697 switch (TREE_CODE (type))
3698 {
3699 case REAL_TYPE:
3700 mode = TYPE_MODE (type);
3701 if (mode != DFmode && mode != SFmode)
3702 return -1;
3703
3704 if (*modep == VOIDmode)
3705 *modep = mode;
3706
3707 if (*modep == mode)
3708 return 1;
3709
3710 break;
3711
3712 case COMPLEX_TYPE:
3713 mode = TYPE_MODE (TREE_TYPE (type));
3714 if (mode != DFmode && mode != SFmode)
3715 return -1;
3716
3717 if (*modep == VOIDmode)
3718 *modep = mode;
3719
3720 if (*modep == mode)
3721 return 2;
3722
3723 break;
3724
3725 case VECTOR_TYPE:
3726 /* Use V2SImode and V4SImode as representatives of all 64-bit
3727 and 128-bit vector types, whether or not those modes are
3728 supported with the present options. */
3729 size = int_size_in_bytes (type);
3730 switch (size)
3731 {
3732 case 8:
3733 mode = V2SImode;
3734 break;
3735 case 16:
3736 mode = V4SImode;
3737 break;
3738 default:
3739 return -1;
3740 }
3741
3742 if (*modep == VOIDmode)
3743 *modep = mode;
3744
3745 /* Vector modes are considered to be opaque: two vectors are
3746 equivalent for the purposes of being homogeneous aggregates
3747 if they are the same size. */
3748 if (*modep == mode)
3749 return 1;
3750
3751 break;
3752
3753 case ARRAY_TYPE:
3754 {
3755 int count;
3756 tree index = TYPE_DOMAIN (type);
3757
3758 /* Can't handle incomplete types. */
3759 if (!COMPLETE_TYPE_P(type))
3760 return -1;
3761
3762 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3763 if (count == -1
3764 || !index
3765 || !TYPE_MAX_VALUE (index)
3766 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3767 || !TYPE_MIN_VALUE (index)
3768 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3769 || count < 0)
3770 return -1;
3771
3772 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3773 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3774
3775 /* There must be no padding. */
3776 if (!host_integerp (TYPE_SIZE (type), 1)
3777 || (tree_low_cst (TYPE_SIZE (type), 1)
3778 != count * GET_MODE_BITSIZE (*modep)))
3779 return -1;
3780
3781 return count;
3782 }
3783
3784 case RECORD_TYPE:
3785 {
3786 int count = 0;
3787 int sub_count;
3788 tree field;
3789
3790 /* Can't handle incomplete types. */
3791 if (!COMPLETE_TYPE_P(type))
3792 return -1;
3793
3794 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3795 {
3796 if (TREE_CODE (field) != FIELD_DECL)
3797 continue;
3798
3799 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3800 if (sub_count < 0)
3801 return -1;
3802 count += sub_count;
3803 }
3804
3805 /* There must be no padding. */
3806 if (!host_integerp (TYPE_SIZE (type), 1)
3807 || (tree_low_cst (TYPE_SIZE (type), 1)
3808 != count * GET_MODE_BITSIZE (*modep)))
3809 return -1;
3810
3811 return count;
3812 }
3813
3814 case UNION_TYPE:
3815 case QUAL_UNION_TYPE:
3816 {
3817 /* These aren't very interesting except in a degenerate case. */
3818 int count = 0;
3819 int sub_count;
3820 tree field;
3821
3822 /* Can't handle incomplete types. */
3823 if (!COMPLETE_TYPE_P(type))
3824 return -1;
3825
3826 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3827 {
3828 if (TREE_CODE (field) != FIELD_DECL)
3829 continue;
3830
3831 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3832 if (sub_count < 0)
3833 return -1;
3834 count = count > sub_count ? count : sub_count;
3835 }
3836
3837 /* There must be no padding. */
3838 if (!host_integerp (TYPE_SIZE (type), 1)
3839 || (tree_low_cst (TYPE_SIZE (type), 1)
3840 != count * GET_MODE_BITSIZE (*modep)))
3841 return -1;
3842
3843 return count;
3844 }
3845
3846 default:
3847 break;
3848 }
3849
3850 return -1;
3851 }
3852
3853 /* Return true if PCS_VARIANT should use VFP registers. */
3854 static bool
3855 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3856 {
3857 if (pcs_variant == ARM_PCS_AAPCS_VFP)
3858 return true;
3859
3860 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
3861 return false;
3862
3863 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
3864 (TARGET_VFP_DOUBLE || !is_double));
3865 }
3866
3867 static bool
3868 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
3869 enum machine_mode mode, const_tree type,
3870 enum machine_mode *base_mode, int *count)
3871 {
3872 enum machine_mode new_mode = VOIDmode;
3873
3874 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3875 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3876 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3877 {
3878 *count = 1;
3879 new_mode = mode;
3880 }
3881 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3882 {
3883 *count = 2;
3884 new_mode = (mode == DCmode ? DFmode : SFmode);
3885 }
3886 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3887 {
3888 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
3889
3890 if (ag_count > 0 && ag_count <= 4)
3891 *count = ag_count;
3892 else
3893 return false;
3894 }
3895 else
3896 return false;
3897
3898
3899 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
3900 return false;
3901
3902 *base_mode = new_mode;
3903 return true;
3904 }
3905
3906 static bool
3907 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
3908 enum machine_mode mode, const_tree type)
3909 {
3910 int count ATTRIBUTE_UNUSED;
3911 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
3912
3913 if (!use_vfp_abi (pcs_variant, false))
3914 return false;
3915 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3916 &ag_mode, &count);
3917 }
3918
3919 static bool
3920 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3921 const_tree type)
3922 {
3923 if (!use_vfp_abi (pcum->pcs_variant, false))
3924 return false;
3925
3926 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
3927 &pcum->aapcs_vfp_rmode,
3928 &pcum->aapcs_vfp_rcount);
3929 }
3930
3931 static bool
3932 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3933 const_tree type ATTRIBUTE_UNUSED)
3934 {
3935 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
3936 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
3937 int regno;
3938
3939 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
3940 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
3941 {
3942 pcum->aapcs_vfp_reg_alloc = mask << regno;
3943 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3944 {
3945 int i;
3946 int rcount = pcum->aapcs_vfp_rcount;
3947 int rshift = shift;
3948 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
3949 rtx par;
3950 if (!TARGET_NEON)
3951 {
3952 /* Avoid using unsupported vector modes. */
3953 if (rmode == V2SImode)
3954 rmode = DImode;
3955 else if (rmode == V4SImode)
3956 {
3957 rmode = DImode;
3958 rcount *= 2;
3959 rshift /= 2;
3960 }
3961 }
3962 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
3963 for (i = 0; i < rcount; i++)
3964 {
3965 rtx tmp = gen_rtx_REG (rmode,
3966 FIRST_VFP_REGNUM + regno + i * rshift);
3967 tmp = gen_rtx_EXPR_LIST
3968 (VOIDmode, tmp,
3969 GEN_INT (i * GET_MODE_SIZE (rmode)));
3970 XVECEXP (par, 0, i) = tmp;
3971 }
3972
3973 pcum->aapcs_reg = par;
3974 }
3975 else
3976 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
3977 return true;
3978 }
3979 return false;
3980 }
3981
3982 static rtx
3983 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
3984 enum machine_mode mode,
3985 const_tree type ATTRIBUTE_UNUSED)
3986 {
3987 if (!use_vfp_abi (pcs_variant, false))
3988 return false;
3989
3990 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3991 {
3992 int count;
3993 enum machine_mode ag_mode;
3994 int i;
3995 rtx par;
3996 int shift;
3997
3998 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3999 &ag_mode, &count);
4000
4001 if (!TARGET_NEON)
4002 {
4003 if (ag_mode == V2SImode)
4004 ag_mode = DImode;
4005 else if (ag_mode == V4SImode)
4006 {
4007 ag_mode = DImode;
4008 count *= 2;
4009 }
4010 }
4011 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4012 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4013 for (i = 0; i < count; i++)
4014 {
4015 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4016 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4017 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4018 XVECEXP (par, 0, i) = tmp;
4019 }
4020
4021 return par;
4022 }
4023
4024 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4025 }
4026
4027 static void
4028 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4029 enum machine_mode mode ATTRIBUTE_UNUSED,
4030 const_tree type ATTRIBUTE_UNUSED)
4031 {
4032 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4033 pcum->aapcs_vfp_reg_alloc = 0;
4034 return;
4035 }
4036
4037 #define AAPCS_CP(X) \
4038 { \
4039 aapcs_ ## X ## _cum_init, \
4040 aapcs_ ## X ## _is_call_candidate, \
4041 aapcs_ ## X ## _allocate, \
4042 aapcs_ ## X ## _is_return_candidate, \
4043 aapcs_ ## X ## _allocate_return_reg, \
4044 aapcs_ ## X ## _advance \
4045 }
4046
4047 /* Table of co-processors that can be used to pass arguments in
4048 registers. Idealy no arugment should be a candidate for more than
4049 one co-processor table entry, but the table is processed in order
4050 and stops after the first match. If that entry then fails to put
4051 the argument into a co-processor register, the argument will go on
4052 the stack. */
4053 static struct
4054 {
4055 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4056 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4057
4058 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4059 BLKmode) is a candidate for this co-processor's registers; this
4060 function should ignore any position-dependent state in
4061 CUMULATIVE_ARGS and only use call-type dependent information. */
4062 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4063
4064 /* Return true if the argument does get a co-processor register; it
4065 should set aapcs_reg to an RTX of the register allocated as is
4066 required for a return from FUNCTION_ARG. */
4067 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4068
4069 /* Return true if a result of mode MODE (or type TYPE if MODE is
4070 BLKmode) is can be returned in this co-processor's registers. */
4071 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4072
4073 /* Allocate and return an RTX element to hold the return type of a
4074 call, this routine must not fail and will only be called if
4075 is_return_candidate returned true with the same parameters. */
4076 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4077
4078 /* Finish processing this argument and prepare to start processing
4079 the next one. */
4080 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4081 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4082 {
4083 AAPCS_CP(vfp)
4084 };
4085
4086 #undef AAPCS_CP
4087
4088 static int
4089 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4090 tree type)
4091 {
4092 int i;
4093
4094 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4095 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4096 return i;
4097
4098 return -1;
4099 }
4100
4101 static int
4102 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4103 {
4104 /* We aren't passed a decl, so we can't check that a call is local.
4105 However, it isn't clear that that would be a win anyway, since it
4106 might limit some tail-calling opportunities. */
4107 enum arm_pcs pcs_variant;
4108
4109 if (fntype)
4110 {
4111 const_tree fndecl = NULL_TREE;
4112
4113 if (TREE_CODE (fntype) == FUNCTION_DECL)
4114 {
4115 fndecl = fntype;
4116 fntype = TREE_TYPE (fntype);
4117 }
4118
4119 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4120 }
4121 else
4122 pcs_variant = arm_pcs_default;
4123
4124 if (pcs_variant != ARM_PCS_AAPCS)
4125 {
4126 int i;
4127
4128 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4129 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4130 TYPE_MODE (type),
4131 type))
4132 return i;
4133 }
4134 return -1;
4135 }
4136
4137 static rtx
4138 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4139 const_tree fntype)
4140 {
4141 /* We aren't passed a decl, so we can't check that a call is local.
4142 However, it isn't clear that that would be a win anyway, since it
4143 might limit some tail-calling opportunities. */
4144 enum arm_pcs pcs_variant;
4145 int unsignedp ATTRIBUTE_UNUSED;
4146
4147 if (fntype)
4148 {
4149 const_tree fndecl = NULL_TREE;
4150
4151 if (TREE_CODE (fntype) == FUNCTION_DECL)
4152 {
4153 fndecl = fntype;
4154 fntype = TREE_TYPE (fntype);
4155 }
4156
4157 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4158 }
4159 else
4160 pcs_variant = arm_pcs_default;
4161
4162 /* Promote integer types. */
4163 if (type && INTEGRAL_TYPE_P (type))
4164 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4165
4166 if (pcs_variant != ARM_PCS_AAPCS)
4167 {
4168 int i;
4169
4170 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4171 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4172 type))
4173 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4174 mode, type);
4175 }
4176
4177 /* Promotes small structs returned in a register to full-word size
4178 for big-endian AAPCS. */
4179 if (type && arm_return_in_msb (type))
4180 {
4181 HOST_WIDE_INT size = int_size_in_bytes (type);
4182 if (size % UNITS_PER_WORD != 0)
4183 {
4184 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4185 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4186 }
4187 }
4188
4189 return gen_rtx_REG (mode, R0_REGNUM);
4190 }
4191
4192 rtx
4193 aapcs_libcall_value (enum machine_mode mode)
4194 {
4195 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4196 }
4197
4198 /* Lay out a function argument using the AAPCS rules. The rule
4199 numbers referred to here are those in the AAPCS. */
4200 static void
4201 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4202 tree type, int named)
4203 {
4204 int nregs, nregs2;
4205 int ncrn;
4206
4207 /* We only need to do this once per argument. */
4208 if (pcum->aapcs_arg_processed)
4209 return;
4210
4211 pcum->aapcs_arg_processed = true;
4212
4213 /* Special case: if named is false then we are handling an incoming
4214 anonymous argument which is on the stack. */
4215 if (!named)
4216 return;
4217
4218 /* Is this a potential co-processor register candidate? */
4219 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4220 {
4221 int slot = aapcs_select_call_coproc (pcum, mode, type);
4222 pcum->aapcs_cprc_slot = slot;
4223
4224 /* We don't have to apply any of the rules from part B of the
4225 preparation phase, these are handled elsewhere in the
4226 compiler. */
4227
4228 if (slot >= 0)
4229 {
4230 /* A Co-processor register candidate goes either in its own
4231 class of registers or on the stack. */
4232 if (!pcum->aapcs_cprc_failed[slot])
4233 {
4234 /* C1.cp - Try to allocate the argument to co-processor
4235 registers. */
4236 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4237 return;
4238
4239 /* C2.cp - Put the argument on the stack and note that we
4240 can't assign any more candidates in this slot. We also
4241 need to note that we have allocated stack space, so that
4242 we won't later try to split a non-cprc candidate between
4243 core registers and the stack. */
4244 pcum->aapcs_cprc_failed[slot] = true;
4245 pcum->can_split = false;
4246 }
4247
4248 /* We didn't get a register, so this argument goes on the
4249 stack. */
4250 gcc_assert (pcum->can_split == false);
4251 return;
4252 }
4253 }
4254
4255 /* C3 - For double-word aligned arguments, round the NCRN up to the
4256 next even number. */
4257 ncrn = pcum->aapcs_ncrn;
4258 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4259 ncrn++;
4260
4261 nregs = ARM_NUM_REGS2(mode, type);
4262
4263 /* Sigh, this test should really assert that nregs > 0, but a GCC
4264 extension allows empty structs and then gives them empty size; it
4265 then allows such a structure to be passed by value. For some of
4266 the code below we have to pretend that such an argument has
4267 non-zero size so that we 'locate' it correctly either in
4268 registers or on the stack. */
4269 gcc_assert (nregs >= 0);
4270
4271 nregs2 = nregs ? nregs : 1;
4272
4273 /* C4 - Argument fits entirely in core registers. */
4274 if (ncrn + nregs2 <= NUM_ARG_REGS)
4275 {
4276 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4277 pcum->aapcs_next_ncrn = ncrn + nregs;
4278 return;
4279 }
4280
4281 /* C5 - Some core registers left and there are no arguments already
4282 on the stack: split this argument between the remaining core
4283 registers and the stack. */
4284 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4285 {
4286 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4287 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4288 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4289 return;
4290 }
4291
4292 /* C6 - NCRN is set to 4. */
4293 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4294
4295 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4296 return;
4297 }
4298
4299 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4300 for a call to a function whose data type is FNTYPE.
4301 For a library call, FNTYPE is NULL. */
4302 void
4303 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4304 rtx libname,
4305 tree fndecl ATTRIBUTE_UNUSED)
4306 {
4307 /* Long call handling. */
4308 if (fntype)
4309 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4310 else
4311 pcum->pcs_variant = arm_pcs_default;
4312
4313 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4314 {
4315 if (arm_libcall_uses_aapcs_base (libname))
4316 pcum->pcs_variant = ARM_PCS_AAPCS;
4317
4318 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4319 pcum->aapcs_reg = NULL_RTX;
4320 pcum->aapcs_partial = 0;
4321 pcum->aapcs_arg_processed = false;
4322 pcum->aapcs_cprc_slot = -1;
4323 pcum->can_split = true;
4324
4325 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4326 {
4327 int i;
4328
4329 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4330 {
4331 pcum->aapcs_cprc_failed[i] = false;
4332 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4333 }
4334 }
4335 return;
4336 }
4337
4338 /* Legacy ABIs */
4339
4340 /* On the ARM, the offset starts at 0. */
4341 pcum->nregs = 0;
4342 pcum->iwmmxt_nregs = 0;
4343 pcum->can_split = true;
4344
4345 /* Varargs vectors are treated the same as long long.
4346 named_count avoids having to change the way arm handles 'named' */
4347 pcum->named_count = 0;
4348 pcum->nargs = 0;
4349
4350 if (TARGET_REALLY_IWMMXT && fntype)
4351 {
4352 tree fn_arg;
4353
4354 for (fn_arg = TYPE_ARG_TYPES (fntype);
4355 fn_arg;
4356 fn_arg = TREE_CHAIN (fn_arg))
4357 pcum->named_count += 1;
4358
4359 if (! pcum->named_count)
4360 pcum->named_count = INT_MAX;
4361 }
4362 }
4363
4364
4365 /* Return true if mode/type need doubleword alignment. */
4366 bool
4367 arm_needs_doubleword_align (enum machine_mode mode, tree type)
4368 {
4369 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4370 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4371 }
4372
4373
4374 /* Determine where to put an argument to a function.
4375 Value is zero to push the argument on the stack,
4376 or a hard register in which to store the argument.
4377
4378 MODE is the argument's machine mode.
4379 TYPE is the data type of the argument (as a tree).
4380 This is null for libcalls where that information may
4381 not be available.
4382 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4383 the preceding args and about the function being called.
4384 NAMED is nonzero if this argument is a named parameter
4385 (otherwise it is an extra parameter matching an ellipsis). */
4386
4387 rtx
4388 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4389 tree type, int named)
4390 {
4391 int nregs;
4392
4393 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4394 a call insn (op3 of a call_value insn). */
4395 if (mode == VOIDmode)
4396 return const0_rtx;
4397
4398 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4399 {
4400 aapcs_layout_arg (pcum, mode, type, named);
4401 return pcum->aapcs_reg;
4402 }
4403
4404 /* Varargs vectors are treated the same as long long.
4405 named_count avoids having to change the way arm handles 'named' */
4406 if (TARGET_IWMMXT_ABI
4407 && arm_vector_mode_supported_p (mode)
4408 && pcum->named_count > pcum->nargs + 1)
4409 {
4410 if (pcum->iwmmxt_nregs <= 9)
4411 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4412 else
4413 {
4414 pcum->can_split = false;
4415 return NULL_RTX;
4416 }
4417 }
4418
4419 /* Put doubleword aligned quantities in even register pairs. */
4420 if (pcum->nregs & 1
4421 && ARM_DOUBLEWORD_ALIGN
4422 && arm_needs_doubleword_align (mode, type))
4423 pcum->nregs++;
4424
4425 if (mode == VOIDmode)
4426 /* Pick an arbitrary value for operand 2 of the call insn. */
4427 return const0_rtx;
4428
4429 /* Only allow splitting an arg between regs and memory if all preceding
4430 args were allocated to regs. For args passed by reference we only count
4431 the reference pointer. */
4432 if (pcum->can_split)
4433 nregs = 1;
4434 else
4435 nregs = ARM_NUM_REGS2 (mode, type);
4436
4437 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4438 return NULL_RTX;
4439
4440 return gen_rtx_REG (mode, pcum->nregs);
4441 }
4442
4443 static int
4444 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4445 tree type, bool named)
4446 {
4447 int nregs = pcum->nregs;
4448
4449 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4450 {
4451 aapcs_layout_arg (pcum, mode, type, named);
4452 return pcum->aapcs_partial;
4453 }
4454
4455 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4456 return 0;
4457
4458 if (NUM_ARG_REGS > nregs
4459 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4460 && pcum->can_split)
4461 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4462
4463 return 0;
4464 }
4465
4466 void
4467 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4468 tree type, bool named)
4469 {
4470 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4471 {
4472 aapcs_layout_arg (pcum, mode, type, named);
4473
4474 if (pcum->aapcs_cprc_slot >= 0)
4475 {
4476 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4477 type);
4478 pcum->aapcs_cprc_slot = -1;
4479 }
4480
4481 /* Generic stuff. */
4482 pcum->aapcs_arg_processed = false;
4483 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4484 pcum->aapcs_reg = NULL_RTX;
4485 pcum->aapcs_partial = 0;
4486 }
4487 else
4488 {
4489 pcum->nargs += 1;
4490 if (arm_vector_mode_supported_p (mode)
4491 && pcum->named_count > pcum->nargs
4492 && TARGET_IWMMXT_ABI)
4493 pcum->iwmmxt_nregs += 1;
4494 else
4495 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4496 }
4497 }
4498
4499 /* Variable sized types are passed by reference. This is a GCC
4500 extension to the ARM ABI. */
4501
4502 static bool
4503 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4504 enum machine_mode mode ATTRIBUTE_UNUSED,
4505 const_tree type, bool named ATTRIBUTE_UNUSED)
4506 {
4507 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4508 }
4509 \f
4510 /* Encode the current state of the #pragma [no_]long_calls. */
4511 typedef enum
4512 {
4513 OFF, /* No #pragma [no_]long_calls is in effect. */
4514 LONG, /* #pragma long_calls is in effect. */
4515 SHORT /* #pragma no_long_calls is in effect. */
4516 } arm_pragma_enum;
4517
4518 static arm_pragma_enum arm_pragma_long_calls = OFF;
4519
4520 void
4521 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4522 {
4523 arm_pragma_long_calls = LONG;
4524 }
4525
4526 void
4527 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4528 {
4529 arm_pragma_long_calls = SHORT;
4530 }
4531
4532 void
4533 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4534 {
4535 arm_pragma_long_calls = OFF;
4536 }
4537 \f
4538 /* Handle an attribute requiring a FUNCTION_DECL;
4539 arguments as in struct attribute_spec.handler. */
4540 static tree
4541 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4542 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4543 {
4544 if (TREE_CODE (*node) != FUNCTION_DECL)
4545 {
4546 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4547 name);
4548 *no_add_attrs = true;
4549 }
4550
4551 return NULL_TREE;
4552 }
4553
4554 /* Handle an "interrupt" or "isr" attribute;
4555 arguments as in struct attribute_spec.handler. */
4556 static tree
4557 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4558 bool *no_add_attrs)
4559 {
4560 if (DECL_P (*node))
4561 {
4562 if (TREE_CODE (*node) != FUNCTION_DECL)
4563 {
4564 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4565 name);
4566 *no_add_attrs = true;
4567 }
4568 /* FIXME: the argument if any is checked for type attributes;
4569 should it be checked for decl ones? */
4570 }
4571 else
4572 {
4573 if (TREE_CODE (*node) == FUNCTION_TYPE
4574 || TREE_CODE (*node) == METHOD_TYPE)
4575 {
4576 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4577 {
4578 warning (OPT_Wattributes, "%qE attribute ignored",
4579 name);
4580 *no_add_attrs = true;
4581 }
4582 }
4583 else if (TREE_CODE (*node) == POINTER_TYPE
4584 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4585 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4586 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4587 {
4588 *node = build_variant_type_copy (*node);
4589 TREE_TYPE (*node) = build_type_attribute_variant
4590 (TREE_TYPE (*node),
4591 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4592 *no_add_attrs = true;
4593 }
4594 else
4595 {
4596 /* Possibly pass this attribute on from the type to a decl. */
4597 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4598 | (int) ATTR_FLAG_FUNCTION_NEXT
4599 | (int) ATTR_FLAG_ARRAY_NEXT))
4600 {
4601 *no_add_attrs = true;
4602 return tree_cons (name, args, NULL_TREE);
4603 }
4604 else
4605 {
4606 warning (OPT_Wattributes, "%qE attribute ignored",
4607 name);
4608 }
4609 }
4610 }
4611
4612 return NULL_TREE;
4613 }
4614
4615 /* Handle a "pcs" attribute; arguments as in struct
4616 attribute_spec.handler. */
4617 static tree
4618 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4619 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4620 {
4621 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4622 {
4623 warning (OPT_Wattributes, "%qE attribute ignored", name);
4624 *no_add_attrs = true;
4625 }
4626 return NULL_TREE;
4627 }
4628
4629 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4630 /* Handle the "notshared" attribute. This attribute is another way of
4631 requesting hidden visibility. ARM's compiler supports
4632 "__declspec(notshared)"; we support the same thing via an
4633 attribute. */
4634
4635 static tree
4636 arm_handle_notshared_attribute (tree *node,
4637 tree name ATTRIBUTE_UNUSED,
4638 tree args ATTRIBUTE_UNUSED,
4639 int flags ATTRIBUTE_UNUSED,
4640 bool *no_add_attrs)
4641 {
4642 tree decl = TYPE_NAME (*node);
4643
4644 if (decl)
4645 {
4646 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4647 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4648 *no_add_attrs = false;
4649 }
4650 return NULL_TREE;
4651 }
4652 #endif
4653
4654 /* Return 0 if the attributes for two types are incompatible, 1 if they
4655 are compatible, and 2 if they are nearly compatible (which causes a
4656 warning to be generated). */
4657 static int
4658 arm_comp_type_attributes (const_tree type1, const_tree type2)
4659 {
4660 int l1, l2, s1, s2;
4661
4662 /* Check for mismatch of non-default calling convention. */
4663 if (TREE_CODE (type1) != FUNCTION_TYPE)
4664 return 1;
4665
4666 /* Check for mismatched call attributes. */
4667 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4668 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4669 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4670 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4671
4672 /* Only bother to check if an attribute is defined. */
4673 if (l1 | l2 | s1 | s2)
4674 {
4675 /* If one type has an attribute, the other must have the same attribute. */
4676 if ((l1 != l2) || (s1 != s2))
4677 return 0;
4678
4679 /* Disallow mixed attributes. */
4680 if ((l1 & s2) || (l2 & s1))
4681 return 0;
4682 }
4683
4684 /* Check for mismatched ISR attribute. */
4685 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4686 if (! l1)
4687 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4688 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4689 if (! l2)
4690 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4691 if (l1 != l2)
4692 return 0;
4693
4694 return 1;
4695 }
4696
4697 /* Assigns default attributes to newly defined type. This is used to
4698 set short_call/long_call attributes for function types of
4699 functions defined inside corresponding #pragma scopes. */
4700 static void
4701 arm_set_default_type_attributes (tree type)
4702 {
4703 /* Add __attribute__ ((long_call)) to all functions, when
4704 inside #pragma long_calls or __attribute__ ((short_call)),
4705 when inside #pragma no_long_calls. */
4706 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4707 {
4708 tree type_attr_list, attr_name;
4709 type_attr_list = TYPE_ATTRIBUTES (type);
4710
4711 if (arm_pragma_long_calls == LONG)
4712 attr_name = get_identifier ("long_call");
4713 else if (arm_pragma_long_calls == SHORT)
4714 attr_name = get_identifier ("short_call");
4715 else
4716 return;
4717
4718 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4719 TYPE_ATTRIBUTES (type) = type_attr_list;
4720 }
4721 }
4722 \f
4723 /* Return true if DECL is known to be linked into section SECTION. */
4724
4725 static bool
4726 arm_function_in_section_p (tree decl, section *section)
4727 {
4728 /* We can only be certain about functions defined in the same
4729 compilation unit. */
4730 if (!TREE_STATIC (decl))
4731 return false;
4732
4733 /* Make sure that SYMBOL always binds to the definition in this
4734 compilation unit. */
4735 if (!targetm.binds_local_p (decl))
4736 return false;
4737
4738 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4739 if (!DECL_SECTION_NAME (decl))
4740 {
4741 /* Make sure that we will not create a unique section for DECL. */
4742 if (flag_function_sections || DECL_ONE_ONLY (decl))
4743 return false;
4744 }
4745
4746 return function_section (decl) == section;
4747 }
4748
4749 /* Return nonzero if a 32-bit "long_call" should be generated for
4750 a call from the current function to DECL. We generate a long_call
4751 if the function:
4752
4753 a. has an __attribute__((long call))
4754 or b. is within the scope of a #pragma long_calls
4755 or c. the -mlong-calls command line switch has been specified
4756
4757 However we do not generate a long call if the function:
4758
4759 d. has an __attribute__ ((short_call))
4760 or e. is inside the scope of a #pragma no_long_calls
4761 or f. is defined in the same section as the current function. */
4762
4763 bool
4764 arm_is_long_call_p (tree decl)
4765 {
4766 tree attrs;
4767
4768 if (!decl)
4769 return TARGET_LONG_CALLS;
4770
4771 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4772 if (lookup_attribute ("short_call", attrs))
4773 return false;
4774
4775 /* For "f", be conservative, and only cater for cases in which the
4776 whole of the current function is placed in the same section. */
4777 if (!flag_reorder_blocks_and_partition
4778 && TREE_CODE (decl) == FUNCTION_DECL
4779 && arm_function_in_section_p (decl, current_function_section ()))
4780 return false;
4781
4782 if (lookup_attribute ("long_call", attrs))
4783 return true;
4784
4785 return TARGET_LONG_CALLS;
4786 }
4787
4788 /* Return nonzero if it is ok to make a tail-call to DECL. */
4789 static bool
4790 arm_function_ok_for_sibcall (tree decl, tree exp)
4791 {
4792 unsigned long func_type;
4793
4794 if (cfun->machine->sibcall_blocked)
4795 return false;
4796
4797 /* Never tailcall something for which we have no decl, or if we
4798 are in Thumb mode. */
4799 if (decl == NULL || TARGET_THUMB)
4800 return false;
4801
4802 /* The PIC register is live on entry to VxWorks PLT entries, so we
4803 must make the call before restoring the PIC register. */
4804 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4805 return false;
4806
4807 /* Cannot tail-call to long calls, since these are out of range of
4808 a branch instruction. */
4809 if (arm_is_long_call_p (decl))
4810 return false;
4811
4812 /* If we are interworking and the function is not declared static
4813 then we can't tail-call it unless we know that it exists in this
4814 compilation unit (since it might be a Thumb routine). */
4815 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4816 return false;
4817
4818 func_type = arm_current_func_type ();
4819 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4820 if (IS_INTERRUPT (func_type))
4821 return false;
4822
4823 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4824 {
4825 /* Check that the return value locations are the same. For
4826 example that we aren't returning a value from the sibling in
4827 a VFP register but then need to transfer it to a core
4828 register. */
4829 rtx a, b;
4830
4831 a = arm_function_value (TREE_TYPE (exp), decl, false);
4832 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4833 cfun->decl, false);
4834 if (!rtx_equal_p (a, b))
4835 return false;
4836 }
4837
4838 /* Never tailcall if function may be called with a misaligned SP. */
4839 if (IS_STACKALIGN (func_type))
4840 return false;
4841
4842 /* Everything else is ok. */
4843 return true;
4844 }
4845
4846 \f
4847 /* Addressing mode support functions. */
4848
4849 /* Return nonzero if X is a legitimate immediate operand when compiling
4850 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4851 int
4852 legitimate_pic_operand_p (rtx x)
4853 {
4854 if (GET_CODE (x) == SYMBOL_REF
4855 || (GET_CODE (x) == CONST
4856 && GET_CODE (XEXP (x, 0)) == PLUS
4857 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4858 return 0;
4859
4860 return 1;
4861 }
4862
4863 /* Record that the current function needs a PIC register. Initialize
4864 cfun->machine->pic_reg if we have not already done so. */
4865
4866 static void
4867 require_pic_register (void)
4868 {
4869 /* A lot of the logic here is made obscure by the fact that this
4870 routine gets called as part of the rtx cost estimation process.
4871 We don't want those calls to affect any assumptions about the real
4872 function; and further, we can't call entry_of_function() until we
4873 start the real expansion process. */
4874 if (!crtl->uses_pic_offset_table)
4875 {
4876 gcc_assert (can_create_pseudo_p ());
4877 if (arm_pic_register != INVALID_REGNUM)
4878 {
4879 if (!cfun->machine->pic_reg)
4880 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4881
4882 /* Play games to avoid marking the function as needing pic
4883 if we are being called as part of the cost-estimation
4884 process. */
4885 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4886 crtl->uses_pic_offset_table = 1;
4887 }
4888 else
4889 {
4890 rtx seq;
4891
4892 if (!cfun->machine->pic_reg)
4893 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4894
4895 /* Play games to avoid marking the function as needing pic
4896 if we are being called as part of the cost-estimation
4897 process. */
4898 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4899 {
4900 crtl->uses_pic_offset_table = 1;
4901 start_sequence ();
4902
4903 arm_load_pic_register (0UL);
4904
4905 seq = get_insns ();
4906 end_sequence ();
4907 /* We can be called during expansion of PHI nodes, where
4908 we can't yet emit instructions directly in the final
4909 insn stream. Queue the insns on the entry edge, they will
4910 be committed after everything else is expanded. */
4911 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
4912 }
4913 }
4914 }
4915 }
4916
4917 rtx
4918 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
4919 {
4920 if (GET_CODE (orig) == SYMBOL_REF
4921 || GET_CODE (orig) == LABEL_REF)
4922 {
4923 rtx pic_ref, address;
4924 rtx insn;
4925
4926 if (reg == 0)
4927 {
4928 gcc_assert (can_create_pseudo_p ());
4929 reg = gen_reg_rtx (Pmode);
4930 address = gen_reg_rtx (Pmode);
4931 }
4932 else
4933 address = reg;
4934
4935 /* VxWorks does not impose a fixed gap between segments; the run-time
4936 gap can be different from the object-file gap. We therefore can't
4937 use GOTOFF unless we are absolutely sure that the symbol is in the
4938 same segment as the GOT. Unfortunately, the flexibility of linker
4939 scripts means that we can't be sure of that in general, so assume
4940 that GOTOFF is never valid on VxWorks. */
4941 if ((GET_CODE (orig) == LABEL_REF
4942 || (GET_CODE (orig) == SYMBOL_REF &&
4943 SYMBOL_REF_LOCAL_P (orig)))
4944 && NEED_GOT_RELOC
4945 && !TARGET_VXWORKS_RTP)
4946 insn = arm_pic_static_addr (orig, reg);
4947 else
4948 {
4949 /* If this function doesn't have a pic register, create one now. */
4950 require_pic_register ();
4951
4952 if (TARGET_32BIT)
4953 emit_insn (gen_pic_load_addr_32bit (address, orig));
4954 else /* TARGET_THUMB1 */
4955 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
4956
4957 pic_ref = gen_const_mem (Pmode,
4958 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
4959 address));
4960 insn = emit_move_insn (reg, pic_ref);
4961 }
4962
4963 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4964 by loop. */
4965 set_unique_reg_note (insn, REG_EQUAL, orig);
4966
4967 return reg;
4968 }
4969 else if (GET_CODE (orig) == CONST)
4970 {
4971 rtx base, offset;
4972
4973 if (GET_CODE (XEXP (orig, 0)) == PLUS
4974 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
4975 return orig;
4976
4977 /* Handle the case where we have: const (UNSPEC_TLS). */
4978 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
4979 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
4980 return orig;
4981
4982 /* Handle the case where we have:
4983 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
4984 CONST_INT. */
4985 if (GET_CODE (XEXP (orig, 0)) == PLUS
4986 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
4987 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
4988 {
4989 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
4990 return orig;
4991 }
4992
4993 if (reg == 0)
4994 {
4995 gcc_assert (can_create_pseudo_p ());
4996 reg = gen_reg_rtx (Pmode);
4997 }
4998
4999 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5000
5001 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5002 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5003 base == reg ? 0 : reg);
5004
5005 if (GET_CODE (offset) == CONST_INT)
5006 {
5007 /* The base register doesn't really matter, we only want to
5008 test the index for the appropriate mode. */
5009 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5010 {
5011 gcc_assert (can_create_pseudo_p ());
5012 offset = force_reg (Pmode, offset);
5013 }
5014
5015 if (GET_CODE (offset) == CONST_INT)
5016 return plus_constant (base, INTVAL (offset));
5017 }
5018
5019 if (GET_MODE_SIZE (mode) > 4
5020 && (GET_MODE_CLASS (mode) == MODE_INT
5021 || TARGET_SOFT_FLOAT))
5022 {
5023 emit_insn (gen_addsi3 (reg, base, offset));
5024 return reg;
5025 }
5026
5027 return gen_rtx_PLUS (Pmode, base, offset);
5028 }
5029
5030 return orig;
5031 }
5032
5033
5034 /* Find a spare register to use during the prolog of a function. */
5035
5036 static int
5037 thumb_find_work_register (unsigned long pushed_regs_mask)
5038 {
5039 int reg;
5040
5041 /* Check the argument registers first as these are call-used. The
5042 register allocation order means that sometimes r3 might be used
5043 but earlier argument registers might not, so check them all. */
5044 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5045 if (!df_regs_ever_live_p (reg))
5046 return reg;
5047
5048 /* Before going on to check the call-saved registers we can try a couple
5049 more ways of deducing that r3 is available. The first is when we are
5050 pushing anonymous arguments onto the stack and we have less than 4
5051 registers worth of fixed arguments(*). In this case r3 will be part of
5052 the variable argument list and so we can be sure that it will be
5053 pushed right at the start of the function. Hence it will be available
5054 for the rest of the prologue.
5055 (*): ie crtl->args.pretend_args_size is greater than 0. */
5056 if (cfun->machine->uses_anonymous_args
5057 && crtl->args.pretend_args_size > 0)
5058 return LAST_ARG_REGNUM;
5059
5060 /* The other case is when we have fixed arguments but less than 4 registers
5061 worth. In this case r3 might be used in the body of the function, but
5062 it is not being used to convey an argument into the function. In theory
5063 we could just check crtl->args.size to see how many bytes are
5064 being passed in argument registers, but it seems that it is unreliable.
5065 Sometimes it will have the value 0 when in fact arguments are being
5066 passed. (See testcase execute/20021111-1.c for an example). So we also
5067 check the args_info.nregs field as well. The problem with this field is
5068 that it makes no allowances for arguments that are passed to the
5069 function but which are not used. Hence we could miss an opportunity
5070 when a function has an unused argument in r3. But it is better to be
5071 safe than to be sorry. */
5072 if (! cfun->machine->uses_anonymous_args
5073 && crtl->args.size >= 0
5074 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5075 && crtl->args.info.nregs < 4)
5076 return LAST_ARG_REGNUM;
5077
5078 /* Otherwise look for a call-saved register that is going to be pushed. */
5079 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5080 if (pushed_regs_mask & (1 << reg))
5081 return reg;
5082
5083 if (TARGET_THUMB2)
5084 {
5085 /* Thumb-2 can use high regs. */
5086 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5087 if (pushed_regs_mask & (1 << reg))
5088 return reg;
5089 }
5090 /* Something went wrong - thumb_compute_save_reg_mask()
5091 should have arranged for a suitable register to be pushed. */
5092 gcc_unreachable ();
5093 }
5094
5095 static GTY(()) int pic_labelno;
5096
5097 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5098 low register. */
5099
5100 void
5101 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5102 {
5103 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5104
5105 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5106 return;
5107
5108 gcc_assert (flag_pic);
5109
5110 pic_reg = cfun->machine->pic_reg;
5111 if (TARGET_VXWORKS_RTP)
5112 {
5113 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5114 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5115 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5116
5117 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5118
5119 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5120 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5121 }
5122 else
5123 {
5124 /* We use an UNSPEC rather than a LABEL_REF because this label
5125 never appears in the code stream. */
5126
5127 labelno = GEN_INT (pic_labelno++);
5128 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5129 l1 = gen_rtx_CONST (VOIDmode, l1);
5130
5131 /* On the ARM the PC register contains 'dot + 8' at the time of the
5132 addition, on the Thumb it is 'dot + 4'. */
5133 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5134 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5135 UNSPEC_GOTSYM_OFF);
5136 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5137
5138 if (TARGET_32BIT)
5139 {
5140 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5141 if (TARGET_ARM)
5142 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5143 else
5144 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5145 }
5146 else /* TARGET_THUMB1 */
5147 {
5148 if (arm_pic_register != INVALID_REGNUM
5149 && REGNO (pic_reg) > LAST_LO_REGNUM)
5150 {
5151 /* We will have pushed the pic register, so we should always be
5152 able to find a work register. */
5153 pic_tmp = gen_rtx_REG (SImode,
5154 thumb_find_work_register (saved_regs));
5155 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5156 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5157 }
5158 else
5159 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5160 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5161 }
5162 }
5163
5164 /* Need to emit this whether or not we obey regdecls,
5165 since setjmp/longjmp can cause life info to screw up. */
5166 emit_use (pic_reg);
5167 }
5168
5169 /* Generate code to load the address of a static var when flag_pic is set. */
5170 static rtx
5171 arm_pic_static_addr (rtx orig, rtx reg)
5172 {
5173 rtx l1, labelno, offset_rtx, insn;
5174
5175 gcc_assert (flag_pic);
5176
5177 /* We use an UNSPEC rather than a LABEL_REF because this label
5178 never appears in the code stream. */
5179 labelno = GEN_INT (pic_labelno++);
5180 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5181 l1 = gen_rtx_CONST (VOIDmode, l1);
5182
5183 /* On the ARM the PC register contains 'dot + 8' at the time of the
5184 addition, on the Thumb it is 'dot + 4'. */
5185 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5186 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5187 UNSPEC_SYMBOL_OFFSET);
5188 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5189
5190 if (TARGET_32BIT)
5191 {
5192 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5193 if (TARGET_ARM)
5194 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5195 else
5196 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5197 }
5198 else /* TARGET_THUMB1 */
5199 {
5200 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5201 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5202 }
5203
5204 return insn;
5205 }
5206
5207 /* Return nonzero if X is valid as an ARM state addressing register. */
5208 static int
5209 arm_address_register_rtx_p (rtx x, int strict_p)
5210 {
5211 int regno;
5212
5213 if (GET_CODE (x) != REG)
5214 return 0;
5215
5216 regno = REGNO (x);
5217
5218 if (strict_p)
5219 return ARM_REGNO_OK_FOR_BASE_P (regno);
5220
5221 return (regno <= LAST_ARM_REGNUM
5222 || regno >= FIRST_PSEUDO_REGISTER
5223 || regno == FRAME_POINTER_REGNUM
5224 || regno == ARG_POINTER_REGNUM);
5225 }
5226
5227 /* Return TRUE if this rtx is the difference of a symbol and a label,
5228 and will reduce to a PC-relative relocation in the object file.
5229 Expressions like this can be left alone when generating PIC, rather
5230 than forced through the GOT. */
5231 static int
5232 pcrel_constant_p (rtx x)
5233 {
5234 if (GET_CODE (x) == MINUS)
5235 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5236
5237 return FALSE;
5238 }
5239
5240 /* Return nonzero if X is a valid ARM state address operand. */
5241 int
5242 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5243 int strict_p)
5244 {
5245 bool use_ldrd;
5246 enum rtx_code code = GET_CODE (x);
5247
5248 if (arm_address_register_rtx_p (x, strict_p))
5249 return 1;
5250
5251 use_ldrd = (TARGET_LDRD
5252 && (mode == DImode
5253 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5254
5255 if (code == POST_INC || code == PRE_DEC
5256 || ((code == PRE_INC || code == POST_DEC)
5257 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5258 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5259
5260 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5261 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5262 && GET_CODE (XEXP (x, 1)) == PLUS
5263 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5264 {
5265 rtx addend = XEXP (XEXP (x, 1), 1);
5266
5267 /* Don't allow ldrd post increment by register because it's hard
5268 to fixup invalid register choices. */
5269 if (use_ldrd
5270 && GET_CODE (x) == POST_MODIFY
5271 && GET_CODE (addend) == REG)
5272 return 0;
5273
5274 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5275 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5276 }
5277
5278 /* After reload constants split into minipools will have addresses
5279 from a LABEL_REF. */
5280 else if (reload_completed
5281 && (code == LABEL_REF
5282 || (code == CONST
5283 && GET_CODE (XEXP (x, 0)) == PLUS
5284 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5285 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5286 return 1;
5287
5288 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5289 return 0;
5290
5291 else if (code == PLUS)
5292 {
5293 rtx xop0 = XEXP (x, 0);
5294 rtx xop1 = XEXP (x, 1);
5295
5296 return ((arm_address_register_rtx_p (xop0, strict_p)
5297 && GET_CODE(xop1) == CONST_INT
5298 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5299 || (arm_address_register_rtx_p (xop1, strict_p)
5300 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5301 }
5302
5303 #if 0
5304 /* Reload currently can't handle MINUS, so disable this for now */
5305 else if (GET_CODE (x) == MINUS)
5306 {
5307 rtx xop0 = XEXP (x, 0);
5308 rtx xop1 = XEXP (x, 1);
5309
5310 return (arm_address_register_rtx_p (xop0, strict_p)
5311 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5312 }
5313 #endif
5314
5315 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5316 && code == SYMBOL_REF
5317 && CONSTANT_POOL_ADDRESS_P (x)
5318 && ! (flag_pic
5319 && symbol_mentioned_p (get_pool_constant (x))
5320 && ! pcrel_constant_p (get_pool_constant (x))))
5321 return 1;
5322
5323 return 0;
5324 }
5325
5326 /* Return nonzero if X is a valid Thumb-2 address operand. */
5327 static int
5328 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5329 {
5330 bool use_ldrd;
5331 enum rtx_code code = GET_CODE (x);
5332
5333 if (arm_address_register_rtx_p (x, strict_p))
5334 return 1;
5335
5336 use_ldrd = (TARGET_LDRD
5337 && (mode == DImode
5338 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5339
5340 if (code == POST_INC || code == PRE_DEC
5341 || ((code == PRE_INC || code == POST_DEC)
5342 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5343 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5344
5345 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5346 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5347 && GET_CODE (XEXP (x, 1)) == PLUS
5348 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5349 {
5350 /* Thumb-2 only has autoincrement by constant. */
5351 rtx addend = XEXP (XEXP (x, 1), 1);
5352 HOST_WIDE_INT offset;
5353
5354 if (GET_CODE (addend) != CONST_INT)
5355 return 0;
5356
5357 offset = INTVAL(addend);
5358 if (GET_MODE_SIZE (mode) <= 4)
5359 return (offset > -256 && offset < 256);
5360
5361 return (use_ldrd && offset > -1024 && offset < 1024
5362 && (offset & 3) == 0);
5363 }
5364
5365 /* After reload constants split into minipools will have addresses
5366 from a LABEL_REF. */
5367 else if (reload_completed
5368 && (code == LABEL_REF
5369 || (code == CONST
5370 && GET_CODE (XEXP (x, 0)) == PLUS
5371 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5372 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5373 return 1;
5374
5375 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5376 return 0;
5377
5378 else if (code == PLUS)
5379 {
5380 rtx xop0 = XEXP (x, 0);
5381 rtx xop1 = XEXP (x, 1);
5382
5383 return ((arm_address_register_rtx_p (xop0, strict_p)
5384 && thumb2_legitimate_index_p (mode, xop1, strict_p))
5385 || (arm_address_register_rtx_p (xop1, strict_p)
5386 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5387 }
5388
5389 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5390 && code == SYMBOL_REF
5391 && CONSTANT_POOL_ADDRESS_P (x)
5392 && ! (flag_pic
5393 && symbol_mentioned_p (get_pool_constant (x))
5394 && ! pcrel_constant_p (get_pool_constant (x))))
5395 return 1;
5396
5397 return 0;
5398 }
5399
5400 /* Return nonzero if INDEX is valid for an address index operand in
5401 ARM state. */
5402 static int
5403 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5404 int strict_p)
5405 {
5406 HOST_WIDE_INT range;
5407 enum rtx_code code = GET_CODE (index);
5408
5409 /* Standard coprocessor addressing modes. */
5410 if (TARGET_HARD_FLOAT
5411 && (TARGET_FPA || TARGET_MAVERICK)
5412 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5413 || (TARGET_MAVERICK && mode == DImode)))
5414 return (code == CONST_INT && INTVAL (index) < 1024
5415 && INTVAL (index) > -1024
5416 && (INTVAL (index) & 3) == 0);
5417
5418 if (TARGET_NEON
5419 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5420 return (code == CONST_INT
5421 && INTVAL (index) < 1016
5422 && INTVAL (index) > -1024
5423 && (INTVAL (index) & 3) == 0);
5424
5425 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5426 return (code == CONST_INT
5427 && INTVAL (index) < 1024
5428 && INTVAL (index) > -1024
5429 && (INTVAL (index) & 3) == 0);
5430
5431 if (arm_address_register_rtx_p (index, strict_p)
5432 && (GET_MODE_SIZE (mode) <= 4))
5433 return 1;
5434
5435 if (mode == DImode || mode == DFmode)
5436 {
5437 if (code == CONST_INT)
5438 {
5439 HOST_WIDE_INT val = INTVAL (index);
5440
5441 if (TARGET_LDRD)
5442 return val > -256 && val < 256;
5443 else
5444 return val > -4096 && val < 4092;
5445 }
5446
5447 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5448 }
5449
5450 if (GET_MODE_SIZE (mode) <= 4
5451 && ! (arm_arch4
5452 && (mode == HImode
5453 || mode == HFmode
5454 || (mode == QImode && outer == SIGN_EXTEND))))
5455 {
5456 if (code == MULT)
5457 {
5458 rtx xiop0 = XEXP (index, 0);
5459 rtx xiop1 = XEXP (index, 1);
5460
5461 return ((arm_address_register_rtx_p (xiop0, strict_p)
5462 && power_of_two_operand (xiop1, SImode))
5463 || (arm_address_register_rtx_p (xiop1, strict_p)
5464 && power_of_two_operand (xiop0, SImode)));
5465 }
5466 else if (code == LSHIFTRT || code == ASHIFTRT
5467 || code == ASHIFT || code == ROTATERT)
5468 {
5469 rtx op = XEXP (index, 1);
5470
5471 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5472 && GET_CODE (op) == CONST_INT
5473 && INTVAL (op) > 0
5474 && INTVAL (op) <= 31);
5475 }
5476 }
5477
5478 /* For ARM v4 we may be doing a sign-extend operation during the
5479 load. */
5480 if (arm_arch4)
5481 {
5482 if (mode == HImode
5483 || mode == HFmode
5484 || (outer == SIGN_EXTEND && mode == QImode))
5485 range = 256;
5486 else
5487 range = 4096;
5488 }
5489 else
5490 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5491
5492 return (code == CONST_INT
5493 && INTVAL (index) < range
5494 && INTVAL (index) > -range);
5495 }
5496
5497 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5498 index operand. i.e. 1, 2, 4 or 8. */
5499 static bool
5500 thumb2_index_mul_operand (rtx op)
5501 {
5502 HOST_WIDE_INT val;
5503
5504 if (GET_CODE(op) != CONST_INT)
5505 return false;
5506
5507 val = INTVAL(op);
5508 return (val == 1 || val == 2 || val == 4 || val == 8);
5509 }
5510
5511 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5512 static int
5513 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5514 {
5515 enum rtx_code code = GET_CODE (index);
5516
5517 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5518 /* Standard coprocessor addressing modes. */
5519 if (TARGET_HARD_FLOAT
5520 && (TARGET_FPA || TARGET_MAVERICK)
5521 && (GET_MODE_CLASS (mode) == MODE_FLOAT
5522 || (TARGET_MAVERICK && mode == DImode)))
5523 return (code == CONST_INT && INTVAL (index) < 1024
5524 && INTVAL (index) > -1024
5525 && (INTVAL (index) & 3) == 0);
5526
5527 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5528 {
5529 /* For DImode assume values will usually live in core regs
5530 and only allow LDRD addressing modes. */
5531 if (!TARGET_LDRD || mode != DImode)
5532 return (code == CONST_INT
5533 && INTVAL (index) < 1024
5534 && INTVAL (index) > -1024
5535 && (INTVAL (index) & 3) == 0);
5536 }
5537
5538 if (TARGET_NEON
5539 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
5540 return (code == CONST_INT
5541 && INTVAL (index) < 1016
5542 && INTVAL (index) > -1024
5543 && (INTVAL (index) & 3) == 0);
5544
5545 if (arm_address_register_rtx_p (index, strict_p)
5546 && (GET_MODE_SIZE (mode) <= 4))
5547 return 1;
5548
5549 if (mode == DImode || mode == DFmode)
5550 {
5551 if (code == CONST_INT)
5552 {
5553 HOST_WIDE_INT val = INTVAL (index);
5554 /* ??? Can we assume ldrd for thumb2? */
5555 /* Thumb-2 ldrd only has reg+const addressing modes. */
5556 /* ldrd supports offsets of +-1020.
5557 However the ldr fallback does not. */
5558 return val > -256 && val < 256 && (val & 3) == 0;
5559 }
5560 else
5561 return 0;
5562 }
5563
5564 if (code == MULT)
5565 {
5566 rtx xiop0 = XEXP (index, 0);
5567 rtx xiop1 = XEXP (index, 1);
5568
5569 return ((arm_address_register_rtx_p (xiop0, strict_p)
5570 && thumb2_index_mul_operand (xiop1))
5571 || (arm_address_register_rtx_p (xiop1, strict_p)
5572 && thumb2_index_mul_operand (xiop0)));
5573 }
5574 else if (code == ASHIFT)
5575 {
5576 rtx op = XEXP (index, 1);
5577
5578 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5579 && GET_CODE (op) == CONST_INT
5580 && INTVAL (op) > 0
5581 && INTVAL (op) <= 3);
5582 }
5583
5584 return (code == CONST_INT
5585 && INTVAL (index) < 4096
5586 && INTVAL (index) > -256);
5587 }
5588
5589 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5590 static int
5591 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5592 {
5593 int regno;
5594
5595 if (GET_CODE (x) != REG)
5596 return 0;
5597
5598 regno = REGNO (x);
5599
5600 if (strict_p)
5601 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5602
5603 return (regno <= LAST_LO_REGNUM
5604 || regno > LAST_VIRTUAL_REGISTER
5605 || regno == FRAME_POINTER_REGNUM
5606 || (GET_MODE_SIZE (mode) >= 4
5607 && (regno == STACK_POINTER_REGNUM
5608 || regno >= FIRST_PSEUDO_REGISTER
5609 || x == hard_frame_pointer_rtx
5610 || x == arg_pointer_rtx)));
5611 }
5612
5613 /* Return nonzero if x is a legitimate index register. This is the case
5614 for any base register that can access a QImode object. */
5615 inline static int
5616 thumb1_index_register_rtx_p (rtx x, int strict_p)
5617 {
5618 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5619 }
5620
5621 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5622
5623 The AP may be eliminated to either the SP or the FP, so we use the
5624 least common denominator, e.g. SImode, and offsets from 0 to 64.
5625
5626 ??? Verify whether the above is the right approach.
5627
5628 ??? Also, the FP may be eliminated to the SP, so perhaps that
5629 needs special handling also.
5630
5631 ??? Look at how the mips16 port solves this problem. It probably uses
5632 better ways to solve some of these problems.
5633
5634 Although it is not incorrect, we don't accept QImode and HImode
5635 addresses based on the frame pointer or arg pointer until the
5636 reload pass starts. This is so that eliminating such addresses
5637 into stack based ones won't produce impossible code. */
5638 static int
5639 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5640 {
5641 /* ??? Not clear if this is right. Experiment. */
5642 if (GET_MODE_SIZE (mode) < 4
5643 && !(reload_in_progress || reload_completed)
5644 && (reg_mentioned_p (frame_pointer_rtx, x)
5645 || reg_mentioned_p (arg_pointer_rtx, x)
5646 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5647 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5648 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5649 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5650 return 0;
5651
5652 /* Accept any base register. SP only in SImode or larger. */
5653 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5654 return 1;
5655
5656 /* This is PC relative data before arm_reorg runs. */
5657 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5658 && GET_CODE (x) == SYMBOL_REF
5659 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5660 return 1;
5661
5662 /* This is PC relative data after arm_reorg runs. */
5663 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5664 && reload_completed
5665 && (GET_CODE (x) == LABEL_REF
5666 || (GET_CODE (x) == CONST
5667 && GET_CODE (XEXP (x, 0)) == PLUS
5668 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5669 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5670 return 1;
5671
5672 /* Post-inc indexing only supported for SImode and larger. */
5673 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5674 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5675 return 1;
5676
5677 else if (GET_CODE (x) == PLUS)
5678 {
5679 /* REG+REG address can be any two index registers. */
5680 /* We disallow FRAME+REG addressing since we know that FRAME
5681 will be replaced with STACK, and SP relative addressing only
5682 permits SP+OFFSET. */
5683 if (GET_MODE_SIZE (mode) <= 4
5684 && XEXP (x, 0) != frame_pointer_rtx
5685 && XEXP (x, 1) != frame_pointer_rtx
5686 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5687 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
5688 return 1;
5689
5690 /* REG+const has 5-7 bit offset for non-SP registers. */
5691 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5692 || XEXP (x, 0) == arg_pointer_rtx)
5693 && GET_CODE (XEXP (x, 1)) == CONST_INT
5694 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5695 return 1;
5696
5697 /* REG+const has 10-bit offset for SP, but only SImode and
5698 larger is supported. */
5699 /* ??? Should probably check for DI/DFmode overflow here
5700 just like GO_IF_LEGITIMATE_OFFSET does. */
5701 else if (GET_CODE (XEXP (x, 0)) == REG
5702 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5703 && GET_MODE_SIZE (mode) >= 4
5704 && GET_CODE (XEXP (x, 1)) == CONST_INT
5705 && INTVAL (XEXP (x, 1)) >= 0
5706 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5707 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5708 return 1;
5709
5710 else if (GET_CODE (XEXP (x, 0)) == REG
5711 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5712 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5713 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5714 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
5715 && GET_MODE_SIZE (mode) >= 4
5716 && GET_CODE (XEXP (x, 1)) == CONST_INT
5717 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5718 return 1;
5719 }
5720
5721 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5722 && GET_MODE_SIZE (mode) == 4
5723 && GET_CODE (x) == SYMBOL_REF
5724 && CONSTANT_POOL_ADDRESS_P (x)
5725 && ! (flag_pic
5726 && symbol_mentioned_p (get_pool_constant (x))
5727 && ! pcrel_constant_p (get_pool_constant (x))))
5728 return 1;
5729
5730 return 0;
5731 }
5732
5733 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5734 instruction of mode MODE. */
5735 int
5736 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5737 {
5738 switch (GET_MODE_SIZE (mode))
5739 {
5740 case 1:
5741 return val >= 0 && val < 32;
5742
5743 case 2:
5744 return val >= 0 && val < 64 && (val & 1) == 0;
5745
5746 default:
5747 return (val >= 0
5748 && (val + GET_MODE_SIZE (mode)) <= 128
5749 && (val & 3) == 0);
5750 }
5751 }
5752
5753 bool
5754 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5755 {
5756 if (TARGET_ARM)
5757 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5758 else if (TARGET_THUMB2)
5759 return thumb2_legitimate_address_p (mode, x, strict_p);
5760 else /* if (TARGET_THUMB1) */
5761 return thumb1_legitimate_address_p (mode, x, strict_p);
5762 }
5763
5764 /* Build the SYMBOL_REF for __tls_get_addr. */
5765
5766 static GTY(()) rtx tls_get_addr_libfunc;
5767
5768 static rtx
5769 get_tls_get_addr (void)
5770 {
5771 if (!tls_get_addr_libfunc)
5772 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5773 return tls_get_addr_libfunc;
5774 }
5775
5776 static rtx
5777 arm_load_tp (rtx target)
5778 {
5779 if (!target)
5780 target = gen_reg_rtx (SImode);
5781
5782 if (TARGET_HARD_TP)
5783 {
5784 /* Can return in any reg. */
5785 emit_insn (gen_load_tp_hard (target));
5786 }
5787 else
5788 {
5789 /* Always returned in r0. Immediately copy the result into a pseudo,
5790 otherwise other uses of r0 (e.g. setting up function arguments) may
5791 clobber the value. */
5792
5793 rtx tmp;
5794
5795 emit_insn (gen_load_tp_soft ());
5796
5797 tmp = gen_rtx_REG (SImode, 0);
5798 emit_move_insn (target, tmp);
5799 }
5800 return target;
5801 }
5802
5803 static rtx
5804 load_tls_operand (rtx x, rtx reg)
5805 {
5806 rtx tmp;
5807
5808 if (reg == NULL_RTX)
5809 reg = gen_reg_rtx (SImode);
5810
5811 tmp = gen_rtx_CONST (SImode, x);
5812
5813 emit_move_insn (reg, tmp);
5814
5815 return reg;
5816 }
5817
5818 static rtx
5819 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5820 {
5821 rtx insns, label, labelno, sum;
5822
5823 start_sequence ();
5824
5825 labelno = GEN_INT (pic_labelno++);
5826 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5827 label = gen_rtx_CONST (VOIDmode, label);
5828
5829 sum = gen_rtx_UNSPEC (Pmode,
5830 gen_rtvec (4, x, GEN_INT (reloc), label,
5831 GEN_INT (TARGET_ARM ? 8 : 4)),
5832 UNSPEC_TLS);
5833 reg = load_tls_operand (sum, reg);
5834
5835 if (TARGET_ARM)
5836 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5837 else if (TARGET_THUMB2)
5838 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5839 else /* TARGET_THUMB1 */
5840 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5841
5842 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5843 Pmode, 1, reg, Pmode);
5844
5845 insns = get_insns ();
5846 end_sequence ();
5847
5848 return insns;
5849 }
5850
5851 rtx
5852 legitimize_tls_address (rtx x, rtx reg)
5853 {
5854 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5855 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5856
5857 switch (model)
5858 {
5859 case TLS_MODEL_GLOBAL_DYNAMIC:
5860 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
5861 dest = gen_reg_rtx (Pmode);
5862 emit_libcall_block (insns, dest, ret, x);
5863 return dest;
5864
5865 case TLS_MODEL_LOCAL_DYNAMIC:
5866 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
5867
5868 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5869 share the LDM result with other LD model accesses. */
5870 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
5871 UNSPEC_TLS);
5872 dest = gen_reg_rtx (Pmode);
5873 emit_libcall_block (insns, dest, ret, eqv);
5874
5875 /* Load the addend. */
5876 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
5877 UNSPEC_TLS);
5878 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
5879 return gen_rtx_PLUS (Pmode, dest, addend);
5880
5881 case TLS_MODEL_INITIAL_EXEC:
5882 labelno = GEN_INT (pic_labelno++);
5883 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5884 label = gen_rtx_CONST (VOIDmode, label);
5885 sum = gen_rtx_UNSPEC (Pmode,
5886 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
5887 GEN_INT (TARGET_ARM ? 8 : 4)),
5888 UNSPEC_TLS);
5889 reg = load_tls_operand (sum, reg);
5890
5891 if (TARGET_ARM)
5892 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
5893 else if (TARGET_THUMB2)
5894 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
5895 else
5896 {
5897 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5898 emit_move_insn (reg, gen_const_mem (SImode, reg));
5899 }
5900
5901 tp = arm_load_tp (NULL_RTX);
5902
5903 return gen_rtx_PLUS (Pmode, tp, reg);
5904
5905 case TLS_MODEL_LOCAL_EXEC:
5906 tp = arm_load_tp (NULL_RTX);
5907
5908 reg = gen_rtx_UNSPEC (Pmode,
5909 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
5910 UNSPEC_TLS);
5911 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
5912
5913 return gen_rtx_PLUS (Pmode, tp, reg);
5914
5915 default:
5916 abort ();
5917 }
5918 }
5919
5920 /* Try machine-dependent ways of modifying an illegitimate address
5921 to be legitimate. If we find one, return the new, valid address. */
5922 rtx
5923 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5924 {
5925 if (!TARGET_ARM)
5926 {
5927 /* TODO: legitimize_address for Thumb2. */
5928 if (TARGET_THUMB2)
5929 return x;
5930 return thumb_legitimize_address (x, orig_x, mode);
5931 }
5932
5933 if (arm_tls_symbol_p (x))
5934 return legitimize_tls_address (x, NULL_RTX);
5935
5936 if (GET_CODE (x) == PLUS)
5937 {
5938 rtx xop0 = XEXP (x, 0);
5939 rtx xop1 = XEXP (x, 1);
5940
5941 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
5942 xop0 = force_reg (SImode, xop0);
5943
5944 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
5945 xop1 = force_reg (SImode, xop1);
5946
5947 if (ARM_BASE_REGISTER_RTX_P (xop0)
5948 && GET_CODE (xop1) == CONST_INT)
5949 {
5950 HOST_WIDE_INT n, low_n;
5951 rtx base_reg, val;
5952 n = INTVAL (xop1);
5953
5954 /* VFP addressing modes actually allow greater offsets, but for
5955 now we just stick with the lowest common denominator. */
5956 if (mode == DImode
5957 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
5958 {
5959 low_n = n & 0x0f;
5960 n &= ~0x0f;
5961 if (low_n > 4)
5962 {
5963 n += 16;
5964 low_n -= 16;
5965 }
5966 }
5967 else
5968 {
5969 low_n = ((mode) == TImode ? 0
5970 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
5971 n -= low_n;
5972 }
5973
5974 base_reg = gen_reg_rtx (SImode);
5975 val = force_operand (plus_constant (xop0, n), NULL_RTX);
5976 emit_move_insn (base_reg, val);
5977 x = plus_constant (base_reg, low_n);
5978 }
5979 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5980 x = gen_rtx_PLUS (SImode, xop0, xop1);
5981 }
5982
5983 /* XXX We don't allow MINUS any more -- see comment in
5984 arm_legitimate_address_outer_p (). */
5985 else if (GET_CODE (x) == MINUS)
5986 {
5987 rtx xop0 = XEXP (x, 0);
5988 rtx xop1 = XEXP (x, 1);
5989
5990 if (CONSTANT_P (xop0))
5991 xop0 = force_reg (SImode, xop0);
5992
5993 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
5994 xop1 = force_reg (SImode, xop1);
5995
5996 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5997 x = gen_rtx_MINUS (SImode, xop0, xop1);
5998 }
5999
6000 /* Make sure to take full advantage of the pre-indexed addressing mode
6001 with absolute addresses which often allows for the base register to
6002 be factorized for multiple adjacent memory references, and it might
6003 even allows for the mini pool to be avoided entirely. */
6004 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6005 {
6006 unsigned int bits;
6007 HOST_WIDE_INT mask, base, index;
6008 rtx base_reg;
6009
6010 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6011 use a 8-bit index. So let's use a 12-bit index for SImode only and
6012 hope that arm_gen_constant will enable ldrb to use more bits. */
6013 bits = (mode == SImode) ? 12 : 8;
6014 mask = (1 << bits) - 1;
6015 base = INTVAL (x) & ~mask;
6016 index = INTVAL (x) & mask;
6017 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6018 {
6019 /* It'll most probably be more efficient to generate the base
6020 with more bits set and use a negative index instead. */
6021 base |= mask;
6022 index -= mask;
6023 }
6024 base_reg = force_reg (SImode, GEN_INT (base));
6025 x = plus_constant (base_reg, index);
6026 }
6027
6028 if (flag_pic)
6029 {
6030 /* We need to find and carefully transform any SYMBOL and LABEL
6031 references; so go back to the original address expression. */
6032 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6033
6034 if (new_x != orig_x)
6035 x = new_x;
6036 }
6037
6038 return x;
6039 }
6040
6041
6042 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6043 to be legitimate. If we find one, return the new, valid address. */
6044 rtx
6045 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6046 {
6047 if (arm_tls_symbol_p (x))
6048 return legitimize_tls_address (x, NULL_RTX);
6049
6050 if (GET_CODE (x) == PLUS
6051 && GET_CODE (XEXP (x, 1)) == CONST_INT
6052 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6053 || INTVAL (XEXP (x, 1)) < 0))
6054 {
6055 rtx xop0 = XEXP (x, 0);
6056 rtx xop1 = XEXP (x, 1);
6057 HOST_WIDE_INT offset = INTVAL (xop1);
6058
6059 /* Try and fold the offset into a biasing of the base register and
6060 then offsetting that. Don't do this when optimizing for space
6061 since it can cause too many CSEs. */
6062 if (optimize_size && offset >= 0
6063 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6064 {
6065 HOST_WIDE_INT delta;
6066
6067 if (offset >= 256)
6068 delta = offset - (256 - GET_MODE_SIZE (mode));
6069 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6070 delta = 31 * GET_MODE_SIZE (mode);
6071 else
6072 delta = offset & (~31 * GET_MODE_SIZE (mode));
6073
6074 xop0 = force_operand (plus_constant (xop0, offset - delta),
6075 NULL_RTX);
6076 x = plus_constant (xop0, delta);
6077 }
6078 else if (offset < 0 && offset > -256)
6079 /* Small negative offsets are best done with a subtract before the
6080 dereference, forcing these into a register normally takes two
6081 instructions. */
6082 x = force_operand (x, NULL_RTX);
6083 else
6084 {
6085 /* For the remaining cases, force the constant into a register. */
6086 xop1 = force_reg (SImode, xop1);
6087 x = gen_rtx_PLUS (SImode, xop0, xop1);
6088 }
6089 }
6090 else if (GET_CODE (x) == PLUS
6091 && s_register_operand (XEXP (x, 1), SImode)
6092 && !s_register_operand (XEXP (x, 0), SImode))
6093 {
6094 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6095
6096 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6097 }
6098
6099 if (flag_pic)
6100 {
6101 /* We need to find and carefully transform any SYMBOL and LABEL
6102 references; so go back to the original address expression. */
6103 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6104
6105 if (new_x != orig_x)
6106 x = new_x;
6107 }
6108
6109 return x;
6110 }
6111
6112 rtx
6113 thumb_legitimize_reload_address (rtx *x_p,
6114 enum machine_mode mode,
6115 int opnum, int type,
6116 int ind_levels ATTRIBUTE_UNUSED)
6117 {
6118 rtx x = *x_p;
6119
6120 if (GET_CODE (x) == PLUS
6121 && GET_MODE_SIZE (mode) < 4
6122 && REG_P (XEXP (x, 0))
6123 && XEXP (x, 0) == stack_pointer_rtx
6124 && GET_CODE (XEXP (x, 1)) == CONST_INT
6125 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6126 {
6127 rtx orig_x = x;
6128
6129 x = copy_rtx (x);
6130 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6131 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6132 return x;
6133 }
6134
6135 /* If both registers are hi-regs, then it's better to reload the
6136 entire expression rather than each register individually. That
6137 only requires one reload register rather than two. */
6138 if (GET_CODE (x) == PLUS
6139 && REG_P (XEXP (x, 0))
6140 && REG_P (XEXP (x, 1))
6141 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6142 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6143 {
6144 rtx orig_x = x;
6145
6146 x = copy_rtx (x);
6147 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6148 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6149 return x;
6150 }
6151
6152 return NULL;
6153 }
6154
6155 /* Test for various thread-local symbols. */
6156
6157 /* Return TRUE if X is a thread-local symbol. */
6158
6159 static bool
6160 arm_tls_symbol_p (rtx x)
6161 {
6162 if (! TARGET_HAVE_TLS)
6163 return false;
6164
6165 if (GET_CODE (x) != SYMBOL_REF)
6166 return false;
6167
6168 return SYMBOL_REF_TLS_MODEL (x) != 0;
6169 }
6170
6171 /* Helper for arm_tls_referenced_p. */
6172
6173 static int
6174 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6175 {
6176 if (GET_CODE (*x) == SYMBOL_REF)
6177 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6178
6179 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6180 TLS offsets, not real symbol references. */
6181 if (GET_CODE (*x) == UNSPEC
6182 && XINT (*x, 1) == UNSPEC_TLS)
6183 return -1;
6184
6185 return 0;
6186 }
6187
6188 /* Return TRUE if X contains any TLS symbol references. */
6189
6190 bool
6191 arm_tls_referenced_p (rtx x)
6192 {
6193 if (! TARGET_HAVE_TLS)
6194 return false;
6195
6196 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6197 }
6198
6199 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6200
6201 bool
6202 arm_cannot_force_const_mem (rtx x)
6203 {
6204 rtx base, offset;
6205
6206 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6207 {
6208 split_const (x, &base, &offset);
6209 if (GET_CODE (base) == SYMBOL_REF
6210 && !offset_within_block_p (base, INTVAL (offset)))
6211 return true;
6212 }
6213 return arm_tls_referenced_p (x);
6214 }
6215 \f
6216 #define REG_OR_SUBREG_REG(X) \
6217 (GET_CODE (X) == REG \
6218 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6219
6220 #define REG_OR_SUBREG_RTX(X) \
6221 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6222
6223 #ifndef COSTS_N_INSNS
6224 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
6225 #endif
6226 static inline int
6227 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6228 {
6229 enum machine_mode mode = GET_MODE (x);
6230
6231 switch (code)
6232 {
6233 case ASHIFT:
6234 case ASHIFTRT:
6235 case LSHIFTRT:
6236 case ROTATERT:
6237 case PLUS:
6238 case MINUS:
6239 case COMPARE:
6240 case NEG:
6241 case NOT:
6242 return COSTS_N_INSNS (1);
6243
6244 case MULT:
6245 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6246 {
6247 int cycles = 0;
6248 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6249
6250 while (i)
6251 {
6252 i >>= 2;
6253 cycles++;
6254 }
6255 return COSTS_N_INSNS (2) + cycles;
6256 }
6257 return COSTS_N_INSNS (1) + 16;
6258
6259 case SET:
6260 return (COSTS_N_INSNS (1)
6261 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6262 + GET_CODE (SET_DEST (x)) == MEM));
6263
6264 case CONST_INT:
6265 if (outer == SET)
6266 {
6267 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6268 return 0;
6269 if (thumb_shiftable_const (INTVAL (x)))
6270 return COSTS_N_INSNS (2);
6271 return COSTS_N_INSNS (3);
6272 }
6273 else if ((outer == PLUS || outer == COMPARE)
6274 && INTVAL (x) < 256 && INTVAL (x) > -256)
6275 return 0;
6276 else if ((outer == IOR || outer == XOR || outer == AND)
6277 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6278 return COSTS_N_INSNS (1);
6279 else if (outer == AND)
6280 {
6281 int i;
6282 /* This duplicates the tests in the andsi3 expander. */
6283 for (i = 9; i <= 31; i++)
6284 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6285 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6286 return COSTS_N_INSNS (2);
6287 }
6288 else if (outer == ASHIFT || outer == ASHIFTRT
6289 || outer == LSHIFTRT)
6290 return 0;
6291 return COSTS_N_INSNS (2);
6292
6293 case CONST:
6294 case CONST_DOUBLE:
6295 case LABEL_REF:
6296 case SYMBOL_REF:
6297 return COSTS_N_INSNS (3);
6298
6299 case UDIV:
6300 case UMOD:
6301 case DIV:
6302 case MOD:
6303 return 100;
6304
6305 case TRUNCATE:
6306 return 99;
6307
6308 case AND:
6309 case XOR:
6310 case IOR:
6311 /* XXX guess. */
6312 return 8;
6313
6314 case MEM:
6315 /* XXX another guess. */
6316 /* Memory costs quite a lot for the first word, but subsequent words
6317 load at the equivalent of a single insn each. */
6318 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6319 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6320 ? 4 : 0));
6321
6322 case IF_THEN_ELSE:
6323 /* XXX a guess. */
6324 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6325 return 14;
6326 return 2;
6327
6328 case ZERO_EXTEND:
6329 /* XXX still guessing. */
6330 switch (GET_MODE (XEXP (x, 0)))
6331 {
6332 case QImode:
6333 return (1 + (mode == DImode ? 4 : 0)
6334 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6335
6336 case HImode:
6337 return (4 + (mode == DImode ? 4 : 0)
6338 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6339
6340 case SImode:
6341 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6342
6343 default:
6344 return 99;
6345 }
6346
6347 default:
6348 return 99;
6349 }
6350 }
6351
6352 static inline bool
6353 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6354 {
6355 enum machine_mode mode = GET_MODE (x);
6356 enum rtx_code subcode;
6357 rtx operand;
6358 enum rtx_code code = GET_CODE (x);
6359 *total = 0;
6360
6361 switch (code)
6362 {
6363 case MEM:
6364 /* Memory costs quite a lot for the first word, but subsequent words
6365 load at the equivalent of a single insn each. */
6366 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6367 return true;
6368
6369 case DIV:
6370 case MOD:
6371 case UDIV:
6372 case UMOD:
6373 if (TARGET_HARD_FLOAT && mode == SFmode)
6374 *total = COSTS_N_INSNS (2);
6375 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6376 *total = COSTS_N_INSNS (4);
6377 else
6378 *total = COSTS_N_INSNS (20);
6379 return false;
6380
6381 case ROTATE:
6382 if (GET_CODE (XEXP (x, 1)) == REG)
6383 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6384 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6385 *total = rtx_cost (XEXP (x, 1), code, speed);
6386
6387 /* Fall through */
6388 case ROTATERT:
6389 if (mode != SImode)
6390 {
6391 *total += COSTS_N_INSNS (4);
6392 return true;
6393 }
6394
6395 /* Fall through */
6396 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6397 *total += rtx_cost (XEXP (x, 0), code, speed);
6398 if (mode == DImode)
6399 {
6400 *total += COSTS_N_INSNS (3);
6401 return true;
6402 }
6403
6404 *total += COSTS_N_INSNS (1);
6405 /* Increase the cost of complex shifts because they aren't any faster,
6406 and reduce dual issue opportunities. */
6407 if (arm_tune_cortex_a9
6408 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6409 ++*total;
6410
6411 return true;
6412
6413 case MINUS:
6414 if (TARGET_THUMB2)
6415 {
6416 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6417 {
6418 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6419 *total = COSTS_N_INSNS (1);
6420 else
6421 *total = COSTS_N_INSNS (20);
6422 }
6423 else
6424 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6425 /* Thumb2 does not have RSB, so all arguments must be
6426 registers (subtracting a constant is canonicalized as
6427 addition of the negated constant). */
6428 return false;
6429 }
6430
6431 if (mode == DImode)
6432 {
6433 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6434 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6435 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6436 {
6437 *total += rtx_cost (XEXP (x, 1), code, speed);
6438 return true;
6439 }
6440
6441 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6442 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6443 {
6444 *total += rtx_cost (XEXP (x, 0), code, speed);
6445 return true;
6446 }
6447
6448 return false;
6449 }
6450
6451 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6452 {
6453 if (TARGET_HARD_FLOAT
6454 && (mode == SFmode
6455 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6456 {
6457 *total = COSTS_N_INSNS (1);
6458 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6459 && arm_const_double_rtx (XEXP (x, 0)))
6460 {
6461 *total += rtx_cost (XEXP (x, 1), code, speed);
6462 return true;
6463 }
6464
6465 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6466 && arm_const_double_rtx (XEXP (x, 1)))
6467 {
6468 *total += rtx_cost (XEXP (x, 0), code, speed);
6469 return true;
6470 }
6471
6472 return false;
6473 }
6474 *total = COSTS_N_INSNS (20);
6475 return false;
6476 }
6477
6478 *total = COSTS_N_INSNS (1);
6479 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6480 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6481 {
6482 *total += rtx_cost (XEXP (x, 1), code, speed);
6483 return true;
6484 }
6485
6486 subcode = GET_CODE (XEXP (x, 1));
6487 if (subcode == ASHIFT || subcode == ASHIFTRT
6488 || subcode == LSHIFTRT
6489 || subcode == ROTATE || subcode == ROTATERT)
6490 {
6491 *total += rtx_cost (XEXP (x, 0), code, speed);
6492 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6493 return true;
6494 }
6495
6496 /* A shift as a part of RSB costs no more than RSB itself. */
6497 if (GET_CODE (XEXP (x, 0)) == MULT
6498 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6499 {
6500 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6501 *total += rtx_cost (XEXP (x, 1), code, speed);
6502 return true;
6503 }
6504
6505 if (subcode == MULT
6506 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6507 {
6508 *total += rtx_cost (XEXP (x, 0), code, speed);
6509 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6510 return true;
6511 }
6512
6513 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6514 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6515 {
6516 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6517 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6518 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6519 *total += COSTS_N_INSNS (1);
6520
6521 return true;
6522 }
6523
6524 /* Fall through */
6525
6526 case PLUS:
6527 if (code == PLUS && arm_arch6 && mode == SImode
6528 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6529 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6530 {
6531 *total = COSTS_N_INSNS (1);
6532 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6533 speed);
6534 *total += rtx_cost (XEXP (x, 1), code, speed);
6535 return true;
6536 }
6537
6538 /* MLA: All arguments must be registers. We filter out
6539 multiplication by a power of two, so that we fall down into
6540 the code below. */
6541 if (GET_CODE (XEXP (x, 0)) == MULT
6542 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6543 {
6544 /* The cost comes from the cost of the multiply. */
6545 return false;
6546 }
6547
6548 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6549 {
6550 if (TARGET_HARD_FLOAT
6551 && (mode == SFmode
6552 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6553 {
6554 *total = COSTS_N_INSNS (1);
6555 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6556 && arm_const_double_rtx (XEXP (x, 1)))
6557 {
6558 *total += rtx_cost (XEXP (x, 0), code, speed);
6559 return true;
6560 }
6561
6562 return false;
6563 }
6564
6565 *total = COSTS_N_INSNS (20);
6566 return false;
6567 }
6568
6569 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6570 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6571 {
6572 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6573 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6574 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6575 *total += COSTS_N_INSNS (1);
6576 return true;
6577 }
6578
6579 /* Fall through */
6580
6581 case AND: case XOR: case IOR:
6582
6583 /* Normally the frame registers will be spilt into reg+const during
6584 reload, so it is a bad idea to combine them with other instructions,
6585 since then they might not be moved outside of loops. As a compromise
6586 we allow integration with ops that have a constant as their second
6587 operand. */
6588 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
6589 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6590 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6591 || (REG_OR_SUBREG_REG (XEXP (x, 0))
6592 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
6593 *total = 4;
6594
6595 if (mode == DImode)
6596 {
6597 *total += COSTS_N_INSNS (2);
6598 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6599 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6600 {
6601 *total += rtx_cost (XEXP (x, 0), code, speed);
6602 return true;
6603 }
6604
6605 return false;
6606 }
6607
6608 *total += COSTS_N_INSNS (1);
6609 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6610 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6611 {
6612 *total += rtx_cost (XEXP (x, 0), code, speed);
6613 return true;
6614 }
6615 subcode = GET_CODE (XEXP (x, 0));
6616 if (subcode == ASHIFT || subcode == ASHIFTRT
6617 || subcode == LSHIFTRT
6618 || subcode == ROTATE || subcode == ROTATERT)
6619 {
6620 *total += rtx_cost (XEXP (x, 1), code, speed);
6621 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6622 return true;
6623 }
6624
6625 if (subcode == MULT
6626 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6627 {
6628 *total += rtx_cost (XEXP (x, 1), code, speed);
6629 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6630 return true;
6631 }
6632
6633 if (subcode == UMIN || subcode == UMAX
6634 || subcode == SMIN || subcode == SMAX)
6635 {
6636 *total = COSTS_N_INSNS (3);
6637 return true;
6638 }
6639
6640 return false;
6641
6642 case MULT:
6643 /* This should have been handled by the CPU specific routines. */
6644 gcc_unreachable ();
6645
6646 case TRUNCATE:
6647 if (arm_arch3m && mode == SImode
6648 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6649 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6650 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6651 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6652 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6653 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6654 {
6655 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6656 return true;
6657 }
6658 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6659 return false;
6660
6661 case NEG:
6662 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6663 {
6664 if (TARGET_HARD_FLOAT
6665 && (mode == SFmode
6666 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6667 {
6668 *total = COSTS_N_INSNS (1);
6669 return false;
6670 }
6671 *total = COSTS_N_INSNS (2);
6672 return false;
6673 }
6674
6675 /* Fall through */
6676 case NOT:
6677 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6678 if (mode == SImode && code == NOT)
6679 {
6680 subcode = GET_CODE (XEXP (x, 0));
6681 if (subcode == ASHIFT || subcode == ASHIFTRT
6682 || subcode == LSHIFTRT
6683 || subcode == ROTATE || subcode == ROTATERT
6684 || (subcode == MULT
6685 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6686 {
6687 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6688 /* Register shifts cost an extra cycle. */
6689 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6690 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6691 subcode, speed);
6692 return true;
6693 }
6694 }
6695
6696 return false;
6697
6698 case IF_THEN_ELSE:
6699 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6700 {
6701 *total = COSTS_N_INSNS (4);
6702 return true;
6703 }
6704
6705 operand = XEXP (x, 0);
6706
6707 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6708 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6709 && GET_CODE (XEXP (operand, 0)) == REG
6710 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6711 *total += COSTS_N_INSNS (1);
6712 *total += (rtx_cost (XEXP (x, 1), code, speed)
6713 + rtx_cost (XEXP (x, 2), code, speed));
6714 return true;
6715
6716 case NE:
6717 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6718 {
6719 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6720 return true;
6721 }
6722 goto scc_insn;
6723
6724 case GE:
6725 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6726 && mode == SImode && XEXP (x, 1) == const0_rtx)
6727 {
6728 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6729 return true;
6730 }
6731 goto scc_insn;
6732
6733 case LT:
6734 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6735 && mode == SImode && XEXP (x, 1) == const0_rtx)
6736 {
6737 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6738 return true;
6739 }
6740 goto scc_insn;
6741
6742 case EQ:
6743 case GT:
6744 case LE:
6745 case GEU:
6746 case LTU:
6747 case GTU:
6748 case LEU:
6749 case UNORDERED:
6750 case ORDERED:
6751 case UNEQ:
6752 case UNGE:
6753 case UNLT:
6754 case UNGT:
6755 case UNLE:
6756 scc_insn:
6757 /* SCC insns. In the case where the comparison has already been
6758 performed, then they cost 2 instructions. Otherwise they need
6759 an additional comparison before them. */
6760 *total = COSTS_N_INSNS (2);
6761 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6762 {
6763 return true;
6764 }
6765
6766 /* Fall through */
6767 case COMPARE:
6768 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6769 {
6770 *total = 0;
6771 return true;
6772 }
6773
6774 *total += COSTS_N_INSNS (1);
6775 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6776 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6777 {
6778 *total += rtx_cost (XEXP (x, 0), code, speed);
6779 return true;
6780 }
6781
6782 subcode = GET_CODE (XEXP (x, 0));
6783 if (subcode == ASHIFT || subcode == ASHIFTRT
6784 || subcode == LSHIFTRT
6785 || subcode == ROTATE || subcode == ROTATERT)
6786 {
6787 *total += rtx_cost (XEXP (x, 1), code, speed);
6788 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6789 return true;
6790 }
6791
6792 if (subcode == MULT
6793 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6794 {
6795 *total += rtx_cost (XEXP (x, 1), code, speed);
6796 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6797 return true;
6798 }
6799
6800 return false;
6801
6802 case UMIN:
6803 case UMAX:
6804 case SMIN:
6805 case SMAX:
6806 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6807 if (GET_CODE (XEXP (x, 1)) != CONST_INT
6808 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
6809 *total += rtx_cost (XEXP (x, 1), code, speed);
6810 return true;
6811
6812 case ABS:
6813 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6814 {
6815 if (TARGET_HARD_FLOAT
6816 && (mode == SFmode
6817 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6818 {
6819 *total = COSTS_N_INSNS (1);
6820 return false;
6821 }
6822 *total = COSTS_N_INSNS (20);
6823 return false;
6824 }
6825 *total = COSTS_N_INSNS (1);
6826 if (mode == DImode)
6827 *total += COSTS_N_INSNS (3);
6828 return false;
6829
6830 case SIGN_EXTEND:
6831 if (GET_MODE_CLASS (mode) == MODE_INT)
6832 {
6833 *total = 0;
6834 if (mode == DImode)
6835 *total += COSTS_N_INSNS (1);
6836
6837 if (GET_MODE (XEXP (x, 0)) != SImode)
6838 {
6839 if (arm_arch6)
6840 {
6841 if (GET_CODE (XEXP (x, 0)) != MEM)
6842 *total += COSTS_N_INSNS (1);
6843 }
6844 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6845 *total += COSTS_N_INSNS (2);
6846 }
6847
6848 return false;
6849 }
6850
6851 /* Fall through */
6852 case ZERO_EXTEND:
6853 *total = 0;
6854 if (GET_MODE_CLASS (mode) == MODE_INT)
6855 {
6856 if (mode == DImode)
6857 *total += COSTS_N_INSNS (1);
6858
6859 if (GET_MODE (XEXP (x, 0)) != SImode)
6860 {
6861 if (arm_arch6)
6862 {
6863 if (GET_CODE (XEXP (x, 0)) != MEM)
6864 *total += COSTS_N_INSNS (1);
6865 }
6866 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6867 *total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ?
6868 1 : 2);
6869 }
6870
6871 return false;
6872 }
6873
6874 switch (GET_MODE (XEXP (x, 0)))
6875 {
6876 case V8QImode:
6877 case V4HImode:
6878 case V2SImode:
6879 case V4QImode:
6880 case V2HImode:
6881 *total = COSTS_N_INSNS (1);
6882 return false;
6883
6884 default:
6885 gcc_unreachable ();
6886 }
6887 gcc_unreachable ();
6888
6889 case ZERO_EXTRACT:
6890 case SIGN_EXTRACT:
6891 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6892 return true;
6893
6894 case CONST_INT:
6895 if (const_ok_for_arm (INTVAL (x))
6896 || const_ok_for_arm (~INTVAL (x)))
6897 *total = COSTS_N_INSNS (1);
6898 else
6899 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
6900 INTVAL (x), NULL_RTX,
6901 NULL_RTX, 0, 0));
6902 return true;
6903
6904 case CONST:
6905 case LABEL_REF:
6906 case SYMBOL_REF:
6907 *total = COSTS_N_INSNS (3);
6908 return true;
6909
6910 case HIGH:
6911 *total = COSTS_N_INSNS (1);
6912 return true;
6913
6914 case LO_SUM:
6915 *total = COSTS_N_INSNS (1);
6916 *total += rtx_cost (XEXP (x, 0), code, speed);
6917 return true;
6918
6919 case CONST_DOUBLE:
6920 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
6921 && (mode == SFmode || !TARGET_VFP_SINGLE))
6922 *total = COSTS_N_INSNS (1);
6923 else
6924 *total = COSTS_N_INSNS (4);
6925 return true;
6926
6927 default:
6928 *total = COSTS_N_INSNS (4);
6929 return false;
6930 }
6931 }
6932
6933 /* Estimates the size cost of thumb1 instructions.
6934 For now most of the code is copied from thumb1_rtx_costs. We need more
6935 fine grain tuning when we have more related test cases. */
6936 static inline int
6937 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6938 {
6939 enum machine_mode mode = GET_MODE (x);
6940
6941 switch (code)
6942 {
6943 case ASHIFT:
6944 case ASHIFTRT:
6945 case LSHIFTRT:
6946 case ROTATERT:
6947 case PLUS:
6948 case MINUS:
6949 case COMPARE:
6950 case NEG:
6951 case NOT:
6952 return COSTS_N_INSNS (1);
6953
6954 case MULT:
6955 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6956 {
6957 /* Thumb1 mul instruction can't operate on const. We must Load it
6958 into a register first. */
6959 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
6960 return COSTS_N_INSNS (1) + const_size;
6961 }
6962 return COSTS_N_INSNS (1);
6963
6964 case SET:
6965 return (COSTS_N_INSNS (1)
6966 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6967 + GET_CODE (SET_DEST (x)) == MEM));
6968
6969 case CONST_INT:
6970 if (outer == SET)
6971 {
6972 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6973 return 0;
6974 if (thumb_shiftable_const (INTVAL (x)))
6975 return COSTS_N_INSNS (2);
6976 return COSTS_N_INSNS (3);
6977 }
6978 else if ((outer == PLUS || outer == COMPARE)
6979 && INTVAL (x) < 256 && INTVAL (x) > -256)
6980 return 0;
6981 else if ((outer == IOR || outer == XOR || outer == AND)
6982 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6983 return COSTS_N_INSNS (1);
6984 else if (outer == AND)
6985 {
6986 int i;
6987 /* This duplicates the tests in the andsi3 expander. */
6988 for (i = 9; i <= 31; i++)
6989 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6990 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6991 return COSTS_N_INSNS (2);
6992 }
6993 else if (outer == ASHIFT || outer == ASHIFTRT
6994 || outer == LSHIFTRT)
6995 return 0;
6996 return COSTS_N_INSNS (2);
6997
6998 case CONST:
6999 case CONST_DOUBLE:
7000 case LABEL_REF:
7001 case SYMBOL_REF:
7002 return COSTS_N_INSNS (3);
7003
7004 case UDIV:
7005 case UMOD:
7006 case DIV:
7007 case MOD:
7008 return 100;
7009
7010 case TRUNCATE:
7011 return 99;
7012
7013 case AND:
7014 case XOR:
7015 case IOR:
7016 /* XXX guess. */
7017 return 8;
7018
7019 case MEM:
7020 /* XXX another guess. */
7021 /* Memory costs quite a lot for the first word, but subsequent words
7022 load at the equivalent of a single insn each. */
7023 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7024 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7025 ? 4 : 0));
7026
7027 case IF_THEN_ELSE:
7028 /* XXX a guess. */
7029 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7030 return 14;
7031 return 2;
7032
7033 case ZERO_EXTEND:
7034 /* XXX still guessing. */
7035 switch (GET_MODE (XEXP (x, 0)))
7036 {
7037 case QImode:
7038 return (1 + (mode == DImode ? 4 : 0)
7039 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7040
7041 case HImode:
7042 return (4 + (mode == DImode ? 4 : 0)
7043 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7044
7045 case SImode:
7046 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7047
7048 default:
7049 return 99;
7050 }
7051
7052 default:
7053 return 99;
7054 }
7055 }
7056
7057 /* RTX costs when optimizing for size. */
7058 static bool
7059 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7060 int *total)
7061 {
7062 enum machine_mode mode = GET_MODE (x);
7063 if (TARGET_THUMB1)
7064 {
7065 *total = thumb1_size_rtx_costs (x, code, outer_code);
7066 return true;
7067 }
7068
7069 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7070 switch (code)
7071 {
7072 case MEM:
7073 /* A memory access costs 1 insn if the mode is small, or the address is
7074 a single register, otherwise it costs one insn per word. */
7075 if (REG_P (XEXP (x, 0)))
7076 *total = COSTS_N_INSNS (1);
7077 else
7078 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7079 return true;
7080
7081 case DIV:
7082 case MOD:
7083 case UDIV:
7084 case UMOD:
7085 /* Needs a libcall, so it costs about this. */
7086 *total = COSTS_N_INSNS (2);
7087 return false;
7088
7089 case ROTATE:
7090 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7091 {
7092 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7093 return true;
7094 }
7095 /* Fall through */
7096 case ROTATERT:
7097 case ASHIFT:
7098 case LSHIFTRT:
7099 case ASHIFTRT:
7100 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7101 {
7102 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7103 return true;
7104 }
7105 else if (mode == SImode)
7106 {
7107 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7108 /* Slightly disparage register shifts, but not by much. */
7109 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7110 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7111 return true;
7112 }
7113
7114 /* Needs a libcall. */
7115 *total = COSTS_N_INSNS (2);
7116 return false;
7117
7118 case MINUS:
7119 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7120 && (mode == SFmode || !TARGET_VFP_SINGLE))
7121 {
7122 *total = COSTS_N_INSNS (1);
7123 return false;
7124 }
7125
7126 if (mode == SImode)
7127 {
7128 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7129 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7130
7131 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7132 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7133 || subcode1 == ROTATE || subcode1 == ROTATERT
7134 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7135 || subcode1 == ASHIFTRT)
7136 {
7137 /* It's just the cost of the two operands. */
7138 *total = 0;
7139 return false;
7140 }
7141
7142 *total = COSTS_N_INSNS (1);
7143 return false;
7144 }
7145
7146 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7147 return false;
7148
7149 case PLUS:
7150 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7151 && (mode == SFmode || !TARGET_VFP_SINGLE))
7152 {
7153 *total = COSTS_N_INSNS (1);
7154 return false;
7155 }
7156
7157 /* A shift as a part of ADD costs nothing. */
7158 if (GET_CODE (XEXP (x, 0)) == MULT
7159 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7160 {
7161 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7162 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7163 *total += rtx_cost (XEXP (x, 1), code, false);
7164 return true;
7165 }
7166
7167 /* Fall through */
7168 case AND: case XOR: case IOR:
7169 if (mode == SImode)
7170 {
7171 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7172
7173 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7174 || subcode == LSHIFTRT || subcode == ASHIFTRT
7175 || (code == AND && subcode == NOT))
7176 {
7177 /* It's just the cost of the two operands. */
7178 *total = 0;
7179 return false;
7180 }
7181 }
7182
7183 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7184 return false;
7185
7186 case MULT:
7187 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7188 return false;
7189
7190 case NEG:
7191 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7192 && (mode == SFmode || !TARGET_VFP_SINGLE))
7193 {
7194 *total = COSTS_N_INSNS (1);
7195 return false;
7196 }
7197
7198 /* Fall through */
7199 case NOT:
7200 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7201
7202 return false;
7203
7204 case IF_THEN_ELSE:
7205 *total = 0;
7206 return false;
7207
7208 case COMPARE:
7209 if (cc_register (XEXP (x, 0), VOIDmode))
7210 * total = 0;
7211 else
7212 *total = COSTS_N_INSNS (1);
7213 return false;
7214
7215 case ABS:
7216 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7217 && (mode == SFmode || !TARGET_VFP_SINGLE))
7218 *total = COSTS_N_INSNS (1);
7219 else
7220 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7221 return false;
7222
7223 case SIGN_EXTEND:
7224 *total = 0;
7225 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
7226 {
7227 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
7228 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
7229 }
7230 if (mode == DImode)
7231 *total += COSTS_N_INSNS (1);
7232 return false;
7233
7234 case ZERO_EXTEND:
7235 *total = 0;
7236 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
7237 {
7238 switch (GET_MODE (XEXP (x, 0)))
7239 {
7240 case QImode:
7241 *total += COSTS_N_INSNS (1);
7242 break;
7243
7244 case HImode:
7245 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
7246
7247 case SImode:
7248 break;
7249
7250 default:
7251 *total += COSTS_N_INSNS (2);
7252 }
7253 }
7254
7255 if (mode == DImode)
7256 *total += COSTS_N_INSNS (1);
7257
7258 return false;
7259
7260 case CONST_INT:
7261 if (const_ok_for_arm (INTVAL (x)))
7262 /* A multiplication by a constant requires another instruction
7263 to load the constant to a register. */
7264 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7265 ? 1 : 0);
7266 else if (const_ok_for_arm (~INTVAL (x)))
7267 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7268 else if (const_ok_for_arm (-INTVAL (x)))
7269 {
7270 if (outer_code == COMPARE || outer_code == PLUS
7271 || outer_code == MINUS)
7272 *total = 0;
7273 else
7274 *total = COSTS_N_INSNS (1);
7275 }
7276 else
7277 *total = COSTS_N_INSNS (2);
7278 return true;
7279
7280 case CONST:
7281 case LABEL_REF:
7282 case SYMBOL_REF:
7283 *total = COSTS_N_INSNS (2);
7284 return true;
7285
7286 case CONST_DOUBLE:
7287 *total = COSTS_N_INSNS (4);
7288 return true;
7289
7290 case HIGH:
7291 case LO_SUM:
7292 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7293 cost of these slightly. */
7294 *total = COSTS_N_INSNS (1) + 1;
7295 return true;
7296
7297 default:
7298 if (mode != VOIDmode)
7299 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7300 else
7301 *total = COSTS_N_INSNS (4); /* How knows? */
7302 return false;
7303 }
7304 }
7305
7306 /* RTX costs when optimizing for size. */
7307 static bool
7308 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7309 bool speed)
7310 {
7311 if (!speed)
7312 return arm_size_rtx_costs (x, (enum rtx_code) code,
7313 (enum rtx_code) outer_code, total);
7314 else
7315 return current_tune->rtx_costs (x, (enum rtx_code) code,
7316 (enum rtx_code) outer_code,
7317 total, speed);
7318 }
7319
7320 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7321 supported on any "slowmul" cores, so it can be ignored. */
7322
7323 static bool
7324 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7325 int *total, bool speed)
7326 {
7327 enum machine_mode mode = GET_MODE (x);
7328
7329 if (TARGET_THUMB)
7330 {
7331 *total = thumb1_rtx_costs (x, code, outer_code);
7332 return true;
7333 }
7334
7335 switch (code)
7336 {
7337 case MULT:
7338 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7339 || mode == DImode)
7340 {
7341 *total = COSTS_N_INSNS (20);
7342 return false;
7343 }
7344
7345 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7346 {
7347 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7348 & (unsigned HOST_WIDE_INT) 0xffffffff);
7349 int cost, const_ok = const_ok_for_arm (i);
7350 int j, booth_unit_size;
7351
7352 /* Tune as appropriate. */
7353 cost = const_ok ? 4 : 8;
7354 booth_unit_size = 2;
7355 for (j = 0; i && j < 32; j += booth_unit_size)
7356 {
7357 i >>= booth_unit_size;
7358 cost++;
7359 }
7360
7361 *total = COSTS_N_INSNS (cost);
7362 *total += rtx_cost (XEXP (x, 0), code, speed);
7363 return true;
7364 }
7365
7366 *total = COSTS_N_INSNS (20);
7367 return false;
7368
7369 default:
7370 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7371 }
7372 }
7373
7374
7375 /* RTX cost for cores with a fast multiply unit (M variants). */
7376
7377 static bool
7378 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7379 int *total, bool speed)
7380 {
7381 enum machine_mode mode = GET_MODE (x);
7382
7383 if (TARGET_THUMB1)
7384 {
7385 *total = thumb1_rtx_costs (x, code, outer_code);
7386 return true;
7387 }
7388
7389 /* ??? should thumb2 use different costs? */
7390 switch (code)
7391 {
7392 case MULT:
7393 /* There is no point basing this on the tuning, since it is always the
7394 fast variant if it exists at all. */
7395 if (mode == DImode
7396 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7397 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7398 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7399 {
7400 *total = COSTS_N_INSNS(2);
7401 return false;
7402 }
7403
7404
7405 if (mode == DImode)
7406 {
7407 *total = COSTS_N_INSNS (5);
7408 return false;
7409 }
7410
7411 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7412 {
7413 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7414 & (unsigned HOST_WIDE_INT) 0xffffffff);
7415 int cost, const_ok = const_ok_for_arm (i);
7416 int j, booth_unit_size;
7417
7418 /* Tune as appropriate. */
7419 cost = const_ok ? 4 : 8;
7420 booth_unit_size = 8;
7421 for (j = 0; i && j < 32; j += booth_unit_size)
7422 {
7423 i >>= booth_unit_size;
7424 cost++;
7425 }
7426
7427 *total = COSTS_N_INSNS(cost);
7428 return false;
7429 }
7430
7431 if (mode == SImode)
7432 {
7433 *total = COSTS_N_INSNS (4);
7434 return false;
7435 }
7436
7437 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7438 {
7439 if (TARGET_HARD_FLOAT
7440 && (mode == SFmode
7441 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7442 {
7443 *total = COSTS_N_INSNS (1);
7444 return false;
7445 }
7446 }
7447
7448 /* Requires a lib call */
7449 *total = COSTS_N_INSNS (20);
7450 return false;
7451
7452 default:
7453 return arm_rtx_costs_1 (x, outer_code, total, speed);
7454 }
7455 }
7456
7457
7458 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7459 so it can be ignored. */
7460
7461 static bool
7462 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7463 int *total, bool speed)
7464 {
7465 enum machine_mode mode = GET_MODE (x);
7466
7467 if (TARGET_THUMB)
7468 {
7469 *total = thumb1_rtx_costs (x, code, outer_code);
7470 return true;
7471 }
7472
7473 switch (code)
7474 {
7475 case COMPARE:
7476 if (GET_CODE (XEXP (x, 0)) != MULT)
7477 return arm_rtx_costs_1 (x, outer_code, total, speed);
7478
7479 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7480 will stall until the multiplication is complete. */
7481 *total = COSTS_N_INSNS (3);
7482 return false;
7483
7484 case MULT:
7485 /* There is no point basing this on the tuning, since it is always the
7486 fast variant if it exists at all. */
7487 if (mode == DImode
7488 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7489 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7490 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7491 {
7492 *total = COSTS_N_INSNS (2);
7493 return false;
7494 }
7495
7496
7497 if (mode == DImode)
7498 {
7499 *total = COSTS_N_INSNS (5);
7500 return false;
7501 }
7502
7503 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7504 {
7505 /* If operand 1 is a constant we can more accurately
7506 calculate the cost of the multiply. The multiplier can
7507 retire 15 bits on the first cycle and a further 12 on the
7508 second. We do, of course, have to load the constant into
7509 a register first. */
7510 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7511 /* There's a general overhead of one cycle. */
7512 int cost = 1;
7513 unsigned HOST_WIDE_INT masked_const;
7514
7515 if (i & 0x80000000)
7516 i = ~i;
7517
7518 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7519
7520 masked_const = i & 0xffff8000;
7521 if (masked_const != 0)
7522 {
7523 cost++;
7524 masked_const = i & 0xf8000000;
7525 if (masked_const != 0)
7526 cost++;
7527 }
7528 *total = COSTS_N_INSNS (cost);
7529 return false;
7530 }
7531
7532 if (mode == SImode)
7533 {
7534 *total = COSTS_N_INSNS (3);
7535 return false;
7536 }
7537
7538 /* Requires a lib call */
7539 *total = COSTS_N_INSNS (20);
7540 return false;
7541
7542 default:
7543 return arm_rtx_costs_1 (x, outer_code, total, speed);
7544 }
7545 }
7546
7547
7548 /* RTX costs for 9e (and later) cores. */
7549
7550 static bool
7551 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7552 int *total, bool speed)
7553 {
7554 enum machine_mode mode = GET_MODE (x);
7555
7556 if (TARGET_THUMB1)
7557 {
7558 switch (code)
7559 {
7560 case MULT:
7561 *total = COSTS_N_INSNS (3);
7562 return true;
7563
7564 default:
7565 *total = thumb1_rtx_costs (x, code, outer_code);
7566 return true;
7567 }
7568 }
7569
7570 switch (code)
7571 {
7572 case MULT:
7573 /* There is no point basing this on the tuning, since it is always the
7574 fast variant if it exists at all. */
7575 if (mode == DImode
7576 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7577 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7578 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7579 {
7580 *total = COSTS_N_INSNS (2);
7581 return false;
7582 }
7583
7584
7585 if (mode == DImode)
7586 {
7587 *total = COSTS_N_INSNS (5);
7588 return false;
7589 }
7590
7591 if (mode == SImode)
7592 {
7593 *total = COSTS_N_INSNS (2);
7594 return false;
7595 }
7596
7597 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7598 {
7599 if (TARGET_HARD_FLOAT
7600 && (mode == SFmode
7601 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7602 {
7603 *total = COSTS_N_INSNS (1);
7604 return false;
7605 }
7606 }
7607
7608 *total = COSTS_N_INSNS (20);
7609 return false;
7610
7611 default:
7612 return arm_rtx_costs_1 (x, outer_code, total, speed);
7613 }
7614 }
7615 /* All address computations that can be done are free, but rtx cost returns
7616 the same for practically all of them. So we weight the different types
7617 of address here in the order (most pref first):
7618 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7619 static inline int
7620 arm_arm_address_cost (rtx x)
7621 {
7622 enum rtx_code c = GET_CODE (x);
7623
7624 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7625 return 0;
7626 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7627 return 10;
7628
7629 if (c == PLUS)
7630 {
7631 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7632 return 2;
7633
7634 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7635 return 3;
7636
7637 return 4;
7638 }
7639
7640 return 6;
7641 }
7642
7643 static inline int
7644 arm_thumb_address_cost (rtx x)
7645 {
7646 enum rtx_code c = GET_CODE (x);
7647
7648 if (c == REG)
7649 return 1;
7650 if (c == PLUS
7651 && GET_CODE (XEXP (x, 0)) == REG
7652 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7653 return 1;
7654
7655 return 2;
7656 }
7657
7658 static int
7659 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7660 {
7661 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7662 }
7663
7664 static int
7665 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
7666 {
7667 rtx i_pat, d_pat;
7668
7669 /* Some true dependencies can have a higher cost depending
7670 on precisely how certain input operands are used. */
7671 if (arm_tune_xscale
7672 && REG_NOTE_KIND (link) == 0
7673 && recog_memoized (insn) >= 0
7674 && recog_memoized (dep) >= 0)
7675 {
7676 int shift_opnum = get_attr_shift (insn);
7677 enum attr_type attr_type = get_attr_type (dep);
7678
7679 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7680 operand for INSN. If we have a shifted input operand and the
7681 instruction we depend on is another ALU instruction, then we may
7682 have to account for an additional stall. */
7683 if (shift_opnum != 0
7684 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7685 {
7686 rtx shifted_operand;
7687 int opno;
7688
7689 /* Get the shifted operand. */
7690 extract_insn (insn);
7691 shifted_operand = recog_data.operand[shift_opnum];
7692
7693 /* Iterate over all the operands in DEP. If we write an operand
7694 that overlaps with SHIFTED_OPERAND, then we have increase the
7695 cost of this dependency. */
7696 extract_insn (dep);
7697 preprocess_constraints ();
7698 for (opno = 0; opno < recog_data.n_operands; opno++)
7699 {
7700 /* We can ignore strict inputs. */
7701 if (recog_data.operand_type[opno] == OP_IN)
7702 continue;
7703
7704 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7705 shifted_operand))
7706 return 2;
7707 }
7708 }
7709 }
7710
7711 /* XXX This is not strictly true for the FPA. */
7712 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7713 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7714 return 0;
7715
7716 /* Call insns don't incur a stall, even if they follow a load. */
7717 if (REG_NOTE_KIND (link) == 0
7718 && GET_CODE (insn) == CALL_INSN)
7719 return 1;
7720
7721 if ((i_pat = single_set (insn)) != NULL
7722 && GET_CODE (SET_SRC (i_pat)) == MEM
7723 && (d_pat = single_set (dep)) != NULL
7724 && GET_CODE (SET_DEST (d_pat)) == MEM)
7725 {
7726 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
7727 /* This is a load after a store, there is no conflict if the load reads
7728 from a cached area. Assume that loads from the stack, and from the
7729 constant pool are cached, and that others will miss. This is a
7730 hack. */
7731
7732 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
7733 || reg_mentioned_p (stack_pointer_rtx, src_mem)
7734 || reg_mentioned_p (frame_pointer_rtx, src_mem)
7735 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
7736 return 1;
7737 }
7738
7739 return cost;
7740 }
7741
7742 static int fp_consts_inited = 0;
7743
7744 /* Only zero is valid for VFP. Other values are also valid for FPA. */
7745 static const char * const strings_fp[8] =
7746 {
7747 "0", "1", "2", "3",
7748 "4", "5", "0.5", "10"
7749 };
7750
7751 static REAL_VALUE_TYPE values_fp[8];
7752
7753 static void
7754 init_fp_table (void)
7755 {
7756 int i;
7757 REAL_VALUE_TYPE r;
7758
7759 if (TARGET_VFP)
7760 fp_consts_inited = 1;
7761 else
7762 fp_consts_inited = 8;
7763
7764 for (i = 0; i < fp_consts_inited; i++)
7765 {
7766 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
7767 values_fp[i] = r;
7768 }
7769 }
7770
7771 /* Return TRUE if rtx X is a valid immediate FP constant. */
7772 int
7773 arm_const_double_rtx (rtx x)
7774 {
7775 REAL_VALUE_TYPE r;
7776 int i;
7777
7778 if (!fp_consts_inited)
7779 init_fp_table ();
7780
7781 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7782 if (REAL_VALUE_MINUS_ZERO (r))
7783 return 0;
7784
7785 for (i = 0; i < fp_consts_inited; i++)
7786 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7787 return 1;
7788
7789 return 0;
7790 }
7791
7792 /* Return TRUE if rtx X is a valid immediate FPA constant. */
7793 int
7794 neg_const_double_rtx_ok_for_fpa (rtx x)
7795 {
7796 REAL_VALUE_TYPE r;
7797 int i;
7798
7799 if (!fp_consts_inited)
7800 init_fp_table ();
7801
7802 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7803 r = REAL_VALUE_NEGATE (r);
7804 if (REAL_VALUE_MINUS_ZERO (r))
7805 return 0;
7806
7807 for (i = 0; i < 8; i++)
7808 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7809 return 1;
7810
7811 return 0;
7812 }
7813
7814
7815 /* VFPv3 has a fairly wide range of representable immediates, formed from
7816 "quarter-precision" floating-point values. These can be evaluated using this
7817 formula (with ^ for exponentiation):
7818
7819 -1^s * n * 2^-r
7820
7821 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
7822 16 <= n <= 31 and 0 <= r <= 7.
7823
7824 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
7825
7826 - A (most-significant) is the sign bit.
7827 - BCD are the exponent (encoded as r XOR 3).
7828 - EFGH are the mantissa (encoded as n - 16).
7829 */
7830
7831 /* Return an integer index for a VFPv3 immediate operand X suitable for the
7832 fconst[sd] instruction, or -1 if X isn't suitable. */
7833 static int
7834 vfp3_const_double_index (rtx x)
7835 {
7836 REAL_VALUE_TYPE r, m;
7837 int sign, exponent;
7838 unsigned HOST_WIDE_INT mantissa, mant_hi;
7839 unsigned HOST_WIDE_INT mask;
7840 HOST_WIDE_INT m1, m2;
7841 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7842
7843 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
7844 return -1;
7845
7846 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7847
7848 /* We can't represent these things, so detect them first. */
7849 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
7850 return -1;
7851
7852 /* Extract sign, exponent and mantissa. */
7853 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
7854 r = REAL_VALUE_ABS (r);
7855 exponent = REAL_EXP (&r);
7856 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7857 highest (sign) bit, with a fixed binary point at bit point_pos.
7858 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
7859 bits for the mantissa, this may fail (low bits would be lost). */
7860 real_ldexp (&m, &r, point_pos - exponent);
7861 REAL_VALUE_TO_INT (&m1, &m2, m);
7862 mantissa = m1;
7863 mant_hi = m2;
7864
7865 /* If there are bits set in the low part of the mantissa, we can't
7866 represent this value. */
7867 if (mantissa != 0)
7868 return -1;
7869
7870 /* Now make it so that mantissa contains the most-significant bits, and move
7871 the point_pos to indicate that the least-significant bits have been
7872 discarded. */
7873 point_pos -= HOST_BITS_PER_WIDE_INT;
7874 mantissa = mant_hi;
7875
7876 /* We can permit four significant bits of mantissa only, plus a high bit
7877 which is always 1. */
7878 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7879 if ((mantissa & mask) != 0)
7880 return -1;
7881
7882 /* Now we know the mantissa is in range, chop off the unneeded bits. */
7883 mantissa >>= point_pos - 5;
7884
7885 /* The mantissa may be zero. Disallow that case. (It's possible to load the
7886 floating-point immediate zero with Neon using an integer-zero load, but
7887 that case is handled elsewhere.) */
7888 if (mantissa == 0)
7889 return -1;
7890
7891 gcc_assert (mantissa >= 16 && mantissa <= 31);
7892
7893 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
7894 normalized significands are in the range [1, 2). (Our mantissa is shifted
7895 left 4 places at this point relative to normalized IEEE754 values). GCC
7896 internally uses [0.5, 1) (see real.c), so the exponent returned from
7897 REAL_EXP must be altered. */
7898 exponent = 5 - exponent;
7899
7900 if (exponent < 0 || exponent > 7)
7901 return -1;
7902
7903 /* Sign, mantissa and exponent are now in the correct form to plug into the
7904 formula described in the comment above. */
7905 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
7906 }
7907
7908 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
7909 int
7910 vfp3_const_double_rtx (rtx x)
7911 {
7912 if (!TARGET_VFP3)
7913 return 0;
7914
7915 return vfp3_const_double_index (x) != -1;
7916 }
7917
7918 /* Recognize immediates which can be used in various Neon instructions. Legal
7919 immediates are described by the following table (for VMVN variants, the
7920 bitwise inverse of the constant shown is recognized. In either case, VMOV
7921 is output and the correct instruction to use for a given constant is chosen
7922 by the assembler). The constant shown is replicated across all elements of
7923 the destination vector.
7924
7925 insn elems variant constant (binary)
7926 ---- ----- ------- -----------------
7927 vmov i32 0 00000000 00000000 00000000 abcdefgh
7928 vmov i32 1 00000000 00000000 abcdefgh 00000000
7929 vmov i32 2 00000000 abcdefgh 00000000 00000000
7930 vmov i32 3 abcdefgh 00000000 00000000 00000000
7931 vmov i16 4 00000000 abcdefgh
7932 vmov i16 5 abcdefgh 00000000
7933 vmvn i32 6 00000000 00000000 00000000 abcdefgh
7934 vmvn i32 7 00000000 00000000 abcdefgh 00000000
7935 vmvn i32 8 00000000 abcdefgh 00000000 00000000
7936 vmvn i32 9 abcdefgh 00000000 00000000 00000000
7937 vmvn i16 10 00000000 abcdefgh
7938 vmvn i16 11 abcdefgh 00000000
7939 vmov i32 12 00000000 00000000 abcdefgh 11111111
7940 vmvn i32 13 00000000 00000000 abcdefgh 11111111
7941 vmov i32 14 00000000 abcdefgh 11111111 11111111
7942 vmvn i32 15 00000000 abcdefgh 11111111 11111111
7943 vmov i8 16 abcdefgh
7944 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
7945 eeeeeeee ffffffff gggggggg hhhhhhhh
7946 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
7947
7948 For case 18, B = !b. Representable values are exactly those accepted by
7949 vfp3_const_double_index, but are output as floating-point numbers rather
7950 than indices.
7951
7952 Variants 0-5 (inclusive) may also be used as immediates for the second
7953 operand of VORR/VBIC instructions.
7954
7955 The INVERSE argument causes the bitwise inverse of the given operand to be
7956 recognized instead (used for recognizing legal immediates for the VAND/VORN
7957 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
7958 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
7959 output, rather than the real insns vbic/vorr).
7960
7961 INVERSE makes no difference to the recognition of float vectors.
7962
7963 The return value is the variant of immediate as shown in the above table, or
7964 -1 if the given value doesn't match any of the listed patterns.
7965 */
7966 static int
7967 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
7968 rtx *modconst, int *elementwidth)
7969 {
7970 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
7971 matches = 1; \
7972 for (i = 0; i < idx; i += (STRIDE)) \
7973 if (!(TEST)) \
7974 matches = 0; \
7975 if (matches) \
7976 { \
7977 immtype = (CLASS); \
7978 elsize = (ELSIZE); \
7979 break; \
7980 }
7981
7982 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7983 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7984 unsigned char bytes[16];
7985 int immtype = -1, matches;
7986 unsigned int invmask = inverse ? 0xff : 0;
7987
7988 /* Vectors of float constants. */
7989 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7990 {
7991 rtx el0 = CONST_VECTOR_ELT (op, 0);
7992 REAL_VALUE_TYPE r0;
7993
7994 if (!vfp3_const_double_rtx (el0))
7995 return -1;
7996
7997 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
7998
7999 for (i = 1; i < n_elts; i++)
8000 {
8001 rtx elt = CONST_VECTOR_ELT (op, i);
8002 REAL_VALUE_TYPE re;
8003
8004 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8005
8006 if (!REAL_VALUES_EQUAL (r0, re))
8007 return -1;
8008 }
8009
8010 if (modconst)
8011 *modconst = CONST_VECTOR_ELT (op, 0);
8012
8013 if (elementwidth)
8014 *elementwidth = 0;
8015
8016 return 18;
8017 }
8018
8019 /* Splat vector constant out into a byte vector. */
8020 for (i = 0; i < n_elts; i++)
8021 {
8022 rtx el = CONST_VECTOR_ELT (op, i);
8023 unsigned HOST_WIDE_INT elpart;
8024 unsigned int part, parts;
8025
8026 if (GET_CODE (el) == CONST_INT)
8027 {
8028 elpart = INTVAL (el);
8029 parts = 1;
8030 }
8031 else if (GET_CODE (el) == CONST_DOUBLE)
8032 {
8033 elpart = CONST_DOUBLE_LOW (el);
8034 parts = 2;
8035 }
8036 else
8037 gcc_unreachable ();
8038
8039 for (part = 0; part < parts; part++)
8040 {
8041 unsigned int byte;
8042 for (byte = 0; byte < innersize; byte++)
8043 {
8044 bytes[idx++] = (elpart & 0xff) ^ invmask;
8045 elpart >>= BITS_PER_UNIT;
8046 }
8047 if (GET_CODE (el) == CONST_DOUBLE)
8048 elpart = CONST_DOUBLE_HIGH (el);
8049 }
8050 }
8051
8052 /* Sanity check. */
8053 gcc_assert (idx == GET_MODE_SIZE (mode));
8054
8055 do
8056 {
8057 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8058 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8059
8060 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8061 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8062
8063 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8064 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8065
8066 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8067 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8068
8069 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8070
8071 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8072
8073 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8074 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8075
8076 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8077 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8078
8079 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8080 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8081
8082 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8083 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8084
8085 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8086
8087 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8088
8089 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8090 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8091
8092 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8093 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8094
8095 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8096 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8097
8098 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8099 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8100
8101 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8102
8103 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8104 && bytes[i] == bytes[(i + 8) % idx]);
8105 }
8106 while (0);
8107
8108 if (immtype == -1)
8109 return -1;
8110
8111 if (elementwidth)
8112 *elementwidth = elsize;
8113
8114 if (modconst)
8115 {
8116 unsigned HOST_WIDE_INT imm = 0;
8117
8118 /* Un-invert bytes of recognized vector, if necessary. */
8119 if (invmask != 0)
8120 for (i = 0; i < idx; i++)
8121 bytes[i] ^= invmask;
8122
8123 if (immtype == 17)
8124 {
8125 /* FIXME: Broken on 32-bit H_W_I hosts. */
8126 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8127
8128 for (i = 0; i < 8; i++)
8129 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8130 << (i * BITS_PER_UNIT);
8131
8132 *modconst = GEN_INT (imm);
8133 }
8134 else
8135 {
8136 unsigned HOST_WIDE_INT imm = 0;
8137
8138 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8139 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8140
8141 *modconst = GEN_INT (imm);
8142 }
8143 }
8144
8145 return immtype;
8146 #undef CHECK
8147 }
8148
8149 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8150 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8151 float elements), and a modified constant (whatever should be output for a
8152 VMOV) in *MODCONST. */
8153
8154 int
8155 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8156 rtx *modconst, int *elementwidth)
8157 {
8158 rtx tmpconst;
8159 int tmpwidth;
8160 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8161
8162 if (retval == -1)
8163 return 0;
8164
8165 if (modconst)
8166 *modconst = tmpconst;
8167
8168 if (elementwidth)
8169 *elementwidth = tmpwidth;
8170
8171 return 1;
8172 }
8173
8174 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8175 the immediate is valid, write a constant suitable for using as an operand
8176 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8177 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8178
8179 int
8180 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8181 rtx *modconst, int *elementwidth)
8182 {
8183 rtx tmpconst;
8184 int tmpwidth;
8185 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8186
8187 if (retval < 0 || retval > 5)
8188 return 0;
8189
8190 if (modconst)
8191 *modconst = tmpconst;
8192
8193 if (elementwidth)
8194 *elementwidth = tmpwidth;
8195
8196 return 1;
8197 }
8198
8199 /* Return a string suitable for output of Neon immediate logic operation
8200 MNEM. */
8201
8202 char *
8203 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8204 int inverse, int quad)
8205 {
8206 int width, is_valid;
8207 static char templ[40];
8208
8209 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8210
8211 gcc_assert (is_valid != 0);
8212
8213 if (quad)
8214 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8215 else
8216 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8217
8218 return templ;
8219 }
8220
8221 /* Output a sequence of pairwise operations to implement a reduction.
8222 NOTE: We do "too much work" here, because pairwise operations work on two
8223 registers-worth of operands in one go. Unfortunately we can't exploit those
8224 extra calculations to do the full operation in fewer steps, I don't think.
8225 Although all vector elements of the result but the first are ignored, we
8226 actually calculate the same result in each of the elements. An alternative
8227 such as initially loading a vector with zero to use as each of the second
8228 operands would use up an additional register and take an extra instruction,
8229 for no particular gain. */
8230
8231 void
8232 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8233 rtx (*reduc) (rtx, rtx, rtx))
8234 {
8235 enum machine_mode inner = GET_MODE_INNER (mode);
8236 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8237 rtx tmpsum = op1;
8238
8239 for (i = parts / 2; i >= 1; i /= 2)
8240 {
8241 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8242 emit_insn (reduc (dest, tmpsum, tmpsum));
8243 tmpsum = dest;
8244 }
8245 }
8246
8247 /* If VALS is a vector constant that can be loaded into a register
8248 using VDUP, generate instructions to do so and return an RTX to
8249 assign to the register. Otherwise return NULL_RTX. */
8250
8251 static rtx
8252 neon_vdup_constant (rtx vals)
8253 {
8254 enum machine_mode mode = GET_MODE (vals);
8255 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8256 int n_elts = GET_MODE_NUNITS (mode);
8257 bool all_same = true;
8258 rtx x;
8259 int i;
8260
8261 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8262 return NULL_RTX;
8263
8264 for (i = 0; i < n_elts; ++i)
8265 {
8266 x = XVECEXP (vals, 0, i);
8267 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8268 all_same = false;
8269 }
8270
8271 if (!all_same)
8272 /* The elements are not all the same. We could handle repeating
8273 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8274 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8275 vdup.i16). */
8276 return NULL_RTX;
8277
8278 /* We can load this constant by using VDUP and a constant in a
8279 single ARM register. This will be cheaper than a vector
8280 load. */
8281
8282 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8283 return gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
8284 UNSPEC_VDUP_N);
8285 }
8286
8287 /* Generate code to load VALS, which is a PARALLEL containing only
8288 constants (for vec_init) or CONST_VECTOR, efficiently into a
8289 register. Returns an RTX to copy into the register, or NULL_RTX
8290 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8291
8292 rtx
8293 neon_make_constant (rtx vals)
8294 {
8295 enum machine_mode mode = GET_MODE (vals);
8296 rtx target;
8297 rtx const_vec = NULL_RTX;
8298 int n_elts = GET_MODE_NUNITS (mode);
8299 int n_const = 0;
8300 int i;
8301
8302 if (GET_CODE (vals) == CONST_VECTOR)
8303 const_vec = vals;
8304 else if (GET_CODE (vals) == PARALLEL)
8305 {
8306 /* A CONST_VECTOR must contain only CONST_INTs and
8307 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8308 Only store valid constants in a CONST_VECTOR. */
8309 for (i = 0; i < n_elts; ++i)
8310 {
8311 rtx x = XVECEXP (vals, 0, i);
8312 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8313 n_const++;
8314 }
8315 if (n_const == n_elts)
8316 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8317 }
8318 else
8319 gcc_unreachable ();
8320
8321 if (const_vec != NULL
8322 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8323 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8324 return const_vec;
8325 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8326 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8327 pipeline cycle; creating the constant takes one or two ARM
8328 pipeline cycles. */
8329 return target;
8330 else if (const_vec != NULL_RTX)
8331 /* Load from constant pool. On Cortex-A8 this takes two cycles
8332 (for either double or quad vectors). We can not take advantage
8333 of single-cycle VLD1 because we need a PC-relative addressing
8334 mode. */
8335 return const_vec;
8336 else
8337 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8338 We can not construct an initializer. */
8339 return NULL_RTX;
8340 }
8341
8342 /* Initialize vector TARGET to VALS. */
8343
8344 void
8345 neon_expand_vector_init (rtx target, rtx vals)
8346 {
8347 enum machine_mode mode = GET_MODE (target);
8348 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8349 int n_elts = GET_MODE_NUNITS (mode);
8350 int n_var = 0, one_var = -1;
8351 bool all_same = true;
8352 rtx x, mem;
8353 int i;
8354
8355 for (i = 0; i < n_elts; ++i)
8356 {
8357 x = XVECEXP (vals, 0, i);
8358 if (!CONSTANT_P (x))
8359 ++n_var, one_var = i;
8360
8361 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8362 all_same = false;
8363 }
8364
8365 if (n_var == 0)
8366 {
8367 rtx constant = neon_make_constant (vals);
8368 if (constant != NULL_RTX)
8369 {
8370 emit_move_insn (target, constant);
8371 return;
8372 }
8373 }
8374
8375 /* Splat a single non-constant element if we can. */
8376 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8377 {
8378 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8379 emit_insn (gen_rtx_SET (VOIDmode, target,
8380 gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
8381 UNSPEC_VDUP_N)));
8382 return;
8383 }
8384
8385 /* One field is non-constant. Load constant then overwrite varying
8386 field. This is more efficient than using the stack. */
8387 if (n_var == 1)
8388 {
8389 rtx copy = copy_rtx (vals);
8390 rtvec ops;
8391
8392 /* Load constant part of vector, substitute neighboring value for
8393 varying element. */
8394 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8395 neon_expand_vector_init (target, copy);
8396
8397 /* Insert variable. */
8398 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8399 ops = gen_rtvec (3, x, target, GEN_INT (one_var));
8400 emit_insn (gen_rtx_SET (VOIDmode, target,
8401 gen_rtx_UNSPEC (mode, ops, UNSPEC_VSET_LANE)));
8402 return;
8403 }
8404
8405 /* Construct the vector in memory one field at a time
8406 and load the whole vector. */
8407 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8408 for (i = 0; i < n_elts; i++)
8409 emit_move_insn (adjust_address_nv (mem, inner_mode,
8410 i * GET_MODE_SIZE (inner_mode)),
8411 XVECEXP (vals, 0, i));
8412 emit_move_insn (target, mem);
8413 }
8414
8415 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8416 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8417 reported source locations are bogus. */
8418
8419 static void
8420 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8421 const char *err)
8422 {
8423 HOST_WIDE_INT lane;
8424
8425 gcc_assert (GET_CODE (operand) == CONST_INT);
8426
8427 lane = INTVAL (operand);
8428
8429 if (lane < low || lane >= high)
8430 error (err);
8431 }
8432
8433 /* Bounds-check lanes. */
8434
8435 void
8436 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8437 {
8438 bounds_check (operand, low, high, "lane out of range");
8439 }
8440
8441 /* Bounds-check constants. */
8442
8443 void
8444 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8445 {
8446 bounds_check (operand, low, high, "constant out of range");
8447 }
8448
8449 HOST_WIDE_INT
8450 neon_element_bits (enum machine_mode mode)
8451 {
8452 if (mode == DImode)
8453 return GET_MODE_BITSIZE (mode);
8454 else
8455 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8456 }
8457
8458 \f
8459 /* Predicates for `match_operand' and `match_operator'. */
8460
8461 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8462 int
8463 cirrus_memory_offset (rtx op)
8464 {
8465 /* Reject eliminable registers. */
8466 if (! (reload_in_progress || reload_completed)
8467 && ( reg_mentioned_p (frame_pointer_rtx, op)
8468 || reg_mentioned_p (arg_pointer_rtx, op)
8469 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8470 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8471 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8472 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8473 return 0;
8474
8475 if (GET_CODE (op) == MEM)
8476 {
8477 rtx ind;
8478
8479 ind = XEXP (op, 0);
8480
8481 /* Match: (mem (reg)). */
8482 if (GET_CODE (ind) == REG)
8483 return 1;
8484
8485 /* Match:
8486 (mem (plus (reg)
8487 (const))). */
8488 if (GET_CODE (ind) == PLUS
8489 && GET_CODE (XEXP (ind, 0)) == REG
8490 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8491 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8492 return 1;
8493 }
8494
8495 return 0;
8496 }
8497
8498 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8499 WB is true if full writeback address modes are allowed and is false
8500 if limited writeback address modes (POST_INC and PRE_DEC) are
8501 allowed. */
8502
8503 int
8504 arm_coproc_mem_operand (rtx op, bool wb)
8505 {
8506 rtx ind;
8507
8508 /* Reject eliminable registers. */
8509 if (! (reload_in_progress || reload_completed)
8510 && ( reg_mentioned_p (frame_pointer_rtx, op)
8511 || reg_mentioned_p (arg_pointer_rtx, op)
8512 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8513 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8514 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8515 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8516 return FALSE;
8517
8518 /* Constants are converted into offsets from labels. */
8519 if (GET_CODE (op) != MEM)
8520 return FALSE;
8521
8522 ind = XEXP (op, 0);
8523
8524 if (reload_completed
8525 && (GET_CODE (ind) == LABEL_REF
8526 || (GET_CODE (ind) == CONST
8527 && GET_CODE (XEXP (ind, 0)) == PLUS
8528 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8529 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8530 return TRUE;
8531
8532 /* Match: (mem (reg)). */
8533 if (GET_CODE (ind) == REG)
8534 return arm_address_register_rtx_p (ind, 0);
8535
8536 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8537 acceptable in any case (subject to verification by
8538 arm_address_register_rtx_p). We need WB to be true to accept
8539 PRE_INC and POST_DEC. */
8540 if (GET_CODE (ind) == POST_INC
8541 || GET_CODE (ind) == PRE_DEC
8542 || (wb
8543 && (GET_CODE (ind) == PRE_INC
8544 || GET_CODE (ind) == POST_DEC)))
8545 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8546
8547 if (wb
8548 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8549 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8550 && GET_CODE (XEXP (ind, 1)) == PLUS
8551 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8552 ind = XEXP (ind, 1);
8553
8554 /* Match:
8555 (plus (reg)
8556 (const)). */
8557 if (GET_CODE (ind) == PLUS
8558 && GET_CODE (XEXP (ind, 0)) == REG
8559 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8560 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8561 && INTVAL (XEXP (ind, 1)) > -1024
8562 && INTVAL (XEXP (ind, 1)) < 1024
8563 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8564 return TRUE;
8565
8566 return FALSE;
8567 }
8568
8569 /* Return TRUE if OP is a memory operand which we can load or store a vector
8570 to/from. TYPE is one of the following values:
8571 0 - Vector load/stor (vldr)
8572 1 - Core registers (ldm)
8573 2 - Element/structure loads (vld1)
8574 */
8575 int
8576 neon_vector_mem_operand (rtx op, int type)
8577 {
8578 rtx ind;
8579
8580 /* Reject eliminable registers. */
8581 if (! (reload_in_progress || reload_completed)
8582 && ( reg_mentioned_p (frame_pointer_rtx, op)
8583 || reg_mentioned_p (arg_pointer_rtx, op)
8584 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8585 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8586 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8587 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8588 return FALSE;
8589
8590 /* Constants are converted into offsets from labels. */
8591 if (GET_CODE (op) != MEM)
8592 return FALSE;
8593
8594 ind = XEXP (op, 0);
8595
8596 if (reload_completed
8597 && (GET_CODE (ind) == LABEL_REF
8598 || (GET_CODE (ind) == CONST
8599 && GET_CODE (XEXP (ind, 0)) == PLUS
8600 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8601 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8602 return TRUE;
8603
8604 /* Match: (mem (reg)). */
8605 if (GET_CODE (ind) == REG)
8606 return arm_address_register_rtx_p (ind, 0);
8607
8608 /* Allow post-increment with Neon registers. */
8609 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
8610 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8611
8612 /* FIXME: vld1 allows register post-modify. */
8613
8614 /* Match:
8615 (plus (reg)
8616 (const)). */
8617 if (type == 0
8618 && GET_CODE (ind) == PLUS
8619 && GET_CODE (XEXP (ind, 0)) == REG
8620 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8621 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8622 && INTVAL (XEXP (ind, 1)) > -1024
8623 && INTVAL (XEXP (ind, 1)) < 1016
8624 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8625 return TRUE;
8626
8627 return FALSE;
8628 }
8629
8630 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8631 type. */
8632 int
8633 neon_struct_mem_operand (rtx op)
8634 {
8635 rtx ind;
8636
8637 /* Reject eliminable registers. */
8638 if (! (reload_in_progress || reload_completed)
8639 && ( reg_mentioned_p (frame_pointer_rtx, op)
8640 || reg_mentioned_p (arg_pointer_rtx, op)
8641 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8642 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8643 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8644 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8645 return FALSE;
8646
8647 /* Constants are converted into offsets from labels. */
8648 if (GET_CODE (op) != MEM)
8649 return FALSE;
8650
8651 ind = XEXP (op, 0);
8652
8653 if (reload_completed
8654 && (GET_CODE (ind) == LABEL_REF
8655 || (GET_CODE (ind) == CONST
8656 && GET_CODE (XEXP (ind, 0)) == PLUS
8657 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8658 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8659 return TRUE;
8660
8661 /* Match: (mem (reg)). */
8662 if (GET_CODE (ind) == REG)
8663 return arm_address_register_rtx_p (ind, 0);
8664
8665 return FALSE;
8666 }
8667
8668 /* Return true if X is a register that will be eliminated later on. */
8669 int
8670 arm_eliminable_register (rtx x)
8671 {
8672 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
8673 || REGNO (x) == ARG_POINTER_REGNUM
8674 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
8675 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
8676 }
8677
8678 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8679 coprocessor registers. Otherwise return NO_REGS. */
8680
8681 enum reg_class
8682 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
8683 {
8684 if (mode == HFmode)
8685 {
8686 if (!TARGET_NEON_FP16)
8687 return GENERAL_REGS;
8688 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
8689 return NO_REGS;
8690 return GENERAL_REGS;
8691 }
8692
8693 if (TARGET_NEON
8694 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
8695 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8696 && neon_vector_mem_operand (x, 0))
8697 return NO_REGS;
8698
8699 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
8700 return NO_REGS;
8701
8702 return GENERAL_REGS;
8703 }
8704
8705 /* Values which must be returned in the most-significant end of the return
8706 register. */
8707
8708 static bool
8709 arm_return_in_msb (const_tree valtype)
8710 {
8711 return (TARGET_AAPCS_BASED
8712 && BYTES_BIG_ENDIAN
8713 && (AGGREGATE_TYPE_P (valtype)
8714 || TREE_CODE (valtype) == COMPLEX_TYPE));
8715 }
8716
8717 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
8718 Use by the Cirrus Maverick code which has to workaround
8719 a hardware bug triggered by such instructions. */
8720 static bool
8721 arm_memory_load_p (rtx insn)
8722 {
8723 rtx body, lhs, rhs;;
8724
8725 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
8726 return false;
8727
8728 body = PATTERN (insn);
8729
8730 if (GET_CODE (body) != SET)
8731 return false;
8732
8733 lhs = XEXP (body, 0);
8734 rhs = XEXP (body, 1);
8735
8736 lhs = REG_OR_SUBREG_RTX (lhs);
8737
8738 /* If the destination is not a general purpose
8739 register we do not have to worry. */
8740 if (GET_CODE (lhs) != REG
8741 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
8742 return false;
8743
8744 /* As well as loads from memory we also have to react
8745 to loads of invalid constants which will be turned
8746 into loads from the minipool. */
8747 return (GET_CODE (rhs) == MEM
8748 || GET_CODE (rhs) == SYMBOL_REF
8749 || note_invalid_constants (insn, -1, false));
8750 }
8751
8752 /* Return TRUE if INSN is a Cirrus instruction. */
8753 static bool
8754 arm_cirrus_insn_p (rtx insn)
8755 {
8756 enum attr_cirrus attr;
8757
8758 /* get_attr cannot accept USE or CLOBBER. */
8759 if (!insn
8760 || GET_CODE (insn) != INSN
8761 || GET_CODE (PATTERN (insn)) == USE
8762 || GET_CODE (PATTERN (insn)) == CLOBBER)
8763 return 0;
8764
8765 attr = get_attr_cirrus (insn);
8766
8767 return attr != CIRRUS_NOT;
8768 }
8769
8770 /* Cirrus reorg for invalid instruction combinations. */
8771 static void
8772 cirrus_reorg (rtx first)
8773 {
8774 enum attr_cirrus attr;
8775 rtx body = PATTERN (first);
8776 rtx t;
8777 int nops;
8778
8779 /* Any branch must be followed by 2 non Cirrus instructions. */
8780 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
8781 {
8782 nops = 0;
8783 t = next_nonnote_insn (first);
8784
8785 if (arm_cirrus_insn_p (t))
8786 ++ nops;
8787
8788 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8789 ++ nops;
8790
8791 while (nops --)
8792 emit_insn_after (gen_nop (), first);
8793
8794 return;
8795 }
8796
8797 /* (float (blah)) is in parallel with a clobber. */
8798 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
8799 body = XVECEXP (body, 0, 0);
8800
8801 if (GET_CODE (body) == SET)
8802 {
8803 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
8804
8805 /* cfldrd, cfldr64, cfstrd, cfstr64 must
8806 be followed by a non Cirrus insn. */
8807 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
8808 {
8809 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
8810 emit_insn_after (gen_nop (), first);
8811
8812 return;
8813 }
8814 else if (arm_memory_load_p (first))
8815 {
8816 unsigned int arm_regno;
8817
8818 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
8819 ldr/cfmv64hr combination where the Rd field is the same
8820 in both instructions must be split with a non Cirrus
8821 insn. Example:
8822
8823 ldr r0, blah
8824 nop
8825 cfmvsr mvf0, r0. */
8826
8827 /* Get Arm register number for ldr insn. */
8828 if (GET_CODE (lhs) == REG)
8829 arm_regno = REGNO (lhs);
8830 else
8831 {
8832 gcc_assert (GET_CODE (rhs) == REG);
8833 arm_regno = REGNO (rhs);
8834 }
8835
8836 /* Next insn. */
8837 first = next_nonnote_insn (first);
8838
8839 if (! arm_cirrus_insn_p (first))
8840 return;
8841
8842 body = PATTERN (first);
8843
8844 /* (float (blah)) is in parallel with a clobber. */
8845 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
8846 body = XVECEXP (body, 0, 0);
8847
8848 if (GET_CODE (body) == FLOAT)
8849 body = XEXP (body, 0);
8850
8851 if (get_attr_cirrus (first) == CIRRUS_MOVE
8852 && GET_CODE (XEXP (body, 1)) == REG
8853 && arm_regno == REGNO (XEXP (body, 1)))
8854 emit_insn_after (gen_nop (), first);
8855
8856 return;
8857 }
8858 }
8859
8860 /* get_attr cannot accept USE or CLOBBER. */
8861 if (!first
8862 || GET_CODE (first) != INSN
8863 || GET_CODE (PATTERN (first)) == USE
8864 || GET_CODE (PATTERN (first)) == CLOBBER)
8865 return;
8866
8867 attr = get_attr_cirrus (first);
8868
8869 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
8870 must be followed by a non-coprocessor instruction. */
8871 if (attr == CIRRUS_COMPARE)
8872 {
8873 nops = 0;
8874
8875 t = next_nonnote_insn (first);
8876
8877 if (arm_cirrus_insn_p (t))
8878 ++ nops;
8879
8880 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
8881 ++ nops;
8882
8883 while (nops --)
8884 emit_insn_after (gen_nop (), first);
8885
8886 return;
8887 }
8888 }
8889
8890 /* Return TRUE if X references a SYMBOL_REF. */
8891 int
8892 symbol_mentioned_p (rtx x)
8893 {
8894 const char * fmt;
8895 int i;
8896
8897 if (GET_CODE (x) == SYMBOL_REF)
8898 return 1;
8899
8900 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
8901 are constant offsets, not symbols. */
8902 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8903 return 0;
8904
8905 fmt = GET_RTX_FORMAT (GET_CODE (x));
8906
8907 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8908 {
8909 if (fmt[i] == 'E')
8910 {
8911 int j;
8912
8913 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8914 if (symbol_mentioned_p (XVECEXP (x, i, j)))
8915 return 1;
8916 }
8917 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
8918 return 1;
8919 }
8920
8921 return 0;
8922 }
8923
8924 /* Return TRUE if X references a LABEL_REF. */
8925 int
8926 label_mentioned_p (rtx x)
8927 {
8928 const char * fmt;
8929 int i;
8930
8931 if (GET_CODE (x) == LABEL_REF)
8932 return 1;
8933
8934 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
8935 instruction, but they are constant offsets, not symbols. */
8936 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8937 return 0;
8938
8939 fmt = GET_RTX_FORMAT (GET_CODE (x));
8940 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8941 {
8942 if (fmt[i] == 'E')
8943 {
8944 int j;
8945
8946 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8947 if (label_mentioned_p (XVECEXP (x, i, j)))
8948 return 1;
8949 }
8950 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
8951 return 1;
8952 }
8953
8954 return 0;
8955 }
8956
8957 int
8958 tls_mentioned_p (rtx x)
8959 {
8960 switch (GET_CODE (x))
8961 {
8962 case CONST:
8963 return tls_mentioned_p (XEXP (x, 0));
8964
8965 case UNSPEC:
8966 if (XINT (x, 1) == UNSPEC_TLS)
8967 return 1;
8968
8969 default:
8970 return 0;
8971 }
8972 }
8973
8974 /* Must not copy any rtx that uses a pc-relative address. */
8975
8976 static int
8977 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
8978 {
8979 if (GET_CODE (*x) == UNSPEC
8980 && XINT (*x, 1) == UNSPEC_PIC_BASE)
8981 return 1;
8982 return 0;
8983 }
8984
8985 static bool
8986 arm_cannot_copy_insn_p (rtx insn)
8987 {
8988 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
8989 }
8990
8991 enum rtx_code
8992 minmax_code (rtx x)
8993 {
8994 enum rtx_code code = GET_CODE (x);
8995
8996 switch (code)
8997 {
8998 case SMAX:
8999 return GE;
9000 case SMIN:
9001 return LE;
9002 case UMIN:
9003 return LEU;
9004 case UMAX:
9005 return GEU;
9006 default:
9007 gcc_unreachable ();
9008 }
9009 }
9010
9011 /* Return 1 if memory locations are adjacent. */
9012 int
9013 adjacent_mem_locations (rtx a, rtx b)
9014 {
9015 /* We don't guarantee to preserve the order of these memory refs. */
9016 if (volatile_refs_p (a) || volatile_refs_p (b))
9017 return 0;
9018
9019 if ((GET_CODE (XEXP (a, 0)) == REG
9020 || (GET_CODE (XEXP (a, 0)) == PLUS
9021 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9022 && (GET_CODE (XEXP (b, 0)) == REG
9023 || (GET_CODE (XEXP (b, 0)) == PLUS
9024 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9025 {
9026 HOST_WIDE_INT val0 = 0, val1 = 0;
9027 rtx reg0, reg1;
9028 int val_diff;
9029
9030 if (GET_CODE (XEXP (a, 0)) == PLUS)
9031 {
9032 reg0 = XEXP (XEXP (a, 0), 0);
9033 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9034 }
9035 else
9036 reg0 = XEXP (a, 0);
9037
9038 if (GET_CODE (XEXP (b, 0)) == PLUS)
9039 {
9040 reg1 = XEXP (XEXP (b, 0), 0);
9041 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9042 }
9043 else
9044 reg1 = XEXP (b, 0);
9045
9046 /* Don't accept any offset that will require multiple
9047 instructions to handle, since this would cause the
9048 arith_adjacentmem pattern to output an overlong sequence. */
9049 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9050 return 0;
9051
9052 /* Don't allow an eliminable register: register elimination can make
9053 the offset too large. */
9054 if (arm_eliminable_register (reg0))
9055 return 0;
9056
9057 val_diff = val1 - val0;
9058
9059 if (arm_ld_sched)
9060 {
9061 /* If the target has load delay slots, then there's no benefit
9062 to using an ldm instruction unless the offset is zero and
9063 we are optimizing for size. */
9064 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9065 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9066 && (val_diff == 4 || val_diff == -4));
9067 }
9068
9069 return ((REGNO (reg0) == REGNO (reg1))
9070 && (val_diff == 4 || val_diff == -4));
9071 }
9072
9073 return 0;
9074 }
9075
9076 int
9077 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
9078 HOST_WIDE_INT *load_offset)
9079 {
9080 int unsorted_regs[4];
9081 HOST_WIDE_INT unsorted_offsets[4];
9082 int order[4];
9083 int base_reg = -1;
9084 int i;
9085
9086 /* Can only handle 2, 3, or 4 insns at present,
9087 though could be easily extended if required. */
9088 gcc_assert (nops >= 2 && nops <= 4);
9089
9090 memset (order, 0, 4 * sizeof (int));
9091
9092 /* Loop over the operands and check that the memory references are
9093 suitable (i.e. immediate offsets from the same base register). At
9094 the same time, extract the target register, and the memory
9095 offsets. */
9096 for (i = 0; i < nops; i++)
9097 {
9098 rtx reg;
9099 rtx offset;
9100
9101 /* Convert a subreg of a mem into the mem itself. */
9102 if (GET_CODE (operands[nops + i]) == SUBREG)
9103 operands[nops + i] = alter_subreg (operands + (nops + i));
9104
9105 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9106
9107 /* Don't reorder volatile memory references; it doesn't seem worth
9108 looking for the case where the order is ok anyway. */
9109 if (MEM_VOLATILE_P (operands[nops + i]))
9110 return 0;
9111
9112 offset = const0_rtx;
9113
9114 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9115 || (GET_CODE (reg) == SUBREG
9116 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9117 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9118 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9119 == REG)
9120 || (GET_CODE (reg) == SUBREG
9121 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9122 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9123 == CONST_INT)))
9124 {
9125 if (i == 0)
9126 {
9127 base_reg = REGNO (reg);
9128 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
9129 ? REGNO (operands[i])
9130 : REGNO (SUBREG_REG (operands[i])));
9131 order[0] = 0;
9132 }
9133 else
9134 {
9135 if (base_reg != (int) REGNO (reg))
9136 /* Not addressed from the same base register. */
9137 return 0;
9138
9139 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9140 ? REGNO (operands[i])
9141 : REGNO (SUBREG_REG (operands[i])));
9142 if (unsorted_regs[i] < unsorted_regs[order[0]])
9143 order[0] = i;
9144 }
9145
9146 /* If it isn't an integer register, or if it overwrites the
9147 base register but isn't the last insn in the list, then
9148 we can't do this. */
9149 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
9150 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9151 return 0;
9152
9153 unsorted_offsets[i] = INTVAL (offset);
9154 }
9155 else
9156 /* Not a suitable memory address. */
9157 return 0;
9158 }
9159
9160 /* All the useful information has now been extracted from the
9161 operands into unsorted_regs and unsorted_offsets; additionally,
9162 order[0] has been set to the lowest numbered register in the
9163 list. Sort the registers into order, and check that the memory
9164 offsets are ascending and adjacent. */
9165
9166 for (i = 1; i < nops; i++)
9167 {
9168 int j;
9169
9170 order[i] = order[i - 1];
9171 for (j = 0; j < nops; j++)
9172 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
9173 && (order[i] == order[i - 1]
9174 || unsorted_regs[j] < unsorted_regs[order[i]]))
9175 order[i] = j;
9176
9177 /* Have we found a suitable register? if not, one must be used more
9178 than once. */
9179 if (order[i] == order[i - 1])
9180 return 0;
9181
9182 /* Is the memory address adjacent and ascending? */
9183 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
9184 return 0;
9185 }
9186
9187 if (base)
9188 {
9189 *base = base_reg;
9190
9191 for (i = 0; i < nops; i++)
9192 regs[i] = unsorted_regs[order[i]];
9193
9194 *load_offset = unsorted_offsets[order[0]];
9195 }
9196
9197 if (unsorted_offsets[order[0]] == 0)
9198 return 1; /* ldmia */
9199
9200 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9201 return 2; /* ldmib */
9202
9203 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9204 return 3; /* ldmda */
9205
9206 if (unsorted_offsets[order[nops - 1]] == -4)
9207 return 4; /* ldmdb */
9208
9209 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9210 if the offset isn't small enough. The reason 2 ldrs are faster
9211 is because these ARMs are able to do more than one cache access
9212 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9213 whilst the ARM8 has a double bandwidth cache. This means that
9214 these cores can do both an instruction fetch and a data fetch in
9215 a single cycle, so the trick of calculating the address into a
9216 scratch register (one of the result regs) and then doing a load
9217 multiple actually becomes slower (and no smaller in code size).
9218 That is the transformation
9219
9220 ldr rd1, [rbase + offset]
9221 ldr rd2, [rbase + offset + 4]
9222
9223 to
9224
9225 add rd1, rbase, offset
9226 ldmia rd1, {rd1, rd2}
9227
9228 produces worse code -- '3 cycles + any stalls on rd2' instead of
9229 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9230 access per cycle, the first sequence could never complete in less
9231 than 6 cycles, whereas the ldm sequence would only take 5 and
9232 would make better use of sequential accesses if not hitting the
9233 cache.
9234
9235 We cheat here and test 'arm_ld_sched' which we currently know to
9236 only be true for the ARM8, ARM9 and StrongARM. If this ever
9237 changes, then the test below needs to be reworked. */
9238 if (nops == 2 && arm_ld_sched)
9239 return 0;
9240
9241 /* Can't do it without setting up the offset, only do this if it takes
9242 no more than one insn. */
9243 return (const_ok_for_arm (unsorted_offsets[order[0]])
9244 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
9245 }
9246
9247 const char *
9248 emit_ldm_seq (rtx *operands, int nops)
9249 {
9250 int regs[4];
9251 int base_reg;
9252 HOST_WIDE_INT offset;
9253 char buf[100];
9254 int i;
9255
9256 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9257 {
9258 case 1:
9259 strcpy (buf, "ldm%(ia%)\t");
9260 break;
9261
9262 case 2:
9263 strcpy (buf, "ldm%(ib%)\t");
9264 break;
9265
9266 case 3:
9267 strcpy (buf, "ldm%(da%)\t");
9268 break;
9269
9270 case 4:
9271 strcpy (buf, "ldm%(db%)\t");
9272 break;
9273
9274 case 5:
9275 if (offset >= 0)
9276 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9277 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9278 (long) offset);
9279 else
9280 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9281 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9282 (long) -offset);
9283 output_asm_insn (buf, operands);
9284 base_reg = regs[0];
9285 strcpy (buf, "ldm%(ia%)\t");
9286 break;
9287
9288 default:
9289 gcc_unreachable ();
9290 }
9291
9292 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9293 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9294
9295 for (i = 1; i < nops; i++)
9296 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9297 reg_names[regs[i]]);
9298
9299 strcat (buf, "}\t%@ phole ldm");
9300
9301 output_asm_insn (buf, operands);
9302 return "";
9303 }
9304
9305 int
9306 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
9307 HOST_WIDE_INT * load_offset)
9308 {
9309 int unsorted_regs[4];
9310 HOST_WIDE_INT unsorted_offsets[4];
9311 int order[4];
9312 int base_reg = -1;
9313 int i;
9314
9315 /* Can only handle 2, 3, or 4 insns at present, though could be easily
9316 extended if required. */
9317 gcc_assert (nops >= 2 && nops <= 4);
9318
9319 memset (order, 0, 4 * sizeof (int));
9320
9321 /* Loop over the operands and check that the memory references are
9322 suitable (i.e. immediate offsets from the same base register). At
9323 the same time, extract the target register, and the memory
9324 offsets. */
9325 for (i = 0; i < nops; i++)
9326 {
9327 rtx reg;
9328 rtx offset;
9329
9330 /* Convert a subreg of a mem into the mem itself. */
9331 if (GET_CODE (operands[nops + i]) == SUBREG)
9332 operands[nops + i] = alter_subreg (operands + (nops + i));
9333
9334 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9335
9336 /* Don't reorder volatile memory references; it doesn't seem worth
9337 looking for the case where the order is ok anyway. */
9338 if (MEM_VOLATILE_P (operands[nops + i]))
9339 return 0;
9340
9341 offset = const0_rtx;
9342
9343 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9344 || (GET_CODE (reg) == SUBREG
9345 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9346 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9347 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9348 == REG)
9349 || (GET_CODE (reg) == SUBREG
9350 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9351 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9352 == CONST_INT)))
9353 {
9354 if (i == 0)
9355 {
9356 base_reg = REGNO (reg);
9357 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
9358 ? REGNO (operands[i])
9359 : REGNO (SUBREG_REG (operands[i])));
9360 order[0] = 0;
9361 }
9362 else
9363 {
9364 if (base_reg != (int) REGNO (reg))
9365 /* Not addressed from the same base register. */
9366 return 0;
9367
9368 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9369 ? REGNO (operands[i])
9370 : REGNO (SUBREG_REG (operands[i])));
9371 if (unsorted_regs[i] < unsorted_regs[order[0]])
9372 order[0] = i;
9373 }
9374
9375 /* If it isn't an integer register, then we can't do this. */
9376 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
9377 return 0;
9378
9379 unsorted_offsets[i] = INTVAL (offset);
9380 }
9381 else
9382 /* Not a suitable memory address. */
9383 return 0;
9384 }
9385
9386 /* All the useful information has now been extracted from the
9387 operands into unsorted_regs and unsorted_offsets; additionally,
9388 order[0] has been set to the lowest numbered register in the
9389 list. Sort the registers into order, and check that the memory
9390 offsets are ascending and adjacent. */
9391
9392 for (i = 1; i < nops; i++)
9393 {
9394 int j;
9395
9396 order[i] = order[i - 1];
9397 for (j = 0; j < nops; j++)
9398 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
9399 && (order[i] == order[i - 1]
9400 || unsorted_regs[j] < unsorted_regs[order[i]]))
9401 order[i] = j;
9402
9403 /* Have we found a suitable register? if not, one must be used more
9404 than once. */
9405 if (order[i] == order[i - 1])
9406 return 0;
9407
9408 /* Is the memory address adjacent and ascending? */
9409 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
9410 return 0;
9411 }
9412
9413 if (base)
9414 {
9415 *base = base_reg;
9416
9417 for (i = 0; i < nops; i++)
9418 regs[i] = unsorted_regs[order[i]];
9419
9420 *load_offset = unsorted_offsets[order[0]];
9421 }
9422
9423 if (unsorted_offsets[order[0]] == 0)
9424 return 1; /* stmia */
9425
9426 if (unsorted_offsets[order[0]] == 4)
9427 return 2; /* stmib */
9428
9429 if (unsorted_offsets[order[nops - 1]] == 0)
9430 return 3; /* stmda */
9431
9432 if (unsorted_offsets[order[nops - 1]] == -4)
9433 return 4; /* stmdb */
9434
9435 return 0;
9436 }
9437
9438 const char *
9439 emit_stm_seq (rtx *operands, int nops)
9440 {
9441 int regs[4];
9442 int base_reg;
9443 HOST_WIDE_INT offset;
9444 char buf[100];
9445 int i;
9446
9447 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9448 {
9449 case 1:
9450 strcpy (buf, "stm%(ia%)\t");
9451 break;
9452
9453 case 2:
9454 strcpy (buf, "stm%(ib%)\t");
9455 break;
9456
9457 case 3:
9458 strcpy (buf, "stm%(da%)\t");
9459 break;
9460
9461 case 4:
9462 strcpy (buf, "stm%(db%)\t");
9463 break;
9464
9465 default:
9466 gcc_unreachable ();
9467 }
9468
9469 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9470 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9471
9472 for (i = 1; i < nops; i++)
9473 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9474 reg_names[regs[i]]);
9475
9476 strcat (buf, "}\t%@ phole stm");
9477
9478 output_asm_insn (buf, operands);
9479 return "";
9480 }
9481 \f
9482 /* Routines for use in generating RTL. */
9483
9484 rtx
9485 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
9486 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9487 {
9488 HOST_WIDE_INT offset = *offsetp;
9489 int i = 0, j;
9490 rtx result;
9491 int sign = up ? 1 : -1;
9492 rtx mem, addr;
9493
9494 /* XScale has load-store double instructions, but they have stricter
9495 alignment requirements than load-store multiple, so we cannot
9496 use them.
9497
9498 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9499 the pipeline until completion.
9500
9501 NREGS CYCLES
9502 1 3
9503 2 4
9504 3 5
9505 4 6
9506
9507 An ldr instruction takes 1-3 cycles, but does not block the
9508 pipeline.
9509
9510 NREGS CYCLES
9511 1 1-3
9512 2 2-6
9513 3 3-9
9514 4 4-12
9515
9516 Best case ldr will always win. However, the more ldr instructions
9517 we issue, the less likely we are to be able to schedule them well.
9518 Using ldr instructions also increases code size.
9519
9520 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9521 for counts of 3 or 4 regs. */
9522 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9523 {
9524 rtx seq;
9525
9526 start_sequence ();
9527
9528 for (i = 0; i < count; i++)
9529 {
9530 addr = plus_constant (from, i * 4 * sign);
9531 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9532 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
9533 offset += 4 * sign;
9534 }
9535
9536 if (write_back)
9537 {
9538 emit_move_insn (from, plus_constant (from, count * 4 * sign));
9539 *offsetp = offset;
9540 }
9541
9542 seq = get_insns ();
9543 end_sequence ();
9544
9545 return seq;
9546 }
9547
9548 result = gen_rtx_PARALLEL (VOIDmode,
9549 rtvec_alloc (count + (write_back ? 1 : 0)));
9550 if (write_back)
9551 {
9552 XVECEXP (result, 0, 0)
9553 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
9554 i = 1;
9555 count++;
9556 }
9557
9558 for (j = 0; i < count; i++, j++)
9559 {
9560 addr = plus_constant (from, j * 4 * sign);
9561 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9562 XVECEXP (result, 0, i)
9563 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
9564 offset += 4 * sign;
9565 }
9566
9567 if (write_back)
9568 *offsetp = offset;
9569
9570 return result;
9571 }
9572
9573 rtx
9574 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
9575 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9576 {
9577 HOST_WIDE_INT offset = *offsetp;
9578 int i = 0, j;
9579 rtx result;
9580 int sign = up ? 1 : -1;
9581 rtx mem, addr;
9582
9583 /* See arm_gen_load_multiple for discussion of
9584 the pros/cons of ldm/stm usage for XScale. */
9585 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9586 {
9587 rtx seq;
9588
9589 start_sequence ();
9590
9591 for (i = 0; i < count; i++)
9592 {
9593 addr = plus_constant (to, i * 4 * sign);
9594 mem = adjust_automodify_address (basemem, SImode, addr, offset);
9595 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
9596 offset += 4 * sign;
9597 }
9598
9599 if (write_back)
9600 {
9601 emit_move_insn (to, plus_constant (to, count * 4 * sign));
9602 *offsetp = offset;
9603 }
9604
9605 seq = get_insns ();
9606 end_sequence ();
9607
9608 return seq;
9609 }
9610
9611 result = gen_rtx_PARALLEL (VOIDmode,
9612 rtvec_alloc (count + (write_back ? 1 : 0)));
9613 if (write_back)
9614 {
9615 XVECEXP (result, 0, 0)
9616 = gen_rtx_SET (VOIDmode, to,
9617 plus_constant (to, count * 4 * sign));
9618 i = 1;
9619 count++;
9620 }
9621
9622 for (j = 0; i < count; i++, j++)
9623 {
9624 addr = plus_constant (to, j * 4 * sign);
9625 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9626 XVECEXP (result, 0, i)
9627 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
9628 offset += 4 * sign;
9629 }
9630
9631 if (write_back)
9632 *offsetp = offset;
9633
9634 return result;
9635 }
9636
9637 int
9638 arm_gen_movmemqi (rtx *operands)
9639 {
9640 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
9641 HOST_WIDE_INT srcoffset, dstoffset;
9642 int i;
9643 rtx src, dst, srcbase, dstbase;
9644 rtx part_bytes_reg = NULL;
9645 rtx mem;
9646
9647 if (GET_CODE (operands[2]) != CONST_INT
9648 || GET_CODE (operands[3]) != CONST_INT
9649 || INTVAL (operands[2]) > 64
9650 || INTVAL (operands[3]) & 3)
9651 return 0;
9652
9653 dstbase = operands[0];
9654 srcbase = operands[1];
9655
9656 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
9657 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
9658
9659 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
9660 out_words_to_go = INTVAL (operands[2]) / 4;
9661 last_bytes = INTVAL (operands[2]) & 3;
9662 dstoffset = srcoffset = 0;
9663
9664 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
9665 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
9666
9667 for (i = 0; in_words_to_go >= 2; i+=4)
9668 {
9669 if (in_words_to_go > 4)
9670 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
9671 srcbase, &srcoffset));
9672 else
9673 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
9674 FALSE, srcbase, &srcoffset));
9675
9676 if (out_words_to_go)
9677 {
9678 if (out_words_to_go > 4)
9679 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
9680 dstbase, &dstoffset));
9681 else if (out_words_to_go != 1)
9682 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
9683 dst, TRUE,
9684 (last_bytes == 0
9685 ? FALSE : TRUE),
9686 dstbase, &dstoffset));
9687 else
9688 {
9689 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9690 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
9691 if (last_bytes != 0)
9692 {
9693 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
9694 dstoffset += 4;
9695 }
9696 }
9697 }
9698
9699 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
9700 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
9701 }
9702
9703 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
9704 if (out_words_to_go)
9705 {
9706 rtx sreg;
9707
9708 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9709 sreg = copy_to_reg (mem);
9710
9711 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
9712 emit_move_insn (mem, sreg);
9713 in_words_to_go--;
9714
9715 gcc_assert (!in_words_to_go); /* Sanity check */
9716 }
9717
9718 if (in_words_to_go)
9719 {
9720 gcc_assert (in_words_to_go > 0);
9721
9722 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
9723 part_bytes_reg = copy_to_mode_reg (SImode, mem);
9724 }
9725
9726 gcc_assert (!last_bytes || part_bytes_reg);
9727
9728 if (BYTES_BIG_ENDIAN && last_bytes)
9729 {
9730 rtx tmp = gen_reg_rtx (SImode);
9731
9732 /* The bytes we want are in the top end of the word. */
9733 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
9734 GEN_INT (8 * (4 - last_bytes))));
9735 part_bytes_reg = tmp;
9736
9737 while (last_bytes)
9738 {
9739 mem = adjust_automodify_address (dstbase, QImode,
9740 plus_constant (dst, last_bytes - 1),
9741 dstoffset + last_bytes - 1);
9742 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9743
9744 if (--last_bytes)
9745 {
9746 tmp = gen_reg_rtx (SImode);
9747 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
9748 part_bytes_reg = tmp;
9749 }
9750 }
9751
9752 }
9753 else
9754 {
9755 if (last_bytes > 1)
9756 {
9757 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
9758 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
9759 last_bytes -= 2;
9760 if (last_bytes)
9761 {
9762 rtx tmp = gen_reg_rtx (SImode);
9763 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
9764 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
9765 part_bytes_reg = tmp;
9766 dstoffset += 2;
9767 }
9768 }
9769
9770 if (last_bytes)
9771 {
9772 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
9773 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
9774 }
9775 }
9776
9777 return 1;
9778 }
9779
9780 /* Select a dominance comparison mode if possible for a test of the general
9781 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
9782 COND_OR == DOM_CC_X_AND_Y => (X && Y)
9783 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
9784 COND_OR == DOM_CC_X_OR_Y => (X || Y)
9785 In all cases OP will be either EQ or NE, but we don't need to know which
9786 here. If we are unable to support a dominance comparison we return
9787 CC mode. This will then fail to match for the RTL expressions that
9788 generate this call. */
9789 enum machine_mode
9790 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
9791 {
9792 enum rtx_code cond1, cond2;
9793 int swapped = 0;
9794
9795 /* Currently we will probably get the wrong result if the individual
9796 comparisons are not simple. This also ensures that it is safe to
9797 reverse a comparison if necessary. */
9798 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
9799 != CCmode)
9800 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
9801 != CCmode))
9802 return CCmode;
9803
9804 /* The if_then_else variant of this tests the second condition if the
9805 first passes, but is true if the first fails. Reverse the first
9806 condition to get a true "inclusive-or" expression. */
9807 if (cond_or == DOM_CC_NX_OR_Y)
9808 cond1 = reverse_condition (cond1);
9809
9810 /* If the comparisons are not equal, and one doesn't dominate the other,
9811 then we can't do this. */
9812 if (cond1 != cond2
9813 && !comparison_dominates_p (cond1, cond2)
9814 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
9815 return CCmode;
9816
9817 if (swapped)
9818 {
9819 enum rtx_code temp = cond1;
9820 cond1 = cond2;
9821 cond2 = temp;
9822 }
9823
9824 switch (cond1)
9825 {
9826 case EQ:
9827 if (cond_or == DOM_CC_X_AND_Y)
9828 return CC_DEQmode;
9829
9830 switch (cond2)
9831 {
9832 case EQ: return CC_DEQmode;
9833 case LE: return CC_DLEmode;
9834 case LEU: return CC_DLEUmode;
9835 case GE: return CC_DGEmode;
9836 case GEU: return CC_DGEUmode;
9837 default: gcc_unreachable ();
9838 }
9839
9840 case LT:
9841 if (cond_or == DOM_CC_X_AND_Y)
9842 return CC_DLTmode;
9843
9844 switch (cond2)
9845 {
9846 case LT:
9847 return CC_DLTmode;
9848 case LE:
9849 return CC_DLEmode;
9850 case NE:
9851 return CC_DNEmode;
9852 default:
9853 gcc_unreachable ();
9854 }
9855
9856 case GT:
9857 if (cond_or == DOM_CC_X_AND_Y)
9858 return CC_DGTmode;
9859
9860 switch (cond2)
9861 {
9862 case GT:
9863 return CC_DGTmode;
9864 case GE:
9865 return CC_DGEmode;
9866 case NE:
9867 return CC_DNEmode;
9868 default:
9869 gcc_unreachable ();
9870 }
9871
9872 case LTU:
9873 if (cond_or == DOM_CC_X_AND_Y)
9874 return CC_DLTUmode;
9875
9876 switch (cond2)
9877 {
9878 case LTU:
9879 return CC_DLTUmode;
9880 case LEU:
9881 return CC_DLEUmode;
9882 case NE:
9883 return CC_DNEmode;
9884 default:
9885 gcc_unreachable ();
9886 }
9887
9888 case GTU:
9889 if (cond_or == DOM_CC_X_AND_Y)
9890 return CC_DGTUmode;
9891
9892 switch (cond2)
9893 {
9894 case GTU:
9895 return CC_DGTUmode;
9896 case GEU:
9897 return CC_DGEUmode;
9898 case NE:
9899 return CC_DNEmode;
9900 default:
9901 gcc_unreachable ();
9902 }
9903
9904 /* The remaining cases only occur when both comparisons are the
9905 same. */
9906 case NE:
9907 gcc_assert (cond1 == cond2);
9908 return CC_DNEmode;
9909
9910 case LE:
9911 gcc_assert (cond1 == cond2);
9912 return CC_DLEmode;
9913
9914 case GE:
9915 gcc_assert (cond1 == cond2);
9916 return CC_DGEmode;
9917
9918 case LEU:
9919 gcc_assert (cond1 == cond2);
9920 return CC_DLEUmode;
9921
9922 case GEU:
9923 gcc_assert (cond1 == cond2);
9924 return CC_DGEUmode;
9925
9926 default:
9927 gcc_unreachable ();
9928 }
9929 }
9930
9931 enum machine_mode
9932 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
9933 {
9934 /* All floating point compares return CCFP if it is an equality
9935 comparison, and CCFPE otherwise. */
9936 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
9937 {
9938 switch (op)
9939 {
9940 case EQ:
9941 case NE:
9942 case UNORDERED:
9943 case ORDERED:
9944 case UNLT:
9945 case UNLE:
9946 case UNGT:
9947 case UNGE:
9948 case UNEQ:
9949 case LTGT:
9950 return CCFPmode;
9951
9952 case LT:
9953 case LE:
9954 case GT:
9955 case GE:
9956 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
9957 return CCFPmode;
9958 return CCFPEmode;
9959
9960 default:
9961 gcc_unreachable ();
9962 }
9963 }
9964
9965 /* A compare with a shifted operand. Because of canonicalization, the
9966 comparison will have to be swapped when we emit the assembler. */
9967 if (GET_MODE (y) == SImode
9968 && (REG_P (y) || (GET_CODE (y) == SUBREG))
9969 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
9970 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
9971 || GET_CODE (x) == ROTATERT))
9972 return CC_SWPmode;
9973
9974 /* This operation is performed swapped, but since we only rely on the Z
9975 flag we don't need an additional mode. */
9976 if (GET_MODE (y) == SImode
9977 && (REG_P (y) || (GET_CODE (y) == SUBREG))
9978 && GET_CODE (x) == NEG
9979 && (op == EQ || op == NE))
9980 return CC_Zmode;
9981
9982 /* This is a special case that is used by combine to allow a
9983 comparison of a shifted byte load to be split into a zero-extend
9984 followed by a comparison of the shifted integer (only valid for
9985 equalities and unsigned inequalities). */
9986 if (GET_MODE (x) == SImode
9987 && GET_CODE (x) == ASHIFT
9988 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
9989 && GET_CODE (XEXP (x, 0)) == SUBREG
9990 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
9991 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
9992 && (op == EQ || op == NE
9993 || op == GEU || op == GTU || op == LTU || op == LEU)
9994 && GET_CODE (y) == CONST_INT)
9995 return CC_Zmode;
9996
9997 /* A construct for a conditional compare, if the false arm contains
9998 0, then both conditions must be true, otherwise either condition
9999 must be true. Not all conditions are possible, so CCmode is
10000 returned if it can't be done. */
10001 if (GET_CODE (x) == IF_THEN_ELSE
10002 && (XEXP (x, 2) == const0_rtx
10003 || XEXP (x, 2) == const1_rtx)
10004 && COMPARISON_P (XEXP (x, 0))
10005 && COMPARISON_P (XEXP (x, 1)))
10006 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10007 INTVAL (XEXP (x, 2)));
10008
10009 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10010 if (GET_CODE (x) == AND
10011 && COMPARISON_P (XEXP (x, 0))
10012 && COMPARISON_P (XEXP (x, 1)))
10013 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10014 DOM_CC_X_AND_Y);
10015
10016 if (GET_CODE (x) == IOR
10017 && COMPARISON_P (XEXP (x, 0))
10018 && COMPARISON_P (XEXP (x, 1)))
10019 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10020 DOM_CC_X_OR_Y);
10021
10022 /* An operation (on Thumb) where we want to test for a single bit.
10023 This is done by shifting that bit up into the top bit of a
10024 scratch register; we can then branch on the sign bit. */
10025 if (TARGET_THUMB1
10026 && GET_MODE (x) == SImode
10027 && (op == EQ || op == NE)
10028 && GET_CODE (x) == ZERO_EXTRACT
10029 && XEXP (x, 1) == const1_rtx)
10030 return CC_Nmode;
10031
10032 /* An operation that sets the condition codes as a side-effect, the
10033 V flag is not set correctly, so we can only use comparisons where
10034 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10035 instead.) */
10036 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10037 if (GET_MODE (x) == SImode
10038 && y == const0_rtx
10039 && (op == EQ || op == NE || op == LT || op == GE)
10040 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10041 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10042 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10043 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10044 || GET_CODE (x) == LSHIFTRT
10045 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10046 || GET_CODE (x) == ROTATERT
10047 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10048 return CC_NOOVmode;
10049
10050 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10051 return CC_Zmode;
10052
10053 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10054 && GET_CODE (x) == PLUS
10055 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10056 return CC_Cmode;
10057
10058 return CCmode;
10059 }
10060
10061 /* X and Y are two things to compare using CODE. Emit the compare insn and
10062 return the rtx for register 0 in the proper mode. FP means this is a
10063 floating point compare: I don't think that it is needed on the arm. */
10064 rtx
10065 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10066 {
10067 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
10068 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10069
10070 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10071
10072 return cc_reg;
10073 }
10074
10075 /* Generate a sequence of insns that will generate the correct return
10076 address mask depending on the physical architecture that the program
10077 is running on. */
10078 rtx
10079 arm_gen_return_addr_mask (void)
10080 {
10081 rtx reg = gen_reg_rtx (Pmode);
10082
10083 emit_insn (gen_return_addr_mask (reg));
10084 return reg;
10085 }
10086
10087 void
10088 arm_reload_in_hi (rtx *operands)
10089 {
10090 rtx ref = operands[1];
10091 rtx base, scratch;
10092 HOST_WIDE_INT offset = 0;
10093
10094 if (GET_CODE (ref) == SUBREG)
10095 {
10096 offset = SUBREG_BYTE (ref);
10097 ref = SUBREG_REG (ref);
10098 }
10099
10100 if (GET_CODE (ref) == REG)
10101 {
10102 /* We have a pseudo which has been spilt onto the stack; there
10103 are two cases here: the first where there is a simple
10104 stack-slot replacement and a second where the stack-slot is
10105 out of range, or is used as a subreg. */
10106 if (reg_equiv_mem[REGNO (ref)])
10107 {
10108 ref = reg_equiv_mem[REGNO (ref)];
10109 base = find_replacement (&XEXP (ref, 0));
10110 }
10111 else
10112 /* The slot is out of range, or was dressed up in a SUBREG. */
10113 base = reg_equiv_address[REGNO (ref)];
10114 }
10115 else
10116 base = find_replacement (&XEXP (ref, 0));
10117
10118 /* Handle the case where the address is too complex to be offset by 1. */
10119 if (GET_CODE (base) == MINUS
10120 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10121 {
10122 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10123
10124 emit_set_insn (base_plus, base);
10125 base = base_plus;
10126 }
10127 else if (GET_CODE (base) == PLUS)
10128 {
10129 /* The addend must be CONST_INT, or we would have dealt with it above. */
10130 HOST_WIDE_INT hi, lo;
10131
10132 offset += INTVAL (XEXP (base, 1));
10133 base = XEXP (base, 0);
10134
10135 /* Rework the address into a legal sequence of insns. */
10136 /* Valid range for lo is -4095 -> 4095 */
10137 lo = (offset >= 0
10138 ? (offset & 0xfff)
10139 : -((-offset) & 0xfff));
10140
10141 /* Corner case, if lo is the max offset then we would be out of range
10142 once we have added the additional 1 below, so bump the msb into the
10143 pre-loading insn(s). */
10144 if (lo == 4095)
10145 lo &= 0x7ff;
10146
10147 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10148 ^ (HOST_WIDE_INT) 0x80000000)
10149 - (HOST_WIDE_INT) 0x80000000);
10150
10151 gcc_assert (hi + lo == offset);
10152
10153 if (hi != 0)
10154 {
10155 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10156
10157 /* Get the base address; addsi3 knows how to handle constants
10158 that require more than one insn. */
10159 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10160 base = base_plus;
10161 offset = lo;
10162 }
10163 }
10164
10165 /* Operands[2] may overlap operands[0] (though it won't overlap
10166 operands[1]), that's why we asked for a DImode reg -- so we can
10167 use the bit that does not overlap. */
10168 if (REGNO (operands[2]) == REGNO (operands[0]))
10169 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10170 else
10171 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10172
10173 emit_insn (gen_zero_extendqisi2 (scratch,
10174 gen_rtx_MEM (QImode,
10175 plus_constant (base,
10176 offset))));
10177 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10178 gen_rtx_MEM (QImode,
10179 plus_constant (base,
10180 offset + 1))));
10181 if (!BYTES_BIG_ENDIAN)
10182 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10183 gen_rtx_IOR (SImode,
10184 gen_rtx_ASHIFT
10185 (SImode,
10186 gen_rtx_SUBREG (SImode, operands[0], 0),
10187 GEN_INT (8)),
10188 scratch));
10189 else
10190 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10191 gen_rtx_IOR (SImode,
10192 gen_rtx_ASHIFT (SImode, scratch,
10193 GEN_INT (8)),
10194 gen_rtx_SUBREG (SImode, operands[0], 0)));
10195 }
10196
10197 /* Handle storing a half-word to memory during reload by synthesizing as two
10198 byte stores. Take care not to clobber the input values until after we
10199 have moved them somewhere safe. This code assumes that if the DImode
10200 scratch in operands[2] overlaps either the input value or output address
10201 in some way, then that value must die in this insn (we absolutely need
10202 two scratch registers for some corner cases). */
10203 void
10204 arm_reload_out_hi (rtx *operands)
10205 {
10206 rtx ref = operands[0];
10207 rtx outval = operands[1];
10208 rtx base, scratch;
10209 HOST_WIDE_INT offset = 0;
10210
10211 if (GET_CODE (ref) == SUBREG)
10212 {
10213 offset = SUBREG_BYTE (ref);
10214 ref = SUBREG_REG (ref);
10215 }
10216
10217 if (GET_CODE (ref) == REG)
10218 {
10219 /* We have a pseudo which has been spilt onto the stack; there
10220 are two cases here: the first where there is a simple
10221 stack-slot replacement and a second where the stack-slot is
10222 out of range, or is used as a subreg. */
10223 if (reg_equiv_mem[REGNO (ref)])
10224 {
10225 ref = reg_equiv_mem[REGNO (ref)];
10226 base = find_replacement (&XEXP (ref, 0));
10227 }
10228 else
10229 /* The slot is out of range, or was dressed up in a SUBREG. */
10230 base = reg_equiv_address[REGNO (ref)];
10231 }
10232 else
10233 base = find_replacement (&XEXP (ref, 0));
10234
10235 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10236
10237 /* Handle the case where the address is too complex to be offset by 1. */
10238 if (GET_CODE (base) == MINUS
10239 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10240 {
10241 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10242
10243 /* Be careful not to destroy OUTVAL. */
10244 if (reg_overlap_mentioned_p (base_plus, outval))
10245 {
10246 /* Updating base_plus might destroy outval, see if we can
10247 swap the scratch and base_plus. */
10248 if (!reg_overlap_mentioned_p (scratch, outval))
10249 {
10250 rtx tmp = scratch;
10251 scratch = base_plus;
10252 base_plus = tmp;
10253 }
10254 else
10255 {
10256 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10257
10258 /* Be conservative and copy OUTVAL into the scratch now,
10259 this should only be necessary if outval is a subreg
10260 of something larger than a word. */
10261 /* XXX Might this clobber base? I can't see how it can,
10262 since scratch is known to overlap with OUTVAL, and
10263 must be wider than a word. */
10264 emit_insn (gen_movhi (scratch_hi, outval));
10265 outval = scratch_hi;
10266 }
10267 }
10268
10269 emit_set_insn (base_plus, base);
10270 base = base_plus;
10271 }
10272 else if (GET_CODE (base) == PLUS)
10273 {
10274 /* The addend must be CONST_INT, or we would have dealt with it above. */
10275 HOST_WIDE_INT hi, lo;
10276
10277 offset += INTVAL (XEXP (base, 1));
10278 base = XEXP (base, 0);
10279
10280 /* Rework the address into a legal sequence of insns. */
10281 /* Valid range for lo is -4095 -> 4095 */
10282 lo = (offset >= 0
10283 ? (offset & 0xfff)
10284 : -((-offset) & 0xfff));
10285
10286 /* Corner case, if lo is the max offset then we would be out of range
10287 once we have added the additional 1 below, so bump the msb into the
10288 pre-loading insn(s). */
10289 if (lo == 4095)
10290 lo &= 0x7ff;
10291
10292 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10293 ^ (HOST_WIDE_INT) 0x80000000)
10294 - (HOST_WIDE_INT) 0x80000000);
10295
10296 gcc_assert (hi + lo == offset);
10297
10298 if (hi != 0)
10299 {
10300 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10301
10302 /* Be careful not to destroy OUTVAL. */
10303 if (reg_overlap_mentioned_p (base_plus, outval))
10304 {
10305 /* Updating base_plus might destroy outval, see if we
10306 can swap the scratch and base_plus. */
10307 if (!reg_overlap_mentioned_p (scratch, outval))
10308 {
10309 rtx tmp = scratch;
10310 scratch = base_plus;
10311 base_plus = tmp;
10312 }
10313 else
10314 {
10315 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10316
10317 /* Be conservative and copy outval into scratch now,
10318 this should only be necessary if outval is a
10319 subreg of something larger than a word. */
10320 /* XXX Might this clobber base? I can't see how it
10321 can, since scratch is known to overlap with
10322 outval. */
10323 emit_insn (gen_movhi (scratch_hi, outval));
10324 outval = scratch_hi;
10325 }
10326 }
10327
10328 /* Get the base address; addsi3 knows how to handle constants
10329 that require more than one insn. */
10330 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10331 base = base_plus;
10332 offset = lo;
10333 }
10334 }
10335
10336 if (BYTES_BIG_ENDIAN)
10337 {
10338 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10339 plus_constant (base, offset + 1)),
10340 gen_lowpart (QImode, outval)));
10341 emit_insn (gen_lshrsi3 (scratch,
10342 gen_rtx_SUBREG (SImode, outval, 0),
10343 GEN_INT (8)));
10344 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10345 gen_lowpart (QImode, scratch)));
10346 }
10347 else
10348 {
10349 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10350 gen_lowpart (QImode, outval)));
10351 emit_insn (gen_lshrsi3 (scratch,
10352 gen_rtx_SUBREG (SImode, outval, 0),
10353 GEN_INT (8)));
10354 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10355 plus_constant (base, offset + 1)),
10356 gen_lowpart (QImode, scratch)));
10357 }
10358 }
10359
10360 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
10361 (padded to the size of a word) should be passed in a register. */
10362
10363 static bool
10364 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
10365 {
10366 if (TARGET_AAPCS_BASED)
10367 return must_pass_in_stack_var_size (mode, type);
10368 else
10369 return must_pass_in_stack_var_size_or_pad (mode, type);
10370 }
10371
10372
10373 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
10374 Return true if an argument passed on the stack should be padded upwards,
10375 i.e. if the least-significant byte has useful data.
10376 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
10377 aggregate types are placed in the lowest memory address. */
10378
10379 bool
10380 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
10381 {
10382 if (!TARGET_AAPCS_BASED)
10383 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
10384
10385 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
10386 return false;
10387
10388 return true;
10389 }
10390
10391
10392 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
10393 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
10394 byte of the register has useful data, and return the opposite if the
10395 most significant byte does.
10396 For AAPCS, small aggregates and small complex types are always padded
10397 upwards. */
10398
10399 bool
10400 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
10401 tree type, int first ATTRIBUTE_UNUSED)
10402 {
10403 if (TARGET_AAPCS_BASED
10404 && BYTES_BIG_ENDIAN
10405 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
10406 && int_size_in_bytes (type) <= 4)
10407 return true;
10408
10409 /* Otherwise, use default padding. */
10410 return !BYTES_BIG_ENDIAN;
10411 }
10412
10413 \f
10414 /* Print a symbolic form of X to the debug file, F. */
10415 static void
10416 arm_print_value (FILE *f, rtx x)
10417 {
10418 switch (GET_CODE (x))
10419 {
10420 case CONST_INT:
10421 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
10422 return;
10423
10424 case CONST_DOUBLE:
10425 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
10426 return;
10427
10428 case CONST_VECTOR:
10429 {
10430 int i;
10431
10432 fprintf (f, "<");
10433 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
10434 {
10435 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
10436 if (i < (CONST_VECTOR_NUNITS (x) - 1))
10437 fputc (',', f);
10438 }
10439 fprintf (f, ">");
10440 }
10441 return;
10442
10443 case CONST_STRING:
10444 fprintf (f, "\"%s\"", XSTR (x, 0));
10445 return;
10446
10447 case SYMBOL_REF:
10448 fprintf (f, "`%s'", XSTR (x, 0));
10449 return;
10450
10451 case LABEL_REF:
10452 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
10453 return;
10454
10455 case CONST:
10456 arm_print_value (f, XEXP (x, 0));
10457 return;
10458
10459 case PLUS:
10460 arm_print_value (f, XEXP (x, 0));
10461 fprintf (f, "+");
10462 arm_print_value (f, XEXP (x, 1));
10463 return;
10464
10465 case PC:
10466 fprintf (f, "pc");
10467 return;
10468
10469 default:
10470 fprintf (f, "????");
10471 return;
10472 }
10473 }
10474 \f
10475 /* Routines for manipulation of the constant pool. */
10476
10477 /* Arm instructions cannot load a large constant directly into a
10478 register; they have to come from a pc relative load. The constant
10479 must therefore be placed in the addressable range of the pc
10480 relative load. Depending on the precise pc relative load
10481 instruction the range is somewhere between 256 bytes and 4k. This
10482 means that we often have to dump a constant inside a function, and
10483 generate code to branch around it.
10484
10485 It is important to minimize this, since the branches will slow
10486 things down and make the code larger.
10487
10488 Normally we can hide the table after an existing unconditional
10489 branch so that there is no interruption of the flow, but in the
10490 worst case the code looks like this:
10491
10492 ldr rn, L1
10493 ...
10494 b L2
10495 align
10496 L1: .long value
10497 L2:
10498 ...
10499
10500 ldr rn, L3
10501 ...
10502 b L4
10503 align
10504 L3: .long value
10505 L4:
10506 ...
10507
10508 We fix this by performing a scan after scheduling, which notices
10509 which instructions need to have their operands fetched from the
10510 constant table and builds the table.
10511
10512 The algorithm starts by building a table of all the constants that
10513 need fixing up and all the natural barriers in the function (places
10514 where a constant table can be dropped without breaking the flow).
10515 For each fixup we note how far the pc-relative replacement will be
10516 able to reach and the offset of the instruction into the function.
10517
10518 Having built the table we then group the fixes together to form
10519 tables that are as large as possible (subject to addressing
10520 constraints) and emit each table of constants after the last
10521 barrier that is within range of all the instructions in the group.
10522 If a group does not contain a barrier, then we forcibly create one
10523 by inserting a jump instruction into the flow. Once the table has
10524 been inserted, the insns are then modified to reference the
10525 relevant entry in the pool.
10526
10527 Possible enhancements to the algorithm (not implemented) are:
10528
10529 1) For some processors and object formats, there may be benefit in
10530 aligning the pools to the start of cache lines; this alignment
10531 would need to be taken into account when calculating addressability
10532 of a pool. */
10533
10534 /* These typedefs are located at the start of this file, so that
10535 they can be used in the prototypes there. This comment is to
10536 remind readers of that fact so that the following structures
10537 can be understood more easily.
10538
10539 typedef struct minipool_node Mnode;
10540 typedef struct minipool_fixup Mfix; */
10541
10542 struct minipool_node
10543 {
10544 /* Doubly linked chain of entries. */
10545 Mnode * next;
10546 Mnode * prev;
10547 /* The maximum offset into the code that this entry can be placed. While
10548 pushing fixes for forward references, all entries are sorted in order
10549 of increasing max_address. */
10550 HOST_WIDE_INT max_address;
10551 /* Similarly for an entry inserted for a backwards ref. */
10552 HOST_WIDE_INT min_address;
10553 /* The number of fixes referencing this entry. This can become zero
10554 if we "unpush" an entry. In this case we ignore the entry when we
10555 come to emit the code. */
10556 int refcount;
10557 /* The offset from the start of the minipool. */
10558 HOST_WIDE_INT offset;
10559 /* The value in table. */
10560 rtx value;
10561 /* The mode of value. */
10562 enum machine_mode mode;
10563 /* The size of the value. With iWMMXt enabled
10564 sizes > 4 also imply an alignment of 8-bytes. */
10565 int fix_size;
10566 };
10567
10568 struct minipool_fixup
10569 {
10570 Mfix * next;
10571 rtx insn;
10572 HOST_WIDE_INT address;
10573 rtx * loc;
10574 enum machine_mode mode;
10575 int fix_size;
10576 rtx value;
10577 Mnode * minipool;
10578 HOST_WIDE_INT forwards;
10579 HOST_WIDE_INT backwards;
10580 };
10581
10582 /* Fixes less than a word need padding out to a word boundary. */
10583 #define MINIPOOL_FIX_SIZE(mode) \
10584 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
10585
10586 static Mnode * minipool_vector_head;
10587 static Mnode * minipool_vector_tail;
10588 static rtx minipool_vector_label;
10589 static int minipool_pad;
10590
10591 /* The linked list of all minipool fixes required for this function. */
10592 Mfix * minipool_fix_head;
10593 Mfix * minipool_fix_tail;
10594 /* The fix entry for the current minipool, once it has been placed. */
10595 Mfix * minipool_barrier;
10596
10597 /* Determines if INSN is the start of a jump table. Returns the end
10598 of the TABLE or NULL_RTX. */
10599 static rtx
10600 is_jump_table (rtx insn)
10601 {
10602 rtx table;
10603
10604 if (GET_CODE (insn) == JUMP_INSN
10605 && JUMP_LABEL (insn) != NULL
10606 && ((table = next_real_insn (JUMP_LABEL (insn)))
10607 == next_real_insn (insn))
10608 && table != NULL
10609 && GET_CODE (table) == JUMP_INSN
10610 && (GET_CODE (PATTERN (table)) == ADDR_VEC
10611 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
10612 return table;
10613
10614 return NULL_RTX;
10615 }
10616
10617 #ifndef JUMP_TABLES_IN_TEXT_SECTION
10618 #define JUMP_TABLES_IN_TEXT_SECTION 0
10619 #endif
10620
10621 static HOST_WIDE_INT
10622 get_jump_table_size (rtx insn)
10623 {
10624 /* ADDR_VECs only take room if read-only data does into the text
10625 section. */
10626 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
10627 {
10628 rtx body = PATTERN (insn);
10629 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
10630 HOST_WIDE_INT size;
10631 HOST_WIDE_INT modesize;
10632
10633 modesize = GET_MODE_SIZE (GET_MODE (body));
10634 size = modesize * XVECLEN (body, elt);
10635 switch (modesize)
10636 {
10637 case 1:
10638 /* Round up size of TBB table to a halfword boundary. */
10639 size = (size + 1) & ~(HOST_WIDE_INT)1;
10640 break;
10641 case 2:
10642 /* No padding necessary for TBH. */
10643 break;
10644 case 4:
10645 /* Add two bytes for alignment on Thumb. */
10646 if (TARGET_THUMB)
10647 size += 2;
10648 break;
10649 default:
10650 gcc_unreachable ();
10651 }
10652 return size;
10653 }
10654
10655 return 0;
10656 }
10657
10658 /* Move a minipool fix MP from its current location to before MAX_MP.
10659 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
10660 constraints may need updating. */
10661 static Mnode *
10662 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
10663 HOST_WIDE_INT max_address)
10664 {
10665 /* The code below assumes these are different. */
10666 gcc_assert (mp != max_mp);
10667
10668 if (max_mp == NULL)
10669 {
10670 if (max_address < mp->max_address)
10671 mp->max_address = max_address;
10672 }
10673 else
10674 {
10675 if (max_address > max_mp->max_address - mp->fix_size)
10676 mp->max_address = max_mp->max_address - mp->fix_size;
10677 else
10678 mp->max_address = max_address;
10679
10680 /* Unlink MP from its current position. Since max_mp is non-null,
10681 mp->prev must be non-null. */
10682 mp->prev->next = mp->next;
10683 if (mp->next != NULL)
10684 mp->next->prev = mp->prev;
10685 else
10686 minipool_vector_tail = mp->prev;
10687
10688 /* Re-insert it before MAX_MP. */
10689 mp->next = max_mp;
10690 mp->prev = max_mp->prev;
10691 max_mp->prev = mp;
10692
10693 if (mp->prev != NULL)
10694 mp->prev->next = mp;
10695 else
10696 minipool_vector_head = mp;
10697 }
10698
10699 /* Save the new entry. */
10700 max_mp = mp;
10701
10702 /* Scan over the preceding entries and adjust their addresses as
10703 required. */
10704 while (mp->prev != NULL
10705 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10706 {
10707 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10708 mp = mp->prev;
10709 }
10710
10711 return max_mp;
10712 }
10713
10714 /* Add a constant to the minipool for a forward reference. Returns the
10715 node added or NULL if the constant will not fit in this pool. */
10716 static Mnode *
10717 add_minipool_forward_ref (Mfix *fix)
10718 {
10719 /* If set, max_mp is the first pool_entry that has a lower
10720 constraint than the one we are trying to add. */
10721 Mnode * max_mp = NULL;
10722 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
10723 Mnode * mp;
10724
10725 /* If the minipool starts before the end of FIX->INSN then this FIX
10726 can not be placed into the current pool. Furthermore, adding the
10727 new constant pool entry may cause the pool to start FIX_SIZE bytes
10728 earlier. */
10729 if (minipool_vector_head &&
10730 (fix->address + get_attr_length (fix->insn)
10731 >= minipool_vector_head->max_address - fix->fix_size))
10732 return NULL;
10733
10734 /* Scan the pool to see if a constant with the same value has
10735 already been added. While we are doing this, also note the
10736 location where we must insert the constant if it doesn't already
10737 exist. */
10738 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10739 {
10740 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10741 && fix->mode == mp->mode
10742 && (GET_CODE (fix->value) != CODE_LABEL
10743 || (CODE_LABEL_NUMBER (fix->value)
10744 == CODE_LABEL_NUMBER (mp->value)))
10745 && rtx_equal_p (fix->value, mp->value))
10746 {
10747 /* More than one fix references this entry. */
10748 mp->refcount++;
10749 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
10750 }
10751
10752 /* Note the insertion point if necessary. */
10753 if (max_mp == NULL
10754 && mp->max_address > max_address)
10755 max_mp = mp;
10756
10757 /* If we are inserting an 8-bytes aligned quantity and
10758 we have not already found an insertion point, then
10759 make sure that all such 8-byte aligned quantities are
10760 placed at the start of the pool. */
10761 if (ARM_DOUBLEWORD_ALIGN
10762 && max_mp == NULL
10763 && fix->fix_size >= 8
10764 && mp->fix_size < 8)
10765 {
10766 max_mp = mp;
10767 max_address = mp->max_address;
10768 }
10769 }
10770
10771 /* The value is not currently in the minipool, so we need to create
10772 a new entry for it. If MAX_MP is NULL, the entry will be put on
10773 the end of the list since the placement is less constrained than
10774 any existing entry. Otherwise, we insert the new fix before
10775 MAX_MP and, if necessary, adjust the constraints on the other
10776 entries. */
10777 mp = XNEW (Mnode);
10778 mp->fix_size = fix->fix_size;
10779 mp->mode = fix->mode;
10780 mp->value = fix->value;
10781 mp->refcount = 1;
10782 /* Not yet required for a backwards ref. */
10783 mp->min_address = -65536;
10784
10785 if (max_mp == NULL)
10786 {
10787 mp->max_address = max_address;
10788 mp->next = NULL;
10789 mp->prev = minipool_vector_tail;
10790
10791 if (mp->prev == NULL)
10792 {
10793 minipool_vector_head = mp;
10794 minipool_vector_label = gen_label_rtx ();
10795 }
10796 else
10797 mp->prev->next = mp;
10798
10799 minipool_vector_tail = mp;
10800 }
10801 else
10802 {
10803 if (max_address > max_mp->max_address - mp->fix_size)
10804 mp->max_address = max_mp->max_address - mp->fix_size;
10805 else
10806 mp->max_address = max_address;
10807
10808 mp->next = max_mp;
10809 mp->prev = max_mp->prev;
10810 max_mp->prev = mp;
10811 if (mp->prev != NULL)
10812 mp->prev->next = mp;
10813 else
10814 minipool_vector_head = mp;
10815 }
10816
10817 /* Save the new entry. */
10818 max_mp = mp;
10819
10820 /* Scan over the preceding entries and adjust their addresses as
10821 required. */
10822 while (mp->prev != NULL
10823 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
10824 {
10825 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
10826 mp = mp->prev;
10827 }
10828
10829 return max_mp;
10830 }
10831
10832 static Mnode *
10833 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
10834 HOST_WIDE_INT min_address)
10835 {
10836 HOST_WIDE_INT offset;
10837
10838 /* The code below assumes these are different. */
10839 gcc_assert (mp != min_mp);
10840
10841 if (min_mp == NULL)
10842 {
10843 if (min_address > mp->min_address)
10844 mp->min_address = min_address;
10845 }
10846 else
10847 {
10848 /* We will adjust this below if it is too loose. */
10849 mp->min_address = min_address;
10850
10851 /* Unlink MP from its current position. Since min_mp is non-null,
10852 mp->next must be non-null. */
10853 mp->next->prev = mp->prev;
10854 if (mp->prev != NULL)
10855 mp->prev->next = mp->next;
10856 else
10857 minipool_vector_head = mp->next;
10858
10859 /* Reinsert it after MIN_MP. */
10860 mp->prev = min_mp;
10861 mp->next = min_mp->next;
10862 min_mp->next = mp;
10863 if (mp->next != NULL)
10864 mp->next->prev = mp;
10865 else
10866 minipool_vector_tail = mp;
10867 }
10868
10869 min_mp = mp;
10870
10871 offset = 0;
10872 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
10873 {
10874 mp->offset = offset;
10875 if (mp->refcount > 0)
10876 offset += mp->fix_size;
10877
10878 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
10879 mp->next->min_address = mp->min_address + mp->fix_size;
10880 }
10881
10882 return min_mp;
10883 }
10884
10885 /* Add a constant to the minipool for a backward reference. Returns the
10886 node added or NULL if the constant will not fit in this pool.
10887
10888 Note that the code for insertion for a backwards reference can be
10889 somewhat confusing because the calculated offsets for each fix do
10890 not take into account the size of the pool (which is still under
10891 construction. */
10892 static Mnode *
10893 add_minipool_backward_ref (Mfix *fix)
10894 {
10895 /* If set, min_mp is the last pool_entry that has a lower constraint
10896 than the one we are trying to add. */
10897 Mnode *min_mp = NULL;
10898 /* This can be negative, since it is only a constraint. */
10899 HOST_WIDE_INT min_address = fix->address - fix->backwards;
10900 Mnode *mp;
10901
10902 /* If we can't reach the current pool from this insn, or if we can't
10903 insert this entry at the end of the pool without pushing other
10904 fixes out of range, then we don't try. This ensures that we
10905 can't fail later on. */
10906 if (min_address >= minipool_barrier->address
10907 || (minipool_vector_tail->min_address + fix->fix_size
10908 >= minipool_barrier->address))
10909 return NULL;
10910
10911 /* Scan the pool to see if a constant with the same value has
10912 already been added. While we are doing this, also note the
10913 location where we must insert the constant if it doesn't already
10914 exist. */
10915 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
10916 {
10917 if (GET_CODE (fix->value) == GET_CODE (mp->value)
10918 && fix->mode == mp->mode
10919 && (GET_CODE (fix->value) != CODE_LABEL
10920 || (CODE_LABEL_NUMBER (fix->value)
10921 == CODE_LABEL_NUMBER (mp->value)))
10922 && rtx_equal_p (fix->value, mp->value)
10923 /* Check that there is enough slack to move this entry to the
10924 end of the table (this is conservative). */
10925 && (mp->max_address
10926 > (minipool_barrier->address
10927 + minipool_vector_tail->offset
10928 + minipool_vector_tail->fix_size)))
10929 {
10930 mp->refcount++;
10931 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
10932 }
10933
10934 if (min_mp != NULL)
10935 mp->min_address += fix->fix_size;
10936 else
10937 {
10938 /* Note the insertion point if necessary. */
10939 if (mp->min_address < min_address)
10940 {
10941 /* For now, we do not allow the insertion of 8-byte alignment
10942 requiring nodes anywhere but at the start of the pool. */
10943 if (ARM_DOUBLEWORD_ALIGN
10944 && fix->fix_size >= 8 && mp->fix_size < 8)
10945 return NULL;
10946 else
10947 min_mp = mp;
10948 }
10949 else if (mp->max_address
10950 < minipool_barrier->address + mp->offset + fix->fix_size)
10951 {
10952 /* Inserting before this entry would push the fix beyond
10953 its maximum address (which can happen if we have
10954 re-located a forwards fix); force the new fix to come
10955 after it. */
10956 if (ARM_DOUBLEWORD_ALIGN
10957 && fix->fix_size >= 8 && mp->fix_size < 8)
10958 return NULL;
10959 else
10960 {
10961 min_mp = mp;
10962 min_address = mp->min_address + fix->fix_size;
10963 }
10964 }
10965 /* Do not insert a non-8-byte aligned quantity before 8-byte
10966 aligned quantities. */
10967 else if (ARM_DOUBLEWORD_ALIGN
10968 && fix->fix_size < 8
10969 && mp->fix_size >= 8)
10970 {
10971 min_mp = mp;
10972 min_address = mp->min_address + fix->fix_size;
10973 }
10974 }
10975 }
10976
10977 /* We need to create a new entry. */
10978 mp = XNEW (Mnode);
10979 mp->fix_size = fix->fix_size;
10980 mp->mode = fix->mode;
10981 mp->value = fix->value;
10982 mp->refcount = 1;
10983 mp->max_address = minipool_barrier->address + 65536;
10984
10985 mp->min_address = min_address;
10986
10987 if (min_mp == NULL)
10988 {
10989 mp->prev = NULL;
10990 mp->next = minipool_vector_head;
10991
10992 if (mp->next == NULL)
10993 {
10994 minipool_vector_tail = mp;
10995 minipool_vector_label = gen_label_rtx ();
10996 }
10997 else
10998 mp->next->prev = mp;
10999
11000 minipool_vector_head = mp;
11001 }
11002 else
11003 {
11004 mp->next = min_mp->next;
11005 mp->prev = min_mp;
11006 min_mp->next = mp;
11007
11008 if (mp->next != NULL)
11009 mp->next->prev = mp;
11010 else
11011 minipool_vector_tail = mp;
11012 }
11013
11014 /* Save the new entry. */
11015 min_mp = mp;
11016
11017 if (mp->prev)
11018 mp = mp->prev;
11019 else
11020 mp->offset = 0;
11021
11022 /* Scan over the following entries and adjust their offsets. */
11023 while (mp->next != NULL)
11024 {
11025 if (mp->next->min_address < mp->min_address + mp->fix_size)
11026 mp->next->min_address = mp->min_address + mp->fix_size;
11027
11028 if (mp->refcount)
11029 mp->next->offset = mp->offset + mp->fix_size;
11030 else
11031 mp->next->offset = mp->offset;
11032
11033 mp = mp->next;
11034 }
11035
11036 return min_mp;
11037 }
11038
11039 static void
11040 assign_minipool_offsets (Mfix *barrier)
11041 {
11042 HOST_WIDE_INT offset = 0;
11043 Mnode *mp;
11044
11045 minipool_barrier = barrier;
11046
11047 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11048 {
11049 mp->offset = offset;
11050
11051 if (mp->refcount > 0)
11052 offset += mp->fix_size;
11053 }
11054 }
11055
11056 /* Output the literal table */
11057 static void
11058 dump_minipool (rtx scan)
11059 {
11060 Mnode * mp;
11061 Mnode * nmp;
11062 int align64 = 0;
11063
11064 if (ARM_DOUBLEWORD_ALIGN)
11065 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11066 if (mp->refcount > 0 && mp->fix_size >= 8)
11067 {
11068 align64 = 1;
11069 break;
11070 }
11071
11072 if (dump_file)
11073 fprintf (dump_file,
11074 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11075 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11076
11077 scan = emit_label_after (gen_label_rtx (), scan);
11078 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11079 scan = emit_label_after (minipool_vector_label, scan);
11080
11081 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11082 {
11083 if (mp->refcount > 0)
11084 {
11085 if (dump_file)
11086 {
11087 fprintf (dump_file,
11088 ";; Offset %u, min %ld, max %ld ",
11089 (unsigned) mp->offset, (unsigned long) mp->min_address,
11090 (unsigned long) mp->max_address);
11091 arm_print_value (dump_file, mp->value);
11092 fputc ('\n', dump_file);
11093 }
11094
11095 switch (mp->fix_size)
11096 {
11097 #ifdef HAVE_consttable_1
11098 case 1:
11099 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11100 break;
11101
11102 #endif
11103 #ifdef HAVE_consttable_2
11104 case 2:
11105 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11106 break;
11107
11108 #endif
11109 #ifdef HAVE_consttable_4
11110 case 4:
11111 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11112 break;
11113
11114 #endif
11115 #ifdef HAVE_consttable_8
11116 case 8:
11117 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11118 break;
11119
11120 #endif
11121 #ifdef HAVE_consttable_16
11122 case 16:
11123 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11124 break;
11125
11126 #endif
11127 default:
11128 gcc_unreachable ();
11129 }
11130 }
11131
11132 nmp = mp->next;
11133 free (mp);
11134 }
11135
11136 minipool_vector_head = minipool_vector_tail = NULL;
11137 scan = emit_insn_after (gen_consttable_end (), scan);
11138 scan = emit_barrier_after (scan);
11139 }
11140
11141 /* Return the cost of forcibly inserting a barrier after INSN. */
11142 static int
11143 arm_barrier_cost (rtx insn)
11144 {
11145 /* Basing the location of the pool on the loop depth is preferable,
11146 but at the moment, the basic block information seems to be
11147 corrupt by this stage of the compilation. */
11148 int base_cost = 50;
11149 rtx next = next_nonnote_insn (insn);
11150
11151 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11152 base_cost -= 20;
11153
11154 switch (GET_CODE (insn))
11155 {
11156 case CODE_LABEL:
11157 /* It will always be better to place the table before the label, rather
11158 than after it. */
11159 return 50;
11160
11161 case INSN:
11162 case CALL_INSN:
11163 return base_cost;
11164
11165 case JUMP_INSN:
11166 return base_cost - 10;
11167
11168 default:
11169 return base_cost + 10;
11170 }
11171 }
11172
11173 /* Find the best place in the insn stream in the range
11174 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11175 Create the barrier by inserting a jump and add a new fix entry for
11176 it. */
11177 static Mfix *
11178 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11179 {
11180 HOST_WIDE_INT count = 0;
11181 rtx barrier;
11182 rtx from = fix->insn;
11183 /* The instruction after which we will insert the jump. */
11184 rtx selected = NULL;
11185 int selected_cost;
11186 /* The address at which the jump instruction will be placed. */
11187 HOST_WIDE_INT selected_address;
11188 Mfix * new_fix;
11189 HOST_WIDE_INT max_count = max_address - fix->address;
11190 rtx label = gen_label_rtx ();
11191
11192 selected_cost = arm_barrier_cost (from);
11193 selected_address = fix->address;
11194
11195 while (from && count < max_count)
11196 {
11197 rtx tmp;
11198 int new_cost;
11199
11200 /* This code shouldn't have been called if there was a natural barrier
11201 within range. */
11202 gcc_assert (GET_CODE (from) != BARRIER);
11203
11204 /* Count the length of this insn. */
11205 count += get_attr_length (from);
11206
11207 /* If there is a jump table, add its length. */
11208 tmp = is_jump_table (from);
11209 if (tmp != NULL)
11210 {
11211 count += get_jump_table_size (tmp);
11212
11213 /* Jump tables aren't in a basic block, so base the cost on
11214 the dispatch insn. If we select this location, we will
11215 still put the pool after the table. */
11216 new_cost = arm_barrier_cost (from);
11217
11218 if (count < max_count
11219 && (!selected || new_cost <= selected_cost))
11220 {
11221 selected = tmp;
11222 selected_cost = new_cost;
11223 selected_address = fix->address + count;
11224 }
11225
11226 /* Continue after the dispatch table. */
11227 from = NEXT_INSN (tmp);
11228 continue;
11229 }
11230
11231 new_cost = arm_barrier_cost (from);
11232
11233 if (count < max_count
11234 && (!selected || new_cost <= selected_cost))
11235 {
11236 selected = from;
11237 selected_cost = new_cost;
11238 selected_address = fix->address + count;
11239 }
11240
11241 from = NEXT_INSN (from);
11242 }
11243
11244 /* Make sure that we found a place to insert the jump. */
11245 gcc_assert (selected);
11246
11247 /* Create a new JUMP_INSN that branches around a barrier. */
11248 from = emit_jump_insn_after (gen_jump (label), selected);
11249 JUMP_LABEL (from) = label;
11250 barrier = emit_barrier_after (from);
11251 emit_label_after (label, barrier);
11252
11253 /* Create a minipool barrier entry for the new barrier. */
11254 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11255 new_fix->insn = barrier;
11256 new_fix->address = selected_address;
11257 new_fix->next = fix->next;
11258 fix->next = new_fix;
11259
11260 return new_fix;
11261 }
11262
11263 /* Record that there is a natural barrier in the insn stream at
11264 ADDRESS. */
11265 static void
11266 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11267 {
11268 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11269
11270 fix->insn = insn;
11271 fix->address = address;
11272
11273 fix->next = NULL;
11274 if (minipool_fix_head != NULL)
11275 minipool_fix_tail->next = fix;
11276 else
11277 minipool_fix_head = fix;
11278
11279 minipool_fix_tail = fix;
11280 }
11281
11282 /* Record INSN, which will need fixing up to load a value from the
11283 minipool. ADDRESS is the offset of the insn since the start of the
11284 function; LOC is a pointer to the part of the insn which requires
11285 fixing; VALUE is the constant that must be loaded, which is of type
11286 MODE. */
11287 static void
11288 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
11289 enum machine_mode mode, rtx value)
11290 {
11291 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11292
11293 fix->insn = insn;
11294 fix->address = address;
11295 fix->loc = loc;
11296 fix->mode = mode;
11297 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
11298 fix->value = value;
11299 fix->forwards = get_attr_pool_range (insn);
11300 fix->backwards = get_attr_neg_pool_range (insn);
11301 fix->minipool = NULL;
11302
11303 /* If an insn doesn't have a range defined for it, then it isn't
11304 expecting to be reworked by this code. Better to stop now than
11305 to generate duff assembly code. */
11306 gcc_assert (fix->forwards || fix->backwards);
11307
11308 /* If an entry requires 8-byte alignment then assume all constant pools
11309 require 4 bytes of padding. Trying to do this later on a per-pool
11310 basis is awkward because existing pool entries have to be modified. */
11311 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
11312 minipool_pad = 4;
11313
11314 if (dump_file)
11315 {
11316 fprintf (dump_file,
11317 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11318 GET_MODE_NAME (mode),
11319 INSN_UID (insn), (unsigned long) address,
11320 -1 * (long)fix->backwards, (long)fix->forwards);
11321 arm_print_value (dump_file, fix->value);
11322 fprintf (dump_file, "\n");
11323 }
11324
11325 /* Add it to the chain of fixes. */
11326 fix->next = NULL;
11327
11328 if (minipool_fix_head != NULL)
11329 minipool_fix_tail->next = fix;
11330 else
11331 minipool_fix_head = fix;
11332
11333 minipool_fix_tail = fix;
11334 }
11335
11336 /* Return the cost of synthesizing a 64-bit constant VAL inline.
11337 Returns the number of insns needed, or 99 if we don't know how to
11338 do it. */
11339 int
11340 arm_const_double_inline_cost (rtx val)
11341 {
11342 rtx lowpart, highpart;
11343 enum machine_mode mode;
11344
11345 mode = GET_MODE (val);
11346
11347 if (mode == VOIDmode)
11348 mode = DImode;
11349
11350 gcc_assert (GET_MODE_SIZE (mode) == 8);
11351
11352 lowpart = gen_lowpart (SImode, val);
11353 highpart = gen_highpart_mode (SImode, mode, val);
11354
11355 gcc_assert (GET_CODE (lowpart) == CONST_INT);
11356 gcc_assert (GET_CODE (highpart) == CONST_INT);
11357
11358 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
11359 NULL_RTX, NULL_RTX, 0, 0)
11360 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
11361 NULL_RTX, NULL_RTX, 0, 0));
11362 }
11363
11364 /* Return true if it is worthwhile to split a 64-bit constant into two
11365 32-bit operations. This is the case if optimizing for size, or
11366 if we have load delay slots, or if one 32-bit part can be done with
11367 a single data operation. */
11368 bool
11369 arm_const_double_by_parts (rtx val)
11370 {
11371 enum machine_mode mode = GET_MODE (val);
11372 rtx part;
11373
11374 if (optimize_size || arm_ld_sched)
11375 return true;
11376
11377 if (mode == VOIDmode)
11378 mode = DImode;
11379
11380 part = gen_highpart_mode (SImode, mode, val);
11381
11382 gcc_assert (GET_CODE (part) == CONST_INT);
11383
11384 if (const_ok_for_arm (INTVAL (part))
11385 || const_ok_for_arm (~INTVAL (part)))
11386 return true;
11387
11388 part = gen_lowpart (SImode, val);
11389
11390 gcc_assert (GET_CODE (part) == CONST_INT);
11391
11392 if (const_ok_for_arm (INTVAL (part))
11393 || const_ok_for_arm (~INTVAL (part)))
11394 return true;
11395
11396 return false;
11397 }
11398
11399 /* Scan INSN and note any of its operands that need fixing.
11400 If DO_PUSHES is false we do not actually push any of the fixups
11401 needed. The function returns TRUE if any fixups were needed/pushed.
11402 This is used by arm_memory_load_p() which needs to know about loads
11403 of constants that will be converted into minipool loads. */
11404 static bool
11405 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
11406 {
11407 bool result = false;
11408 int opno;
11409
11410 extract_insn (insn);
11411
11412 if (!constrain_operands (1))
11413 fatal_insn_not_found (insn);
11414
11415 if (recog_data.n_alternatives == 0)
11416 return false;
11417
11418 /* Fill in recog_op_alt with information about the constraints of
11419 this insn. */
11420 preprocess_constraints ();
11421
11422 for (opno = 0; opno < recog_data.n_operands; opno++)
11423 {
11424 /* Things we need to fix can only occur in inputs. */
11425 if (recog_data.operand_type[opno] != OP_IN)
11426 continue;
11427
11428 /* If this alternative is a memory reference, then any mention
11429 of constants in this alternative is really to fool reload
11430 into allowing us to accept one there. We need to fix them up
11431 now so that we output the right code. */
11432 if (recog_op_alt[opno][which_alternative].memory_ok)
11433 {
11434 rtx op = recog_data.operand[opno];
11435
11436 if (CONSTANT_P (op))
11437 {
11438 if (do_pushes)
11439 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
11440 recog_data.operand_mode[opno], op);
11441 result = true;
11442 }
11443 else if (GET_CODE (op) == MEM
11444 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
11445 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
11446 {
11447 if (do_pushes)
11448 {
11449 rtx cop = avoid_constant_pool_reference (op);
11450
11451 /* Casting the address of something to a mode narrower
11452 than a word can cause avoid_constant_pool_reference()
11453 to return the pool reference itself. That's no good to
11454 us here. Lets just hope that we can use the
11455 constant pool value directly. */
11456 if (op == cop)
11457 cop = get_pool_constant (XEXP (op, 0));
11458
11459 push_minipool_fix (insn, address,
11460 recog_data.operand_loc[opno],
11461 recog_data.operand_mode[opno], cop);
11462 }
11463
11464 result = true;
11465 }
11466 }
11467 }
11468
11469 return result;
11470 }
11471
11472 /* Gcc puts the pool in the wrong place for ARM, since we can only
11473 load addresses a limited distance around the pc. We do some
11474 special munging to move the constant pool values to the correct
11475 point in the code. */
11476 static void
11477 arm_reorg (void)
11478 {
11479 rtx insn;
11480 HOST_WIDE_INT address = 0;
11481 Mfix * fix;
11482
11483 minipool_fix_head = minipool_fix_tail = NULL;
11484
11485 /* The first insn must always be a note, or the code below won't
11486 scan it properly. */
11487 insn = get_insns ();
11488 gcc_assert (GET_CODE (insn) == NOTE);
11489 minipool_pad = 0;
11490
11491 /* Scan all the insns and record the operands that will need fixing. */
11492 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
11493 {
11494 if (TARGET_CIRRUS_FIX_INVALID_INSNS
11495 && (arm_cirrus_insn_p (insn)
11496 || GET_CODE (insn) == JUMP_INSN
11497 || arm_memory_load_p (insn)))
11498 cirrus_reorg (insn);
11499
11500 if (GET_CODE (insn) == BARRIER)
11501 push_minipool_barrier (insn, address);
11502 else if (INSN_P (insn))
11503 {
11504 rtx table;
11505
11506 note_invalid_constants (insn, address, true);
11507 address += get_attr_length (insn);
11508
11509 /* If the insn is a vector jump, add the size of the table
11510 and skip the table. */
11511 if ((table = is_jump_table (insn)) != NULL)
11512 {
11513 address += get_jump_table_size (table);
11514 insn = table;
11515 }
11516 }
11517 }
11518
11519 fix = minipool_fix_head;
11520
11521 /* Now scan the fixups and perform the required changes. */
11522 while (fix)
11523 {
11524 Mfix * ftmp;
11525 Mfix * fdel;
11526 Mfix * last_added_fix;
11527 Mfix * last_barrier = NULL;
11528 Mfix * this_fix;
11529
11530 /* Skip any further barriers before the next fix. */
11531 while (fix && GET_CODE (fix->insn) == BARRIER)
11532 fix = fix->next;
11533
11534 /* No more fixes. */
11535 if (fix == NULL)
11536 break;
11537
11538 last_added_fix = NULL;
11539
11540 for (ftmp = fix; ftmp; ftmp = ftmp->next)
11541 {
11542 if (GET_CODE (ftmp->insn) == BARRIER)
11543 {
11544 if (ftmp->address >= minipool_vector_head->max_address)
11545 break;
11546
11547 last_barrier = ftmp;
11548 }
11549 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
11550 break;
11551
11552 last_added_fix = ftmp; /* Keep track of the last fix added. */
11553 }
11554
11555 /* If we found a barrier, drop back to that; any fixes that we
11556 could have reached but come after the barrier will now go in
11557 the next mini-pool. */
11558 if (last_barrier != NULL)
11559 {
11560 /* Reduce the refcount for those fixes that won't go into this
11561 pool after all. */
11562 for (fdel = last_barrier->next;
11563 fdel && fdel != ftmp;
11564 fdel = fdel->next)
11565 {
11566 fdel->minipool->refcount--;
11567 fdel->minipool = NULL;
11568 }
11569
11570 ftmp = last_barrier;
11571 }
11572 else
11573 {
11574 /* ftmp is first fix that we can't fit into this pool and
11575 there no natural barriers that we could use. Insert a
11576 new barrier in the code somewhere between the previous
11577 fix and this one, and arrange to jump around it. */
11578 HOST_WIDE_INT max_address;
11579
11580 /* The last item on the list of fixes must be a barrier, so
11581 we can never run off the end of the list of fixes without
11582 last_barrier being set. */
11583 gcc_assert (ftmp);
11584
11585 max_address = minipool_vector_head->max_address;
11586 /* Check that there isn't another fix that is in range that
11587 we couldn't fit into this pool because the pool was
11588 already too large: we need to put the pool before such an
11589 instruction. The pool itself may come just after the
11590 fix because create_fix_barrier also allows space for a
11591 jump instruction. */
11592 if (ftmp->address < max_address)
11593 max_address = ftmp->address + 1;
11594
11595 last_barrier = create_fix_barrier (last_added_fix, max_address);
11596 }
11597
11598 assign_minipool_offsets (last_barrier);
11599
11600 while (ftmp)
11601 {
11602 if (GET_CODE (ftmp->insn) != BARRIER
11603 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
11604 == NULL))
11605 break;
11606
11607 ftmp = ftmp->next;
11608 }
11609
11610 /* Scan over the fixes we have identified for this pool, fixing them
11611 up and adding the constants to the pool itself. */
11612 for (this_fix = fix; this_fix && ftmp != this_fix;
11613 this_fix = this_fix->next)
11614 if (GET_CODE (this_fix->insn) != BARRIER)
11615 {
11616 rtx addr
11617 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
11618 minipool_vector_label),
11619 this_fix->minipool->offset);
11620 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
11621 }
11622
11623 dump_minipool (last_barrier->insn);
11624 fix = ftmp;
11625 }
11626
11627 /* From now on we must synthesize any constants that we can't handle
11628 directly. This can happen if the RTL gets split during final
11629 instruction generation. */
11630 after_arm_reorg = 1;
11631
11632 /* Free the minipool memory. */
11633 obstack_free (&minipool_obstack, minipool_startobj);
11634 }
11635 \f
11636 /* Routines to output assembly language. */
11637
11638 /* If the rtx is the correct value then return the string of the number.
11639 In this way we can ensure that valid double constants are generated even
11640 when cross compiling. */
11641 const char *
11642 fp_immediate_constant (rtx x)
11643 {
11644 REAL_VALUE_TYPE r;
11645 int i;
11646
11647 if (!fp_consts_inited)
11648 init_fp_table ();
11649
11650 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11651 for (i = 0; i < 8; i++)
11652 if (REAL_VALUES_EQUAL (r, values_fp[i]))
11653 return strings_fp[i];
11654
11655 gcc_unreachable ();
11656 }
11657
11658 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
11659 static const char *
11660 fp_const_from_val (REAL_VALUE_TYPE *r)
11661 {
11662 int i;
11663
11664 if (!fp_consts_inited)
11665 init_fp_table ();
11666
11667 for (i = 0; i < 8; i++)
11668 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
11669 return strings_fp[i];
11670
11671 gcc_unreachable ();
11672 }
11673
11674 /* Output the operands of a LDM/STM instruction to STREAM.
11675 MASK is the ARM register set mask of which only bits 0-15 are important.
11676 REG is the base register, either the frame pointer or the stack pointer,
11677 INSTR is the possibly suffixed load or store instruction.
11678 RFE is nonzero if the instruction should also copy spsr to cpsr. */
11679
11680 static void
11681 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
11682 unsigned long mask, int rfe)
11683 {
11684 unsigned i;
11685 bool not_first = FALSE;
11686
11687 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
11688 fputc ('\t', stream);
11689 asm_fprintf (stream, instr, reg);
11690 fputc ('{', stream);
11691
11692 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11693 if (mask & (1 << i))
11694 {
11695 if (not_first)
11696 fprintf (stream, ", ");
11697
11698 asm_fprintf (stream, "%r", i);
11699 not_first = TRUE;
11700 }
11701
11702 if (rfe)
11703 fprintf (stream, "}^\n");
11704 else
11705 fprintf (stream, "}\n");
11706 }
11707
11708
11709 /* Output a FLDMD instruction to STREAM.
11710 BASE if the register containing the address.
11711 REG and COUNT specify the register range.
11712 Extra registers may be added to avoid hardware bugs.
11713
11714 We output FLDMD even for ARMv5 VFP implementations. Although
11715 FLDMD is technically not supported until ARMv6, it is believed
11716 that all VFP implementations support its use in this context. */
11717
11718 static void
11719 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
11720 {
11721 int i;
11722
11723 /* Workaround ARM10 VFPr1 bug. */
11724 if (count == 2 && !arm_arch6)
11725 {
11726 if (reg == 15)
11727 reg--;
11728 count++;
11729 }
11730
11731 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
11732 load into multiple parts if we have to handle more than 16 registers. */
11733 if (count > 16)
11734 {
11735 vfp_output_fldmd (stream, base, reg, 16);
11736 vfp_output_fldmd (stream, base, reg + 16, count - 16);
11737 return;
11738 }
11739
11740 fputc ('\t', stream);
11741 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
11742
11743 for (i = reg; i < reg + count; i++)
11744 {
11745 if (i > reg)
11746 fputs (", ", stream);
11747 asm_fprintf (stream, "d%d", i);
11748 }
11749 fputs ("}\n", stream);
11750
11751 }
11752
11753
11754 /* Output the assembly for a store multiple. */
11755
11756 const char *
11757 vfp_output_fstmd (rtx * operands)
11758 {
11759 char pattern[100];
11760 int p;
11761 int base;
11762 int i;
11763
11764 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
11765 p = strlen (pattern);
11766
11767 gcc_assert (GET_CODE (operands[1]) == REG);
11768
11769 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
11770 for (i = 1; i < XVECLEN (operands[2], 0); i++)
11771 {
11772 p += sprintf (&pattern[p], ", d%d", base + i);
11773 }
11774 strcpy (&pattern[p], "}");
11775
11776 output_asm_insn (pattern, operands);
11777 return "";
11778 }
11779
11780
11781 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
11782 number of bytes pushed. */
11783
11784 static int
11785 vfp_emit_fstmd (int base_reg, int count)
11786 {
11787 rtx par;
11788 rtx dwarf;
11789 rtx tmp, reg;
11790 int i;
11791
11792 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
11793 register pairs are stored by a store multiple insn. We avoid this
11794 by pushing an extra pair. */
11795 if (count == 2 && !arm_arch6)
11796 {
11797 if (base_reg == LAST_VFP_REGNUM - 3)
11798 base_reg -= 2;
11799 count++;
11800 }
11801
11802 /* FSTMD may not store more than 16 doubleword registers at once. Split
11803 larger stores into multiple parts (up to a maximum of two, in
11804 practice). */
11805 if (count > 16)
11806 {
11807 int saved;
11808 /* NOTE: base_reg is an internal register number, so each D register
11809 counts as 2. */
11810 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
11811 saved += vfp_emit_fstmd (base_reg, 16);
11812 return saved;
11813 }
11814
11815 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
11816 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
11817
11818 reg = gen_rtx_REG (DFmode, base_reg);
11819 base_reg += 2;
11820
11821 XVECEXP (par, 0, 0)
11822 = gen_rtx_SET (VOIDmode,
11823 gen_frame_mem
11824 (BLKmode,
11825 gen_rtx_PRE_MODIFY (Pmode,
11826 stack_pointer_rtx,
11827 plus_constant
11828 (stack_pointer_rtx,
11829 - (count * 8)))
11830 ),
11831 gen_rtx_UNSPEC (BLKmode,
11832 gen_rtvec (1, reg),
11833 UNSPEC_PUSH_MULT));
11834
11835 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11836 plus_constant (stack_pointer_rtx, -(count * 8)));
11837 RTX_FRAME_RELATED_P (tmp) = 1;
11838 XVECEXP (dwarf, 0, 0) = tmp;
11839
11840 tmp = gen_rtx_SET (VOIDmode,
11841 gen_frame_mem (DFmode, stack_pointer_rtx),
11842 reg);
11843 RTX_FRAME_RELATED_P (tmp) = 1;
11844 XVECEXP (dwarf, 0, 1) = tmp;
11845
11846 for (i = 1; i < count; i++)
11847 {
11848 reg = gen_rtx_REG (DFmode, base_reg);
11849 base_reg += 2;
11850 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
11851
11852 tmp = gen_rtx_SET (VOIDmode,
11853 gen_frame_mem (DFmode,
11854 plus_constant (stack_pointer_rtx,
11855 i * 8)),
11856 reg);
11857 RTX_FRAME_RELATED_P (tmp) = 1;
11858 XVECEXP (dwarf, 0, i + 1) = tmp;
11859 }
11860
11861 par = emit_insn (par);
11862 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
11863 RTX_FRAME_RELATED_P (par) = 1;
11864
11865 return count * 8;
11866 }
11867
11868 /* Emit a call instruction with pattern PAT. ADDR is the address of
11869 the call target. */
11870
11871 void
11872 arm_emit_call_insn (rtx pat, rtx addr)
11873 {
11874 rtx insn;
11875
11876 insn = emit_call_insn (pat);
11877
11878 /* The PIC register is live on entry to VxWorks PIC PLT entries.
11879 If the call might use such an entry, add a use of the PIC register
11880 to the instruction's CALL_INSN_FUNCTION_USAGE. */
11881 if (TARGET_VXWORKS_RTP
11882 && flag_pic
11883 && GET_CODE (addr) == SYMBOL_REF
11884 && (SYMBOL_REF_DECL (addr)
11885 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
11886 : !SYMBOL_REF_LOCAL_P (addr)))
11887 {
11888 require_pic_register ();
11889 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
11890 }
11891 }
11892
11893 /* Output a 'call' insn. */
11894 const char *
11895 output_call (rtx *operands)
11896 {
11897 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
11898
11899 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
11900 if (REGNO (operands[0]) == LR_REGNUM)
11901 {
11902 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
11903 output_asm_insn ("mov%?\t%0, %|lr", operands);
11904 }
11905
11906 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11907
11908 if (TARGET_INTERWORK || arm_arch4t)
11909 output_asm_insn ("bx%?\t%0", operands);
11910 else
11911 output_asm_insn ("mov%?\t%|pc, %0", operands);
11912
11913 return "";
11914 }
11915
11916 /* Output a 'call' insn that is a reference in memory. This is
11917 disabled for ARMv5 and we prefer a blx instead because otherwise
11918 there's a significant performance overhead. */
11919 const char *
11920 output_call_mem (rtx *operands)
11921 {
11922 gcc_assert (!arm_arch5);
11923 if (TARGET_INTERWORK)
11924 {
11925 output_asm_insn ("ldr%?\t%|ip, %0", operands);
11926 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11927 output_asm_insn ("bx%?\t%|ip", operands);
11928 }
11929 else if (regno_use_in (LR_REGNUM, operands[0]))
11930 {
11931 /* LR is used in the memory address. We load the address in the
11932 first instruction. It's safe to use IP as the target of the
11933 load since the call will kill it anyway. */
11934 output_asm_insn ("ldr%?\t%|ip, %0", operands);
11935 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11936 if (arm_arch4t)
11937 output_asm_insn ("bx%?\t%|ip", operands);
11938 else
11939 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
11940 }
11941 else
11942 {
11943 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
11944 output_asm_insn ("ldr%?\t%|pc, %0", operands);
11945 }
11946
11947 return "";
11948 }
11949
11950
11951 /* Output a move from arm registers to an fpa registers.
11952 OPERANDS[0] is an fpa register.
11953 OPERANDS[1] is the first registers of an arm register pair. */
11954 const char *
11955 output_mov_long_double_fpa_from_arm (rtx *operands)
11956 {
11957 int arm_reg0 = REGNO (operands[1]);
11958 rtx ops[3];
11959
11960 gcc_assert (arm_reg0 != IP_REGNUM);
11961
11962 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11963 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11964 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
11965
11966 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
11967 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
11968
11969 return "";
11970 }
11971
11972 /* Output a move from an fpa register to arm registers.
11973 OPERANDS[0] is the first registers of an arm register pair.
11974 OPERANDS[1] is an fpa register. */
11975 const char *
11976 output_mov_long_double_arm_from_fpa (rtx *operands)
11977 {
11978 int arm_reg0 = REGNO (operands[0]);
11979 rtx ops[3];
11980
11981 gcc_assert (arm_reg0 != IP_REGNUM);
11982
11983 ops[0] = gen_rtx_REG (SImode, arm_reg0);
11984 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
11985 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
11986
11987 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
11988 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
11989 return "";
11990 }
11991
11992 /* Output a move from arm registers to arm registers of a long double
11993 OPERANDS[0] is the destination.
11994 OPERANDS[1] is the source. */
11995 const char *
11996 output_mov_long_double_arm_from_arm (rtx *operands)
11997 {
11998 /* We have to be careful here because the two might overlap. */
11999 int dest_start = REGNO (operands[0]);
12000 int src_start = REGNO (operands[1]);
12001 rtx ops[2];
12002 int i;
12003
12004 if (dest_start < src_start)
12005 {
12006 for (i = 0; i < 3; i++)
12007 {
12008 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12009 ops[1] = gen_rtx_REG (SImode, src_start + i);
12010 output_asm_insn ("mov%?\t%0, %1", ops);
12011 }
12012 }
12013 else
12014 {
12015 for (i = 2; i >= 0; i--)
12016 {
12017 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12018 ops[1] = gen_rtx_REG (SImode, src_start + i);
12019 output_asm_insn ("mov%?\t%0, %1", ops);
12020 }
12021 }
12022
12023 return "";
12024 }
12025
12026 void
12027 arm_emit_movpair (rtx dest, rtx src)
12028 {
12029 /* If the src is an immediate, simplify it. */
12030 if (CONST_INT_P (src))
12031 {
12032 HOST_WIDE_INT val = INTVAL (src);
12033 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12034 if ((val >> 16) & 0x0000ffff)
12035 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12036 GEN_INT (16)),
12037 GEN_INT ((val >> 16) & 0x0000ffff));
12038 return;
12039 }
12040 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12041 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12042 }
12043
12044 /* Output a move from arm registers to an fpa registers.
12045 OPERANDS[0] is an fpa register.
12046 OPERANDS[1] is the first registers of an arm register pair. */
12047 const char *
12048 output_mov_double_fpa_from_arm (rtx *operands)
12049 {
12050 int arm_reg0 = REGNO (operands[1]);
12051 rtx ops[2];
12052
12053 gcc_assert (arm_reg0 != IP_REGNUM);
12054
12055 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12056 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12057 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12058 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12059 return "";
12060 }
12061
12062 /* Output a move from an fpa register to arm registers.
12063 OPERANDS[0] is the first registers of an arm register pair.
12064 OPERANDS[1] is an fpa register. */
12065 const char *
12066 output_mov_double_arm_from_fpa (rtx *operands)
12067 {
12068 int arm_reg0 = REGNO (operands[0]);
12069 rtx ops[2];
12070
12071 gcc_assert (arm_reg0 != IP_REGNUM);
12072
12073 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12074 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12075 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12076 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12077 return "";
12078 }
12079
12080 /* Output a move between double words.
12081 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
12082 or MEM<-REG and all MEMs must be offsettable addresses. */
12083 const char *
12084 output_move_double (rtx *operands)
12085 {
12086 enum rtx_code code0 = GET_CODE (operands[0]);
12087 enum rtx_code code1 = GET_CODE (operands[1]);
12088 rtx otherops[3];
12089
12090 if (code0 == REG)
12091 {
12092 unsigned int reg0 = REGNO (operands[0]);
12093
12094 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12095
12096 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
12097
12098 switch (GET_CODE (XEXP (operands[1], 0)))
12099 {
12100 case REG:
12101 if (TARGET_LDRD
12102 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
12103 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
12104 else
12105 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12106 break;
12107
12108 case PRE_INC:
12109 gcc_assert (TARGET_LDRD);
12110 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
12111 break;
12112
12113 case PRE_DEC:
12114 if (TARGET_LDRD)
12115 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
12116 else
12117 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
12118 break;
12119
12120 case POST_INC:
12121 if (TARGET_LDRD)
12122 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
12123 else
12124 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
12125 break;
12126
12127 case POST_DEC:
12128 gcc_assert (TARGET_LDRD);
12129 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
12130 break;
12131
12132 case PRE_MODIFY:
12133 case POST_MODIFY:
12134 /* Autoicrement addressing modes should never have overlapping
12135 base and destination registers, and overlapping index registers
12136 are already prohibited, so this doesn't need to worry about
12137 fix_cm3_ldrd. */
12138 otherops[0] = operands[0];
12139 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
12140 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
12141
12142 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
12143 {
12144 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
12145 {
12146 /* Registers overlap so split out the increment. */
12147 output_asm_insn ("add%?\t%1, %1, %2", otherops);
12148 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
12149 }
12150 else
12151 {
12152 /* Use a single insn if we can.
12153 FIXME: IWMMXT allows offsets larger than ldrd can
12154 handle, fix these up with a pair of ldr. */
12155 if (TARGET_THUMB2
12156 || GET_CODE (otherops[2]) != CONST_INT
12157 || (INTVAL (otherops[2]) > -256
12158 && INTVAL (otherops[2]) < 256))
12159 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
12160 else
12161 {
12162 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12163 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12164 }
12165 }
12166 }
12167 else
12168 {
12169 /* Use a single insn if we can.
12170 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12171 fix these up with a pair of ldr. */
12172 if (TARGET_THUMB2
12173 || GET_CODE (otherops[2]) != CONST_INT
12174 || (INTVAL (otherops[2]) > -256
12175 && INTVAL (otherops[2]) < 256))
12176 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
12177 else
12178 {
12179 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12180 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12181 }
12182 }
12183 break;
12184
12185 case LABEL_REF:
12186 case CONST:
12187 /* We might be able to use ldrd %0, %1 here. However the range is
12188 different to ldr/adr, and it is broken on some ARMv7-M
12189 implementations. */
12190 /* Use the second register of the pair to avoid problematic
12191 overlap. */
12192 otherops[1] = operands[1];
12193 output_asm_insn ("adr%?\t%0, %1", otherops);
12194 operands[1] = otherops[0];
12195 if (TARGET_LDRD)
12196 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12197 else
12198 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
12199 break;
12200
12201 /* ??? This needs checking for thumb2. */
12202 default:
12203 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
12204 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
12205 {
12206 otherops[0] = operands[0];
12207 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
12208 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
12209
12210 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
12211 {
12212 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12213 {
12214 switch ((int) INTVAL (otherops[2]))
12215 {
12216 case -8:
12217 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
12218 return "";
12219 case -4:
12220 if (TARGET_THUMB2)
12221 break;
12222 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
12223 return "";
12224 case 4:
12225 if (TARGET_THUMB2)
12226 break;
12227 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
12228 return "";
12229 }
12230 }
12231 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
12232 operands[1] = otherops[0];
12233 if (TARGET_LDRD
12234 && (GET_CODE (otherops[2]) == REG
12235 || TARGET_THUMB2
12236 || (GET_CODE (otherops[2]) == CONST_INT
12237 && INTVAL (otherops[2]) > -256
12238 && INTVAL (otherops[2]) < 256)))
12239 {
12240 if (reg_overlap_mentioned_p (operands[0],
12241 otherops[2]))
12242 {
12243 rtx tmp;
12244 /* Swap base and index registers over to
12245 avoid a conflict. */
12246 tmp = otherops[1];
12247 otherops[1] = otherops[2];
12248 otherops[2] = tmp;
12249 }
12250 /* If both registers conflict, it will usually
12251 have been fixed by a splitter. */
12252 if (reg_overlap_mentioned_p (operands[0], otherops[2])
12253 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
12254 {
12255 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12256 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12257 }
12258 else
12259 {
12260 otherops[0] = operands[0];
12261 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
12262 }
12263 return "";
12264 }
12265
12266 if (GET_CODE (otherops[2]) == CONST_INT)
12267 {
12268 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
12269 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
12270 else
12271 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12272 }
12273 else
12274 output_asm_insn ("add%?\t%0, %1, %2", otherops);
12275 }
12276 else
12277 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
12278
12279 if (TARGET_LDRD)
12280 return "ldr%(d%)\t%0, [%1]";
12281
12282 return "ldm%(ia%)\t%1, %M0";
12283 }
12284 else
12285 {
12286 otherops[1] = adjust_address (operands[1], SImode, 4);
12287 /* Take care of overlapping base/data reg. */
12288 if (reg_mentioned_p (operands[0], operands[1]))
12289 {
12290 output_asm_insn ("ldr%?\t%0, %1", otherops);
12291 output_asm_insn ("ldr%?\t%0, %1", operands);
12292 }
12293 else
12294 {
12295 output_asm_insn ("ldr%?\t%0, %1", operands);
12296 output_asm_insn ("ldr%?\t%0, %1", otherops);
12297 }
12298 }
12299 }
12300 }
12301 else
12302 {
12303 /* Constraints should ensure this. */
12304 gcc_assert (code0 == MEM && code1 == REG);
12305 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
12306
12307 switch (GET_CODE (XEXP (operands[0], 0)))
12308 {
12309 case REG:
12310 if (TARGET_LDRD)
12311 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
12312 else
12313 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12314 break;
12315
12316 case PRE_INC:
12317 gcc_assert (TARGET_LDRD);
12318 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
12319 break;
12320
12321 case PRE_DEC:
12322 if (TARGET_LDRD)
12323 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
12324 else
12325 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
12326 break;
12327
12328 case POST_INC:
12329 if (TARGET_LDRD)
12330 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
12331 else
12332 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
12333 break;
12334
12335 case POST_DEC:
12336 gcc_assert (TARGET_LDRD);
12337 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
12338 break;
12339
12340 case PRE_MODIFY:
12341 case POST_MODIFY:
12342 otherops[0] = operands[1];
12343 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
12344 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
12345
12346 /* IWMMXT allows offsets larger than ldrd can handle,
12347 fix these up with a pair of ldr. */
12348 if (!TARGET_THUMB2
12349 && GET_CODE (otherops[2]) == CONST_INT
12350 && (INTVAL(otherops[2]) <= -256
12351 || INTVAL(otherops[2]) >= 256))
12352 {
12353 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12354 {
12355 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12356 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12357 }
12358 else
12359 {
12360 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12361 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12362 }
12363 }
12364 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12365 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
12366 else
12367 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
12368 break;
12369
12370 case PLUS:
12371 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
12372 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12373 {
12374 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
12375 {
12376 case -8:
12377 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
12378 return "";
12379
12380 case -4:
12381 if (TARGET_THUMB2)
12382 break;
12383 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
12384 return "";
12385
12386 case 4:
12387 if (TARGET_THUMB2)
12388 break;
12389 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
12390 return "";
12391 }
12392 }
12393 if (TARGET_LDRD
12394 && (GET_CODE (otherops[2]) == REG
12395 || TARGET_THUMB2
12396 || (GET_CODE (otherops[2]) == CONST_INT
12397 && INTVAL (otherops[2]) > -256
12398 && INTVAL (otherops[2]) < 256)))
12399 {
12400 otherops[0] = operands[1];
12401 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
12402 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
12403 return "";
12404 }
12405 /* Fall through */
12406
12407 default:
12408 otherops[0] = adjust_address (operands[0], SImode, 4);
12409 otherops[1] = operands[1];
12410 output_asm_insn ("str%?\t%1, %0", operands);
12411 output_asm_insn ("str%?\t%H1, %0", otherops);
12412 }
12413 }
12414
12415 return "";
12416 }
12417
12418 /* Output a move, load or store for quad-word vectors in ARM registers. Only
12419 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
12420
12421 const char *
12422 output_move_quad (rtx *operands)
12423 {
12424 if (REG_P (operands[0]))
12425 {
12426 /* Load, or reg->reg move. */
12427
12428 if (MEM_P (operands[1]))
12429 {
12430 switch (GET_CODE (XEXP (operands[1], 0)))
12431 {
12432 case REG:
12433 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12434 break;
12435
12436 case LABEL_REF:
12437 case CONST:
12438 output_asm_insn ("adr%?\t%0, %1", operands);
12439 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
12440 break;
12441
12442 default:
12443 gcc_unreachable ();
12444 }
12445 }
12446 else
12447 {
12448 rtx ops[2];
12449 int dest, src, i;
12450
12451 gcc_assert (REG_P (operands[1]));
12452
12453 dest = REGNO (operands[0]);
12454 src = REGNO (operands[1]);
12455
12456 /* This seems pretty dumb, but hopefully GCC won't try to do it
12457 very often. */
12458 if (dest < src)
12459 for (i = 0; i < 4; i++)
12460 {
12461 ops[0] = gen_rtx_REG (SImode, dest + i);
12462 ops[1] = gen_rtx_REG (SImode, src + i);
12463 output_asm_insn ("mov%?\t%0, %1", ops);
12464 }
12465 else
12466 for (i = 3; i >= 0; i--)
12467 {
12468 ops[0] = gen_rtx_REG (SImode, dest + i);
12469 ops[1] = gen_rtx_REG (SImode, src + i);
12470 output_asm_insn ("mov%?\t%0, %1", ops);
12471 }
12472 }
12473 }
12474 else
12475 {
12476 gcc_assert (MEM_P (operands[0]));
12477 gcc_assert (REG_P (operands[1]));
12478 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
12479
12480 switch (GET_CODE (XEXP (operands[0], 0)))
12481 {
12482 case REG:
12483 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
12484 break;
12485
12486 default:
12487 gcc_unreachable ();
12488 }
12489 }
12490
12491 return "";
12492 }
12493
12494 /* Output a VFP load or store instruction. */
12495
12496 const char *
12497 output_move_vfp (rtx *operands)
12498 {
12499 rtx reg, mem, addr, ops[2];
12500 int load = REG_P (operands[0]);
12501 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
12502 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
12503 const char *templ;
12504 char buff[50];
12505 enum machine_mode mode;
12506
12507 reg = operands[!load];
12508 mem = operands[load];
12509
12510 mode = GET_MODE (reg);
12511
12512 gcc_assert (REG_P (reg));
12513 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
12514 gcc_assert (mode == SFmode
12515 || mode == DFmode
12516 || mode == SImode
12517 || mode == DImode
12518 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
12519 gcc_assert (MEM_P (mem));
12520
12521 addr = XEXP (mem, 0);
12522
12523 switch (GET_CODE (addr))
12524 {
12525 case PRE_DEC:
12526 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
12527 ops[0] = XEXP (addr, 0);
12528 ops[1] = reg;
12529 break;
12530
12531 case POST_INC:
12532 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
12533 ops[0] = XEXP (addr, 0);
12534 ops[1] = reg;
12535 break;
12536
12537 default:
12538 templ = "f%s%c%%?\t%%%s0, %%1%s";
12539 ops[0] = reg;
12540 ops[1] = mem;
12541 break;
12542 }
12543
12544 sprintf (buff, templ,
12545 load ? "ld" : "st",
12546 dp ? 'd' : 's',
12547 dp ? "P" : "",
12548 integer_p ? "\t%@ int" : "");
12549 output_asm_insn (buff, ops);
12550
12551 return "";
12552 }
12553
12554 /* Output a Neon quad-word load or store, or a load or store for
12555 larger structure modes.
12556
12557 WARNING: The ordering of elements is weird in big-endian mode,
12558 because we use VSTM, as required by the EABI. GCC RTL defines
12559 element ordering based on in-memory order. This can be differ
12560 from the architectural ordering of elements within a NEON register.
12561 The intrinsics defined in arm_neon.h use the NEON register element
12562 ordering, not the GCC RTL element ordering.
12563
12564 For example, the in-memory ordering of a big-endian a quadword
12565 vector with 16-bit elements when stored from register pair {d0,d1}
12566 will be (lowest address first, d0[N] is NEON register element N):
12567
12568 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
12569
12570 When necessary, quadword registers (dN, dN+1) are moved to ARM
12571 registers from rN in the order:
12572
12573 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
12574
12575 So that STM/LDM can be used on vectors in ARM registers, and the
12576 same memory layout will result as if VSTM/VLDM were used. */
12577
12578 const char *
12579 output_move_neon (rtx *operands)
12580 {
12581 rtx reg, mem, addr, ops[2];
12582 int regno, load = REG_P (operands[0]);
12583 const char *templ;
12584 char buff[50];
12585 enum machine_mode mode;
12586
12587 reg = operands[!load];
12588 mem = operands[load];
12589
12590 mode = GET_MODE (reg);
12591
12592 gcc_assert (REG_P (reg));
12593 regno = REGNO (reg);
12594 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
12595 || NEON_REGNO_OK_FOR_QUAD (regno));
12596 gcc_assert (VALID_NEON_DREG_MODE (mode)
12597 || VALID_NEON_QREG_MODE (mode)
12598 || VALID_NEON_STRUCT_MODE (mode));
12599 gcc_assert (MEM_P (mem));
12600
12601 addr = XEXP (mem, 0);
12602
12603 /* Strip off const from addresses like (const (plus (...))). */
12604 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
12605 addr = XEXP (addr, 0);
12606
12607 switch (GET_CODE (addr))
12608 {
12609 case POST_INC:
12610 templ = "v%smia%%?\t%%0!, %%h1";
12611 ops[0] = XEXP (addr, 0);
12612 ops[1] = reg;
12613 break;
12614
12615 case PRE_DEC:
12616 /* FIXME: We should be using vld1/vst1 here in BE mode? */
12617 templ = "v%smdb%%?\t%%0!, %%h1";
12618 ops[0] = XEXP (addr, 0);
12619 ops[1] = reg;
12620 break;
12621
12622 case POST_MODIFY:
12623 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
12624 gcc_unreachable ();
12625
12626 case LABEL_REF:
12627 case PLUS:
12628 {
12629 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
12630 int i;
12631 int overlap = -1;
12632 for (i = 0; i < nregs; i++)
12633 {
12634 /* We're only using DImode here because it's a convenient size. */
12635 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
12636 ops[1] = adjust_address (mem, DImode, 8 * i);
12637 if (reg_overlap_mentioned_p (ops[0], mem))
12638 {
12639 gcc_assert (overlap == -1);
12640 overlap = i;
12641 }
12642 else
12643 {
12644 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12645 output_asm_insn (buff, ops);
12646 }
12647 }
12648 if (overlap != -1)
12649 {
12650 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
12651 ops[1] = adjust_address (mem, SImode, 8 * overlap);
12652 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
12653 output_asm_insn (buff, ops);
12654 }
12655
12656 return "";
12657 }
12658
12659 default:
12660 templ = "v%smia%%?\t%%m0, %%h1";
12661 ops[0] = mem;
12662 ops[1] = reg;
12663 }
12664
12665 sprintf (buff, templ, load ? "ld" : "st");
12666 output_asm_insn (buff, ops);
12667
12668 return "";
12669 }
12670
12671 /* Output an ADD r, s, #n where n may be too big for one instruction.
12672 If adding zero to one register, output nothing. */
12673 const char *
12674 output_add_immediate (rtx *operands)
12675 {
12676 HOST_WIDE_INT n = INTVAL (operands[2]);
12677
12678 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
12679 {
12680 if (n < 0)
12681 output_multi_immediate (operands,
12682 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
12683 -n);
12684 else
12685 output_multi_immediate (operands,
12686 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
12687 n);
12688 }
12689
12690 return "";
12691 }
12692
12693 /* Output a multiple immediate operation.
12694 OPERANDS is the vector of operands referred to in the output patterns.
12695 INSTR1 is the output pattern to use for the first constant.
12696 INSTR2 is the output pattern to use for subsequent constants.
12697 IMMED_OP is the index of the constant slot in OPERANDS.
12698 N is the constant value. */
12699 static const char *
12700 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
12701 int immed_op, HOST_WIDE_INT n)
12702 {
12703 #if HOST_BITS_PER_WIDE_INT > 32
12704 n &= 0xffffffff;
12705 #endif
12706
12707 if (n == 0)
12708 {
12709 /* Quick and easy output. */
12710 operands[immed_op] = const0_rtx;
12711 output_asm_insn (instr1, operands);
12712 }
12713 else
12714 {
12715 int i;
12716 const char * instr = instr1;
12717
12718 /* Note that n is never zero here (which would give no output). */
12719 for (i = 0; i < 32; i += 2)
12720 {
12721 if (n & (3 << i))
12722 {
12723 operands[immed_op] = GEN_INT (n & (255 << i));
12724 output_asm_insn (instr, operands);
12725 instr = instr2;
12726 i += 6;
12727 }
12728 }
12729 }
12730
12731 return "";
12732 }
12733
12734 /* Return the name of a shifter operation. */
12735 static const char *
12736 arm_shift_nmem(enum rtx_code code)
12737 {
12738 switch (code)
12739 {
12740 case ASHIFT:
12741 return ARM_LSL_NAME;
12742
12743 case ASHIFTRT:
12744 return "asr";
12745
12746 case LSHIFTRT:
12747 return "lsr";
12748
12749 case ROTATERT:
12750 return "ror";
12751
12752 default:
12753 abort();
12754 }
12755 }
12756
12757 /* Return the appropriate ARM instruction for the operation code.
12758 The returned result should not be overwritten. OP is the rtx of the
12759 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
12760 was shifted. */
12761 const char *
12762 arithmetic_instr (rtx op, int shift_first_arg)
12763 {
12764 switch (GET_CODE (op))
12765 {
12766 case PLUS:
12767 return "add";
12768
12769 case MINUS:
12770 return shift_first_arg ? "rsb" : "sub";
12771
12772 case IOR:
12773 return "orr";
12774
12775 case XOR:
12776 return "eor";
12777
12778 case AND:
12779 return "and";
12780
12781 case ASHIFT:
12782 case ASHIFTRT:
12783 case LSHIFTRT:
12784 case ROTATERT:
12785 return arm_shift_nmem(GET_CODE(op));
12786
12787 default:
12788 gcc_unreachable ();
12789 }
12790 }
12791
12792 /* Ensure valid constant shifts and return the appropriate shift mnemonic
12793 for the operation code. The returned result should not be overwritten.
12794 OP is the rtx code of the shift.
12795 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
12796 shift. */
12797 static const char *
12798 shift_op (rtx op, HOST_WIDE_INT *amountp)
12799 {
12800 const char * mnem;
12801 enum rtx_code code = GET_CODE (op);
12802
12803 switch (GET_CODE (XEXP (op, 1)))
12804 {
12805 case REG:
12806 case SUBREG:
12807 *amountp = -1;
12808 break;
12809
12810 case CONST_INT:
12811 *amountp = INTVAL (XEXP (op, 1));
12812 break;
12813
12814 default:
12815 gcc_unreachable ();
12816 }
12817
12818 switch (code)
12819 {
12820 case ROTATE:
12821 gcc_assert (*amountp != -1);
12822 *amountp = 32 - *amountp;
12823 code = ROTATERT;
12824
12825 /* Fall through. */
12826
12827 case ASHIFT:
12828 case ASHIFTRT:
12829 case LSHIFTRT:
12830 case ROTATERT:
12831 mnem = arm_shift_nmem(code);
12832 break;
12833
12834 case MULT:
12835 /* We never have to worry about the amount being other than a
12836 power of 2, since this case can never be reloaded from a reg. */
12837 gcc_assert (*amountp != -1);
12838 *amountp = int_log2 (*amountp);
12839 return ARM_LSL_NAME;
12840
12841 default:
12842 gcc_unreachable ();
12843 }
12844
12845 if (*amountp != -1)
12846 {
12847 /* This is not 100% correct, but follows from the desire to merge
12848 multiplication by a power of 2 with the recognizer for a
12849 shift. >=32 is not a valid shift for "lsl", so we must try and
12850 output a shift that produces the correct arithmetical result.
12851 Using lsr #32 is identical except for the fact that the carry bit
12852 is not set correctly if we set the flags; but we never use the
12853 carry bit from such an operation, so we can ignore that. */
12854 if (code == ROTATERT)
12855 /* Rotate is just modulo 32. */
12856 *amountp &= 31;
12857 else if (*amountp != (*amountp & 31))
12858 {
12859 if (code == ASHIFT)
12860 mnem = "lsr";
12861 *amountp = 32;
12862 }
12863
12864 /* Shifts of 0 are no-ops. */
12865 if (*amountp == 0)
12866 return NULL;
12867 }
12868
12869 return mnem;
12870 }
12871
12872 /* Obtain the shift from the POWER of two. */
12873
12874 static HOST_WIDE_INT
12875 int_log2 (HOST_WIDE_INT power)
12876 {
12877 HOST_WIDE_INT shift = 0;
12878
12879 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
12880 {
12881 gcc_assert (shift <= 31);
12882 shift++;
12883 }
12884
12885 return shift;
12886 }
12887
12888 /* Output a .ascii pseudo-op, keeping track of lengths. This is
12889 because /bin/as is horribly restrictive. The judgement about
12890 whether or not each character is 'printable' (and can be output as
12891 is) or not (and must be printed with an octal escape) must be made
12892 with reference to the *host* character set -- the situation is
12893 similar to that discussed in the comments above pp_c_char in
12894 c-pretty-print.c. */
12895
12896 #define MAX_ASCII_LEN 51
12897
12898 void
12899 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
12900 {
12901 int i;
12902 int len_so_far = 0;
12903
12904 fputs ("\t.ascii\t\"", stream);
12905
12906 for (i = 0; i < len; i++)
12907 {
12908 int c = p[i];
12909
12910 if (len_so_far >= MAX_ASCII_LEN)
12911 {
12912 fputs ("\"\n\t.ascii\t\"", stream);
12913 len_so_far = 0;
12914 }
12915
12916 if (ISPRINT (c))
12917 {
12918 if (c == '\\' || c == '\"')
12919 {
12920 putc ('\\', stream);
12921 len_so_far++;
12922 }
12923 putc (c, stream);
12924 len_so_far++;
12925 }
12926 else
12927 {
12928 fprintf (stream, "\\%03o", c);
12929 len_so_far += 4;
12930 }
12931 }
12932
12933 fputs ("\"\n", stream);
12934 }
12935 \f
12936 /* Compute the register save mask for registers 0 through 12
12937 inclusive. This code is used by arm_compute_save_reg_mask. */
12938
12939 static unsigned long
12940 arm_compute_save_reg0_reg12_mask (void)
12941 {
12942 unsigned long func_type = arm_current_func_type ();
12943 unsigned long save_reg_mask = 0;
12944 unsigned int reg;
12945
12946 if (IS_INTERRUPT (func_type))
12947 {
12948 unsigned int max_reg;
12949 /* Interrupt functions must not corrupt any registers,
12950 even call clobbered ones. If this is a leaf function
12951 we can just examine the registers used by the RTL, but
12952 otherwise we have to assume that whatever function is
12953 called might clobber anything, and so we have to save
12954 all the call-clobbered registers as well. */
12955 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
12956 /* FIQ handlers have registers r8 - r12 banked, so
12957 we only need to check r0 - r7, Normal ISRs only
12958 bank r14 and r15, so we must check up to r12.
12959 r13 is the stack pointer which is always preserved,
12960 so we do not need to consider it here. */
12961 max_reg = 7;
12962 else
12963 max_reg = 12;
12964
12965 for (reg = 0; reg <= max_reg; reg++)
12966 if (df_regs_ever_live_p (reg)
12967 || (! current_function_is_leaf && call_used_regs[reg]))
12968 save_reg_mask |= (1 << reg);
12969
12970 /* Also save the pic base register if necessary. */
12971 if (flag_pic
12972 && !TARGET_SINGLE_PIC_BASE
12973 && arm_pic_register != INVALID_REGNUM
12974 && crtl->uses_pic_offset_table)
12975 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
12976 }
12977 else if (IS_VOLATILE(func_type))
12978 {
12979 /* For noreturn functions we historically omitted register saves
12980 altogether. However this really messes up debugging. As a
12981 compromise save just the frame pointers. Combined with the link
12982 register saved elsewhere this should be sufficient to get
12983 a backtrace. */
12984 if (frame_pointer_needed)
12985 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
12986 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
12987 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
12988 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
12989 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
12990 }
12991 else
12992 {
12993 /* In the normal case we only need to save those registers
12994 which are call saved and which are used by this function. */
12995 for (reg = 0; reg <= 11; reg++)
12996 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12997 save_reg_mask |= (1 << reg);
12998
12999 /* Handle the frame pointer as a special case. */
13000 if (frame_pointer_needed)
13001 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13002
13003 /* If we aren't loading the PIC register,
13004 don't stack it even though it may be live. */
13005 if (flag_pic
13006 && !TARGET_SINGLE_PIC_BASE
13007 && arm_pic_register != INVALID_REGNUM
13008 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13009 || crtl->uses_pic_offset_table))
13010 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13011
13012 /* The prologue will copy SP into R0, so save it. */
13013 if (IS_STACKALIGN (func_type))
13014 save_reg_mask |= 1;
13015 }
13016
13017 /* Save registers so the exception handler can modify them. */
13018 if (crtl->calls_eh_return)
13019 {
13020 unsigned int i;
13021
13022 for (i = 0; ; i++)
13023 {
13024 reg = EH_RETURN_DATA_REGNO (i);
13025 if (reg == INVALID_REGNUM)
13026 break;
13027 save_reg_mask |= 1 << reg;
13028 }
13029 }
13030
13031 return save_reg_mask;
13032 }
13033
13034
13035 /* Compute the number of bytes used to store the static chain register on the
13036 stack, above the stack frame. We need to know this accurately to get the
13037 alignment of the rest of the stack frame correct. */
13038
13039 static int arm_compute_static_chain_stack_bytes (void)
13040 {
13041 unsigned long func_type = arm_current_func_type ();
13042 int static_chain_stack_bytes = 0;
13043
13044 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
13045 IS_NESTED (func_type) &&
13046 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
13047 static_chain_stack_bytes = 4;
13048
13049 return static_chain_stack_bytes;
13050 }
13051
13052
13053 /* Compute a bit mask of which registers need to be
13054 saved on the stack for the current function.
13055 This is used by arm_get_frame_offsets, which may add extra registers. */
13056
13057 static unsigned long
13058 arm_compute_save_reg_mask (void)
13059 {
13060 unsigned int save_reg_mask = 0;
13061 unsigned long func_type = arm_current_func_type ();
13062 unsigned int reg;
13063
13064 if (IS_NAKED (func_type))
13065 /* This should never really happen. */
13066 return 0;
13067
13068 /* If we are creating a stack frame, then we must save the frame pointer,
13069 IP (which will hold the old stack pointer), LR and the PC. */
13070 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13071 save_reg_mask |=
13072 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
13073 | (1 << IP_REGNUM)
13074 | (1 << LR_REGNUM)
13075 | (1 << PC_REGNUM);
13076
13077 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
13078
13079 /* Decide if we need to save the link register.
13080 Interrupt routines have their own banked link register,
13081 so they never need to save it.
13082 Otherwise if we do not use the link register we do not need to save
13083 it. If we are pushing other registers onto the stack however, we
13084 can save an instruction in the epilogue by pushing the link register
13085 now and then popping it back into the PC. This incurs extra memory
13086 accesses though, so we only do it when optimizing for size, and only
13087 if we know that we will not need a fancy return sequence. */
13088 if (df_regs_ever_live_p (LR_REGNUM)
13089 || (save_reg_mask
13090 && optimize_size
13091 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13092 && !crtl->calls_eh_return))
13093 save_reg_mask |= 1 << LR_REGNUM;
13094
13095 if (cfun->machine->lr_save_eliminated)
13096 save_reg_mask &= ~ (1 << LR_REGNUM);
13097
13098 if (TARGET_REALLY_IWMMXT
13099 && ((bit_count (save_reg_mask)
13100 + ARM_NUM_INTS (crtl->args.pretend_args_size +
13101 arm_compute_static_chain_stack_bytes())
13102 ) % 2) != 0)
13103 {
13104 /* The total number of registers that are going to be pushed
13105 onto the stack is odd. We need to ensure that the stack
13106 is 64-bit aligned before we start to save iWMMXt registers,
13107 and also before we start to create locals. (A local variable
13108 might be a double or long long which we will load/store using
13109 an iWMMXt instruction). Therefore we need to push another
13110 ARM register, so that the stack will be 64-bit aligned. We
13111 try to avoid using the arg registers (r0 -r3) as they might be
13112 used to pass values in a tail call. */
13113 for (reg = 4; reg <= 12; reg++)
13114 if ((save_reg_mask & (1 << reg)) == 0)
13115 break;
13116
13117 if (reg <= 12)
13118 save_reg_mask |= (1 << reg);
13119 else
13120 {
13121 cfun->machine->sibcall_blocked = 1;
13122 save_reg_mask |= (1 << 3);
13123 }
13124 }
13125
13126 /* We may need to push an additional register for use initializing the
13127 PIC base register. */
13128 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
13129 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
13130 {
13131 reg = thumb_find_work_register (1 << 4);
13132 if (!call_used_regs[reg])
13133 save_reg_mask |= (1 << reg);
13134 }
13135
13136 return save_reg_mask;
13137 }
13138
13139
13140 /* Compute a bit mask of which registers need to be
13141 saved on the stack for the current function. */
13142 static unsigned long
13143 thumb1_compute_save_reg_mask (void)
13144 {
13145 unsigned long mask;
13146 unsigned reg;
13147
13148 mask = 0;
13149 for (reg = 0; reg < 12; reg ++)
13150 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13151 mask |= 1 << reg;
13152
13153 if (flag_pic
13154 && !TARGET_SINGLE_PIC_BASE
13155 && arm_pic_register != INVALID_REGNUM
13156 && crtl->uses_pic_offset_table)
13157 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13158
13159 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
13160 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
13161 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13162
13163 /* LR will also be pushed if any lo regs are pushed. */
13164 if (mask & 0xff || thumb_force_lr_save ())
13165 mask |= (1 << LR_REGNUM);
13166
13167 /* Make sure we have a low work register if we need one.
13168 We will need one if we are going to push a high register,
13169 but we are not currently intending to push a low register. */
13170 if ((mask & 0xff) == 0
13171 && ((mask & 0x0f00) || TARGET_BACKTRACE))
13172 {
13173 /* Use thumb_find_work_register to choose which register
13174 we will use. If the register is live then we will
13175 have to push it. Use LAST_LO_REGNUM as our fallback
13176 choice for the register to select. */
13177 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
13178 /* Make sure the register returned by thumb_find_work_register is
13179 not part of the return value. */
13180 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
13181 reg = LAST_LO_REGNUM;
13182
13183 if (! call_used_regs[reg])
13184 mask |= 1 << reg;
13185 }
13186
13187 /* The 504 below is 8 bytes less than 512 because there are two possible
13188 alignment words. We can't tell here if they will be present or not so we
13189 have to play it safe and assume that they are. */
13190 if ((CALLER_INTERWORKING_SLOT_SIZE +
13191 ROUND_UP_WORD (get_frame_size ()) +
13192 crtl->outgoing_args_size) >= 504)
13193 {
13194 /* This is the same as the code in thumb1_expand_prologue() which
13195 determines which register to use for stack decrement. */
13196 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
13197 if (mask & (1 << reg))
13198 break;
13199
13200 if (reg > LAST_LO_REGNUM)
13201 {
13202 /* Make sure we have a register available for stack decrement. */
13203 mask |= 1 << LAST_LO_REGNUM;
13204 }
13205 }
13206
13207 return mask;
13208 }
13209
13210
13211 /* Return the number of bytes required to save VFP registers. */
13212 static int
13213 arm_get_vfp_saved_size (void)
13214 {
13215 unsigned int regno;
13216 int count;
13217 int saved;
13218
13219 saved = 0;
13220 /* Space for saved VFP registers. */
13221 if (TARGET_HARD_FLOAT && TARGET_VFP)
13222 {
13223 count = 0;
13224 for (regno = FIRST_VFP_REGNUM;
13225 regno < LAST_VFP_REGNUM;
13226 regno += 2)
13227 {
13228 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
13229 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
13230 {
13231 if (count > 0)
13232 {
13233 /* Workaround ARM10 VFPr1 bug. */
13234 if (count == 2 && !arm_arch6)
13235 count++;
13236 saved += count * 8;
13237 }
13238 count = 0;
13239 }
13240 else
13241 count++;
13242 }
13243 if (count > 0)
13244 {
13245 if (count == 2 && !arm_arch6)
13246 count++;
13247 saved += count * 8;
13248 }
13249 }
13250 return saved;
13251 }
13252
13253
13254 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
13255 everything bar the final return instruction. */
13256 const char *
13257 output_return_instruction (rtx operand, int really_return, int reverse)
13258 {
13259 char conditional[10];
13260 char instr[100];
13261 unsigned reg;
13262 unsigned long live_regs_mask;
13263 unsigned long func_type;
13264 arm_stack_offsets *offsets;
13265
13266 func_type = arm_current_func_type ();
13267
13268 if (IS_NAKED (func_type))
13269 return "";
13270
13271 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13272 {
13273 /* If this function was declared non-returning, and we have
13274 found a tail call, then we have to trust that the called
13275 function won't return. */
13276 if (really_return)
13277 {
13278 rtx ops[2];
13279
13280 /* Otherwise, trap an attempted return by aborting. */
13281 ops[0] = operand;
13282 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
13283 : "abort");
13284 assemble_external_libcall (ops[1]);
13285 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
13286 }
13287
13288 return "";
13289 }
13290
13291 gcc_assert (!cfun->calls_alloca || really_return);
13292
13293 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
13294
13295 cfun->machine->return_used_this_function = 1;
13296
13297 offsets = arm_get_frame_offsets ();
13298 live_regs_mask = offsets->saved_regs_mask;
13299
13300 if (live_regs_mask)
13301 {
13302 const char * return_reg;
13303
13304 /* If we do not have any special requirements for function exit
13305 (e.g. interworking) then we can load the return address
13306 directly into the PC. Otherwise we must load it into LR. */
13307 if (really_return
13308 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
13309 return_reg = reg_names[PC_REGNUM];
13310 else
13311 return_reg = reg_names[LR_REGNUM];
13312
13313 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
13314 {
13315 /* There are three possible reasons for the IP register
13316 being saved. 1) a stack frame was created, in which case
13317 IP contains the old stack pointer, or 2) an ISR routine
13318 corrupted it, or 3) it was saved to align the stack on
13319 iWMMXt. In case 1, restore IP into SP, otherwise just
13320 restore IP. */
13321 if (frame_pointer_needed)
13322 {
13323 live_regs_mask &= ~ (1 << IP_REGNUM);
13324 live_regs_mask |= (1 << SP_REGNUM);
13325 }
13326 else
13327 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
13328 }
13329
13330 /* On some ARM architectures it is faster to use LDR rather than
13331 LDM to load a single register. On other architectures, the
13332 cost is the same. In 26 bit mode, or for exception handlers,
13333 we have to use LDM to load the PC so that the CPSR is also
13334 restored. */
13335 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13336 if (live_regs_mask == (1U << reg))
13337 break;
13338
13339 if (reg <= LAST_ARM_REGNUM
13340 && (reg != LR_REGNUM
13341 || ! really_return
13342 || ! IS_INTERRUPT (func_type)))
13343 {
13344 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
13345 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
13346 }
13347 else
13348 {
13349 char *p;
13350 int first = 1;
13351
13352 /* Generate the load multiple instruction to restore the
13353 registers. Note we can get here, even if
13354 frame_pointer_needed is true, but only if sp already
13355 points to the base of the saved core registers. */
13356 if (live_regs_mask & (1 << SP_REGNUM))
13357 {
13358 unsigned HOST_WIDE_INT stack_adjust;
13359
13360 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
13361 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
13362
13363 if (stack_adjust && arm_arch5 && TARGET_ARM)
13364 if (TARGET_UNIFIED_ASM)
13365 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
13366 else
13367 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
13368 else
13369 {
13370 /* If we can't use ldmib (SA110 bug),
13371 then try to pop r3 instead. */
13372 if (stack_adjust)
13373 live_regs_mask |= 1 << 3;
13374
13375 if (TARGET_UNIFIED_ASM)
13376 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
13377 else
13378 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
13379 }
13380 }
13381 else
13382 if (TARGET_UNIFIED_ASM)
13383 sprintf (instr, "pop%s\t{", conditional);
13384 else
13385 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
13386
13387 p = instr + strlen (instr);
13388
13389 for (reg = 0; reg <= SP_REGNUM; reg++)
13390 if (live_regs_mask & (1 << reg))
13391 {
13392 int l = strlen (reg_names[reg]);
13393
13394 if (first)
13395 first = 0;
13396 else
13397 {
13398 memcpy (p, ", ", 2);
13399 p += 2;
13400 }
13401
13402 memcpy (p, "%|", 2);
13403 memcpy (p + 2, reg_names[reg], l);
13404 p += l + 2;
13405 }
13406
13407 if (live_regs_mask & (1 << LR_REGNUM))
13408 {
13409 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
13410 /* If returning from an interrupt, restore the CPSR. */
13411 if (IS_INTERRUPT (func_type))
13412 strcat (p, "^");
13413 }
13414 else
13415 strcpy (p, "}");
13416 }
13417
13418 output_asm_insn (instr, & operand);
13419
13420 /* See if we need to generate an extra instruction to
13421 perform the actual function return. */
13422 if (really_return
13423 && func_type != ARM_FT_INTERWORKED
13424 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
13425 {
13426 /* The return has already been handled
13427 by loading the LR into the PC. */
13428 really_return = 0;
13429 }
13430 }
13431
13432 if (really_return)
13433 {
13434 switch ((int) ARM_FUNC_TYPE (func_type))
13435 {
13436 case ARM_FT_ISR:
13437 case ARM_FT_FIQ:
13438 /* ??? This is wrong for unified assembly syntax. */
13439 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
13440 break;
13441
13442 case ARM_FT_INTERWORKED:
13443 sprintf (instr, "bx%s\t%%|lr", conditional);
13444 break;
13445
13446 case ARM_FT_EXCEPTION:
13447 /* ??? This is wrong for unified assembly syntax. */
13448 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
13449 break;
13450
13451 default:
13452 /* Use bx if it's available. */
13453 if (arm_arch5 || arm_arch4t)
13454 sprintf (instr, "bx%s\t%%|lr", conditional);
13455 else
13456 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
13457 break;
13458 }
13459
13460 output_asm_insn (instr, & operand);
13461 }
13462
13463 return "";
13464 }
13465
13466 /* Write the function name into the code section, directly preceding
13467 the function prologue.
13468
13469 Code will be output similar to this:
13470 t0
13471 .ascii "arm_poke_function_name", 0
13472 .align
13473 t1
13474 .word 0xff000000 + (t1 - t0)
13475 arm_poke_function_name
13476 mov ip, sp
13477 stmfd sp!, {fp, ip, lr, pc}
13478 sub fp, ip, #4
13479
13480 When performing a stack backtrace, code can inspect the value
13481 of 'pc' stored at 'fp' + 0. If the trace function then looks
13482 at location pc - 12 and the top 8 bits are set, then we know
13483 that there is a function name embedded immediately preceding this
13484 location and has length ((pc[-3]) & 0xff000000).
13485
13486 We assume that pc is declared as a pointer to an unsigned long.
13487
13488 It is of no benefit to output the function name if we are assembling
13489 a leaf function. These function types will not contain a stack
13490 backtrace structure, therefore it is not possible to determine the
13491 function name. */
13492 void
13493 arm_poke_function_name (FILE *stream, const char *name)
13494 {
13495 unsigned long alignlength;
13496 unsigned long length;
13497 rtx x;
13498
13499 length = strlen (name) + 1;
13500 alignlength = ROUND_UP_WORD (length);
13501
13502 ASM_OUTPUT_ASCII (stream, name, length);
13503 ASM_OUTPUT_ALIGN (stream, 2);
13504 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
13505 assemble_aligned_integer (UNITS_PER_WORD, x);
13506 }
13507
13508 /* Place some comments into the assembler stream
13509 describing the current function. */
13510 static void
13511 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
13512 {
13513 unsigned long func_type;
13514
13515 if (TARGET_THUMB1)
13516 {
13517 thumb1_output_function_prologue (f, frame_size);
13518 return;
13519 }
13520
13521 /* Sanity check. */
13522 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
13523
13524 func_type = arm_current_func_type ();
13525
13526 switch ((int) ARM_FUNC_TYPE (func_type))
13527 {
13528 default:
13529 case ARM_FT_NORMAL:
13530 break;
13531 case ARM_FT_INTERWORKED:
13532 asm_fprintf (f, "\t%@ Function supports interworking.\n");
13533 break;
13534 case ARM_FT_ISR:
13535 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
13536 break;
13537 case ARM_FT_FIQ:
13538 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
13539 break;
13540 case ARM_FT_EXCEPTION:
13541 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
13542 break;
13543 }
13544
13545 if (IS_NAKED (func_type))
13546 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
13547
13548 if (IS_VOLATILE (func_type))
13549 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
13550
13551 if (IS_NESTED (func_type))
13552 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
13553 if (IS_STACKALIGN (func_type))
13554 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
13555
13556 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
13557 crtl->args.size,
13558 crtl->args.pretend_args_size, frame_size);
13559
13560 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
13561 frame_pointer_needed,
13562 cfun->machine->uses_anonymous_args);
13563
13564 if (cfun->machine->lr_save_eliminated)
13565 asm_fprintf (f, "\t%@ link register save eliminated.\n");
13566
13567 if (crtl->calls_eh_return)
13568 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
13569
13570 }
13571
13572 const char *
13573 arm_output_epilogue (rtx sibling)
13574 {
13575 int reg;
13576 unsigned long saved_regs_mask;
13577 unsigned long func_type;
13578 /* Floats_offset is the offset from the "virtual" frame. In an APCS
13579 frame that is $fp + 4 for a non-variadic function. */
13580 int floats_offset = 0;
13581 rtx operands[3];
13582 FILE * f = asm_out_file;
13583 unsigned int lrm_count = 0;
13584 int really_return = (sibling == NULL);
13585 int start_reg;
13586 arm_stack_offsets *offsets;
13587
13588 /* If we have already generated the return instruction
13589 then it is futile to generate anything else. */
13590 if (use_return_insn (FALSE, sibling) &&
13591 (cfun->machine->return_used_this_function != 0))
13592 return "";
13593
13594 func_type = arm_current_func_type ();
13595
13596 if (IS_NAKED (func_type))
13597 /* Naked functions don't have epilogues. */
13598 return "";
13599
13600 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
13601 {
13602 rtx op;
13603
13604 /* A volatile function should never return. Call abort. */
13605 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
13606 assemble_external_libcall (op);
13607 output_asm_insn ("bl\t%a0", &op);
13608
13609 return "";
13610 }
13611
13612 /* If we are throwing an exception, then we really must be doing a
13613 return, so we can't tail-call. */
13614 gcc_assert (!crtl->calls_eh_return || really_return);
13615
13616 offsets = arm_get_frame_offsets ();
13617 saved_regs_mask = offsets->saved_regs_mask;
13618
13619 if (TARGET_IWMMXT)
13620 lrm_count = bit_count (saved_regs_mask);
13621
13622 floats_offset = offsets->saved_args;
13623 /* Compute how far away the floats will be. */
13624 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
13625 if (saved_regs_mask & (1 << reg))
13626 floats_offset += 4;
13627
13628 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13629 {
13630 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
13631 int vfp_offset = offsets->frame;
13632
13633 if (TARGET_FPA_EMU2)
13634 {
13635 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13636 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13637 {
13638 floats_offset += 12;
13639 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
13640 reg, FP_REGNUM, floats_offset - vfp_offset);
13641 }
13642 }
13643 else
13644 {
13645 start_reg = LAST_FPA_REGNUM;
13646
13647 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
13648 {
13649 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13650 {
13651 floats_offset += 12;
13652
13653 /* We can't unstack more than four registers at once. */
13654 if (start_reg - reg == 3)
13655 {
13656 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
13657 reg, FP_REGNUM, floats_offset - vfp_offset);
13658 start_reg = reg - 1;
13659 }
13660 }
13661 else
13662 {
13663 if (reg != start_reg)
13664 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13665 reg + 1, start_reg - reg,
13666 FP_REGNUM, floats_offset - vfp_offset);
13667 start_reg = reg - 1;
13668 }
13669 }
13670
13671 /* Just in case the last register checked also needs unstacking. */
13672 if (reg != start_reg)
13673 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
13674 reg + 1, start_reg - reg,
13675 FP_REGNUM, floats_offset - vfp_offset);
13676 }
13677
13678 if (TARGET_HARD_FLOAT && TARGET_VFP)
13679 {
13680 int saved_size;
13681
13682 /* The fldmd insns do not have base+offset addressing
13683 modes, so we use IP to hold the address. */
13684 saved_size = arm_get_vfp_saved_size ();
13685
13686 if (saved_size > 0)
13687 {
13688 floats_offset += saved_size;
13689 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
13690 FP_REGNUM, floats_offset - vfp_offset);
13691 }
13692 start_reg = FIRST_VFP_REGNUM;
13693 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
13694 {
13695 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13696 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
13697 {
13698 if (start_reg != reg)
13699 vfp_output_fldmd (f, IP_REGNUM,
13700 (start_reg - FIRST_VFP_REGNUM) / 2,
13701 (reg - start_reg) / 2);
13702 start_reg = reg + 2;
13703 }
13704 }
13705 if (start_reg != reg)
13706 vfp_output_fldmd (f, IP_REGNUM,
13707 (start_reg - FIRST_VFP_REGNUM) / 2,
13708 (reg - start_reg) / 2);
13709 }
13710
13711 if (TARGET_IWMMXT)
13712 {
13713 /* The frame pointer is guaranteed to be non-double-word aligned.
13714 This is because it is set to (old_stack_pointer - 4) and the
13715 old_stack_pointer was double word aligned. Thus the offset to
13716 the iWMMXt registers to be loaded must also be non-double-word
13717 sized, so that the resultant address *is* double-word aligned.
13718 We can ignore floats_offset since that was already included in
13719 the live_regs_mask. */
13720 lrm_count += (lrm_count % 2 ? 2 : 1);
13721
13722 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
13723 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13724 {
13725 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
13726 reg, FP_REGNUM, lrm_count * 4);
13727 lrm_count += 2;
13728 }
13729 }
13730
13731 /* saved_regs_mask should contain the IP, which at the time of stack
13732 frame generation actually contains the old stack pointer. So a
13733 quick way to unwind the stack is just pop the IP register directly
13734 into the stack pointer. */
13735 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
13736 saved_regs_mask &= ~ (1 << IP_REGNUM);
13737 saved_regs_mask |= (1 << SP_REGNUM);
13738
13739 /* There are two registers left in saved_regs_mask - LR and PC. We
13740 only need to restore the LR register (the return address), but to
13741 save time we can load it directly into the PC, unless we need a
13742 special function exit sequence, or we are not really returning. */
13743 if (really_return
13744 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13745 && !crtl->calls_eh_return)
13746 /* Delete the LR from the register mask, so that the LR on
13747 the stack is loaded into the PC in the register mask. */
13748 saved_regs_mask &= ~ (1 << LR_REGNUM);
13749 else
13750 saved_regs_mask &= ~ (1 << PC_REGNUM);
13751
13752 /* We must use SP as the base register, because SP is one of the
13753 registers being restored. If an interrupt or page fault
13754 happens in the ldm instruction, the SP might or might not
13755 have been restored. That would be bad, as then SP will no
13756 longer indicate the safe area of stack, and we can get stack
13757 corruption. Using SP as the base register means that it will
13758 be reset correctly to the original value, should an interrupt
13759 occur. If the stack pointer already points at the right
13760 place, then omit the subtraction. */
13761 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
13762 || cfun->calls_alloca)
13763 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
13764 4 * bit_count (saved_regs_mask));
13765 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
13766
13767 if (IS_INTERRUPT (func_type))
13768 /* Interrupt handlers will have pushed the
13769 IP onto the stack, so restore it now. */
13770 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
13771 }
13772 else
13773 {
13774 /* This branch is executed for ARM mode (non-apcs frames) and
13775 Thumb-2 mode. Frame layout is essentially the same for those
13776 cases, except that in ARM mode frame pointer points to the
13777 first saved register, while in Thumb-2 mode the frame pointer points
13778 to the last saved register.
13779
13780 It is possible to make frame pointer point to last saved
13781 register in both cases, and remove some conditionals below.
13782 That means that fp setup in prologue would be just "mov fp, sp"
13783 and sp restore in epilogue would be just "mov sp, fp", whereas
13784 now we have to use add/sub in those cases. However, the value
13785 of that would be marginal, as both mov and add/sub are 32-bit
13786 in ARM mode, and it would require extra conditionals
13787 in arm_expand_prologue to distingish ARM-apcs-frame case
13788 (where frame pointer is required to point at first register)
13789 and ARM-non-apcs-frame. Therefore, such change is postponed
13790 until real need arise. */
13791 unsigned HOST_WIDE_INT amount;
13792 int rfe;
13793 /* Restore stack pointer if necessary. */
13794 if (TARGET_ARM && frame_pointer_needed)
13795 {
13796 operands[0] = stack_pointer_rtx;
13797 operands[1] = hard_frame_pointer_rtx;
13798
13799 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
13800 output_add_immediate (operands);
13801 }
13802 else
13803 {
13804 if (frame_pointer_needed)
13805 {
13806 /* For Thumb-2 restore sp from the frame pointer.
13807 Operand restrictions mean we have to incrememnt FP, then copy
13808 to SP. */
13809 amount = offsets->locals_base - offsets->saved_regs;
13810 operands[0] = hard_frame_pointer_rtx;
13811 }
13812 else
13813 {
13814 unsigned long count;
13815 operands[0] = stack_pointer_rtx;
13816 amount = offsets->outgoing_args - offsets->saved_regs;
13817 /* pop call clobbered registers if it avoids a
13818 separate stack adjustment. */
13819 count = offsets->saved_regs - offsets->saved_args;
13820 if (optimize_size
13821 && count != 0
13822 && !crtl->calls_eh_return
13823 && bit_count(saved_regs_mask) * 4 == count
13824 && !IS_INTERRUPT (func_type)
13825 && !crtl->tail_call_emit)
13826 {
13827 unsigned long mask;
13828 mask = (1 << (arm_size_return_regs() / 4)) - 1;
13829 mask ^= 0xf;
13830 mask &= ~saved_regs_mask;
13831 reg = 0;
13832 while (bit_count (mask) * 4 > amount)
13833 {
13834 while ((mask & (1 << reg)) == 0)
13835 reg++;
13836 mask &= ~(1 << reg);
13837 }
13838 if (bit_count (mask) * 4 == amount) {
13839 amount = 0;
13840 saved_regs_mask |= mask;
13841 }
13842 }
13843 }
13844
13845 if (amount)
13846 {
13847 operands[1] = operands[0];
13848 operands[2] = GEN_INT (amount);
13849 output_add_immediate (operands);
13850 }
13851 if (frame_pointer_needed)
13852 asm_fprintf (f, "\tmov\t%r, %r\n",
13853 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
13854 }
13855
13856 if (TARGET_FPA_EMU2)
13857 {
13858 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
13859 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13860 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
13861 reg, SP_REGNUM);
13862 }
13863 else
13864 {
13865 start_reg = FIRST_FPA_REGNUM;
13866
13867 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
13868 {
13869 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13870 {
13871 if (reg - start_reg == 3)
13872 {
13873 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
13874 start_reg, SP_REGNUM);
13875 start_reg = reg + 1;
13876 }
13877 }
13878 else
13879 {
13880 if (reg != start_reg)
13881 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
13882 start_reg, reg - start_reg,
13883 SP_REGNUM);
13884
13885 start_reg = reg + 1;
13886 }
13887 }
13888
13889 /* Just in case the last register checked also needs unstacking. */
13890 if (reg != start_reg)
13891 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
13892 start_reg, reg - start_reg, SP_REGNUM);
13893 }
13894
13895 if (TARGET_HARD_FLOAT && TARGET_VFP)
13896 {
13897 int end_reg = LAST_VFP_REGNUM + 1;
13898
13899 /* Scan the registers in reverse order. We need to match
13900 any groupings made in the prologue and generate matching
13901 pop operations. */
13902 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
13903 {
13904 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13905 && (!df_regs_ever_live_p (reg + 1)
13906 || call_used_regs[reg + 1]))
13907 {
13908 if (end_reg > reg + 2)
13909 vfp_output_fldmd (f, SP_REGNUM,
13910 (reg + 2 - FIRST_VFP_REGNUM) / 2,
13911 (end_reg - (reg + 2)) / 2);
13912 end_reg = reg;
13913 }
13914 }
13915 if (end_reg > reg + 2)
13916 vfp_output_fldmd (f, SP_REGNUM, 0,
13917 (end_reg - (reg + 2)) / 2);
13918 }
13919
13920 if (TARGET_IWMMXT)
13921 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
13922 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13923 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
13924
13925 /* If we can, restore the LR into the PC. */
13926 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
13927 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
13928 && !IS_STACKALIGN (func_type)
13929 && really_return
13930 && crtl->args.pretend_args_size == 0
13931 && saved_regs_mask & (1 << LR_REGNUM)
13932 && !crtl->calls_eh_return)
13933 {
13934 saved_regs_mask &= ~ (1 << LR_REGNUM);
13935 saved_regs_mask |= (1 << PC_REGNUM);
13936 rfe = IS_INTERRUPT (func_type);
13937 }
13938 else
13939 rfe = 0;
13940
13941 /* Load the registers off the stack. If we only have one register
13942 to load use the LDR instruction - it is faster. For Thumb-2
13943 always use pop and the assembler will pick the best instruction.*/
13944 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
13945 && !IS_INTERRUPT(func_type))
13946 {
13947 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
13948 }
13949 else if (saved_regs_mask)
13950 {
13951 if (saved_regs_mask & (1 << SP_REGNUM))
13952 /* Note - write back to the stack register is not enabled
13953 (i.e. "ldmfd sp!..."). We know that the stack pointer is
13954 in the list of registers and if we add writeback the
13955 instruction becomes UNPREDICTABLE. */
13956 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
13957 rfe);
13958 else if (TARGET_ARM)
13959 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
13960 rfe);
13961 else
13962 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
13963 }
13964
13965 if (crtl->args.pretend_args_size)
13966 {
13967 /* Unwind the pre-pushed regs. */
13968 operands[0] = operands[1] = stack_pointer_rtx;
13969 operands[2] = GEN_INT (crtl->args.pretend_args_size);
13970 output_add_immediate (operands);
13971 }
13972 }
13973
13974 /* We may have already restored PC directly from the stack. */
13975 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
13976 return "";
13977
13978 /* Stack adjustment for exception handler. */
13979 if (crtl->calls_eh_return)
13980 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
13981 ARM_EH_STACKADJ_REGNUM);
13982
13983 /* Generate the return instruction. */
13984 switch ((int) ARM_FUNC_TYPE (func_type))
13985 {
13986 case ARM_FT_ISR:
13987 case ARM_FT_FIQ:
13988 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
13989 break;
13990
13991 case ARM_FT_EXCEPTION:
13992 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
13993 break;
13994
13995 case ARM_FT_INTERWORKED:
13996 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
13997 break;
13998
13999 default:
14000 if (IS_STACKALIGN (func_type))
14001 {
14002 /* See comment in arm_expand_prologue. */
14003 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14004 }
14005 if (arm_arch5 || arm_arch4t)
14006 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14007 else
14008 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14009 break;
14010 }
14011
14012 return "";
14013 }
14014
14015 static void
14016 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14017 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
14018 {
14019 arm_stack_offsets *offsets;
14020
14021 if (TARGET_THUMB1)
14022 {
14023 int regno;
14024
14025 /* Emit any call-via-reg trampolines that are needed for v4t support
14026 of call_reg and call_value_reg type insns. */
14027 for (regno = 0; regno < LR_REGNUM; regno++)
14028 {
14029 rtx label = cfun->machine->call_via[regno];
14030
14031 if (label != NULL)
14032 {
14033 switch_to_section (function_section (current_function_decl));
14034 targetm.asm_out.internal_label (asm_out_file, "L",
14035 CODE_LABEL_NUMBER (label));
14036 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14037 }
14038 }
14039
14040 /* ??? Probably not safe to set this here, since it assumes that a
14041 function will be emitted as assembly immediately after we generate
14042 RTL for it. This does not happen for inline functions. */
14043 cfun->machine->return_used_this_function = 0;
14044 }
14045 else /* TARGET_32BIT */
14046 {
14047 /* We need to take into account any stack-frame rounding. */
14048 offsets = arm_get_frame_offsets ();
14049
14050 gcc_assert (!use_return_insn (FALSE, NULL)
14051 || (cfun->machine->return_used_this_function != 0)
14052 || offsets->saved_regs == offsets->outgoing_args
14053 || frame_pointer_needed);
14054
14055 /* Reset the ARM-specific per-function variables. */
14056 after_arm_reorg = 0;
14057 }
14058 }
14059
14060 /* Generate and emit an insn that we will recognize as a push_multi.
14061 Unfortunately, since this insn does not reflect very well the actual
14062 semantics of the operation, we need to annotate the insn for the benefit
14063 of DWARF2 frame unwind information. */
14064 static rtx
14065 emit_multi_reg_push (unsigned long mask)
14066 {
14067 int num_regs = 0;
14068 int num_dwarf_regs;
14069 int i, j;
14070 rtx par;
14071 rtx dwarf;
14072 int dwarf_par_index;
14073 rtx tmp, reg;
14074
14075 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14076 if (mask & (1 << i))
14077 num_regs++;
14078
14079 gcc_assert (num_regs && num_regs <= 16);
14080
14081 /* We don't record the PC in the dwarf frame information. */
14082 num_dwarf_regs = num_regs;
14083 if (mask & (1 << PC_REGNUM))
14084 num_dwarf_regs--;
14085
14086 /* For the body of the insn we are going to generate an UNSPEC in
14087 parallel with several USEs. This allows the insn to be recognized
14088 by the push_multi pattern in the arm.md file.
14089
14090 The body of the insn looks something like this:
14091
14092 (parallel [
14093 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14094 (const_int:SI <num>)))
14095 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14096 (use (reg:SI XX))
14097 (use (reg:SI YY))
14098 ...
14099 ])
14100
14101 For the frame note however, we try to be more explicit and actually
14102 show each register being stored into the stack frame, plus a (single)
14103 decrement of the stack pointer. We do it this way in order to be
14104 friendly to the stack unwinding code, which only wants to see a single
14105 stack decrement per instruction. The RTL we generate for the note looks
14106 something like this:
14107
14108 (sequence [
14109 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
14110 (set (mem:SI (reg:SI sp)) (reg:SI r4))
14111 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
14112 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
14113 ...
14114 ])
14115
14116 FIXME:: In an ideal world the PRE_MODIFY would not exist and
14117 instead we'd have a parallel expression detailing all
14118 the stores to the various memory addresses so that debug
14119 information is more up-to-date. Remember however while writing
14120 this to take care of the constraints with the push instruction.
14121
14122 Note also that this has to be taken care of for the VFP registers.
14123
14124 For more see PR43399. */
14125
14126 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
14127 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
14128 dwarf_par_index = 1;
14129
14130 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14131 {
14132 if (mask & (1 << i))
14133 {
14134 reg = gen_rtx_REG (SImode, i);
14135
14136 XVECEXP (par, 0, 0)
14137 = gen_rtx_SET (VOIDmode,
14138 gen_frame_mem
14139 (BLKmode,
14140 gen_rtx_PRE_MODIFY (Pmode,
14141 stack_pointer_rtx,
14142 plus_constant
14143 (stack_pointer_rtx,
14144 -4 * num_regs))
14145 ),
14146 gen_rtx_UNSPEC (BLKmode,
14147 gen_rtvec (1, reg),
14148 UNSPEC_PUSH_MULT));
14149
14150 if (i != PC_REGNUM)
14151 {
14152 tmp = gen_rtx_SET (VOIDmode,
14153 gen_frame_mem (SImode, stack_pointer_rtx),
14154 reg);
14155 RTX_FRAME_RELATED_P (tmp) = 1;
14156 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
14157 dwarf_par_index++;
14158 }
14159
14160 break;
14161 }
14162 }
14163
14164 for (j = 1, i++; j < num_regs; i++)
14165 {
14166 if (mask & (1 << i))
14167 {
14168 reg = gen_rtx_REG (SImode, i);
14169
14170 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
14171
14172 if (i != PC_REGNUM)
14173 {
14174 tmp
14175 = gen_rtx_SET (VOIDmode,
14176 gen_frame_mem
14177 (SImode,
14178 plus_constant (stack_pointer_rtx,
14179 4 * j)),
14180 reg);
14181 RTX_FRAME_RELATED_P (tmp) = 1;
14182 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
14183 }
14184
14185 j++;
14186 }
14187 }
14188
14189 par = emit_insn (par);
14190
14191 tmp = gen_rtx_SET (VOIDmode,
14192 stack_pointer_rtx,
14193 plus_constant (stack_pointer_rtx, -4 * num_regs));
14194 RTX_FRAME_RELATED_P (tmp) = 1;
14195 XVECEXP (dwarf, 0, 0) = tmp;
14196
14197 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14198
14199 return par;
14200 }
14201
14202 /* Calculate the size of the return value that is passed in registers. */
14203 static unsigned
14204 arm_size_return_regs (void)
14205 {
14206 enum machine_mode mode;
14207
14208 if (crtl->return_rtx != 0)
14209 mode = GET_MODE (crtl->return_rtx);
14210 else
14211 mode = DECL_MODE (DECL_RESULT (current_function_decl));
14212
14213 return GET_MODE_SIZE (mode);
14214 }
14215
14216 static rtx
14217 emit_sfm (int base_reg, int count)
14218 {
14219 rtx par;
14220 rtx dwarf;
14221 rtx tmp, reg;
14222 int i;
14223
14224 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
14225 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
14226
14227 reg = gen_rtx_REG (XFmode, base_reg++);
14228
14229 XVECEXP (par, 0, 0)
14230 = gen_rtx_SET (VOIDmode,
14231 gen_frame_mem
14232 (BLKmode,
14233 gen_rtx_PRE_MODIFY (Pmode,
14234 stack_pointer_rtx,
14235 plus_constant
14236 (stack_pointer_rtx,
14237 -12 * count))
14238 ),
14239 gen_rtx_UNSPEC (BLKmode,
14240 gen_rtvec (1, reg),
14241 UNSPEC_PUSH_MULT));
14242 tmp = gen_rtx_SET (VOIDmode,
14243 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
14244 RTX_FRAME_RELATED_P (tmp) = 1;
14245 XVECEXP (dwarf, 0, 1) = tmp;
14246
14247 for (i = 1; i < count; i++)
14248 {
14249 reg = gen_rtx_REG (XFmode, base_reg++);
14250 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
14251
14252 tmp = gen_rtx_SET (VOIDmode,
14253 gen_frame_mem (XFmode,
14254 plus_constant (stack_pointer_rtx,
14255 i * 12)),
14256 reg);
14257 RTX_FRAME_RELATED_P (tmp) = 1;
14258 XVECEXP (dwarf, 0, i + 1) = tmp;
14259 }
14260
14261 tmp = gen_rtx_SET (VOIDmode,
14262 stack_pointer_rtx,
14263 plus_constant (stack_pointer_rtx, -12 * count));
14264
14265 RTX_FRAME_RELATED_P (tmp) = 1;
14266 XVECEXP (dwarf, 0, 0) = tmp;
14267
14268 par = emit_insn (par);
14269 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
14270
14271 return par;
14272 }
14273
14274
14275 /* Return true if the current function needs to save/restore LR. */
14276
14277 static bool
14278 thumb_force_lr_save (void)
14279 {
14280 return !cfun->machine->lr_save_eliminated
14281 && (!leaf_function_p ()
14282 || thumb_far_jump_used_p ()
14283 || df_regs_ever_live_p (LR_REGNUM));
14284 }
14285
14286
14287 /* Compute the distance from register FROM to register TO.
14288 These can be the arg pointer (26), the soft frame pointer (25),
14289 the stack pointer (13) or the hard frame pointer (11).
14290 In thumb mode r7 is used as the soft frame pointer, if needed.
14291 Typical stack layout looks like this:
14292
14293 old stack pointer -> | |
14294 ----
14295 | | \
14296 | | saved arguments for
14297 | | vararg functions
14298 | | /
14299 --
14300 hard FP & arg pointer -> | | \
14301 | | stack
14302 | | frame
14303 | | /
14304 --
14305 | | \
14306 | | call saved
14307 | | registers
14308 soft frame pointer -> | | /
14309 --
14310 | | \
14311 | | local
14312 | | variables
14313 locals base pointer -> | | /
14314 --
14315 | | \
14316 | | outgoing
14317 | | arguments
14318 current stack pointer -> | | /
14319 --
14320
14321 For a given function some or all of these stack components
14322 may not be needed, giving rise to the possibility of
14323 eliminating some of the registers.
14324
14325 The values returned by this function must reflect the behavior
14326 of arm_expand_prologue() and arm_compute_save_reg_mask().
14327
14328 The sign of the number returned reflects the direction of stack
14329 growth, so the values are positive for all eliminations except
14330 from the soft frame pointer to the hard frame pointer.
14331
14332 SFP may point just inside the local variables block to ensure correct
14333 alignment. */
14334
14335
14336 /* Calculate stack offsets. These are used to calculate register elimination
14337 offsets and in prologue/epilogue code. Also calculates which registers
14338 should be saved. */
14339
14340 static arm_stack_offsets *
14341 arm_get_frame_offsets (void)
14342 {
14343 struct arm_stack_offsets *offsets;
14344 unsigned long func_type;
14345 int leaf;
14346 int saved;
14347 int core_saved;
14348 HOST_WIDE_INT frame_size;
14349 int i;
14350
14351 offsets = &cfun->machine->stack_offsets;
14352
14353 /* We need to know if we are a leaf function. Unfortunately, it
14354 is possible to be called after start_sequence has been called,
14355 which causes get_insns to return the insns for the sequence,
14356 not the function, which will cause leaf_function_p to return
14357 the incorrect result.
14358
14359 to know about leaf functions once reload has completed, and the
14360 frame size cannot be changed after that time, so we can safely
14361 use the cached value. */
14362
14363 if (reload_completed)
14364 return offsets;
14365
14366 /* Initially this is the size of the local variables. It will translated
14367 into an offset once we have determined the size of preceding data. */
14368 frame_size = ROUND_UP_WORD (get_frame_size ());
14369
14370 leaf = leaf_function_p ();
14371
14372 /* Space for variadic functions. */
14373 offsets->saved_args = crtl->args.pretend_args_size;
14374
14375 /* In Thumb mode this is incorrect, but never used. */
14376 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
14377 arm_compute_static_chain_stack_bytes();
14378
14379 if (TARGET_32BIT)
14380 {
14381 unsigned int regno;
14382
14383 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
14384 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14385 saved = core_saved;
14386
14387 /* We know that SP will be doubleword aligned on entry, and we must
14388 preserve that condition at any subroutine call. We also require the
14389 soft frame pointer to be doubleword aligned. */
14390
14391 if (TARGET_REALLY_IWMMXT)
14392 {
14393 /* Check for the call-saved iWMMXt registers. */
14394 for (regno = FIRST_IWMMXT_REGNUM;
14395 regno <= LAST_IWMMXT_REGNUM;
14396 regno++)
14397 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14398 saved += 8;
14399 }
14400
14401 func_type = arm_current_func_type ();
14402 if (! IS_VOLATILE (func_type))
14403 {
14404 /* Space for saved FPA registers. */
14405 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
14406 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
14407 saved += 12;
14408
14409 /* Space for saved VFP registers. */
14410 if (TARGET_HARD_FLOAT && TARGET_VFP)
14411 saved += arm_get_vfp_saved_size ();
14412 }
14413 }
14414 else /* TARGET_THUMB1 */
14415 {
14416 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
14417 core_saved = bit_count (offsets->saved_regs_mask) * 4;
14418 saved = core_saved;
14419 if (TARGET_BACKTRACE)
14420 saved += 16;
14421 }
14422
14423 /* Saved registers include the stack frame. */
14424 offsets->saved_regs = offsets->saved_args + saved +
14425 arm_compute_static_chain_stack_bytes();
14426 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
14427 /* A leaf function does not need any stack alignment if it has nothing
14428 on the stack. */
14429 if (leaf && frame_size == 0)
14430 {
14431 offsets->outgoing_args = offsets->soft_frame;
14432 offsets->locals_base = offsets->soft_frame;
14433 return offsets;
14434 }
14435
14436 /* Ensure SFP has the correct alignment. */
14437 if (ARM_DOUBLEWORD_ALIGN
14438 && (offsets->soft_frame & 7))
14439 {
14440 offsets->soft_frame += 4;
14441 /* Try to align stack by pushing an extra reg. Don't bother doing this
14442 when there is a stack frame as the alignment will be rolled into
14443 the normal stack adjustment. */
14444 if (frame_size + crtl->outgoing_args_size == 0)
14445 {
14446 int reg = -1;
14447
14448 /* If it is safe to use r3, then do so. This sometimes
14449 generates better code on Thumb-2 by avoiding the need to
14450 use 32-bit push/pop instructions. */
14451 if (!crtl->tail_call_emit
14452 && arm_size_return_regs () <= 12)
14453 {
14454 reg = 3;
14455 }
14456 else
14457 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
14458 {
14459 if ((offsets->saved_regs_mask & (1 << i)) == 0)
14460 {
14461 reg = i;
14462 break;
14463 }
14464 }
14465
14466 if (reg != -1)
14467 {
14468 offsets->saved_regs += 4;
14469 offsets->saved_regs_mask |= (1 << reg);
14470 }
14471 }
14472 }
14473
14474 offsets->locals_base = offsets->soft_frame + frame_size;
14475 offsets->outgoing_args = (offsets->locals_base
14476 + crtl->outgoing_args_size);
14477
14478 if (ARM_DOUBLEWORD_ALIGN)
14479 {
14480 /* Ensure SP remains doubleword aligned. */
14481 if (offsets->outgoing_args & 7)
14482 offsets->outgoing_args += 4;
14483 gcc_assert (!(offsets->outgoing_args & 7));
14484 }
14485
14486 return offsets;
14487 }
14488
14489
14490 /* Calculate the relative offsets for the different stack pointers. Positive
14491 offsets are in the direction of stack growth. */
14492
14493 HOST_WIDE_INT
14494 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
14495 {
14496 arm_stack_offsets *offsets;
14497
14498 offsets = arm_get_frame_offsets ();
14499
14500 /* OK, now we have enough information to compute the distances.
14501 There must be an entry in these switch tables for each pair
14502 of registers in ELIMINABLE_REGS, even if some of the entries
14503 seem to be redundant or useless. */
14504 switch (from)
14505 {
14506 case ARG_POINTER_REGNUM:
14507 switch (to)
14508 {
14509 case THUMB_HARD_FRAME_POINTER_REGNUM:
14510 return 0;
14511
14512 case FRAME_POINTER_REGNUM:
14513 /* This is the reverse of the soft frame pointer
14514 to hard frame pointer elimination below. */
14515 return offsets->soft_frame - offsets->saved_args;
14516
14517 case ARM_HARD_FRAME_POINTER_REGNUM:
14518 /* This is only non-zero in the case where the static chain register
14519 is stored above the frame. */
14520 return offsets->frame - offsets->saved_args - 4;
14521
14522 case STACK_POINTER_REGNUM:
14523 /* If nothing has been pushed on the stack at all
14524 then this will return -4. This *is* correct! */
14525 return offsets->outgoing_args - (offsets->saved_args + 4);
14526
14527 default:
14528 gcc_unreachable ();
14529 }
14530 gcc_unreachable ();
14531
14532 case FRAME_POINTER_REGNUM:
14533 switch (to)
14534 {
14535 case THUMB_HARD_FRAME_POINTER_REGNUM:
14536 return 0;
14537
14538 case ARM_HARD_FRAME_POINTER_REGNUM:
14539 /* The hard frame pointer points to the top entry in the
14540 stack frame. The soft frame pointer to the bottom entry
14541 in the stack frame. If there is no stack frame at all,
14542 then they are identical. */
14543
14544 return offsets->frame - offsets->soft_frame;
14545
14546 case STACK_POINTER_REGNUM:
14547 return offsets->outgoing_args - offsets->soft_frame;
14548
14549 default:
14550 gcc_unreachable ();
14551 }
14552 gcc_unreachable ();
14553
14554 default:
14555 /* You cannot eliminate from the stack pointer.
14556 In theory you could eliminate from the hard frame
14557 pointer to the stack pointer, but this will never
14558 happen, since if a stack frame is not needed the
14559 hard frame pointer will never be used. */
14560 gcc_unreachable ();
14561 }
14562 }
14563
14564 /* Given FROM and TO register numbers, say whether this elimination is
14565 allowed. Frame pointer elimination is automatically handled.
14566
14567 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
14568 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
14569 pointer, we must eliminate FRAME_POINTER_REGNUM into
14570 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
14571 ARG_POINTER_REGNUM. */
14572
14573 bool
14574 arm_can_eliminate (const int from, const int to)
14575 {
14576 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
14577 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
14578 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
14579 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
14580 true);
14581 }
14582
14583 /* Emit RTL to save coprocessor registers on function entry. Returns the
14584 number of bytes pushed. */
14585
14586 static int
14587 arm_save_coproc_regs(void)
14588 {
14589 int saved_size = 0;
14590 unsigned reg;
14591 unsigned start_reg;
14592 rtx insn;
14593
14594 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14595 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
14596 {
14597 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
14598 insn = gen_rtx_MEM (V2SImode, insn);
14599 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
14600 RTX_FRAME_RELATED_P (insn) = 1;
14601 saved_size += 8;
14602 }
14603
14604 /* Save any floating point call-saved registers used by this
14605 function. */
14606 if (TARGET_FPA_EMU2)
14607 {
14608 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14609 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14610 {
14611 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
14612 insn = gen_rtx_MEM (XFmode, insn);
14613 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
14614 RTX_FRAME_RELATED_P (insn) = 1;
14615 saved_size += 12;
14616 }
14617 }
14618 else
14619 {
14620 start_reg = LAST_FPA_REGNUM;
14621
14622 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14623 {
14624 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14625 {
14626 if (start_reg - reg == 3)
14627 {
14628 insn = emit_sfm (reg, 4);
14629 RTX_FRAME_RELATED_P (insn) = 1;
14630 saved_size += 48;
14631 start_reg = reg - 1;
14632 }
14633 }
14634 else
14635 {
14636 if (start_reg != reg)
14637 {
14638 insn = emit_sfm (reg + 1, start_reg - reg);
14639 RTX_FRAME_RELATED_P (insn) = 1;
14640 saved_size += (start_reg - reg) * 12;
14641 }
14642 start_reg = reg - 1;
14643 }
14644 }
14645
14646 if (start_reg != reg)
14647 {
14648 insn = emit_sfm (reg + 1, start_reg - reg);
14649 saved_size += (start_reg - reg) * 12;
14650 RTX_FRAME_RELATED_P (insn) = 1;
14651 }
14652 }
14653 if (TARGET_HARD_FLOAT && TARGET_VFP)
14654 {
14655 start_reg = FIRST_VFP_REGNUM;
14656
14657 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14658 {
14659 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14660 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14661 {
14662 if (start_reg != reg)
14663 saved_size += vfp_emit_fstmd (start_reg,
14664 (reg - start_reg) / 2);
14665 start_reg = reg + 2;
14666 }
14667 }
14668 if (start_reg != reg)
14669 saved_size += vfp_emit_fstmd (start_reg,
14670 (reg - start_reg) / 2);
14671 }
14672 return saved_size;
14673 }
14674
14675
14676 /* Set the Thumb frame pointer from the stack pointer. */
14677
14678 static void
14679 thumb_set_frame_pointer (arm_stack_offsets *offsets)
14680 {
14681 HOST_WIDE_INT amount;
14682 rtx insn, dwarf;
14683
14684 amount = offsets->outgoing_args - offsets->locals_base;
14685 if (amount < 1024)
14686 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14687 stack_pointer_rtx, GEN_INT (amount)));
14688 else
14689 {
14690 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
14691 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
14692 expects the first two operands to be the same. */
14693 if (TARGET_THUMB2)
14694 {
14695 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14696 stack_pointer_rtx,
14697 hard_frame_pointer_rtx));
14698 }
14699 else
14700 {
14701 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14702 hard_frame_pointer_rtx,
14703 stack_pointer_rtx));
14704 }
14705 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
14706 plus_constant (stack_pointer_rtx, amount));
14707 RTX_FRAME_RELATED_P (dwarf) = 1;
14708 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14709 }
14710
14711 RTX_FRAME_RELATED_P (insn) = 1;
14712 }
14713
14714 /* Generate the prologue instructions for entry into an ARM or Thumb-2
14715 function. */
14716 void
14717 arm_expand_prologue (void)
14718 {
14719 rtx amount;
14720 rtx insn;
14721 rtx ip_rtx;
14722 unsigned long live_regs_mask;
14723 unsigned long func_type;
14724 int fp_offset = 0;
14725 int saved_pretend_args = 0;
14726 int saved_regs = 0;
14727 unsigned HOST_WIDE_INT args_to_push;
14728 arm_stack_offsets *offsets;
14729
14730 func_type = arm_current_func_type ();
14731
14732 /* Naked functions don't have prologues. */
14733 if (IS_NAKED (func_type))
14734 return;
14735
14736 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
14737 args_to_push = crtl->args.pretend_args_size;
14738
14739 /* Compute which register we will have to save onto the stack. */
14740 offsets = arm_get_frame_offsets ();
14741 live_regs_mask = offsets->saved_regs_mask;
14742
14743 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
14744
14745 if (IS_STACKALIGN (func_type))
14746 {
14747 rtx dwarf;
14748 rtx r0;
14749 rtx r1;
14750 /* Handle a word-aligned stack pointer. We generate the following:
14751
14752 mov r0, sp
14753 bic r1, r0, #7
14754 mov sp, r1
14755 <save and restore r0 in normal prologue/epilogue>
14756 mov sp, r0
14757 bx lr
14758
14759 The unwinder doesn't need to know about the stack realignment.
14760 Just tell it we saved SP in r0. */
14761 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
14762
14763 r0 = gen_rtx_REG (SImode, 0);
14764 r1 = gen_rtx_REG (SImode, 1);
14765 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
14766 compiler won't choke. */
14767 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
14768 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
14769 insn = gen_movsi (r0, stack_pointer_rtx);
14770 RTX_FRAME_RELATED_P (insn) = 1;
14771 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14772 emit_insn (insn);
14773 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
14774 emit_insn (gen_movsi (stack_pointer_rtx, r1));
14775 }
14776
14777 /* For APCS frames, if IP register is clobbered
14778 when creating frame, save that register in a special
14779 way. */
14780 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14781 {
14782 if (IS_INTERRUPT (func_type))
14783 {
14784 /* Interrupt functions must not corrupt any registers.
14785 Creating a frame pointer however, corrupts the IP
14786 register, so we must push it first. */
14787 insn = emit_multi_reg_push (1 << IP_REGNUM);
14788
14789 /* Do not set RTX_FRAME_RELATED_P on this insn.
14790 The dwarf stack unwinding code only wants to see one
14791 stack decrement per function, and this is not it. If
14792 this instruction is labeled as being part of the frame
14793 creation sequence then dwarf2out_frame_debug_expr will
14794 die when it encounters the assignment of IP to FP
14795 later on, since the use of SP here establishes SP as
14796 the CFA register and not IP.
14797
14798 Anyway this instruction is not really part of the stack
14799 frame creation although it is part of the prologue. */
14800 }
14801 else if (IS_NESTED (func_type))
14802 {
14803 /* The Static chain register is the same as the IP register
14804 used as a scratch register during stack frame creation.
14805 To get around this need to find somewhere to store IP
14806 whilst the frame is being created. We try the following
14807 places in order:
14808
14809 1. The last argument register.
14810 2. A slot on the stack above the frame. (This only
14811 works if the function is not a varargs function).
14812 3. Register r3, after pushing the argument registers
14813 onto the stack.
14814
14815 Note - we only need to tell the dwarf2 backend about the SP
14816 adjustment in the second variant; the static chain register
14817 doesn't need to be unwound, as it doesn't contain a value
14818 inherited from the caller. */
14819
14820 if (df_regs_ever_live_p (3) == false)
14821 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
14822 else if (args_to_push == 0)
14823 {
14824 rtx dwarf;
14825
14826 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
14827 saved_regs += 4;
14828
14829 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
14830 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
14831 fp_offset = 4;
14832
14833 /* Just tell the dwarf backend that we adjusted SP. */
14834 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14835 plus_constant (stack_pointer_rtx,
14836 -fp_offset));
14837 RTX_FRAME_RELATED_P (insn) = 1;
14838 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
14839 }
14840 else
14841 {
14842 /* Store the args on the stack. */
14843 if (cfun->machine->uses_anonymous_args)
14844 insn = emit_multi_reg_push
14845 ((0xf0 >> (args_to_push / 4)) & 0xf);
14846 else
14847 insn = emit_insn
14848 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14849 GEN_INT (- args_to_push)));
14850
14851 RTX_FRAME_RELATED_P (insn) = 1;
14852
14853 saved_pretend_args = 1;
14854 fp_offset = args_to_push;
14855 args_to_push = 0;
14856
14857 /* Now reuse r3 to preserve IP. */
14858 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
14859 }
14860 }
14861
14862 insn = emit_set_insn (ip_rtx,
14863 plus_constant (stack_pointer_rtx, fp_offset));
14864 RTX_FRAME_RELATED_P (insn) = 1;
14865 }
14866
14867 if (args_to_push)
14868 {
14869 /* Push the argument registers, or reserve space for them. */
14870 if (cfun->machine->uses_anonymous_args)
14871 insn = emit_multi_reg_push
14872 ((0xf0 >> (args_to_push / 4)) & 0xf);
14873 else
14874 insn = emit_insn
14875 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14876 GEN_INT (- args_to_push)));
14877 RTX_FRAME_RELATED_P (insn) = 1;
14878 }
14879
14880 /* If this is an interrupt service routine, and the link register
14881 is going to be pushed, and we're not generating extra
14882 push of IP (needed when frame is needed and frame layout if apcs),
14883 subtracting four from LR now will mean that the function return
14884 can be done with a single instruction. */
14885 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
14886 && (live_regs_mask & (1 << LR_REGNUM)) != 0
14887 && !(frame_pointer_needed && TARGET_APCS_FRAME)
14888 && TARGET_ARM)
14889 {
14890 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
14891
14892 emit_set_insn (lr, plus_constant (lr, -4));
14893 }
14894
14895 if (live_regs_mask)
14896 {
14897 saved_regs += bit_count (live_regs_mask) * 4;
14898 if (optimize_size && !frame_pointer_needed
14899 && saved_regs == offsets->saved_regs - offsets->saved_args)
14900 {
14901 /* If no coprocessor registers are being pushed and we don't have
14902 to worry about a frame pointer then push extra registers to
14903 create the stack frame. This is done is a way that does not
14904 alter the frame layout, so is independent of the epilogue. */
14905 int n;
14906 int frame;
14907 n = 0;
14908 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
14909 n++;
14910 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
14911 if (frame && n * 4 >= frame)
14912 {
14913 n = frame / 4;
14914 live_regs_mask |= (1 << n) - 1;
14915 saved_regs += frame;
14916 }
14917 }
14918 insn = emit_multi_reg_push (live_regs_mask);
14919 RTX_FRAME_RELATED_P (insn) = 1;
14920 }
14921
14922 if (! IS_VOLATILE (func_type))
14923 saved_regs += arm_save_coproc_regs ();
14924
14925 if (frame_pointer_needed && TARGET_ARM)
14926 {
14927 /* Create the new frame pointer. */
14928 if (TARGET_APCS_FRAME)
14929 {
14930 insn = GEN_INT (-(4 + args_to_push + fp_offset));
14931 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
14932 RTX_FRAME_RELATED_P (insn) = 1;
14933
14934 if (IS_NESTED (func_type))
14935 {
14936 /* Recover the static chain register. */
14937 if (!df_regs_ever_live_p (3)
14938 || saved_pretend_args)
14939 insn = gen_rtx_REG (SImode, 3);
14940 else /* if (crtl->args.pretend_args_size == 0) */
14941 {
14942 insn = plus_constant (hard_frame_pointer_rtx, 4);
14943 insn = gen_frame_mem (SImode, insn);
14944 }
14945 emit_set_insn (ip_rtx, insn);
14946 /* Add a USE to stop propagate_one_insn() from barfing. */
14947 emit_insn (gen_prologue_use (ip_rtx));
14948 }
14949 }
14950 else
14951 {
14952 insn = GEN_INT (saved_regs - 4);
14953 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
14954 stack_pointer_rtx, insn));
14955 RTX_FRAME_RELATED_P (insn) = 1;
14956 }
14957 }
14958
14959 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
14960 {
14961 /* This add can produce multiple insns for a large constant, so we
14962 need to get tricky. */
14963 rtx last = get_last_insn ();
14964
14965 amount = GEN_INT (offsets->saved_args + saved_regs
14966 - offsets->outgoing_args);
14967
14968 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
14969 amount));
14970 do
14971 {
14972 last = last ? NEXT_INSN (last) : get_insns ();
14973 RTX_FRAME_RELATED_P (last) = 1;
14974 }
14975 while (last != insn);
14976
14977 /* If the frame pointer is needed, emit a special barrier that
14978 will prevent the scheduler from moving stores to the frame
14979 before the stack adjustment. */
14980 if (frame_pointer_needed)
14981 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
14982 hard_frame_pointer_rtx));
14983 }
14984
14985
14986 if (frame_pointer_needed && TARGET_THUMB2)
14987 thumb_set_frame_pointer (offsets);
14988
14989 if (flag_pic && arm_pic_register != INVALID_REGNUM)
14990 {
14991 unsigned long mask;
14992
14993 mask = live_regs_mask;
14994 mask &= THUMB2_WORK_REGS;
14995 if (!IS_NESTED (func_type))
14996 mask |= (1 << IP_REGNUM);
14997 arm_load_pic_register (mask);
14998 }
14999
15000 /* If we are profiling, make sure no instructions are scheduled before
15001 the call to mcount. Similarly if the user has requested no
15002 scheduling in the prolog. Similarly if we want non-call exceptions
15003 using the EABI unwinder, to prevent faulting instructions from being
15004 swapped with a stack adjustment. */
15005 if (crtl->profile || !TARGET_SCHED_PROLOG
15006 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
15007 emit_insn (gen_blockage ());
15008
15009 /* If the link register is being kept alive, with the return address in it,
15010 then make sure that it does not get reused by the ce2 pass. */
15011 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
15012 cfun->machine->lr_save_eliminated = 1;
15013 }
15014 \f
15015 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15016 static void
15017 arm_print_condition (FILE *stream)
15018 {
15019 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
15020 {
15021 /* Branch conversion is not implemented for Thumb-2. */
15022 if (TARGET_THUMB)
15023 {
15024 output_operand_lossage ("predicated Thumb instruction");
15025 return;
15026 }
15027 if (current_insn_predicate != NULL)
15028 {
15029 output_operand_lossage
15030 ("predicated instruction in conditional sequence");
15031 return;
15032 }
15033
15034 fputs (arm_condition_codes[arm_current_cc], stream);
15035 }
15036 else if (current_insn_predicate)
15037 {
15038 enum arm_cond_code code;
15039
15040 if (TARGET_THUMB1)
15041 {
15042 output_operand_lossage ("predicated Thumb instruction");
15043 return;
15044 }
15045
15046 code = get_arm_condition_code (current_insn_predicate);
15047 fputs (arm_condition_codes[code], stream);
15048 }
15049 }
15050
15051
15052 /* If CODE is 'd', then the X is a condition operand and the instruction
15053 should only be executed if the condition is true.
15054 if CODE is 'D', then the X is a condition operand and the instruction
15055 should only be executed if the condition is false: however, if the mode
15056 of the comparison is CCFPEmode, then always execute the instruction -- we
15057 do this because in these circumstances !GE does not necessarily imply LT;
15058 in these cases the instruction pattern will take care to make sure that
15059 an instruction containing %d will follow, thereby undoing the effects of
15060 doing this instruction unconditionally.
15061 If CODE is 'N' then X is a floating point operand that must be negated
15062 before output.
15063 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15064 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15065 void
15066 arm_print_operand (FILE *stream, rtx x, int code)
15067 {
15068 switch (code)
15069 {
15070 case '@':
15071 fputs (ASM_COMMENT_START, stream);
15072 return;
15073
15074 case '_':
15075 fputs (user_label_prefix, stream);
15076 return;
15077
15078 case '|':
15079 fputs (REGISTER_PREFIX, stream);
15080 return;
15081
15082 case '?':
15083 arm_print_condition (stream);
15084 return;
15085
15086 case '(':
15087 /* Nothing in unified syntax, otherwise the current condition code. */
15088 if (!TARGET_UNIFIED_ASM)
15089 arm_print_condition (stream);
15090 break;
15091
15092 case ')':
15093 /* The current condition code in unified syntax, otherwise nothing. */
15094 if (TARGET_UNIFIED_ASM)
15095 arm_print_condition (stream);
15096 break;
15097
15098 case '.':
15099 /* The current condition code for a condition code setting instruction.
15100 Preceded by 's' in unified syntax, otherwise followed by 's'. */
15101 if (TARGET_UNIFIED_ASM)
15102 {
15103 fputc('s', stream);
15104 arm_print_condition (stream);
15105 }
15106 else
15107 {
15108 arm_print_condition (stream);
15109 fputc('s', stream);
15110 }
15111 return;
15112
15113 case '!':
15114 /* If the instruction is conditionally executed then print
15115 the current condition code, otherwise print 's'. */
15116 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
15117 if (current_insn_predicate)
15118 arm_print_condition (stream);
15119 else
15120 fputc('s', stream);
15121 break;
15122
15123 /* %# is a "break" sequence. It doesn't output anything, but is used to
15124 separate e.g. operand numbers from following text, if that text consists
15125 of further digits which we don't want to be part of the operand
15126 number. */
15127 case '#':
15128 return;
15129
15130 case 'N':
15131 {
15132 REAL_VALUE_TYPE r;
15133 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15134 r = REAL_VALUE_NEGATE (r);
15135 fprintf (stream, "%s", fp_const_from_val (&r));
15136 }
15137 return;
15138
15139 /* An integer or symbol address without a preceding # sign. */
15140 case 'c':
15141 switch (GET_CODE (x))
15142 {
15143 case CONST_INT:
15144 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15145 break;
15146
15147 case SYMBOL_REF:
15148 output_addr_const (stream, x);
15149 break;
15150
15151 default:
15152 gcc_unreachable ();
15153 }
15154 return;
15155
15156 case 'B':
15157 if (GET_CODE (x) == CONST_INT)
15158 {
15159 HOST_WIDE_INT val;
15160 val = ARM_SIGN_EXTEND (~INTVAL (x));
15161 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
15162 }
15163 else
15164 {
15165 putc ('~', stream);
15166 output_addr_const (stream, x);
15167 }
15168 return;
15169
15170 case 'L':
15171 /* The low 16 bits of an immediate constant. */
15172 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
15173 return;
15174
15175 case 'i':
15176 fprintf (stream, "%s", arithmetic_instr (x, 1));
15177 return;
15178
15179 /* Truncate Cirrus shift counts. */
15180 case 's':
15181 if (GET_CODE (x) == CONST_INT)
15182 {
15183 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
15184 return;
15185 }
15186 arm_print_operand (stream, x, 0);
15187 return;
15188
15189 case 'I':
15190 fprintf (stream, "%s", arithmetic_instr (x, 0));
15191 return;
15192
15193 case 'S':
15194 {
15195 HOST_WIDE_INT val;
15196 const char *shift;
15197
15198 if (!shift_operator (x, SImode))
15199 {
15200 output_operand_lossage ("invalid shift operand");
15201 break;
15202 }
15203
15204 shift = shift_op (x, &val);
15205
15206 if (shift)
15207 {
15208 fprintf (stream, ", %s ", shift);
15209 if (val == -1)
15210 arm_print_operand (stream, XEXP (x, 1), 0);
15211 else
15212 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
15213 }
15214 }
15215 return;
15216
15217 /* An explanation of the 'Q', 'R' and 'H' register operands:
15218
15219 In a pair of registers containing a DI or DF value the 'Q'
15220 operand returns the register number of the register containing
15221 the least significant part of the value. The 'R' operand returns
15222 the register number of the register containing the most
15223 significant part of the value.
15224
15225 The 'H' operand returns the higher of the two register numbers.
15226 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
15227 same as the 'Q' operand, since the most significant part of the
15228 value is held in the lower number register. The reverse is true
15229 on systems where WORDS_BIG_ENDIAN is false.
15230
15231 The purpose of these operands is to distinguish between cases
15232 where the endian-ness of the values is important (for example
15233 when they are added together), and cases where the endian-ness
15234 is irrelevant, but the order of register operations is important.
15235 For example when loading a value from memory into a register
15236 pair, the endian-ness does not matter. Provided that the value
15237 from the lower memory address is put into the lower numbered
15238 register, and the value from the higher address is put into the
15239 higher numbered register, the load will work regardless of whether
15240 the value being loaded is big-wordian or little-wordian. The
15241 order of the two register loads can matter however, if the address
15242 of the memory location is actually held in one of the registers
15243 being overwritten by the load. */
15244 case 'Q':
15245 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15246 {
15247 output_operand_lossage ("invalid operand for code '%c'", code);
15248 return;
15249 }
15250
15251 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
15252 return;
15253
15254 case 'R':
15255 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15256 {
15257 output_operand_lossage ("invalid operand for code '%c'", code);
15258 return;
15259 }
15260
15261 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
15262 return;
15263
15264 case 'H':
15265 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15266 {
15267 output_operand_lossage ("invalid operand for code '%c'", code);
15268 return;
15269 }
15270
15271 asm_fprintf (stream, "%r", REGNO (x) + 1);
15272 return;
15273
15274 case 'J':
15275 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15276 {
15277 output_operand_lossage ("invalid operand for code '%c'", code);
15278 return;
15279 }
15280
15281 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
15282 return;
15283
15284 case 'K':
15285 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15286 {
15287 output_operand_lossage ("invalid operand for code '%c'", code);
15288 return;
15289 }
15290
15291 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
15292 return;
15293
15294 case 'm':
15295 asm_fprintf (stream, "%r",
15296 GET_CODE (XEXP (x, 0)) == REG
15297 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
15298 return;
15299
15300 case 'M':
15301 asm_fprintf (stream, "{%r-%r}",
15302 REGNO (x),
15303 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
15304 return;
15305
15306 /* Like 'M', but writing doubleword vector registers, for use by Neon
15307 insns. */
15308 case 'h':
15309 {
15310 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
15311 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
15312 if (numregs == 1)
15313 asm_fprintf (stream, "{d%d}", regno);
15314 else
15315 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
15316 }
15317 return;
15318
15319 case 'd':
15320 /* CONST_TRUE_RTX means always -- that's the default. */
15321 if (x == const_true_rtx)
15322 return;
15323
15324 if (!COMPARISON_P (x))
15325 {
15326 output_operand_lossage ("invalid operand for code '%c'", code);
15327 return;
15328 }
15329
15330 fputs (arm_condition_codes[get_arm_condition_code (x)],
15331 stream);
15332 return;
15333
15334 case 'D':
15335 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
15336 want to do that. */
15337 if (x == const_true_rtx)
15338 {
15339 output_operand_lossage ("instruction never executed");
15340 return;
15341 }
15342 if (!COMPARISON_P (x))
15343 {
15344 output_operand_lossage ("invalid operand for code '%c'", code);
15345 return;
15346 }
15347
15348 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
15349 (get_arm_condition_code (x))],
15350 stream);
15351 return;
15352
15353 /* Cirrus registers can be accessed in a variety of ways:
15354 single floating point (f)
15355 double floating point (d)
15356 32bit integer (fx)
15357 64bit integer (dx). */
15358 case 'W': /* Cirrus register in F mode. */
15359 case 'X': /* Cirrus register in D mode. */
15360 case 'Y': /* Cirrus register in FX mode. */
15361 case 'Z': /* Cirrus register in DX mode. */
15362 gcc_assert (GET_CODE (x) == REG
15363 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
15364
15365 fprintf (stream, "mv%s%s",
15366 code == 'W' ? "f"
15367 : code == 'X' ? "d"
15368 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
15369
15370 return;
15371
15372 /* Print cirrus register in the mode specified by the register's mode. */
15373 case 'V':
15374 {
15375 int mode = GET_MODE (x);
15376
15377 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
15378 {
15379 output_operand_lossage ("invalid operand for code '%c'", code);
15380 return;
15381 }
15382
15383 fprintf (stream, "mv%s%s",
15384 mode == DFmode ? "d"
15385 : mode == SImode ? "fx"
15386 : mode == DImode ? "dx"
15387 : "f", reg_names[REGNO (x)] + 2);
15388
15389 return;
15390 }
15391
15392 case 'U':
15393 if (GET_CODE (x) != REG
15394 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
15395 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
15396 /* Bad value for wCG register number. */
15397 {
15398 output_operand_lossage ("invalid operand for code '%c'", code);
15399 return;
15400 }
15401
15402 else
15403 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
15404 return;
15405
15406 /* Print an iWMMXt control register name. */
15407 case 'w':
15408 if (GET_CODE (x) != CONST_INT
15409 || INTVAL (x) < 0
15410 || INTVAL (x) >= 16)
15411 /* Bad value for wC register number. */
15412 {
15413 output_operand_lossage ("invalid operand for code '%c'", code);
15414 return;
15415 }
15416
15417 else
15418 {
15419 static const char * wc_reg_names [16] =
15420 {
15421 "wCID", "wCon", "wCSSF", "wCASF",
15422 "wC4", "wC5", "wC6", "wC7",
15423 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
15424 "wC12", "wC13", "wC14", "wC15"
15425 };
15426
15427 fprintf (stream, wc_reg_names [INTVAL (x)]);
15428 }
15429 return;
15430
15431 /* Print the high single-precision register of a VFP double-precision
15432 register. */
15433 case 'p':
15434 {
15435 int mode = GET_MODE (x);
15436 int regno;
15437
15438 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
15439 {
15440 output_operand_lossage ("invalid operand for code '%c'", code);
15441 return;
15442 }
15443
15444 regno = REGNO (x);
15445 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
15446 {
15447 output_operand_lossage ("invalid operand for code '%c'", code);
15448 return;
15449 }
15450
15451 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
15452 }
15453 return;
15454
15455 /* Print a VFP/Neon double precision or quad precision register name. */
15456 case 'P':
15457 case 'q':
15458 {
15459 int mode = GET_MODE (x);
15460 int is_quad = (code == 'q');
15461 int regno;
15462
15463 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
15464 {
15465 output_operand_lossage ("invalid operand for code '%c'", code);
15466 return;
15467 }
15468
15469 if (GET_CODE (x) != REG
15470 || !IS_VFP_REGNUM (REGNO (x)))
15471 {
15472 output_operand_lossage ("invalid operand for code '%c'", code);
15473 return;
15474 }
15475
15476 regno = REGNO (x);
15477 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
15478 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
15479 {
15480 output_operand_lossage ("invalid operand for code '%c'", code);
15481 return;
15482 }
15483
15484 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
15485 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
15486 }
15487 return;
15488
15489 /* These two codes print the low/high doubleword register of a Neon quad
15490 register, respectively. For pair-structure types, can also print
15491 low/high quadword registers. */
15492 case 'e':
15493 case 'f':
15494 {
15495 int mode = GET_MODE (x);
15496 int regno;
15497
15498 if ((GET_MODE_SIZE (mode) != 16
15499 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
15500 {
15501 output_operand_lossage ("invalid operand for code '%c'", code);
15502 return;
15503 }
15504
15505 regno = REGNO (x);
15506 if (!NEON_REGNO_OK_FOR_QUAD (regno))
15507 {
15508 output_operand_lossage ("invalid operand for code '%c'", code);
15509 return;
15510 }
15511
15512 if (GET_MODE_SIZE (mode) == 16)
15513 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
15514 + (code == 'f' ? 1 : 0));
15515 else
15516 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
15517 + (code == 'f' ? 1 : 0));
15518 }
15519 return;
15520
15521 /* Print a VFPv3 floating-point constant, represented as an integer
15522 index. */
15523 case 'G':
15524 {
15525 int index = vfp3_const_double_index (x);
15526 gcc_assert (index != -1);
15527 fprintf (stream, "%d", index);
15528 }
15529 return;
15530
15531 /* Print bits representing opcode features for Neon.
15532
15533 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
15534 and polynomials as unsigned.
15535
15536 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
15537
15538 Bit 2 is 1 for rounding functions, 0 otherwise. */
15539
15540 /* Identify the type as 's', 'u', 'p' or 'f'. */
15541 case 'T':
15542 {
15543 HOST_WIDE_INT bits = INTVAL (x);
15544 fputc ("uspf"[bits & 3], stream);
15545 }
15546 return;
15547
15548 /* Likewise, but signed and unsigned integers are both 'i'. */
15549 case 'F':
15550 {
15551 HOST_WIDE_INT bits = INTVAL (x);
15552 fputc ("iipf"[bits & 3], stream);
15553 }
15554 return;
15555
15556 /* As for 'T', but emit 'u' instead of 'p'. */
15557 case 't':
15558 {
15559 HOST_WIDE_INT bits = INTVAL (x);
15560 fputc ("usuf"[bits & 3], stream);
15561 }
15562 return;
15563
15564 /* Bit 2: rounding (vs none). */
15565 case 'O':
15566 {
15567 HOST_WIDE_INT bits = INTVAL (x);
15568 fputs ((bits & 4) != 0 ? "r" : "", stream);
15569 }
15570 return;
15571
15572 /* Memory operand for vld1/vst1 instruction. */
15573 case 'A':
15574 {
15575 rtx addr;
15576 bool postinc = FALSE;
15577 gcc_assert (GET_CODE (x) == MEM);
15578 addr = XEXP (x, 0);
15579 if (GET_CODE (addr) == POST_INC)
15580 {
15581 postinc = 1;
15582 addr = XEXP (addr, 0);
15583 }
15584 asm_fprintf (stream, "[%r]", REGNO (addr));
15585 if (postinc)
15586 fputs("!", stream);
15587 }
15588 return;
15589
15590 /* Translate an S register number into a D register number and element index. */
15591 case 'y':
15592 {
15593 int mode = GET_MODE (x);
15594 int regno;
15595
15596 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
15597 {
15598 output_operand_lossage ("invalid operand for code '%c'", code);
15599 return;
15600 }
15601
15602 regno = REGNO (x);
15603 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15604 {
15605 output_operand_lossage ("invalid operand for code '%c'", code);
15606 return;
15607 }
15608
15609 regno = regno - FIRST_VFP_REGNUM;
15610 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
15611 }
15612 return;
15613
15614 /* Register specifier for vld1.16/vst1.16. Translate the S register
15615 number into a D register number and element index. */
15616 case 'z':
15617 {
15618 int mode = GET_MODE (x);
15619 int regno;
15620
15621 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
15622 {
15623 output_operand_lossage ("invalid operand for code '%c'", code);
15624 return;
15625 }
15626
15627 regno = REGNO (x);
15628 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
15629 {
15630 output_operand_lossage ("invalid operand for code '%c'", code);
15631 return;
15632 }
15633
15634 regno = regno - FIRST_VFP_REGNUM;
15635 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
15636 }
15637 return;
15638
15639 default:
15640 if (x == 0)
15641 {
15642 output_operand_lossage ("missing operand");
15643 return;
15644 }
15645
15646 switch (GET_CODE (x))
15647 {
15648 case REG:
15649 asm_fprintf (stream, "%r", REGNO (x));
15650 break;
15651
15652 case MEM:
15653 output_memory_reference_mode = GET_MODE (x);
15654 output_address (XEXP (x, 0));
15655 break;
15656
15657 case CONST_DOUBLE:
15658 if (TARGET_NEON)
15659 {
15660 char fpstr[20];
15661 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
15662 sizeof (fpstr), 0, 1);
15663 fprintf (stream, "#%s", fpstr);
15664 }
15665 else
15666 fprintf (stream, "#%s", fp_immediate_constant (x));
15667 break;
15668
15669 default:
15670 gcc_assert (GET_CODE (x) != NEG);
15671 fputc ('#', stream);
15672 if (GET_CODE (x) == HIGH)
15673 {
15674 fputs (":lower16:", stream);
15675 x = XEXP (x, 0);
15676 }
15677
15678 output_addr_const (stream, x);
15679 break;
15680 }
15681 }
15682 }
15683 \f
15684 /* Target hook for assembling integer objects. The ARM version needs to
15685 handle word-sized values specially. */
15686 static bool
15687 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
15688 {
15689 enum machine_mode mode;
15690
15691 if (size == UNITS_PER_WORD && aligned_p)
15692 {
15693 fputs ("\t.word\t", asm_out_file);
15694 output_addr_const (asm_out_file, x);
15695
15696 /* Mark symbols as position independent. We only do this in the
15697 .text segment, not in the .data segment. */
15698 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
15699 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
15700 {
15701 /* See legitimize_pic_address for an explanation of the
15702 TARGET_VXWORKS_RTP check. */
15703 if (TARGET_VXWORKS_RTP
15704 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
15705 fputs ("(GOT)", asm_out_file);
15706 else
15707 fputs ("(GOTOFF)", asm_out_file);
15708 }
15709 fputc ('\n', asm_out_file);
15710 return true;
15711 }
15712
15713 mode = GET_MODE (x);
15714
15715 if (arm_vector_mode_supported_p (mode))
15716 {
15717 int i, units;
15718
15719 gcc_assert (GET_CODE (x) == CONST_VECTOR);
15720
15721 units = CONST_VECTOR_NUNITS (x);
15722 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
15723
15724 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15725 for (i = 0; i < units; i++)
15726 {
15727 rtx elt = CONST_VECTOR_ELT (x, i);
15728 assemble_integer
15729 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
15730 }
15731 else
15732 for (i = 0; i < units; i++)
15733 {
15734 rtx elt = CONST_VECTOR_ELT (x, i);
15735 REAL_VALUE_TYPE rval;
15736
15737 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
15738
15739 assemble_real
15740 (rval, GET_MODE_INNER (mode),
15741 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
15742 }
15743
15744 return true;
15745 }
15746
15747 return default_assemble_integer (x, size, aligned_p);
15748 }
15749
15750 static void
15751 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
15752 {
15753 section *s;
15754
15755 if (!TARGET_AAPCS_BASED)
15756 {
15757 (is_ctor ?
15758 default_named_section_asm_out_constructor
15759 : default_named_section_asm_out_destructor) (symbol, priority);
15760 return;
15761 }
15762
15763 /* Put these in the .init_array section, using a special relocation. */
15764 if (priority != DEFAULT_INIT_PRIORITY)
15765 {
15766 char buf[18];
15767 sprintf (buf, "%s.%.5u",
15768 is_ctor ? ".init_array" : ".fini_array",
15769 priority);
15770 s = get_section (buf, SECTION_WRITE, NULL_TREE);
15771 }
15772 else if (is_ctor)
15773 s = ctors_section;
15774 else
15775 s = dtors_section;
15776
15777 switch_to_section (s);
15778 assemble_align (POINTER_SIZE);
15779 fputs ("\t.word\t", asm_out_file);
15780 output_addr_const (asm_out_file, symbol);
15781 fputs ("(target1)\n", asm_out_file);
15782 }
15783
15784 /* Add a function to the list of static constructors. */
15785
15786 static void
15787 arm_elf_asm_constructor (rtx symbol, int priority)
15788 {
15789 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
15790 }
15791
15792 /* Add a function to the list of static destructors. */
15793
15794 static void
15795 arm_elf_asm_destructor (rtx symbol, int priority)
15796 {
15797 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
15798 }
15799 \f
15800 /* A finite state machine takes care of noticing whether or not instructions
15801 can be conditionally executed, and thus decrease execution time and code
15802 size by deleting branch instructions. The fsm is controlled by
15803 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
15804
15805 /* The state of the fsm controlling condition codes are:
15806 0: normal, do nothing special
15807 1: make ASM_OUTPUT_OPCODE not output this instruction
15808 2: make ASM_OUTPUT_OPCODE not output this instruction
15809 3: make instructions conditional
15810 4: make instructions conditional
15811
15812 State transitions (state->state by whom under condition):
15813 0 -> 1 final_prescan_insn if the `target' is a label
15814 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
15815 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
15816 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
15817 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
15818 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
15819 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
15820 (the target insn is arm_target_insn).
15821
15822 If the jump clobbers the conditions then we use states 2 and 4.
15823
15824 A similar thing can be done with conditional return insns.
15825
15826 XXX In case the `target' is an unconditional branch, this conditionalising
15827 of the instructions always reduces code size, but not always execution
15828 time. But then, I want to reduce the code size to somewhere near what
15829 /bin/cc produces. */
15830
15831 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
15832 instructions. When a COND_EXEC instruction is seen the subsequent
15833 instructions are scanned so that multiple conditional instructions can be
15834 combined into a single IT block. arm_condexec_count and arm_condexec_mask
15835 specify the length and true/false mask for the IT block. These will be
15836 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
15837
15838 /* Returns the index of the ARM condition code string in
15839 `arm_condition_codes'. COMPARISON should be an rtx like
15840 `(eq (...) (...))'. */
15841 static enum arm_cond_code
15842 get_arm_condition_code (rtx comparison)
15843 {
15844 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
15845 enum arm_cond_code code;
15846 enum rtx_code comp_code = GET_CODE (comparison);
15847
15848 if (GET_MODE_CLASS (mode) != MODE_CC)
15849 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
15850 XEXP (comparison, 1));
15851
15852 switch (mode)
15853 {
15854 case CC_DNEmode: code = ARM_NE; goto dominance;
15855 case CC_DEQmode: code = ARM_EQ; goto dominance;
15856 case CC_DGEmode: code = ARM_GE; goto dominance;
15857 case CC_DGTmode: code = ARM_GT; goto dominance;
15858 case CC_DLEmode: code = ARM_LE; goto dominance;
15859 case CC_DLTmode: code = ARM_LT; goto dominance;
15860 case CC_DGEUmode: code = ARM_CS; goto dominance;
15861 case CC_DGTUmode: code = ARM_HI; goto dominance;
15862 case CC_DLEUmode: code = ARM_LS; goto dominance;
15863 case CC_DLTUmode: code = ARM_CC;
15864
15865 dominance:
15866 gcc_assert (comp_code == EQ || comp_code == NE);
15867
15868 if (comp_code == EQ)
15869 return ARM_INVERSE_CONDITION_CODE (code);
15870 return code;
15871
15872 case CC_NOOVmode:
15873 switch (comp_code)
15874 {
15875 case NE: return ARM_NE;
15876 case EQ: return ARM_EQ;
15877 case GE: return ARM_PL;
15878 case LT: return ARM_MI;
15879 default: gcc_unreachable ();
15880 }
15881
15882 case CC_Zmode:
15883 switch (comp_code)
15884 {
15885 case NE: return ARM_NE;
15886 case EQ: return ARM_EQ;
15887 default: gcc_unreachable ();
15888 }
15889
15890 case CC_Nmode:
15891 switch (comp_code)
15892 {
15893 case NE: return ARM_MI;
15894 case EQ: return ARM_PL;
15895 default: gcc_unreachable ();
15896 }
15897
15898 case CCFPEmode:
15899 case CCFPmode:
15900 /* These encodings assume that AC=1 in the FPA system control
15901 byte. This allows us to handle all cases except UNEQ and
15902 LTGT. */
15903 switch (comp_code)
15904 {
15905 case GE: return ARM_GE;
15906 case GT: return ARM_GT;
15907 case LE: return ARM_LS;
15908 case LT: return ARM_MI;
15909 case NE: return ARM_NE;
15910 case EQ: return ARM_EQ;
15911 case ORDERED: return ARM_VC;
15912 case UNORDERED: return ARM_VS;
15913 case UNLT: return ARM_LT;
15914 case UNLE: return ARM_LE;
15915 case UNGT: return ARM_HI;
15916 case UNGE: return ARM_PL;
15917 /* UNEQ and LTGT do not have a representation. */
15918 case UNEQ: /* Fall through. */
15919 case LTGT: /* Fall through. */
15920 default: gcc_unreachable ();
15921 }
15922
15923 case CC_SWPmode:
15924 switch (comp_code)
15925 {
15926 case NE: return ARM_NE;
15927 case EQ: return ARM_EQ;
15928 case GE: return ARM_LE;
15929 case GT: return ARM_LT;
15930 case LE: return ARM_GE;
15931 case LT: return ARM_GT;
15932 case GEU: return ARM_LS;
15933 case GTU: return ARM_CC;
15934 case LEU: return ARM_CS;
15935 case LTU: return ARM_HI;
15936 default: gcc_unreachable ();
15937 }
15938
15939 case CC_Cmode:
15940 switch (comp_code)
15941 {
15942 case LTU: return ARM_CS;
15943 case GEU: return ARM_CC;
15944 default: gcc_unreachable ();
15945 }
15946
15947 case CCmode:
15948 switch (comp_code)
15949 {
15950 case NE: return ARM_NE;
15951 case EQ: return ARM_EQ;
15952 case GE: return ARM_GE;
15953 case GT: return ARM_GT;
15954 case LE: return ARM_LE;
15955 case LT: return ARM_LT;
15956 case GEU: return ARM_CS;
15957 case GTU: return ARM_HI;
15958 case LEU: return ARM_LS;
15959 case LTU: return ARM_CC;
15960 default: gcc_unreachable ();
15961 }
15962
15963 default: gcc_unreachable ();
15964 }
15965 }
15966
15967 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
15968 instructions. */
15969 void
15970 thumb2_final_prescan_insn (rtx insn)
15971 {
15972 rtx first_insn = insn;
15973 rtx body = PATTERN (insn);
15974 rtx predicate;
15975 enum arm_cond_code code;
15976 int n;
15977 int mask;
15978
15979 /* Remove the previous insn from the count of insns to be output. */
15980 if (arm_condexec_count)
15981 arm_condexec_count--;
15982
15983 /* Nothing to do if we are already inside a conditional block. */
15984 if (arm_condexec_count)
15985 return;
15986
15987 if (GET_CODE (body) != COND_EXEC)
15988 return;
15989
15990 /* Conditional jumps are implemented directly. */
15991 if (GET_CODE (insn) == JUMP_INSN)
15992 return;
15993
15994 predicate = COND_EXEC_TEST (body);
15995 arm_current_cc = get_arm_condition_code (predicate);
15996
15997 n = get_attr_ce_count (insn);
15998 arm_condexec_count = 1;
15999 arm_condexec_mask = (1 << n) - 1;
16000 arm_condexec_masklen = n;
16001 /* See if subsequent instructions can be combined into the same block. */
16002 for (;;)
16003 {
16004 insn = next_nonnote_insn (insn);
16005
16006 /* Jumping into the middle of an IT block is illegal, so a label or
16007 barrier terminates the block. */
16008 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
16009 break;
16010
16011 body = PATTERN (insn);
16012 /* USE and CLOBBER aren't really insns, so just skip them. */
16013 if (GET_CODE (body) == USE
16014 || GET_CODE (body) == CLOBBER)
16015 continue;
16016
16017 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
16018 if (GET_CODE (body) != COND_EXEC)
16019 break;
16020 /* Allow up to 4 conditionally executed instructions in a block. */
16021 n = get_attr_ce_count (insn);
16022 if (arm_condexec_masklen + n > 4)
16023 break;
16024
16025 predicate = COND_EXEC_TEST (body);
16026 code = get_arm_condition_code (predicate);
16027 mask = (1 << n) - 1;
16028 if (arm_current_cc == code)
16029 arm_condexec_mask |= (mask << arm_condexec_masklen);
16030 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
16031 break;
16032
16033 arm_condexec_count++;
16034 arm_condexec_masklen += n;
16035
16036 /* A jump must be the last instruction in a conditional block. */
16037 if (GET_CODE(insn) == JUMP_INSN)
16038 break;
16039 }
16040 /* Restore recog_data (getting the attributes of other insns can
16041 destroy this array, but final.c assumes that it remains intact
16042 across this call). */
16043 extract_constrain_insn_cached (first_insn);
16044 }
16045
16046 void
16047 arm_final_prescan_insn (rtx insn)
16048 {
16049 /* BODY will hold the body of INSN. */
16050 rtx body = PATTERN (insn);
16051
16052 /* This will be 1 if trying to repeat the trick, and things need to be
16053 reversed if it appears to fail. */
16054 int reverse = 0;
16055
16056 /* If we start with a return insn, we only succeed if we find another one. */
16057 int seeking_return = 0;
16058
16059 /* START_INSN will hold the insn from where we start looking. This is the
16060 first insn after the following code_label if REVERSE is true. */
16061 rtx start_insn = insn;
16062
16063 /* If in state 4, check if the target branch is reached, in order to
16064 change back to state 0. */
16065 if (arm_ccfsm_state == 4)
16066 {
16067 if (insn == arm_target_insn)
16068 {
16069 arm_target_insn = NULL;
16070 arm_ccfsm_state = 0;
16071 }
16072 return;
16073 }
16074
16075 /* If in state 3, it is possible to repeat the trick, if this insn is an
16076 unconditional branch to a label, and immediately following this branch
16077 is the previous target label which is only used once, and the label this
16078 branch jumps to is not too far off. */
16079 if (arm_ccfsm_state == 3)
16080 {
16081 if (simplejump_p (insn))
16082 {
16083 start_insn = next_nonnote_insn (start_insn);
16084 if (GET_CODE (start_insn) == BARRIER)
16085 {
16086 /* XXX Isn't this always a barrier? */
16087 start_insn = next_nonnote_insn (start_insn);
16088 }
16089 if (GET_CODE (start_insn) == CODE_LABEL
16090 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
16091 && LABEL_NUSES (start_insn) == 1)
16092 reverse = TRUE;
16093 else
16094 return;
16095 }
16096 else if (GET_CODE (body) == RETURN)
16097 {
16098 start_insn = next_nonnote_insn (start_insn);
16099 if (GET_CODE (start_insn) == BARRIER)
16100 start_insn = next_nonnote_insn (start_insn);
16101 if (GET_CODE (start_insn) == CODE_LABEL
16102 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
16103 && LABEL_NUSES (start_insn) == 1)
16104 {
16105 reverse = TRUE;
16106 seeking_return = 1;
16107 }
16108 else
16109 return;
16110 }
16111 else
16112 return;
16113 }
16114
16115 gcc_assert (!arm_ccfsm_state || reverse);
16116 if (GET_CODE (insn) != JUMP_INSN)
16117 return;
16118
16119 /* This jump might be paralleled with a clobber of the condition codes
16120 the jump should always come first */
16121 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
16122 body = XVECEXP (body, 0, 0);
16123
16124 if (reverse
16125 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
16126 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
16127 {
16128 int insns_skipped;
16129 int fail = FALSE, succeed = FALSE;
16130 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
16131 int then_not_else = TRUE;
16132 rtx this_insn = start_insn, label = 0;
16133
16134 /* Register the insn jumped to. */
16135 if (reverse)
16136 {
16137 if (!seeking_return)
16138 label = XEXP (SET_SRC (body), 0);
16139 }
16140 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
16141 label = XEXP (XEXP (SET_SRC (body), 1), 0);
16142 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
16143 {
16144 label = XEXP (XEXP (SET_SRC (body), 2), 0);
16145 then_not_else = FALSE;
16146 }
16147 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
16148 seeking_return = 1;
16149 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
16150 {
16151 seeking_return = 1;
16152 then_not_else = FALSE;
16153 }
16154 else
16155 gcc_unreachable ();
16156
16157 /* See how many insns this branch skips, and what kind of insns. If all
16158 insns are okay, and the label or unconditional branch to the same
16159 label is not too far away, succeed. */
16160 for (insns_skipped = 0;
16161 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
16162 {
16163 rtx scanbody;
16164
16165 this_insn = next_nonnote_insn (this_insn);
16166 if (!this_insn)
16167 break;
16168
16169 switch (GET_CODE (this_insn))
16170 {
16171 case CODE_LABEL:
16172 /* Succeed if it is the target label, otherwise fail since
16173 control falls in from somewhere else. */
16174 if (this_insn == label)
16175 {
16176 arm_ccfsm_state = 1;
16177 succeed = TRUE;
16178 }
16179 else
16180 fail = TRUE;
16181 break;
16182
16183 case BARRIER:
16184 /* Succeed if the following insn is the target label.
16185 Otherwise fail.
16186 If return insns are used then the last insn in a function
16187 will be a barrier. */
16188 this_insn = next_nonnote_insn (this_insn);
16189 if (this_insn && this_insn == label)
16190 {
16191 arm_ccfsm_state = 1;
16192 succeed = TRUE;
16193 }
16194 else
16195 fail = TRUE;
16196 break;
16197
16198 case CALL_INSN:
16199 /* The AAPCS says that conditional calls should not be
16200 used since they make interworking inefficient (the
16201 linker can't transform BL<cond> into BLX). That's
16202 only a problem if the machine has BLX. */
16203 if (arm_arch5)
16204 {
16205 fail = TRUE;
16206 break;
16207 }
16208
16209 /* Succeed if the following insn is the target label, or
16210 if the following two insns are a barrier and the
16211 target label. */
16212 this_insn = next_nonnote_insn (this_insn);
16213 if (this_insn && GET_CODE (this_insn) == BARRIER)
16214 this_insn = next_nonnote_insn (this_insn);
16215
16216 if (this_insn && this_insn == label
16217 && insns_skipped < max_insns_skipped)
16218 {
16219 arm_ccfsm_state = 1;
16220 succeed = TRUE;
16221 }
16222 else
16223 fail = TRUE;
16224 break;
16225
16226 case JUMP_INSN:
16227 /* If this is an unconditional branch to the same label, succeed.
16228 If it is to another label, do nothing. If it is conditional,
16229 fail. */
16230 /* XXX Probably, the tests for SET and the PC are
16231 unnecessary. */
16232
16233 scanbody = PATTERN (this_insn);
16234 if (GET_CODE (scanbody) == SET
16235 && GET_CODE (SET_DEST (scanbody)) == PC)
16236 {
16237 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
16238 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
16239 {
16240 arm_ccfsm_state = 2;
16241 succeed = TRUE;
16242 }
16243 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
16244 fail = TRUE;
16245 }
16246 /* Fail if a conditional return is undesirable (e.g. on a
16247 StrongARM), but still allow this if optimizing for size. */
16248 else if (GET_CODE (scanbody) == RETURN
16249 && !use_return_insn (TRUE, NULL)
16250 && !optimize_size)
16251 fail = TRUE;
16252 else if (GET_CODE (scanbody) == RETURN
16253 && seeking_return)
16254 {
16255 arm_ccfsm_state = 2;
16256 succeed = TRUE;
16257 }
16258 else if (GET_CODE (scanbody) == PARALLEL)
16259 {
16260 switch (get_attr_conds (this_insn))
16261 {
16262 case CONDS_NOCOND:
16263 break;
16264 default:
16265 fail = TRUE;
16266 break;
16267 }
16268 }
16269 else
16270 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
16271
16272 break;
16273
16274 case INSN:
16275 /* Instructions using or affecting the condition codes make it
16276 fail. */
16277 scanbody = PATTERN (this_insn);
16278 if (!(GET_CODE (scanbody) == SET
16279 || GET_CODE (scanbody) == PARALLEL)
16280 || get_attr_conds (this_insn) != CONDS_NOCOND)
16281 fail = TRUE;
16282
16283 /* A conditional cirrus instruction must be followed by
16284 a non Cirrus instruction. However, since we
16285 conditionalize instructions in this function and by
16286 the time we get here we can't add instructions
16287 (nops), because shorten_branches() has already been
16288 called, we will disable conditionalizing Cirrus
16289 instructions to be safe. */
16290 if (GET_CODE (scanbody) != USE
16291 && GET_CODE (scanbody) != CLOBBER
16292 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
16293 fail = TRUE;
16294 break;
16295
16296 default:
16297 break;
16298 }
16299 }
16300 if (succeed)
16301 {
16302 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
16303 arm_target_label = CODE_LABEL_NUMBER (label);
16304 else
16305 {
16306 gcc_assert (seeking_return || arm_ccfsm_state == 2);
16307
16308 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
16309 {
16310 this_insn = next_nonnote_insn (this_insn);
16311 gcc_assert (!this_insn
16312 || (GET_CODE (this_insn) != BARRIER
16313 && GET_CODE (this_insn) != CODE_LABEL));
16314 }
16315 if (!this_insn)
16316 {
16317 /* Oh, dear! we ran off the end.. give up. */
16318 extract_constrain_insn_cached (insn);
16319 arm_ccfsm_state = 0;
16320 arm_target_insn = NULL;
16321 return;
16322 }
16323 arm_target_insn = this_insn;
16324 }
16325
16326 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
16327 what it was. */
16328 if (!reverse)
16329 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
16330
16331 if (reverse || then_not_else)
16332 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
16333 }
16334
16335 /* Restore recog_data (getting the attributes of other insns can
16336 destroy this array, but final.c assumes that it remains intact
16337 across this call. */
16338 extract_constrain_insn_cached (insn);
16339 }
16340 }
16341
16342 /* Output IT instructions. */
16343 void
16344 thumb2_asm_output_opcode (FILE * stream)
16345 {
16346 char buff[5];
16347 int n;
16348
16349 if (arm_condexec_mask)
16350 {
16351 for (n = 0; n < arm_condexec_masklen; n++)
16352 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
16353 buff[n] = 0;
16354 asm_fprintf(stream, "i%s\t%s\n\t", buff,
16355 arm_condition_codes[arm_current_cc]);
16356 arm_condexec_mask = 0;
16357 }
16358 }
16359
16360 /* Returns true if REGNO is a valid register
16361 for holding a quantity of type MODE. */
16362 int
16363 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
16364 {
16365 if (GET_MODE_CLASS (mode) == MODE_CC)
16366 return (regno == CC_REGNUM
16367 || (TARGET_HARD_FLOAT && TARGET_VFP
16368 && regno == VFPCC_REGNUM));
16369
16370 if (TARGET_THUMB1)
16371 /* For the Thumb we only allow values bigger than SImode in
16372 registers 0 - 6, so that there is always a second low
16373 register available to hold the upper part of the value.
16374 We probably we ought to ensure that the register is the
16375 start of an even numbered register pair. */
16376 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
16377
16378 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
16379 && IS_CIRRUS_REGNUM (regno))
16380 /* We have outlawed SI values in Cirrus registers because they
16381 reside in the lower 32 bits, but SF values reside in the
16382 upper 32 bits. This causes gcc all sorts of grief. We can't
16383 even split the registers into pairs because Cirrus SI values
16384 get sign extended to 64bits-- aldyh. */
16385 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
16386
16387 if (TARGET_HARD_FLOAT && TARGET_VFP
16388 && IS_VFP_REGNUM (regno))
16389 {
16390 if (mode == SFmode || mode == SImode)
16391 return VFP_REGNO_OK_FOR_SINGLE (regno);
16392
16393 if (mode == DFmode)
16394 return VFP_REGNO_OK_FOR_DOUBLE (regno);
16395
16396 /* VFP registers can hold HFmode values, but there is no point in
16397 putting them there unless we have hardware conversion insns. */
16398 if (mode == HFmode)
16399 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
16400
16401 if (TARGET_NEON)
16402 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
16403 || (VALID_NEON_QREG_MODE (mode)
16404 && NEON_REGNO_OK_FOR_QUAD (regno))
16405 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
16406 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
16407 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
16408 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
16409 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
16410
16411 return FALSE;
16412 }
16413
16414 if (TARGET_REALLY_IWMMXT)
16415 {
16416 if (IS_IWMMXT_GR_REGNUM (regno))
16417 return mode == SImode;
16418
16419 if (IS_IWMMXT_REGNUM (regno))
16420 return VALID_IWMMXT_REG_MODE (mode);
16421 }
16422
16423 /* We allow almost any value to be stored in the general registers.
16424 Restrict doubleword quantities to even register pairs so that we can
16425 use ldrd. Do not allow very large Neon structure opaque modes in
16426 general registers; they would use too many. */
16427 if (regno <= LAST_ARM_REGNUM)
16428 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
16429 && ARM_NUM_REGS (mode) <= 4;
16430
16431 if (regno == FRAME_POINTER_REGNUM
16432 || regno == ARG_POINTER_REGNUM)
16433 /* We only allow integers in the fake hard registers. */
16434 return GET_MODE_CLASS (mode) == MODE_INT;
16435
16436 /* The only registers left are the FPA registers
16437 which we only allow to hold FP values. */
16438 return (TARGET_HARD_FLOAT && TARGET_FPA
16439 && GET_MODE_CLASS (mode) == MODE_FLOAT
16440 && regno >= FIRST_FPA_REGNUM
16441 && regno <= LAST_FPA_REGNUM);
16442 }
16443
16444 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
16445 not used in arm mode. */
16446
16447 enum reg_class
16448 arm_regno_class (int regno)
16449 {
16450 if (TARGET_THUMB1)
16451 {
16452 if (regno == STACK_POINTER_REGNUM)
16453 return STACK_REG;
16454 if (regno == CC_REGNUM)
16455 return CC_REG;
16456 if (regno < 8)
16457 return LO_REGS;
16458 return HI_REGS;
16459 }
16460
16461 if (TARGET_THUMB2 && regno < 8)
16462 return LO_REGS;
16463
16464 if ( regno <= LAST_ARM_REGNUM
16465 || regno == FRAME_POINTER_REGNUM
16466 || regno == ARG_POINTER_REGNUM)
16467 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
16468
16469 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
16470 return TARGET_THUMB2 ? CC_REG : NO_REGS;
16471
16472 if (IS_CIRRUS_REGNUM (regno))
16473 return CIRRUS_REGS;
16474
16475 if (IS_VFP_REGNUM (regno))
16476 {
16477 if (regno <= D7_VFP_REGNUM)
16478 return VFP_D0_D7_REGS;
16479 else if (regno <= LAST_LO_VFP_REGNUM)
16480 return VFP_LO_REGS;
16481 else
16482 return VFP_HI_REGS;
16483 }
16484
16485 if (IS_IWMMXT_REGNUM (regno))
16486 return IWMMXT_REGS;
16487
16488 if (IS_IWMMXT_GR_REGNUM (regno))
16489 return IWMMXT_GR_REGS;
16490
16491 return FPA_REGS;
16492 }
16493
16494 /* Handle a special case when computing the offset
16495 of an argument from the frame pointer. */
16496 int
16497 arm_debugger_arg_offset (int value, rtx addr)
16498 {
16499 rtx insn;
16500
16501 /* We are only interested if dbxout_parms() failed to compute the offset. */
16502 if (value != 0)
16503 return 0;
16504
16505 /* We can only cope with the case where the address is held in a register. */
16506 if (GET_CODE (addr) != REG)
16507 return 0;
16508
16509 /* If we are using the frame pointer to point at the argument, then
16510 an offset of 0 is correct. */
16511 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
16512 return 0;
16513
16514 /* If we are using the stack pointer to point at the
16515 argument, then an offset of 0 is correct. */
16516 /* ??? Check this is consistent with thumb2 frame layout. */
16517 if ((TARGET_THUMB || !frame_pointer_needed)
16518 && REGNO (addr) == SP_REGNUM)
16519 return 0;
16520
16521 /* Oh dear. The argument is pointed to by a register rather
16522 than being held in a register, or being stored at a known
16523 offset from the frame pointer. Since GDB only understands
16524 those two kinds of argument we must translate the address
16525 held in the register into an offset from the frame pointer.
16526 We do this by searching through the insns for the function
16527 looking to see where this register gets its value. If the
16528 register is initialized from the frame pointer plus an offset
16529 then we are in luck and we can continue, otherwise we give up.
16530
16531 This code is exercised by producing debugging information
16532 for a function with arguments like this:
16533
16534 double func (double a, double b, int c, double d) {return d;}
16535
16536 Without this code the stab for parameter 'd' will be set to
16537 an offset of 0 from the frame pointer, rather than 8. */
16538
16539 /* The if() statement says:
16540
16541 If the insn is a normal instruction
16542 and if the insn is setting the value in a register
16543 and if the register being set is the register holding the address of the argument
16544 and if the address is computing by an addition
16545 that involves adding to a register
16546 which is the frame pointer
16547 a constant integer
16548
16549 then... */
16550
16551 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16552 {
16553 if ( GET_CODE (insn) == INSN
16554 && GET_CODE (PATTERN (insn)) == SET
16555 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
16556 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
16557 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
16558 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
16559 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
16560 )
16561 {
16562 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
16563
16564 break;
16565 }
16566 }
16567
16568 if (value == 0)
16569 {
16570 debug_rtx (addr);
16571 warning (0, "unable to compute real location of stacked parameter");
16572 value = 8; /* XXX magic hack */
16573 }
16574
16575 return value;
16576 }
16577 \f
16578 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
16579 do \
16580 { \
16581 if ((MASK) & insn_flags) \
16582 add_builtin_function ((NAME), (TYPE), (CODE), \
16583 BUILT_IN_MD, NULL, NULL_TREE); \
16584 } \
16585 while (0)
16586
16587 struct builtin_description
16588 {
16589 const unsigned int mask;
16590 const enum insn_code icode;
16591 const char * const name;
16592 const enum arm_builtins code;
16593 const enum rtx_code comparison;
16594 const unsigned int flag;
16595 };
16596
16597 static const struct builtin_description bdesc_2arg[] =
16598 {
16599 #define IWMMXT_BUILTIN(code, string, builtin) \
16600 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
16601 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16602
16603 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
16604 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
16605 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
16606 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
16607 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
16608 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
16609 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
16610 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
16611 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
16612 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
16613 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
16614 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
16615 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
16616 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
16617 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
16618 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
16619 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
16620 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
16621 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
16622 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
16623 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
16624 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
16625 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
16626 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
16627 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
16628 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
16629 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
16630 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
16631 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
16632 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
16633 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
16634 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
16635 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
16636 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
16637 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
16638 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
16639 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
16640 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
16641 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
16642 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
16643 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
16644 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
16645 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
16646 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
16647 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
16648 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
16649 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
16650 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
16651 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
16652 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
16653 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
16654 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
16655 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
16656 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
16657 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
16658 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
16659 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
16660 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
16661
16662 #define IWMMXT_BUILTIN2(code, builtin) \
16663 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16664
16665 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
16666 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
16667 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
16668 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
16669 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
16670 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
16671 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
16672 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
16673 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
16674 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
16675 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
16676 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
16677 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
16678 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
16679 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
16680 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
16681 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
16682 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
16683 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
16684 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
16685 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
16686 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
16687 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
16688 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
16689 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
16690 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
16691 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
16692 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
16693 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
16694 IWMMXT_BUILTIN2 (rordi3, WRORDI)
16695 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
16696 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
16697 };
16698
16699 static const struct builtin_description bdesc_1arg[] =
16700 {
16701 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
16702 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
16703 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
16704 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
16705 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
16706 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
16707 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
16708 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
16709 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
16710 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
16711 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
16712 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
16713 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
16714 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
16715 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
16716 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
16717 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
16718 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
16719 };
16720
16721 /* Set up all the iWMMXt builtins. This is
16722 not called if TARGET_IWMMXT is zero. */
16723
16724 static void
16725 arm_init_iwmmxt_builtins (void)
16726 {
16727 const struct builtin_description * d;
16728 size_t i;
16729 tree endlink = void_list_node;
16730
16731 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16732 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16733 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
16734
16735 tree int_ftype_int
16736 = build_function_type (integer_type_node,
16737 tree_cons (NULL_TREE, integer_type_node, endlink));
16738 tree v8qi_ftype_v8qi_v8qi_int
16739 = build_function_type (V8QI_type_node,
16740 tree_cons (NULL_TREE, V8QI_type_node,
16741 tree_cons (NULL_TREE, V8QI_type_node,
16742 tree_cons (NULL_TREE,
16743 integer_type_node,
16744 endlink))));
16745 tree v4hi_ftype_v4hi_int
16746 = build_function_type (V4HI_type_node,
16747 tree_cons (NULL_TREE, V4HI_type_node,
16748 tree_cons (NULL_TREE, integer_type_node,
16749 endlink)));
16750 tree v2si_ftype_v2si_int
16751 = build_function_type (V2SI_type_node,
16752 tree_cons (NULL_TREE, V2SI_type_node,
16753 tree_cons (NULL_TREE, integer_type_node,
16754 endlink)));
16755 tree v2si_ftype_di_di
16756 = build_function_type (V2SI_type_node,
16757 tree_cons (NULL_TREE, long_long_integer_type_node,
16758 tree_cons (NULL_TREE, long_long_integer_type_node,
16759 endlink)));
16760 tree di_ftype_di_int
16761 = build_function_type (long_long_integer_type_node,
16762 tree_cons (NULL_TREE, long_long_integer_type_node,
16763 tree_cons (NULL_TREE, integer_type_node,
16764 endlink)));
16765 tree di_ftype_di_int_int
16766 = build_function_type (long_long_integer_type_node,
16767 tree_cons (NULL_TREE, long_long_integer_type_node,
16768 tree_cons (NULL_TREE, integer_type_node,
16769 tree_cons (NULL_TREE,
16770 integer_type_node,
16771 endlink))));
16772 tree int_ftype_v8qi
16773 = build_function_type (integer_type_node,
16774 tree_cons (NULL_TREE, V8QI_type_node,
16775 endlink));
16776 tree int_ftype_v4hi
16777 = build_function_type (integer_type_node,
16778 tree_cons (NULL_TREE, V4HI_type_node,
16779 endlink));
16780 tree int_ftype_v2si
16781 = build_function_type (integer_type_node,
16782 tree_cons (NULL_TREE, V2SI_type_node,
16783 endlink));
16784 tree int_ftype_v8qi_int
16785 = build_function_type (integer_type_node,
16786 tree_cons (NULL_TREE, V8QI_type_node,
16787 tree_cons (NULL_TREE, integer_type_node,
16788 endlink)));
16789 tree int_ftype_v4hi_int
16790 = build_function_type (integer_type_node,
16791 tree_cons (NULL_TREE, V4HI_type_node,
16792 tree_cons (NULL_TREE, integer_type_node,
16793 endlink)));
16794 tree int_ftype_v2si_int
16795 = build_function_type (integer_type_node,
16796 tree_cons (NULL_TREE, V2SI_type_node,
16797 tree_cons (NULL_TREE, integer_type_node,
16798 endlink)));
16799 tree v8qi_ftype_v8qi_int_int
16800 = build_function_type (V8QI_type_node,
16801 tree_cons (NULL_TREE, V8QI_type_node,
16802 tree_cons (NULL_TREE, integer_type_node,
16803 tree_cons (NULL_TREE,
16804 integer_type_node,
16805 endlink))));
16806 tree v4hi_ftype_v4hi_int_int
16807 = build_function_type (V4HI_type_node,
16808 tree_cons (NULL_TREE, V4HI_type_node,
16809 tree_cons (NULL_TREE, integer_type_node,
16810 tree_cons (NULL_TREE,
16811 integer_type_node,
16812 endlink))));
16813 tree v2si_ftype_v2si_int_int
16814 = build_function_type (V2SI_type_node,
16815 tree_cons (NULL_TREE, V2SI_type_node,
16816 tree_cons (NULL_TREE, integer_type_node,
16817 tree_cons (NULL_TREE,
16818 integer_type_node,
16819 endlink))));
16820 /* Miscellaneous. */
16821 tree v8qi_ftype_v4hi_v4hi
16822 = build_function_type (V8QI_type_node,
16823 tree_cons (NULL_TREE, V4HI_type_node,
16824 tree_cons (NULL_TREE, V4HI_type_node,
16825 endlink)));
16826 tree v4hi_ftype_v2si_v2si
16827 = build_function_type (V4HI_type_node,
16828 tree_cons (NULL_TREE, V2SI_type_node,
16829 tree_cons (NULL_TREE, V2SI_type_node,
16830 endlink)));
16831 tree v2si_ftype_v4hi_v4hi
16832 = build_function_type (V2SI_type_node,
16833 tree_cons (NULL_TREE, V4HI_type_node,
16834 tree_cons (NULL_TREE, V4HI_type_node,
16835 endlink)));
16836 tree v2si_ftype_v8qi_v8qi
16837 = build_function_type (V2SI_type_node,
16838 tree_cons (NULL_TREE, V8QI_type_node,
16839 tree_cons (NULL_TREE, V8QI_type_node,
16840 endlink)));
16841 tree v4hi_ftype_v4hi_di
16842 = build_function_type (V4HI_type_node,
16843 tree_cons (NULL_TREE, V4HI_type_node,
16844 tree_cons (NULL_TREE,
16845 long_long_integer_type_node,
16846 endlink)));
16847 tree v2si_ftype_v2si_di
16848 = build_function_type (V2SI_type_node,
16849 tree_cons (NULL_TREE, V2SI_type_node,
16850 tree_cons (NULL_TREE,
16851 long_long_integer_type_node,
16852 endlink)));
16853 tree void_ftype_int_int
16854 = build_function_type (void_type_node,
16855 tree_cons (NULL_TREE, integer_type_node,
16856 tree_cons (NULL_TREE, integer_type_node,
16857 endlink)));
16858 tree di_ftype_void
16859 = build_function_type (long_long_unsigned_type_node, endlink);
16860 tree di_ftype_v8qi
16861 = build_function_type (long_long_integer_type_node,
16862 tree_cons (NULL_TREE, V8QI_type_node,
16863 endlink));
16864 tree di_ftype_v4hi
16865 = build_function_type (long_long_integer_type_node,
16866 tree_cons (NULL_TREE, V4HI_type_node,
16867 endlink));
16868 tree di_ftype_v2si
16869 = build_function_type (long_long_integer_type_node,
16870 tree_cons (NULL_TREE, V2SI_type_node,
16871 endlink));
16872 tree v2si_ftype_v4hi
16873 = build_function_type (V2SI_type_node,
16874 tree_cons (NULL_TREE, V4HI_type_node,
16875 endlink));
16876 tree v4hi_ftype_v8qi
16877 = build_function_type (V4HI_type_node,
16878 tree_cons (NULL_TREE, V8QI_type_node,
16879 endlink));
16880
16881 tree di_ftype_di_v4hi_v4hi
16882 = build_function_type (long_long_unsigned_type_node,
16883 tree_cons (NULL_TREE,
16884 long_long_unsigned_type_node,
16885 tree_cons (NULL_TREE, V4HI_type_node,
16886 tree_cons (NULL_TREE,
16887 V4HI_type_node,
16888 endlink))));
16889
16890 tree di_ftype_v4hi_v4hi
16891 = build_function_type (long_long_unsigned_type_node,
16892 tree_cons (NULL_TREE, V4HI_type_node,
16893 tree_cons (NULL_TREE, V4HI_type_node,
16894 endlink)));
16895
16896 /* Normal vector binops. */
16897 tree v8qi_ftype_v8qi_v8qi
16898 = build_function_type (V8QI_type_node,
16899 tree_cons (NULL_TREE, V8QI_type_node,
16900 tree_cons (NULL_TREE, V8QI_type_node,
16901 endlink)));
16902 tree v4hi_ftype_v4hi_v4hi
16903 = build_function_type (V4HI_type_node,
16904 tree_cons (NULL_TREE, V4HI_type_node,
16905 tree_cons (NULL_TREE, V4HI_type_node,
16906 endlink)));
16907 tree v2si_ftype_v2si_v2si
16908 = build_function_type (V2SI_type_node,
16909 tree_cons (NULL_TREE, V2SI_type_node,
16910 tree_cons (NULL_TREE, V2SI_type_node,
16911 endlink)));
16912 tree di_ftype_di_di
16913 = build_function_type (long_long_unsigned_type_node,
16914 tree_cons (NULL_TREE, long_long_unsigned_type_node,
16915 tree_cons (NULL_TREE,
16916 long_long_unsigned_type_node,
16917 endlink)));
16918
16919 /* Add all builtins that are more or less simple operations on two
16920 operands. */
16921 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16922 {
16923 /* Use one of the operands; the target can have a different mode for
16924 mask-generating compares. */
16925 enum machine_mode mode;
16926 tree type;
16927
16928 if (d->name == 0)
16929 continue;
16930
16931 mode = insn_data[d->icode].operand[1].mode;
16932
16933 switch (mode)
16934 {
16935 case V8QImode:
16936 type = v8qi_ftype_v8qi_v8qi;
16937 break;
16938 case V4HImode:
16939 type = v4hi_ftype_v4hi_v4hi;
16940 break;
16941 case V2SImode:
16942 type = v2si_ftype_v2si_v2si;
16943 break;
16944 case DImode:
16945 type = di_ftype_di_di;
16946 break;
16947
16948 default:
16949 gcc_unreachable ();
16950 }
16951
16952 def_mbuiltin (d->mask, d->name, type, d->code);
16953 }
16954
16955 /* Add the remaining MMX insns with somewhat more complicated types. */
16956 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
16957 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
16958 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
16959
16960 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
16961 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
16962 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
16963 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
16964 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
16965 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
16966
16967 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
16968 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
16969 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
16970 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
16971 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
16972 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
16973
16974 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
16975 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
16976 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
16977 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
16978 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
16979 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
16980
16981 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
16982 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
16983 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
16984 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
16985 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
16986 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
16987
16988 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
16989
16990 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
16991 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
16992 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
16993 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
16994
16995 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
16996 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
16997 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
16998 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
16999 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
17000 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
17001 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
17002 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
17003 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
17004
17005 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
17006 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
17007 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
17008
17009 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
17010 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
17011 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
17012
17013 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
17014 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
17015 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
17016 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
17017 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
17018 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
17019
17020 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
17021 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
17022 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
17023 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
17024 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
17025 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
17026 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
17027 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
17028 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
17029 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
17030 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
17031 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
17032
17033 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
17034 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
17035 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
17036 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
17037
17038 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
17039 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
17040 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
17041 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
17042 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
17043 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
17044 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
17045 }
17046
17047 static void
17048 arm_init_tls_builtins (void)
17049 {
17050 tree ftype, decl;
17051
17052 ftype = build_function_type (ptr_type_node, void_list_node);
17053 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
17054 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
17055 NULL, NULL_TREE);
17056 TREE_NOTHROW (decl) = 1;
17057 TREE_READONLY (decl) = 1;
17058 }
17059
17060 enum neon_builtin_type_bits {
17061 T_V8QI = 0x0001,
17062 T_V4HI = 0x0002,
17063 T_V2SI = 0x0004,
17064 T_V2SF = 0x0008,
17065 T_DI = 0x0010,
17066 T_V16QI = 0x0020,
17067 T_V8HI = 0x0040,
17068 T_V4SI = 0x0080,
17069 T_V4SF = 0x0100,
17070 T_V2DI = 0x0200,
17071 T_TI = 0x0400,
17072 T_EI = 0x0800,
17073 T_OI = 0x1000
17074 };
17075
17076 #define v8qi_UP T_V8QI
17077 #define v4hi_UP T_V4HI
17078 #define v2si_UP T_V2SI
17079 #define v2sf_UP T_V2SF
17080 #define di_UP T_DI
17081 #define v16qi_UP T_V16QI
17082 #define v8hi_UP T_V8HI
17083 #define v4si_UP T_V4SI
17084 #define v4sf_UP T_V4SF
17085 #define v2di_UP T_V2DI
17086 #define ti_UP T_TI
17087 #define ei_UP T_EI
17088 #define oi_UP T_OI
17089
17090 #define UP(X) X##_UP
17091
17092 #define T_MAX 13
17093
17094 typedef enum {
17095 NEON_BINOP,
17096 NEON_TERNOP,
17097 NEON_UNOP,
17098 NEON_GETLANE,
17099 NEON_SETLANE,
17100 NEON_CREATE,
17101 NEON_DUP,
17102 NEON_DUPLANE,
17103 NEON_COMBINE,
17104 NEON_SPLIT,
17105 NEON_LANEMUL,
17106 NEON_LANEMULL,
17107 NEON_LANEMULH,
17108 NEON_LANEMAC,
17109 NEON_SCALARMUL,
17110 NEON_SCALARMULL,
17111 NEON_SCALARMULH,
17112 NEON_SCALARMAC,
17113 NEON_CONVERT,
17114 NEON_FIXCONV,
17115 NEON_SELECT,
17116 NEON_RESULTPAIR,
17117 NEON_REINTERP,
17118 NEON_VTBL,
17119 NEON_VTBX,
17120 NEON_LOAD1,
17121 NEON_LOAD1LANE,
17122 NEON_STORE1,
17123 NEON_STORE1LANE,
17124 NEON_LOADSTRUCT,
17125 NEON_LOADSTRUCTLANE,
17126 NEON_STORESTRUCT,
17127 NEON_STORESTRUCTLANE,
17128 NEON_LOGICBINOP,
17129 NEON_SHIFTINSERT,
17130 NEON_SHIFTIMM,
17131 NEON_SHIFTACC
17132 } neon_itype;
17133
17134 typedef struct {
17135 const char *name;
17136 const neon_itype itype;
17137 const int bits;
17138 const enum insn_code codes[T_MAX];
17139 const unsigned int num_vars;
17140 unsigned int base_fcode;
17141 } neon_builtin_datum;
17142
17143 #define CF(N,X) CODE_FOR_neon_##N##X
17144
17145 #define VAR1(T, N, A) \
17146 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
17147 #define VAR2(T, N, A, B) \
17148 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
17149 #define VAR3(T, N, A, B, C) \
17150 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
17151 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
17152 #define VAR4(T, N, A, B, C, D) \
17153 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
17154 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
17155 #define VAR5(T, N, A, B, C, D, E) \
17156 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
17157 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
17158 #define VAR6(T, N, A, B, C, D, E, F) \
17159 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
17160 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
17161 #define VAR7(T, N, A, B, C, D, E, F, G) \
17162 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
17163 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17164 CF (N, G) }, 7, 0
17165 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
17166 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17167 | UP (H), \
17168 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17169 CF (N, G), CF (N, H) }, 8, 0
17170 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
17171 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17172 | UP (H) | UP (I), \
17173 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17174 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
17175 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
17176 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17177 | UP (H) | UP (I) | UP (J), \
17178 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17179 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
17180
17181 /* The mode entries in the following table correspond to the "key" type of the
17182 instruction variant, i.e. equivalent to that which would be specified after
17183 the assembler mnemonic, which usually refers to the last vector operand.
17184 (Signed/unsigned/polynomial types are not differentiated between though, and
17185 are all mapped onto the same mode for a given element size.) The modes
17186 listed per instruction should be the same as those defined for that
17187 instruction's pattern in neon.md.
17188 WARNING: Variants should be listed in the same increasing order as
17189 neon_builtin_type_bits. */
17190
17191 static neon_builtin_datum neon_builtin_data[] =
17192 {
17193 { VAR10 (BINOP, vadd,
17194 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17195 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
17196 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
17197 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17198 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17199 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
17200 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17201 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17202 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
17203 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17204 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
17205 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
17206 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
17207 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
17208 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
17209 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
17210 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
17211 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
17212 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
17213 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
17214 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
17215 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
17216 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17217 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17218 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17219 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
17220 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
17221 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
17222 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17223 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17224 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17225 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
17226 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17227 { VAR10 (BINOP, vsub,
17228 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17229 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
17230 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
17231 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17232 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17233 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
17234 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17235 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17236 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17237 { VAR2 (BINOP, vcage, v2sf, v4sf) },
17238 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
17239 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17240 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17241 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
17242 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17243 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
17244 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17245 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17246 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
17247 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17248 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17249 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
17250 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
17251 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
17252 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
17253 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17254 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
17255 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17256 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17257 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17258 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17259 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17260 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17261 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
17262 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
17263 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
17264 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
17265 /* FIXME: vget_lane supports more variants than this! */
17266 { VAR10 (GETLANE, vget_lane,
17267 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17268 { VAR10 (SETLANE, vset_lane,
17269 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17270 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
17271 { VAR10 (DUP, vdup_n,
17272 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17273 { VAR10 (DUPLANE, vdup_lane,
17274 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17275 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
17276 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
17277 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
17278 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
17279 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
17280 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
17281 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
17282 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17283 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17284 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
17285 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
17286 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17287 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
17288 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
17289 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17290 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17291 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
17292 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
17293 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17294 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
17295 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
17296 { VAR10 (BINOP, vext,
17297 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17298 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17299 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
17300 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
17301 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
17302 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
17303 { VAR10 (SELECT, vbsl,
17304 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17305 { VAR1 (VTBL, vtbl1, v8qi) },
17306 { VAR1 (VTBL, vtbl2, v8qi) },
17307 { VAR1 (VTBL, vtbl3, v8qi) },
17308 { VAR1 (VTBL, vtbl4, v8qi) },
17309 { VAR1 (VTBX, vtbx1, v8qi) },
17310 { VAR1 (VTBX, vtbx2, v8qi) },
17311 { VAR1 (VTBX, vtbx3, v8qi) },
17312 { VAR1 (VTBX, vtbx4, v8qi) },
17313 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17314 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17315 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
17316 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
17317 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
17318 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
17319 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
17320 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
17321 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
17322 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
17323 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
17324 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
17325 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
17326 { VAR10 (LOAD1, vld1,
17327 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17328 { VAR10 (LOAD1LANE, vld1_lane,
17329 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17330 { VAR10 (LOAD1, vld1_dup,
17331 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17332 { VAR10 (STORE1, vst1,
17333 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17334 { VAR10 (STORE1LANE, vst1_lane,
17335 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17336 { VAR9 (LOADSTRUCT,
17337 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17338 { VAR7 (LOADSTRUCTLANE, vld2_lane,
17339 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17340 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
17341 { VAR9 (STORESTRUCT, vst2,
17342 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17343 { VAR7 (STORESTRUCTLANE, vst2_lane,
17344 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17345 { VAR9 (LOADSTRUCT,
17346 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17347 { VAR7 (LOADSTRUCTLANE, vld3_lane,
17348 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17349 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
17350 { VAR9 (STORESTRUCT, vst3,
17351 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17352 { VAR7 (STORESTRUCTLANE, vst3_lane,
17353 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17354 { VAR9 (LOADSTRUCT, vld4,
17355 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17356 { VAR7 (LOADSTRUCTLANE, vld4_lane,
17357 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17358 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
17359 { VAR9 (STORESTRUCT, vst4,
17360 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
17361 { VAR7 (STORESTRUCTLANE, vst4_lane,
17362 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
17363 { VAR10 (LOGICBINOP, vand,
17364 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17365 { VAR10 (LOGICBINOP, vorr,
17366 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17367 { VAR10 (BINOP, veor,
17368 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17369 { VAR10 (LOGICBINOP, vbic,
17370 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
17371 { VAR10 (LOGICBINOP, vorn,
17372 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
17373 };
17374
17375 #undef CF
17376 #undef VAR1
17377 #undef VAR2
17378 #undef VAR3
17379 #undef VAR4
17380 #undef VAR5
17381 #undef VAR6
17382 #undef VAR7
17383 #undef VAR8
17384 #undef VAR9
17385 #undef VAR10
17386
17387 static void
17388 arm_init_neon_builtins (void)
17389 {
17390 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
17391
17392 tree neon_intQI_type_node;
17393 tree neon_intHI_type_node;
17394 tree neon_polyQI_type_node;
17395 tree neon_polyHI_type_node;
17396 tree neon_intSI_type_node;
17397 tree neon_intDI_type_node;
17398 tree neon_float_type_node;
17399
17400 tree intQI_pointer_node;
17401 tree intHI_pointer_node;
17402 tree intSI_pointer_node;
17403 tree intDI_pointer_node;
17404 tree float_pointer_node;
17405
17406 tree const_intQI_node;
17407 tree const_intHI_node;
17408 tree const_intSI_node;
17409 tree const_intDI_node;
17410 tree const_float_node;
17411
17412 tree const_intQI_pointer_node;
17413 tree const_intHI_pointer_node;
17414 tree const_intSI_pointer_node;
17415 tree const_intDI_pointer_node;
17416 tree const_float_pointer_node;
17417
17418 tree V8QI_type_node;
17419 tree V4HI_type_node;
17420 tree V2SI_type_node;
17421 tree V2SF_type_node;
17422 tree V16QI_type_node;
17423 tree V8HI_type_node;
17424 tree V4SI_type_node;
17425 tree V4SF_type_node;
17426 tree V2DI_type_node;
17427
17428 tree intUQI_type_node;
17429 tree intUHI_type_node;
17430 tree intUSI_type_node;
17431 tree intUDI_type_node;
17432
17433 tree intEI_type_node;
17434 tree intOI_type_node;
17435 tree intCI_type_node;
17436 tree intXI_type_node;
17437
17438 tree V8QI_pointer_node;
17439 tree V4HI_pointer_node;
17440 tree V2SI_pointer_node;
17441 tree V2SF_pointer_node;
17442 tree V16QI_pointer_node;
17443 tree V8HI_pointer_node;
17444 tree V4SI_pointer_node;
17445 tree V4SF_pointer_node;
17446 tree V2DI_pointer_node;
17447
17448 tree void_ftype_pv8qi_v8qi_v8qi;
17449 tree void_ftype_pv4hi_v4hi_v4hi;
17450 tree void_ftype_pv2si_v2si_v2si;
17451 tree void_ftype_pv2sf_v2sf_v2sf;
17452 tree void_ftype_pdi_di_di;
17453 tree void_ftype_pv16qi_v16qi_v16qi;
17454 tree void_ftype_pv8hi_v8hi_v8hi;
17455 tree void_ftype_pv4si_v4si_v4si;
17456 tree void_ftype_pv4sf_v4sf_v4sf;
17457 tree void_ftype_pv2di_v2di_v2di;
17458
17459 tree reinterp_ftype_dreg[5][5];
17460 tree reinterp_ftype_qreg[5][5];
17461 tree dreg_types[5], qreg_types[5];
17462
17463 /* Create distinguished type nodes for NEON vector element types,
17464 and pointers to values of such types, so we can detect them later. */
17465 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17466 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17467 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
17468 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
17469 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
17470 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
17471 neon_float_type_node = make_node (REAL_TYPE);
17472 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
17473 layout_type (neon_float_type_node);
17474
17475 /* Define typedefs which exactly correspond to the modes we are basing vector
17476 types on. If you change these names you'll need to change
17477 the table used by arm_mangle_type too. */
17478 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
17479 "__builtin_neon_qi");
17480 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
17481 "__builtin_neon_hi");
17482 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
17483 "__builtin_neon_si");
17484 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
17485 "__builtin_neon_sf");
17486 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
17487 "__builtin_neon_di");
17488 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
17489 "__builtin_neon_poly8");
17490 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
17491 "__builtin_neon_poly16");
17492
17493 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
17494 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
17495 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
17496 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
17497 float_pointer_node = build_pointer_type (neon_float_type_node);
17498
17499 /* Next create constant-qualified versions of the above types. */
17500 const_intQI_node = build_qualified_type (neon_intQI_type_node,
17501 TYPE_QUAL_CONST);
17502 const_intHI_node = build_qualified_type (neon_intHI_type_node,
17503 TYPE_QUAL_CONST);
17504 const_intSI_node = build_qualified_type (neon_intSI_type_node,
17505 TYPE_QUAL_CONST);
17506 const_intDI_node = build_qualified_type (neon_intDI_type_node,
17507 TYPE_QUAL_CONST);
17508 const_float_node = build_qualified_type (neon_float_type_node,
17509 TYPE_QUAL_CONST);
17510
17511 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
17512 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
17513 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
17514 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
17515 const_float_pointer_node = build_pointer_type (const_float_node);
17516
17517 /* Now create vector types based on our NEON element types. */
17518 /* 64-bit vectors. */
17519 V8QI_type_node =
17520 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
17521 V4HI_type_node =
17522 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
17523 V2SI_type_node =
17524 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
17525 V2SF_type_node =
17526 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
17527 /* 128-bit vectors. */
17528 V16QI_type_node =
17529 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
17530 V8HI_type_node =
17531 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
17532 V4SI_type_node =
17533 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
17534 V4SF_type_node =
17535 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
17536 V2DI_type_node =
17537 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
17538
17539 /* Unsigned integer types for various mode sizes. */
17540 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
17541 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
17542 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
17543 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
17544
17545 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
17546 "__builtin_neon_uqi");
17547 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
17548 "__builtin_neon_uhi");
17549 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
17550 "__builtin_neon_usi");
17551 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
17552 "__builtin_neon_udi");
17553
17554 /* Opaque integer types for structures of vectors. */
17555 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
17556 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
17557 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
17558 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
17559
17560 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
17561 "__builtin_neon_ti");
17562 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
17563 "__builtin_neon_ei");
17564 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
17565 "__builtin_neon_oi");
17566 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
17567 "__builtin_neon_ci");
17568 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
17569 "__builtin_neon_xi");
17570
17571 /* Pointers to vector types. */
17572 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
17573 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
17574 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
17575 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
17576 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
17577 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
17578 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
17579 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
17580 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
17581
17582 /* Operations which return results as pairs. */
17583 void_ftype_pv8qi_v8qi_v8qi =
17584 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
17585 V8QI_type_node, NULL);
17586 void_ftype_pv4hi_v4hi_v4hi =
17587 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
17588 V4HI_type_node, NULL);
17589 void_ftype_pv2si_v2si_v2si =
17590 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
17591 V2SI_type_node, NULL);
17592 void_ftype_pv2sf_v2sf_v2sf =
17593 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
17594 V2SF_type_node, NULL);
17595 void_ftype_pdi_di_di =
17596 build_function_type_list (void_type_node, intDI_pointer_node,
17597 neon_intDI_type_node, neon_intDI_type_node, NULL);
17598 void_ftype_pv16qi_v16qi_v16qi =
17599 build_function_type_list (void_type_node, V16QI_pointer_node,
17600 V16QI_type_node, V16QI_type_node, NULL);
17601 void_ftype_pv8hi_v8hi_v8hi =
17602 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
17603 V8HI_type_node, NULL);
17604 void_ftype_pv4si_v4si_v4si =
17605 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
17606 V4SI_type_node, NULL);
17607 void_ftype_pv4sf_v4sf_v4sf =
17608 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
17609 V4SF_type_node, NULL);
17610 void_ftype_pv2di_v2di_v2di =
17611 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
17612 V2DI_type_node, NULL);
17613
17614 dreg_types[0] = V8QI_type_node;
17615 dreg_types[1] = V4HI_type_node;
17616 dreg_types[2] = V2SI_type_node;
17617 dreg_types[3] = V2SF_type_node;
17618 dreg_types[4] = neon_intDI_type_node;
17619
17620 qreg_types[0] = V16QI_type_node;
17621 qreg_types[1] = V8HI_type_node;
17622 qreg_types[2] = V4SI_type_node;
17623 qreg_types[3] = V4SF_type_node;
17624 qreg_types[4] = V2DI_type_node;
17625
17626 for (i = 0; i < 5; i++)
17627 {
17628 int j;
17629 for (j = 0; j < 5; j++)
17630 {
17631 reinterp_ftype_dreg[i][j]
17632 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
17633 reinterp_ftype_qreg[i][j]
17634 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
17635 }
17636 }
17637
17638 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
17639 {
17640 neon_builtin_datum *d = &neon_builtin_data[i];
17641 unsigned int j, codeidx = 0;
17642
17643 d->base_fcode = fcode;
17644
17645 for (j = 0; j < T_MAX; j++)
17646 {
17647 const char* const modenames[] = {
17648 "v8qi", "v4hi", "v2si", "v2sf", "di",
17649 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
17650 };
17651 char namebuf[60];
17652 tree ftype = NULL;
17653 enum insn_code icode;
17654 int is_load = 0, is_store = 0;
17655
17656 if ((d->bits & (1 << j)) == 0)
17657 continue;
17658
17659 icode = d->codes[codeidx++];
17660
17661 switch (d->itype)
17662 {
17663 case NEON_LOAD1:
17664 case NEON_LOAD1LANE:
17665 case NEON_LOADSTRUCT:
17666 case NEON_LOADSTRUCTLANE:
17667 is_load = 1;
17668 /* Fall through. */
17669 case NEON_STORE1:
17670 case NEON_STORE1LANE:
17671 case NEON_STORESTRUCT:
17672 case NEON_STORESTRUCTLANE:
17673 if (!is_load)
17674 is_store = 1;
17675 /* Fall through. */
17676 case NEON_UNOP:
17677 case NEON_BINOP:
17678 case NEON_LOGICBINOP:
17679 case NEON_SHIFTINSERT:
17680 case NEON_TERNOP:
17681 case NEON_GETLANE:
17682 case NEON_SETLANE:
17683 case NEON_CREATE:
17684 case NEON_DUP:
17685 case NEON_DUPLANE:
17686 case NEON_SHIFTIMM:
17687 case NEON_SHIFTACC:
17688 case NEON_COMBINE:
17689 case NEON_SPLIT:
17690 case NEON_CONVERT:
17691 case NEON_FIXCONV:
17692 case NEON_LANEMUL:
17693 case NEON_LANEMULL:
17694 case NEON_LANEMULH:
17695 case NEON_LANEMAC:
17696 case NEON_SCALARMUL:
17697 case NEON_SCALARMULL:
17698 case NEON_SCALARMULH:
17699 case NEON_SCALARMAC:
17700 case NEON_SELECT:
17701 case NEON_VTBL:
17702 case NEON_VTBX:
17703 {
17704 int k;
17705 tree return_type = void_type_node, args = void_list_node;
17706
17707 /* Build a function type directly from the insn_data for this
17708 builtin. The build_function_type() function takes care of
17709 removing duplicates for us. */
17710 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
17711 {
17712 tree eltype;
17713
17714 if (is_load && k == 1)
17715 {
17716 /* Neon load patterns always have the memory operand
17717 (a SImode pointer) in the operand 1 position. We
17718 want a const pointer to the element type in that
17719 position. */
17720 gcc_assert (insn_data[icode].operand[k].mode == SImode);
17721
17722 switch (1 << j)
17723 {
17724 case T_V8QI:
17725 case T_V16QI:
17726 eltype = const_intQI_pointer_node;
17727 break;
17728
17729 case T_V4HI:
17730 case T_V8HI:
17731 eltype = const_intHI_pointer_node;
17732 break;
17733
17734 case T_V2SI:
17735 case T_V4SI:
17736 eltype = const_intSI_pointer_node;
17737 break;
17738
17739 case T_V2SF:
17740 case T_V4SF:
17741 eltype = const_float_pointer_node;
17742 break;
17743
17744 case T_DI:
17745 case T_V2DI:
17746 eltype = const_intDI_pointer_node;
17747 break;
17748
17749 default: gcc_unreachable ();
17750 }
17751 }
17752 else if (is_store && k == 0)
17753 {
17754 /* Similarly, Neon store patterns use operand 0 as
17755 the memory location to store to (a SImode pointer).
17756 Use a pointer to the element type of the store in
17757 that position. */
17758 gcc_assert (insn_data[icode].operand[k].mode == SImode);
17759
17760 switch (1 << j)
17761 {
17762 case T_V8QI:
17763 case T_V16QI:
17764 eltype = intQI_pointer_node;
17765 break;
17766
17767 case T_V4HI:
17768 case T_V8HI:
17769 eltype = intHI_pointer_node;
17770 break;
17771
17772 case T_V2SI:
17773 case T_V4SI:
17774 eltype = intSI_pointer_node;
17775 break;
17776
17777 case T_V2SF:
17778 case T_V4SF:
17779 eltype = float_pointer_node;
17780 break;
17781
17782 case T_DI:
17783 case T_V2DI:
17784 eltype = intDI_pointer_node;
17785 break;
17786
17787 default: gcc_unreachable ();
17788 }
17789 }
17790 else
17791 {
17792 switch (insn_data[icode].operand[k].mode)
17793 {
17794 case VOIDmode: eltype = void_type_node; break;
17795 /* Scalars. */
17796 case QImode: eltype = neon_intQI_type_node; break;
17797 case HImode: eltype = neon_intHI_type_node; break;
17798 case SImode: eltype = neon_intSI_type_node; break;
17799 case SFmode: eltype = neon_float_type_node; break;
17800 case DImode: eltype = neon_intDI_type_node; break;
17801 case TImode: eltype = intTI_type_node; break;
17802 case EImode: eltype = intEI_type_node; break;
17803 case OImode: eltype = intOI_type_node; break;
17804 case CImode: eltype = intCI_type_node; break;
17805 case XImode: eltype = intXI_type_node; break;
17806 /* 64-bit vectors. */
17807 case V8QImode: eltype = V8QI_type_node; break;
17808 case V4HImode: eltype = V4HI_type_node; break;
17809 case V2SImode: eltype = V2SI_type_node; break;
17810 case V2SFmode: eltype = V2SF_type_node; break;
17811 /* 128-bit vectors. */
17812 case V16QImode: eltype = V16QI_type_node; break;
17813 case V8HImode: eltype = V8HI_type_node; break;
17814 case V4SImode: eltype = V4SI_type_node; break;
17815 case V4SFmode: eltype = V4SF_type_node; break;
17816 case V2DImode: eltype = V2DI_type_node; break;
17817 default: gcc_unreachable ();
17818 }
17819 }
17820
17821 if (k == 0 && !is_store)
17822 return_type = eltype;
17823 else
17824 args = tree_cons (NULL_TREE, eltype, args);
17825 }
17826
17827 ftype = build_function_type (return_type, args);
17828 }
17829 break;
17830
17831 case NEON_RESULTPAIR:
17832 {
17833 switch (insn_data[icode].operand[1].mode)
17834 {
17835 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
17836 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
17837 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
17838 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
17839 case DImode: ftype = void_ftype_pdi_di_di; break;
17840 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
17841 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
17842 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
17843 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
17844 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
17845 default: gcc_unreachable ();
17846 }
17847 }
17848 break;
17849
17850 case NEON_REINTERP:
17851 {
17852 /* We iterate over 5 doubleword types, then 5 quadword
17853 types. */
17854 int rhs = j % 5;
17855 switch (insn_data[icode].operand[0].mode)
17856 {
17857 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
17858 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
17859 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
17860 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
17861 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
17862 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
17863 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
17864 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
17865 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
17866 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
17867 default: gcc_unreachable ();
17868 }
17869 }
17870 break;
17871
17872 default:
17873 gcc_unreachable ();
17874 }
17875
17876 gcc_assert (ftype != NULL);
17877
17878 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
17879
17880 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
17881 NULL_TREE);
17882 }
17883 }
17884 }
17885
17886 static void
17887 arm_init_fp16_builtins (void)
17888 {
17889 tree fp16_type = make_node (REAL_TYPE);
17890 TYPE_PRECISION (fp16_type) = 16;
17891 layout_type (fp16_type);
17892 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
17893 }
17894
17895 static void
17896 arm_init_builtins (void)
17897 {
17898 arm_init_tls_builtins ();
17899
17900 if (TARGET_REALLY_IWMMXT)
17901 arm_init_iwmmxt_builtins ();
17902
17903 if (TARGET_NEON)
17904 arm_init_neon_builtins ();
17905
17906 if (arm_fp16_format)
17907 arm_init_fp16_builtins ();
17908 }
17909
17910 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17911
17912 static const char *
17913 arm_invalid_parameter_type (const_tree t)
17914 {
17915 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17916 return N_("function parameters cannot have __fp16 type");
17917 return NULL;
17918 }
17919
17920 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17921
17922 static const char *
17923 arm_invalid_return_type (const_tree t)
17924 {
17925 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17926 return N_("functions cannot return __fp16 type");
17927 return NULL;
17928 }
17929
17930 /* Implement TARGET_PROMOTED_TYPE. */
17931
17932 static tree
17933 arm_promoted_type (const_tree t)
17934 {
17935 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
17936 return float_type_node;
17937 return NULL_TREE;
17938 }
17939
17940 /* Implement TARGET_CONVERT_TO_TYPE.
17941 Specifically, this hook implements the peculiarity of the ARM
17942 half-precision floating-point C semantics that requires conversions between
17943 __fp16 to or from double to do an intermediate conversion to float. */
17944
17945 static tree
17946 arm_convert_to_type (tree type, tree expr)
17947 {
17948 tree fromtype = TREE_TYPE (expr);
17949 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
17950 return NULL_TREE;
17951 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
17952 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
17953 return convert (type, convert (float_type_node, expr));
17954 return NULL_TREE;
17955 }
17956
17957 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
17958 This simply adds HFmode as a supported mode; even though we don't
17959 implement arithmetic on this type directly, it's supported by
17960 optabs conversions, much the way the double-word arithmetic is
17961 special-cased in the default hook. */
17962
17963 static bool
17964 arm_scalar_mode_supported_p (enum machine_mode mode)
17965 {
17966 if (mode == HFmode)
17967 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
17968 else
17969 return default_scalar_mode_supported_p (mode);
17970 }
17971
17972 /* Errors in the source file can cause expand_expr to return const0_rtx
17973 where we expect a vector. To avoid crashing, use one of the vector
17974 clear instructions. */
17975
17976 static rtx
17977 safe_vector_operand (rtx x, enum machine_mode mode)
17978 {
17979 if (x != const0_rtx)
17980 return x;
17981 x = gen_reg_rtx (mode);
17982
17983 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
17984 : gen_rtx_SUBREG (DImode, x, 0)));
17985 return x;
17986 }
17987
17988 /* Subroutine of arm_expand_builtin to take care of binop insns. */
17989
17990 static rtx
17991 arm_expand_binop_builtin (enum insn_code icode,
17992 tree exp, rtx target)
17993 {
17994 rtx pat;
17995 tree arg0 = CALL_EXPR_ARG (exp, 0);
17996 tree arg1 = CALL_EXPR_ARG (exp, 1);
17997 rtx op0 = expand_normal (arg0);
17998 rtx op1 = expand_normal (arg1);
17999 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18000 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18001 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18002
18003 if (VECTOR_MODE_P (mode0))
18004 op0 = safe_vector_operand (op0, mode0);
18005 if (VECTOR_MODE_P (mode1))
18006 op1 = safe_vector_operand (op1, mode1);
18007
18008 if (! target
18009 || GET_MODE (target) != tmode
18010 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18011 target = gen_reg_rtx (tmode);
18012
18013 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
18014
18015 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18016 op0 = copy_to_mode_reg (mode0, op0);
18017 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18018 op1 = copy_to_mode_reg (mode1, op1);
18019
18020 pat = GEN_FCN (icode) (target, op0, op1);
18021 if (! pat)
18022 return 0;
18023 emit_insn (pat);
18024 return target;
18025 }
18026
18027 /* Subroutine of arm_expand_builtin to take care of unop insns. */
18028
18029 static rtx
18030 arm_expand_unop_builtin (enum insn_code icode,
18031 tree exp, rtx target, int do_load)
18032 {
18033 rtx pat;
18034 tree arg0 = CALL_EXPR_ARG (exp, 0);
18035 rtx op0 = expand_normal (arg0);
18036 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18037 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18038
18039 if (! target
18040 || GET_MODE (target) != tmode
18041 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18042 target = gen_reg_rtx (tmode);
18043 if (do_load)
18044 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18045 else
18046 {
18047 if (VECTOR_MODE_P (mode0))
18048 op0 = safe_vector_operand (op0, mode0);
18049
18050 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18051 op0 = copy_to_mode_reg (mode0, op0);
18052 }
18053
18054 pat = GEN_FCN (icode) (target, op0);
18055 if (! pat)
18056 return 0;
18057 emit_insn (pat);
18058 return target;
18059 }
18060
18061 static int
18062 neon_builtin_compare (const void *a, const void *b)
18063 {
18064 const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
18065 const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
18066 unsigned int soughtcode = key->base_fcode;
18067
18068 if (soughtcode >= memb->base_fcode
18069 && soughtcode < memb->base_fcode + memb->num_vars)
18070 return 0;
18071 else if (soughtcode < memb->base_fcode)
18072 return -1;
18073 else
18074 return 1;
18075 }
18076
18077 static enum insn_code
18078 locate_neon_builtin_icode (int fcode, neon_itype *itype)
18079 {
18080 neon_builtin_datum key, *found;
18081 int idx;
18082
18083 key.base_fcode = fcode;
18084 found = (neon_builtin_datum *)
18085 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
18086 sizeof (neon_builtin_data[0]), neon_builtin_compare);
18087 gcc_assert (found);
18088 idx = fcode - (int) found->base_fcode;
18089 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
18090
18091 if (itype)
18092 *itype = found->itype;
18093
18094 return found->codes[idx];
18095 }
18096
18097 typedef enum {
18098 NEON_ARG_COPY_TO_REG,
18099 NEON_ARG_CONSTANT,
18100 NEON_ARG_STOP
18101 } builtin_arg;
18102
18103 #define NEON_MAX_BUILTIN_ARGS 5
18104
18105 /* Expand a Neon builtin. */
18106 static rtx
18107 arm_expand_neon_args (rtx target, int icode, int have_retval,
18108 tree exp, ...)
18109 {
18110 va_list ap;
18111 rtx pat;
18112 tree arg[NEON_MAX_BUILTIN_ARGS];
18113 rtx op[NEON_MAX_BUILTIN_ARGS];
18114 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18115 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
18116 int argc = 0;
18117
18118 if (have_retval
18119 && (!target
18120 || GET_MODE (target) != tmode
18121 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
18122 target = gen_reg_rtx (tmode);
18123
18124 va_start (ap, exp);
18125
18126 for (;;)
18127 {
18128 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
18129
18130 if (thisarg == NEON_ARG_STOP)
18131 break;
18132 else
18133 {
18134 arg[argc] = CALL_EXPR_ARG (exp, argc);
18135 op[argc] = expand_normal (arg[argc]);
18136 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
18137
18138 switch (thisarg)
18139 {
18140 case NEON_ARG_COPY_TO_REG:
18141 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
18142 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
18143 (op[argc], mode[argc]))
18144 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
18145 break;
18146
18147 case NEON_ARG_CONSTANT:
18148 /* FIXME: This error message is somewhat unhelpful. */
18149 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
18150 (op[argc], mode[argc]))
18151 error ("argument must be a constant");
18152 break;
18153
18154 case NEON_ARG_STOP:
18155 gcc_unreachable ();
18156 }
18157
18158 argc++;
18159 }
18160 }
18161
18162 va_end (ap);
18163
18164 if (have_retval)
18165 switch (argc)
18166 {
18167 case 1:
18168 pat = GEN_FCN (icode) (target, op[0]);
18169 break;
18170
18171 case 2:
18172 pat = GEN_FCN (icode) (target, op[0], op[1]);
18173 break;
18174
18175 case 3:
18176 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
18177 break;
18178
18179 case 4:
18180 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
18181 break;
18182
18183 case 5:
18184 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
18185 break;
18186
18187 default:
18188 gcc_unreachable ();
18189 }
18190 else
18191 switch (argc)
18192 {
18193 case 1:
18194 pat = GEN_FCN (icode) (op[0]);
18195 break;
18196
18197 case 2:
18198 pat = GEN_FCN (icode) (op[0], op[1]);
18199 break;
18200
18201 case 3:
18202 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
18203 break;
18204
18205 case 4:
18206 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
18207 break;
18208
18209 case 5:
18210 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
18211 break;
18212
18213 default:
18214 gcc_unreachable ();
18215 }
18216
18217 if (!pat)
18218 return 0;
18219
18220 emit_insn (pat);
18221
18222 return target;
18223 }
18224
18225 /* Expand a Neon builtin. These are "special" because they don't have symbolic
18226 constants defined per-instruction or per instruction-variant. Instead, the
18227 required info is looked up in the table neon_builtin_data. */
18228 static rtx
18229 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
18230 {
18231 neon_itype itype;
18232 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
18233
18234 switch (itype)
18235 {
18236 case NEON_UNOP:
18237 case NEON_CONVERT:
18238 case NEON_DUPLANE:
18239 return arm_expand_neon_args (target, icode, 1, exp,
18240 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
18241
18242 case NEON_BINOP:
18243 case NEON_SETLANE:
18244 case NEON_SCALARMUL:
18245 case NEON_SCALARMULL:
18246 case NEON_SCALARMULH:
18247 case NEON_SHIFTINSERT:
18248 case NEON_LOGICBINOP:
18249 return arm_expand_neon_args (target, icode, 1, exp,
18250 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18251 NEON_ARG_STOP);
18252
18253 case NEON_TERNOP:
18254 return arm_expand_neon_args (target, icode, 1, exp,
18255 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18256 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18257
18258 case NEON_GETLANE:
18259 case NEON_FIXCONV:
18260 case NEON_SHIFTIMM:
18261 return arm_expand_neon_args (target, icode, 1, exp,
18262 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
18263 NEON_ARG_STOP);
18264
18265 case NEON_CREATE:
18266 return arm_expand_neon_args (target, icode, 1, exp,
18267 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18268
18269 case NEON_DUP:
18270 case NEON_SPLIT:
18271 case NEON_REINTERP:
18272 return arm_expand_neon_args (target, icode, 1, exp,
18273 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18274
18275 case NEON_COMBINE:
18276 case NEON_VTBL:
18277 return arm_expand_neon_args (target, icode, 1, exp,
18278 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18279
18280 case NEON_RESULTPAIR:
18281 return arm_expand_neon_args (target, icode, 0, exp,
18282 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18283 NEON_ARG_STOP);
18284
18285 case NEON_LANEMUL:
18286 case NEON_LANEMULL:
18287 case NEON_LANEMULH:
18288 return arm_expand_neon_args (target, icode, 1, exp,
18289 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18290 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18291
18292 case NEON_LANEMAC:
18293 return arm_expand_neon_args (target, icode, 1, exp,
18294 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18295 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
18296
18297 case NEON_SHIFTACC:
18298 return arm_expand_neon_args (target, icode, 1, exp,
18299 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18300 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18301
18302 case NEON_SCALARMAC:
18303 return arm_expand_neon_args (target, icode, 1, exp,
18304 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18305 NEON_ARG_CONSTANT, NEON_ARG_STOP);
18306
18307 case NEON_SELECT:
18308 case NEON_VTBX:
18309 return arm_expand_neon_args (target, icode, 1, exp,
18310 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
18311 NEON_ARG_STOP);
18312
18313 case NEON_LOAD1:
18314 case NEON_LOADSTRUCT:
18315 return arm_expand_neon_args (target, icode, 1, exp,
18316 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18317
18318 case NEON_LOAD1LANE:
18319 case NEON_LOADSTRUCTLANE:
18320 return arm_expand_neon_args (target, icode, 1, exp,
18321 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18322 NEON_ARG_STOP);
18323
18324 case NEON_STORE1:
18325 case NEON_STORESTRUCT:
18326 return arm_expand_neon_args (target, icode, 0, exp,
18327 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
18328
18329 case NEON_STORE1LANE:
18330 case NEON_STORESTRUCTLANE:
18331 return arm_expand_neon_args (target, icode, 0, exp,
18332 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
18333 NEON_ARG_STOP);
18334 }
18335
18336 gcc_unreachable ();
18337 }
18338
18339 /* Emit code to reinterpret one Neon type as another, without altering bits. */
18340 void
18341 neon_reinterpret (rtx dest, rtx src)
18342 {
18343 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
18344 }
18345
18346 /* Emit code to place a Neon pair result in memory locations (with equal
18347 registers). */
18348 void
18349 neon_emit_pair_result_insn (enum machine_mode mode,
18350 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
18351 rtx op1, rtx op2)
18352 {
18353 rtx mem = gen_rtx_MEM (mode, destaddr);
18354 rtx tmp1 = gen_reg_rtx (mode);
18355 rtx tmp2 = gen_reg_rtx (mode);
18356
18357 emit_insn (intfn (tmp1, op1, tmp2, op2));
18358
18359 emit_move_insn (mem, tmp1);
18360 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
18361 emit_move_insn (mem, tmp2);
18362 }
18363
18364 /* Set up operands for a register copy from src to dest, taking care not to
18365 clobber registers in the process.
18366 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
18367 be called with a large N, so that should be OK. */
18368
18369 void
18370 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
18371 {
18372 unsigned int copied = 0, opctr = 0;
18373 unsigned int done = (1 << count) - 1;
18374 unsigned int i, j;
18375
18376 while (copied != done)
18377 {
18378 for (i = 0; i < count; i++)
18379 {
18380 int good = 1;
18381
18382 for (j = 0; good && j < count; j++)
18383 if (i != j && (copied & (1 << j)) == 0
18384 && reg_overlap_mentioned_p (src[j], dest[i]))
18385 good = 0;
18386
18387 if (good)
18388 {
18389 operands[opctr++] = dest[i];
18390 operands[opctr++] = src[i];
18391 copied |= 1 << i;
18392 }
18393 }
18394 }
18395
18396 gcc_assert (opctr == count * 2);
18397 }
18398
18399 /* Expand an expression EXP that calls a built-in function,
18400 with result going to TARGET if that's convenient
18401 (and in mode MODE if that's convenient).
18402 SUBTARGET may be used as the target for computing one of EXP's operands.
18403 IGNORE is nonzero if the value is to be ignored. */
18404
18405 static rtx
18406 arm_expand_builtin (tree exp,
18407 rtx target,
18408 rtx subtarget ATTRIBUTE_UNUSED,
18409 enum machine_mode mode ATTRIBUTE_UNUSED,
18410 int ignore ATTRIBUTE_UNUSED)
18411 {
18412 const struct builtin_description * d;
18413 enum insn_code icode;
18414 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
18415 tree arg0;
18416 tree arg1;
18417 tree arg2;
18418 rtx op0;
18419 rtx op1;
18420 rtx op2;
18421 rtx pat;
18422 int fcode = DECL_FUNCTION_CODE (fndecl);
18423 size_t i;
18424 enum machine_mode tmode;
18425 enum machine_mode mode0;
18426 enum machine_mode mode1;
18427 enum machine_mode mode2;
18428
18429 if (fcode >= ARM_BUILTIN_NEON_BASE)
18430 return arm_expand_neon_builtin (fcode, exp, target);
18431
18432 switch (fcode)
18433 {
18434 case ARM_BUILTIN_TEXTRMSB:
18435 case ARM_BUILTIN_TEXTRMUB:
18436 case ARM_BUILTIN_TEXTRMSH:
18437 case ARM_BUILTIN_TEXTRMUH:
18438 case ARM_BUILTIN_TEXTRMSW:
18439 case ARM_BUILTIN_TEXTRMUW:
18440 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
18441 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
18442 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
18443 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
18444 : CODE_FOR_iwmmxt_textrmw);
18445
18446 arg0 = CALL_EXPR_ARG (exp, 0);
18447 arg1 = CALL_EXPR_ARG (exp, 1);
18448 op0 = expand_normal (arg0);
18449 op1 = expand_normal (arg1);
18450 tmode = insn_data[icode].operand[0].mode;
18451 mode0 = insn_data[icode].operand[1].mode;
18452 mode1 = insn_data[icode].operand[2].mode;
18453
18454 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18455 op0 = copy_to_mode_reg (mode0, op0);
18456 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18457 {
18458 /* @@@ better error message */
18459 error ("selector must be an immediate");
18460 return gen_reg_rtx (tmode);
18461 }
18462 if (target == 0
18463 || GET_MODE (target) != tmode
18464 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18465 target = gen_reg_rtx (tmode);
18466 pat = GEN_FCN (icode) (target, op0, op1);
18467 if (! pat)
18468 return 0;
18469 emit_insn (pat);
18470 return target;
18471
18472 case ARM_BUILTIN_TINSRB:
18473 case ARM_BUILTIN_TINSRH:
18474 case ARM_BUILTIN_TINSRW:
18475 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
18476 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
18477 : CODE_FOR_iwmmxt_tinsrw);
18478 arg0 = CALL_EXPR_ARG (exp, 0);
18479 arg1 = CALL_EXPR_ARG (exp, 1);
18480 arg2 = CALL_EXPR_ARG (exp, 2);
18481 op0 = expand_normal (arg0);
18482 op1 = expand_normal (arg1);
18483 op2 = expand_normal (arg2);
18484 tmode = insn_data[icode].operand[0].mode;
18485 mode0 = insn_data[icode].operand[1].mode;
18486 mode1 = insn_data[icode].operand[2].mode;
18487 mode2 = insn_data[icode].operand[3].mode;
18488
18489 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18490 op0 = copy_to_mode_reg (mode0, op0);
18491 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18492 op1 = copy_to_mode_reg (mode1, op1);
18493 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18494 {
18495 /* @@@ better error message */
18496 error ("selector must be an immediate");
18497 return const0_rtx;
18498 }
18499 if (target == 0
18500 || GET_MODE (target) != tmode
18501 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18502 target = gen_reg_rtx (tmode);
18503 pat = GEN_FCN (icode) (target, op0, op1, op2);
18504 if (! pat)
18505 return 0;
18506 emit_insn (pat);
18507 return target;
18508
18509 case ARM_BUILTIN_SETWCX:
18510 arg0 = CALL_EXPR_ARG (exp, 0);
18511 arg1 = CALL_EXPR_ARG (exp, 1);
18512 op0 = force_reg (SImode, expand_normal (arg0));
18513 op1 = expand_normal (arg1);
18514 emit_insn (gen_iwmmxt_tmcr (op1, op0));
18515 return 0;
18516
18517 case ARM_BUILTIN_GETWCX:
18518 arg0 = CALL_EXPR_ARG (exp, 0);
18519 op0 = expand_normal (arg0);
18520 target = gen_reg_rtx (SImode);
18521 emit_insn (gen_iwmmxt_tmrc (target, op0));
18522 return target;
18523
18524 case ARM_BUILTIN_WSHUFH:
18525 icode = CODE_FOR_iwmmxt_wshufh;
18526 arg0 = CALL_EXPR_ARG (exp, 0);
18527 arg1 = CALL_EXPR_ARG (exp, 1);
18528 op0 = expand_normal (arg0);
18529 op1 = expand_normal (arg1);
18530 tmode = insn_data[icode].operand[0].mode;
18531 mode1 = insn_data[icode].operand[1].mode;
18532 mode2 = insn_data[icode].operand[2].mode;
18533
18534 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18535 op0 = copy_to_mode_reg (mode1, op0);
18536 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18537 {
18538 /* @@@ better error message */
18539 error ("mask must be an immediate");
18540 return const0_rtx;
18541 }
18542 if (target == 0
18543 || GET_MODE (target) != tmode
18544 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18545 target = gen_reg_rtx (tmode);
18546 pat = GEN_FCN (icode) (target, op0, op1);
18547 if (! pat)
18548 return 0;
18549 emit_insn (pat);
18550 return target;
18551
18552 case ARM_BUILTIN_WSADB:
18553 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
18554 case ARM_BUILTIN_WSADH:
18555 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
18556 case ARM_BUILTIN_WSADBZ:
18557 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
18558 case ARM_BUILTIN_WSADHZ:
18559 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
18560
18561 /* Several three-argument builtins. */
18562 case ARM_BUILTIN_WMACS:
18563 case ARM_BUILTIN_WMACU:
18564 case ARM_BUILTIN_WALIGN:
18565 case ARM_BUILTIN_TMIA:
18566 case ARM_BUILTIN_TMIAPH:
18567 case ARM_BUILTIN_TMIATT:
18568 case ARM_BUILTIN_TMIATB:
18569 case ARM_BUILTIN_TMIABT:
18570 case ARM_BUILTIN_TMIABB:
18571 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
18572 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
18573 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
18574 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
18575 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
18576 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
18577 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
18578 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
18579 : CODE_FOR_iwmmxt_walign);
18580 arg0 = CALL_EXPR_ARG (exp, 0);
18581 arg1 = CALL_EXPR_ARG (exp, 1);
18582 arg2 = CALL_EXPR_ARG (exp, 2);
18583 op0 = expand_normal (arg0);
18584 op1 = expand_normal (arg1);
18585 op2 = expand_normal (arg2);
18586 tmode = insn_data[icode].operand[0].mode;
18587 mode0 = insn_data[icode].operand[1].mode;
18588 mode1 = insn_data[icode].operand[2].mode;
18589 mode2 = insn_data[icode].operand[3].mode;
18590
18591 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18592 op0 = copy_to_mode_reg (mode0, op0);
18593 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18594 op1 = copy_to_mode_reg (mode1, op1);
18595 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18596 op2 = copy_to_mode_reg (mode2, op2);
18597 if (target == 0
18598 || GET_MODE (target) != tmode
18599 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18600 target = gen_reg_rtx (tmode);
18601 pat = GEN_FCN (icode) (target, op0, op1, op2);
18602 if (! pat)
18603 return 0;
18604 emit_insn (pat);
18605 return target;
18606
18607 case ARM_BUILTIN_WZERO:
18608 target = gen_reg_rtx (DImode);
18609 emit_insn (gen_iwmmxt_clrdi (target));
18610 return target;
18611
18612 case ARM_BUILTIN_THREAD_POINTER:
18613 return arm_load_tp (target);
18614
18615 default:
18616 break;
18617 }
18618
18619 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18620 if (d->code == (const enum arm_builtins) fcode)
18621 return arm_expand_binop_builtin (d->icode, exp, target);
18622
18623 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18624 if (d->code == (const enum arm_builtins) fcode)
18625 return arm_expand_unop_builtin (d->icode, exp, target, 0);
18626
18627 /* @@@ Should really do something sensible here. */
18628 return NULL_RTX;
18629 }
18630 \f
18631 /* Return the number (counting from 0) of
18632 the least significant set bit in MASK. */
18633
18634 inline static int
18635 number_of_first_bit_set (unsigned mask)
18636 {
18637 int bit;
18638
18639 for (bit = 0;
18640 (mask & (1 << bit)) == 0;
18641 ++bit)
18642 continue;
18643
18644 return bit;
18645 }
18646
18647 /* Emit code to push or pop registers to or from the stack. F is the
18648 assembly file. MASK is the registers to push or pop. PUSH is
18649 nonzero if we should push, and zero if we should pop. For debugging
18650 output, if pushing, adjust CFA_OFFSET by the amount of space added
18651 to the stack. REAL_REGS should have the same number of bits set as
18652 MASK, and will be used instead (in the same order) to describe which
18653 registers were saved - this is used to mark the save slots when we
18654 push high registers after moving them to low registers. */
18655 static void
18656 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
18657 unsigned long real_regs)
18658 {
18659 int regno;
18660 int lo_mask = mask & 0xFF;
18661 int pushed_words = 0;
18662
18663 gcc_assert (mask);
18664
18665 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
18666 {
18667 /* Special case. Do not generate a POP PC statement here, do it in
18668 thumb_exit() */
18669 thumb_exit (f, -1);
18670 return;
18671 }
18672
18673 if (ARM_EABI_UNWIND_TABLES && push)
18674 {
18675 fprintf (f, "\t.save\t{");
18676 for (regno = 0; regno < 15; regno++)
18677 {
18678 if (real_regs & (1 << regno))
18679 {
18680 if (real_regs & ((1 << regno) -1))
18681 fprintf (f, ", ");
18682 asm_fprintf (f, "%r", regno);
18683 }
18684 }
18685 fprintf (f, "}\n");
18686 }
18687
18688 fprintf (f, "\t%s\t{", push ? "push" : "pop");
18689
18690 /* Look at the low registers first. */
18691 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
18692 {
18693 if (lo_mask & 1)
18694 {
18695 asm_fprintf (f, "%r", regno);
18696
18697 if ((lo_mask & ~1) != 0)
18698 fprintf (f, ", ");
18699
18700 pushed_words++;
18701 }
18702 }
18703
18704 if (push && (mask & (1 << LR_REGNUM)))
18705 {
18706 /* Catch pushing the LR. */
18707 if (mask & 0xFF)
18708 fprintf (f, ", ");
18709
18710 asm_fprintf (f, "%r", LR_REGNUM);
18711
18712 pushed_words++;
18713 }
18714 else if (!push && (mask & (1 << PC_REGNUM)))
18715 {
18716 /* Catch popping the PC. */
18717 if (TARGET_INTERWORK || TARGET_BACKTRACE
18718 || crtl->calls_eh_return)
18719 {
18720 /* The PC is never poped directly, instead
18721 it is popped into r3 and then BX is used. */
18722 fprintf (f, "}\n");
18723
18724 thumb_exit (f, -1);
18725
18726 return;
18727 }
18728 else
18729 {
18730 if (mask & 0xFF)
18731 fprintf (f, ", ");
18732
18733 asm_fprintf (f, "%r", PC_REGNUM);
18734 }
18735 }
18736
18737 fprintf (f, "}\n");
18738
18739 if (push && pushed_words && dwarf2out_do_frame ())
18740 {
18741 char *l = dwarf2out_cfi_label (false);
18742 int pushed_mask = real_regs;
18743
18744 *cfa_offset += pushed_words * 4;
18745 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
18746
18747 pushed_words = 0;
18748 pushed_mask = real_regs;
18749 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
18750 {
18751 if (pushed_mask & 1)
18752 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
18753 }
18754 }
18755 }
18756
18757 /* Generate code to return from a thumb function.
18758 If 'reg_containing_return_addr' is -1, then the return address is
18759 actually on the stack, at the stack pointer. */
18760 static void
18761 thumb_exit (FILE *f, int reg_containing_return_addr)
18762 {
18763 unsigned regs_available_for_popping;
18764 unsigned regs_to_pop;
18765 int pops_needed;
18766 unsigned available;
18767 unsigned required;
18768 int mode;
18769 int size;
18770 int restore_a4 = FALSE;
18771
18772 /* Compute the registers we need to pop. */
18773 regs_to_pop = 0;
18774 pops_needed = 0;
18775
18776 if (reg_containing_return_addr == -1)
18777 {
18778 regs_to_pop |= 1 << LR_REGNUM;
18779 ++pops_needed;
18780 }
18781
18782 if (TARGET_BACKTRACE)
18783 {
18784 /* Restore the (ARM) frame pointer and stack pointer. */
18785 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
18786 pops_needed += 2;
18787 }
18788
18789 /* If there is nothing to pop then just emit the BX instruction and
18790 return. */
18791 if (pops_needed == 0)
18792 {
18793 if (crtl->calls_eh_return)
18794 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
18795
18796 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
18797 return;
18798 }
18799 /* Otherwise if we are not supporting interworking and we have not created
18800 a backtrace structure and the function was not entered in ARM mode then
18801 just pop the return address straight into the PC. */
18802 else if (!TARGET_INTERWORK
18803 && !TARGET_BACKTRACE
18804 && !is_called_in_ARM_mode (current_function_decl)
18805 && !crtl->calls_eh_return)
18806 {
18807 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
18808 return;
18809 }
18810
18811 /* Find out how many of the (return) argument registers we can corrupt. */
18812 regs_available_for_popping = 0;
18813
18814 /* If returning via __builtin_eh_return, the bottom three registers
18815 all contain information needed for the return. */
18816 if (crtl->calls_eh_return)
18817 size = 12;
18818 else
18819 {
18820 /* If we can deduce the registers used from the function's
18821 return value. This is more reliable that examining
18822 df_regs_ever_live_p () because that will be set if the register is
18823 ever used in the function, not just if the register is used
18824 to hold a return value. */
18825
18826 if (crtl->return_rtx != 0)
18827 mode = GET_MODE (crtl->return_rtx);
18828 else
18829 mode = DECL_MODE (DECL_RESULT (current_function_decl));
18830
18831 size = GET_MODE_SIZE (mode);
18832
18833 if (size == 0)
18834 {
18835 /* In a void function we can use any argument register.
18836 In a function that returns a structure on the stack
18837 we can use the second and third argument registers. */
18838 if (mode == VOIDmode)
18839 regs_available_for_popping =
18840 (1 << ARG_REGISTER (1))
18841 | (1 << ARG_REGISTER (2))
18842 | (1 << ARG_REGISTER (3));
18843 else
18844 regs_available_for_popping =
18845 (1 << ARG_REGISTER (2))
18846 | (1 << ARG_REGISTER (3));
18847 }
18848 else if (size <= 4)
18849 regs_available_for_popping =
18850 (1 << ARG_REGISTER (2))
18851 | (1 << ARG_REGISTER (3));
18852 else if (size <= 8)
18853 regs_available_for_popping =
18854 (1 << ARG_REGISTER (3));
18855 }
18856
18857 /* Match registers to be popped with registers into which we pop them. */
18858 for (available = regs_available_for_popping,
18859 required = regs_to_pop;
18860 required != 0 && available != 0;
18861 available &= ~(available & - available),
18862 required &= ~(required & - required))
18863 -- pops_needed;
18864
18865 /* If we have any popping registers left over, remove them. */
18866 if (available > 0)
18867 regs_available_for_popping &= ~available;
18868
18869 /* Otherwise if we need another popping register we can use
18870 the fourth argument register. */
18871 else if (pops_needed)
18872 {
18873 /* If we have not found any free argument registers and
18874 reg a4 contains the return address, we must move it. */
18875 if (regs_available_for_popping == 0
18876 && reg_containing_return_addr == LAST_ARG_REGNUM)
18877 {
18878 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
18879 reg_containing_return_addr = LR_REGNUM;
18880 }
18881 else if (size > 12)
18882 {
18883 /* Register a4 is being used to hold part of the return value,
18884 but we have dire need of a free, low register. */
18885 restore_a4 = TRUE;
18886
18887 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
18888 }
18889
18890 if (reg_containing_return_addr != LAST_ARG_REGNUM)
18891 {
18892 /* The fourth argument register is available. */
18893 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
18894
18895 --pops_needed;
18896 }
18897 }
18898
18899 /* Pop as many registers as we can. */
18900 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18901 regs_available_for_popping);
18902
18903 /* Process the registers we popped. */
18904 if (reg_containing_return_addr == -1)
18905 {
18906 /* The return address was popped into the lowest numbered register. */
18907 regs_to_pop &= ~(1 << LR_REGNUM);
18908
18909 reg_containing_return_addr =
18910 number_of_first_bit_set (regs_available_for_popping);
18911
18912 /* Remove this register for the mask of available registers, so that
18913 the return address will not be corrupted by further pops. */
18914 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
18915 }
18916
18917 /* If we popped other registers then handle them here. */
18918 if (regs_available_for_popping)
18919 {
18920 int frame_pointer;
18921
18922 /* Work out which register currently contains the frame pointer. */
18923 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
18924
18925 /* Move it into the correct place. */
18926 asm_fprintf (f, "\tmov\t%r, %r\n",
18927 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
18928
18929 /* (Temporarily) remove it from the mask of popped registers. */
18930 regs_available_for_popping &= ~(1 << frame_pointer);
18931 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
18932
18933 if (regs_available_for_popping)
18934 {
18935 int stack_pointer;
18936
18937 /* We popped the stack pointer as well,
18938 find the register that contains it. */
18939 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
18940
18941 /* Move it into the stack register. */
18942 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
18943
18944 /* At this point we have popped all necessary registers, so
18945 do not worry about restoring regs_available_for_popping
18946 to its correct value:
18947
18948 assert (pops_needed == 0)
18949 assert (regs_available_for_popping == (1 << frame_pointer))
18950 assert (regs_to_pop == (1 << STACK_POINTER)) */
18951 }
18952 else
18953 {
18954 /* Since we have just move the popped value into the frame
18955 pointer, the popping register is available for reuse, and
18956 we know that we still have the stack pointer left to pop. */
18957 regs_available_for_popping |= (1 << frame_pointer);
18958 }
18959 }
18960
18961 /* If we still have registers left on the stack, but we no longer have
18962 any registers into which we can pop them, then we must move the return
18963 address into the link register and make available the register that
18964 contained it. */
18965 if (regs_available_for_popping == 0 && pops_needed > 0)
18966 {
18967 regs_available_for_popping |= 1 << reg_containing_return_addr;
18968
18969 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
18970 reg_containing_return_addr);
18971
18972 reg_containing_return_addr = LR_REGNUM;
18973 }
18974
18975 /* If we have registers left on the stack then pop some more.
18976 We know that at most we will want to pop FP and SP. */
18977 if (pops_needed > 0)
18978 {
18979 int popped_into;
18980 int move_to;
18981
18982 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
18983 regs_available_for_popping);
18984
18985 /* We have popped either FP or SP.
18986 Move whichever one it is into the correct register. */
18987 popped_into = number_of_first_bit_set (regs_available_for_popping);
18988 move_to = number_of_first_bit_set (regs_to_pop);
18989
18990 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
18991
18992 regs_to_pop &= ~(1 << move_to);
18993
18994 --pops_needed;
18995 }
18996
18997 /* If we still have not popped everything then we must have only
18998 had one register available to us and we are now popping the SP. */
18999 if (pops_needed > 0)
19000 {
19001 int popped_into;
19002
19003 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
19004 regs_available_for_popping);
19005
19006 popped_into = number_of_first_bit_set (regs_available_for_popping);
19007
19008 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
19009 /*
19010 assert (regs_to_pop == (1 << STACK_POINTER))
19011 assert (pops_needed == 1)
19012 */
19013 }
19014
19015 /* If necessary restore the a4 register. */
19016 if (restore_a4)
19017 {
19018 if (reg_containing_return_addr != LR_REGNUM)
19019 {
19020 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
19021 reg_containing_return_addr = LR_REGNUM;
19022 }
19023
19024 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
19025 }
19026
19027 if (crtl->calls_eh_return)
19028 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19029
19030 /* Return to caller. */
19031 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19032 }
19033
19034 \f
19035 void
19036 thumb1_final_prescan_insn (rtx insn)
19037 {
19038 if (flag_print_asm_name)
19039 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
19040 INSN_ADDRESSES (INSN_UID (insn)));
19041 }
19042
19043 int
19044 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
19045 {
19046 unsigned HOST_WIDE_INT mask = 0xff;
19047 int i;
19048
19049 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
19050 if (val == 0) /* XXX */
19051 return 0;
19052
19053 for (i = 0; i < 25; i++)
19054 if ((val & (mask << i)) == val)
19055 return 1;
19056
19057 return 0;
19058 }
19059
19060 /* Returns nonzero if the current function contains,
19061 or might contain a far jump. */
19062 static int
19063 thumb_far_jump_used_p (void)
19064 {
19065 rtx insn;
19066
19067 /* This test is only important for leaf functions. */
19068 /* assert (!leaf_function_p ()); */
19069
19070 /* If we have already decided that far jumps may be used,
19071 do not bother checking again, and always return true even if
19072 it turns out that they are not being used. Once we have made
19073 the decision that far jumps are present (and that hence the link
19074 register will be pushed onto the stack) we cannot go back on it. */
19075 if (cfun->machine->far_jump_used)
19076 return 1;
19077
19078 /* If this function is not being called from the prologue/epilogue
19079 generation code then it must be being called from the
19080 INITIAL_ELIMINATION_OFFSET macro. */
19081 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
19082 {
19083 /* In this case we know that we are being asked about the elimination
19084 of the arg pointer register. If that register is not being used,
19085 then there are no arguments on the stack, and we do not have to
19086 worry that a far jump might force the prologue to push the link
19087 register, changing the stack offsets. In this case we can just
19088 return false, since the presence of far jumps in the function will
19089 not affect stack offsets.
19090
19091 If the arg pointer is live (or if it was live, but has now been
19092 eliminated and so set to dead) then we do have to test to see if
19093 the function might contain a far jump. This test can lead to some
19094 false negatives, since before reload is completed, then length of
19095 branch instructions is not known, so gcc defaults to returning their
19096 longest length, which in turn sets the far jump attribute to true.
19097
19098 A false negative will not result in bad code being generated, but it
19099 will result in a needless push and pop of the link register. We
19100 hope that this does not occur too often.
19101
19102 If we need doubleword stack alignment this could affect the other
19103 elimination offsets so we can't risk getting it wrong. */
19104 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
19105 cfun->machine->arg_pointer_live = 1;
19106 else if (!cfun->machine->arg_pointer_live)
19107 return 0;
19108 }
19109
19110 /* Check to see if the function contains a branch
19111 insn with the far jump attribute set. */
19112 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19113 {
19114 if (GET_CODE (insn) == JUMP_INSN
19115 /* Ignore tablejump patterns. */
19116 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19117 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
19118 && get_attr_far_jump (insn) == FAR_JUMP_YES
19119 )
19120 {
19121 /* Record the fact that we have decided that
19122 the function does use far jumps. */
19123 cfun->machine->far_jump_used = 1;
19124 return 1;
19125 }
19126 }
19127
19128 return 0;
19129 }
19130
19131 /* Return nonzero if FUNC must be entered in ARM mode. */
19132 int
19133 is_called_in_ARM_mode (tree func)
19134 {
19135 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
19136
19137 /* Ignore the problem about functions whose address is taken. */
19138 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
19139 return TRUE;
19140
19141 #ifdef ARM_PE
19142 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
19143 #else
19144 return FALSE;
19145 #endif
19146 }
19147
19148 /* The bits which aren't usefully expanded as rtl. */
19149 const char *
19150 thumb_unexpanded_epilogue (void)
19151 {
19152 arm_stack_offsets *offsets;
19153 int regno;
19154 unsigned long live_regs_mask = 0;
19155 int high_regs_pushed = 0;
19156 int had_to_push_lr;
19157 int size;
19158
19159 if (cfun->machine->return_used_this_function != 0)
19160 return "";
19161
19162 if (IS_NAKED (arm_current_func_type ()))
19163 return "";
19164
19165 offsets = arm_get_frame_offsets ();
19166 live_regs_mask = offsets->saved_regs_mask;
19167 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19168
19169 /* If we can deduce the registers used from the function's return value.
19170 This is more reliable that examining df_regs_ever_live_p () because that
19171 will be set if the register is ever used in the function, not just if
19172 the register is used to hold a return value. */
19173 size = arm_size_return_regs ();
19174
19175 /* The prolog may have pushed some high registers to use as
19176 work registers. e.g. the testsuite file:
19177 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
19178 compiles to produce:
19179 push {r4, r5, r6, r7, lr}
19180 mov r7, r9
19181 mov r6, r8
19182 push {r6, r7}
19183 as part of the prolog. We have to undo that pushing here. */
19184
19185 if (high_regs_pushed)
19186 {
19187 unsigned long mask = live_regs_mask & 0xff;
19188 int next_hi_reg;
19189
19190 /* The available low registers depend on the size of the value we are
19191 returning. */
19192 if (size <= 12)
19193 mask |= 1 << 3;
19194 if (size <= 8)
19195 mask |= 1 << 2;
19196
19197 if (mask == 0)
19198 /* Oh dear! We have no low registers into which we can pop
19199 high registers! */
19200 internal_error
19201 ("no low registers available for popping high registers");
19202
19203 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
19204 if (live_regs_mask & (1 << next_hi_reg))
19205 break;
19206
19207 while (high_regs_pushed)
19208 {
19209 /* Find lo register(s) into which the high register(s) can
19210 be popped. */
19211 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
19212 {
19213 if (mask & (1 << regno))
19214 high_regs_pushed--;
19215 if (high_regs_pushed == 0)
19216 break;
19217 }
19218
19219 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
19220
19221 /* Pop the values into the low register(s). */
19222 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
19223
19224 /* Move the value(s) into the high registers. */
19225 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
19226 {
19227 if (mask & (1 << regno))
19228 {
19229 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
19230 regno);
19231
19232 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
19233 if (live_regs_mask & (1 << next_hi_reg))
19234 break;
19235 }
19236 }
19237 }
19238 live_regs_mask &= ~0x0f00;
19239 }
19240
19241 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
19242 live_regs_mask &= 0xff;
19243
19244 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
19245 {
19246 /* Pop the return address into the PC. */
19247 if (had_to_push_lr)
19248 live_regs_mask |= 1 << PC_REGNUM;
19249
19250 /* Either no argument registers were pushed or a backtrace
19251 structure was created which includes an adjusted stack
19252 pointer, so just pop everything. */
19253 if (live_regs_mask)
19254 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19255 live_regs_mask);
19256
19257 /* We have either just popped the return address into the
19258 PC or it is was kept in LR for the entire function. */
19259 if (!had_to_push_lr)
19260 thumb_exit (asm_out_file, LR_REGNUM);
19261 }
19262 else
19263 {
19264 /* Pop everything but the return address. */
19265 if (live_regs_mask)
19266 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19267 live_regs_mask);
19268
19269 if (had_to_push_lr)
19270 {
19271 if (size > 12)
19272 {
19273 /* We have no free low regs, so save one. */
19274 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
19275 LAST_ARG_REGNUM);
19276 }
19277
19278 /* Get the return address into a temporary register. */
19279 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
19280 1 << LAST_ARG_REGNUM);
19281
19282 if (size > 12)
19283 {
19284 /* Move the return address to lr. */
19285 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
19286 LAST_ARG_REGNUM);
19287 /* Restore the low register. */
19288 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
19289 IP_REGNUM);
19290 regno = LR_REGNUM;
19291 }
19292 else
19293 regno = LAST_ARG_REGNUM;
19294 }
19295 else
19296 regno = LR_REGNUM;
19297
19298 /* Remove the argument registers that were pushed onto the stack. */
19299 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
19300 SP_REGNUM, SP_REGNUM,
19301 crtl->args.pretend_args_size);
19302
19303 thumb_exit (asm_out_file, regno);
19304 }
19305
19306 return "";
19307 }
19308
19309 /* Functions to save and restore machine-specific function data. */
19310 static struct machine_function *
19311 arm_init_machine_status (void)
19312 {
19313 struct machine_function *machine;
19314 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
19315
19316 #if ARM_FT_UNKNOWN != 0
19317 machine->func_type = ARM_FT_UNKNOWN;
19318 #endif
19319 return machine;
19320 }
19321
19322 /* Return an RTX indicating where the return address to the
19323 calling function can be found. */
19324 rtx
19325 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
19326 {
19327 if (count != 0)
19328 return NULL_RTX;
19329
19330 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
19331 }
19332
19333 /* Do anything needed before RTL is emitted for each function. */
19334 void
19335 arm_init_expanders (void)
19336 {
19337 /* Arrange to initialize and mark the machine per-function status. */
19338 init_machine_status = arm_init_machine_status;
19339
19340 /* This is to stop the combine pass optimizing away the alignment
19341 adjustment of va_arg. */
19342 /* ??? It is claimed that this should not be necessary. */
19343 if (cfun)
19344 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
19345 }
19346
19347
19348 /* Like arm_compute_initial_elimination offset. Simpler because there
19349 isn't an ABI specified frame pointer for Thumb. Instead, we set it
19350 to point at the base of the local variables after static stack
19351 space for a function has been allocated. */
19352
19353 HOST_WIDE_INT
19354 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
19355 {
19356 arm_stack_offsets *offsets;
19357
19358 offsets = arm_get_frame_offsets ();
19359
19360 switch (from)
19361 {
19362 case ARG_POINTER_REGNUM:
19363 switch (to)
19364 {
19365 case STACK_POINTER_REGNUM:
19366 return offsets->outgoing_args - offsets->saved_args;
19367
19368 case FRAME_POINTER_REGNUM:
19369 return offsets->soft_frame - offsets->saved_args;
19370
19371 case ARM_HARD_FRAME_POINTER_REGNUM:
19372 return offsets->saved_regs - offsets->saved_args;
19373
19374 case THUMB_HARD_FRAME_POINTER_REGNUM:
19375 return offsets->locals_base - offsets->saved_args;
19376
19377 default:
19378 gcc_unreachable ();
19379 }
19380 break;
19381
19382 case FRAME_POINTER_REGNUM:
19383 switch (to)
19384 {
19385 case STACK_POINTER_REGNUM:
19386 return offsets->outgoing_args - offsets->soft_frame;
19387
19388 case ARM_HARD_FRAME_POINTER_REGNUM:
19389 return offsets->saved_regs - offsets->soft_frame;
19390
19391 case THUMB_HARD_FRAME_POINTER_REGNUM:
19392 return offsets->locals_base - offsets->soft_frame;
19393
19394 default:
19395 gcc_unreachable ();
19396 }
19397 break;
19398
19399 default:
19400 gcc_unreachable ();
19401 }
19402 }
19403
19404 /* Given the stack offsets and register mask in OFFSETS, decide
19405 how many additional registers to push instead of subtracting
19406 a constant from SP. */
19407 static int
19408 thumb1_extra_regs_pushed (arm_stack_offsets *offsets)
19409 {
19410 HOST_WIDE_INT amount = offsets->outgoing_args - offsets->saved_regs;
19411 unsigned long live_regs_mask = offsets->saved_regs_mask;
19412 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
19413 unsigned long l_mask = live_regs_mask & 0x40ff;
19414 /* Then count how many other high registers will need to be pushed. */
19415 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19416 int n_free;
19417
19418 /* If the stack frame size is 512 exactly, we can save one load
19419 instruction, which should make this a win even when optimizing
19420 for speed. */
19421 if (!optimize_size && amount != 512)
19422 return 0;
19423
19424 /* Can't do this if there are high registers to push, or if we
19425 are not going to do a push at all. */
19426 if (high_regs_pushed != 0 || l_mask == 0)
19427 return 0;
19428
19429 /* Don't do this if thumb1_expand_prologue wants to emit instructions
19430 between the push and the stack frame allocation. */
19431 if ((flag_pic && arm_pic_register != INVALID_REGNUM)
19432 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0))
19433 return 0;
19434
19435 for (n_free = 0; n_free < 8 && !(live_regs_mask & 1); live_regs_mask >>= 1)
19436 n_free++;
19437
19438 if (n_free == 0)
19439 return 0;
19440 gcc_assert (amount / 4 * 4 == amount);
19441
19442 if (amount >= 512 && (amount - n_free * 4) < 512)
19443 return (amount - 508) / 4;
19444 if (amount <= n_free * 4)
19445 return amount / 4;
19446 return 0;
19447 }
19448
19449 /* Generate the rest of a function's prologue. */
19450 void
19451 thumb1_expand_prologue (void)
19452 {
19453 rtx insn, dwarf;
19454
19455 HOST_WIDE_INT amount;
19456 arm_stack_offsets *offsets;
19457 unsigned long func_type;
19458 int regno;
19459 unsigned long live_regs_mask;
19460
19461 func_type = arm_current_func_type ();
19462
19463 /* Naked functions don't have prologues. */
19464 if (IS_NAKED (func_type))
19465 return;
19466
19467 if (IS_INTERRUPT (func_type))
19468 {
19469 error ("interrupt Service Routines cannot be coded in Thumb mode");
19470 return;
19471 }
19472
19473 offsets = arm_get_frame_offsets ();
19474 live_regs_mask = offsets->saved_regs_mask;
19475 /* Load the pic register before setting the frame pointer,
19476 so we can use r7 as a temporary work register. */
19477 if (flag_pic && arm_pic_register != INVALID_REGNUM)
19478 arm_load_pic_register (live_regs_mask);
19479
19480 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19481 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
19482 stack_pointer_rtx);
19483
19484 amount = offsets->outgoing_args - offsets->saved_regs;
19485 amount -= 4 * thumb1_extra_regs_pushed (offsets);
19486 if (amount)
19487 {
19488 if (amount < 512)
19489 {
19490 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19491 GEN_INT (- amount)));
19492 RTX_FRAME_RELATED_P (insn) = 1;
19493 }
19494 else
19495 {
19496 rtx reg;
19497
19498 /* The stack decrement is too big for an immediate value in a single
19499 insn. In theory we could issue multiple subtracts, but after
19500 three of them it becomes more space efficient to place the full
19501 value in the constant pool and load into a register. (Also the
19502 ARM debugger really likes to see only one stack decrement per
19503 function). So instead we look for a scratch register into which
19504 we can load the decrement, and then we subtract this from the
19505 stack pointer. Unfortunately on the thumb the only available
19506 scratch registers are the argument registers, and we cannot use
19507 these as they may hold arguments to the function. Instead we
19508 attempt to locate a call preserved register which is used by this
19509 function. If we can find one, then we know that it will have
19510 been pushed at the start of the prologue and so we can corrupt
19511 it now. */
19512 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
19513 if (live_regs_mask & (1 << regno))
19514 break;
19515
19516 gcc_assert(regno <= LAST_LO_REGNUM);
19517
19518 reg = gen_rtx_REG (SImode, regno);
19519
19520 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
19521
19522 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
19523 stack_pointer_rtx, reg));
19524 RTX_FRAME_RELATED_P (insn) = 1;
19525 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19526 plus_constant (stack_pointer_rtx,
19527 -amount));
19528 RTX_FRAME_RELATED_P (dwarf) = 1;
19529 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19530 }
19531 }
19532
19533 if (frame_pointer_needed)
19534 thumb_set_frame_pointer (offsets);
19535
19536 /* If we are profiling, make sure no instructions are scheduled before
19537 the call to mcount. Similarly if the user has requested no
19538 scheduling in the prolog. Similarly if we want non-call exceptions
19539 using the EABI unwinder, to prevent faulting instructions from being
19540 swapped with a stack adjustment. */
19541 if (crtl->profile || !TARGET_SCHED_PROLOG
19542 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
19543 emit_insn (gen_blockage ());
19544
19545 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
19546 if (live_regs_mask & 0xff)
19547 cfun->machine->lr_save_eliminated = 0;
19548 }
19549
19550
19551 void
19552 thumb1_expand_epilogue (void)
19553 {
19554 HOST_WIDE_INT amount;
19555 arm_stack_offsets *offsets;
19556 int regno;
19557
19558 /* Naked functions don't have prologues. */
19559 if (IS_NAKED (arm_current_func_type ()))
19560 return;
19561
19562 offsets = arm_get_frame_offsets ();
19563 amount = offsets->outgoing_args - offsets->saved_regs;
19564
19565 if (frame_pointer_needed)
19566 {
19567 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
19568 amount = offsets->locals_base - offsets->saved_regs;
19569 }
19570
19571 gcc_assert (amount >= 0);
19572 if (amount)
19573 {
19574 if (amount < 512)
19575 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19576 GEN_INT (amount)));
19577 else
19578 {
19579 /* r3 is always free in the epilogue. */
19580 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
19581
19582 emit_insn (gen_movsi (reg, GEN_INT (amount)));
19583 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
19584 }
19585 }
19586
19587 /* Emit a USE (stack_pointer_rtx), so that
19588 the stack adjustment will not be deleted. */
19589 emit_insn (gen_prologue_use (stack_pointer_rtx));
19590
19591 if (crtl->profile || !TARGET_SCHED_PROLOG)
19592 emit_insn (gen_blockage ());
19593
19594 /* Emit a clobber for each insn that will be restored in the epilogue,
19595 so that flow2 will get register lifetimes correct. */
19596 for (regno = 0; regno < 13; regno++)
19597 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
19598 emit_clobber (gen_rtx_REG (SImode, regno));
19599
19600 if (! df_regs_ever_live_p (LR_REGNUM))
19601 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
19602 }
19603
19604 static void
19605 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
19606 {
19607 arm_stack_offsets *offsets;
19608 unsigned long live_regs_mask = 0;
19609 unsigned long l_mask;
19610 unsigned high_regs_pushed = 0;
19611 int cfa_offset = 0;
19612 int regno;
19613
19614 if (IS_NAKED (arm_current_func_type ()))
19615 return;
19616
19617 if (is_called_in_ARM_mode (current_function_decl))
19618 {
19619 const char * name;
19620
19621 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
19622 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
19623 == SYMBOL_REF);
19624 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
19625
19626 /* Generate code sequence to switch us into Thumb mode. */
19627 /* The .code 32 directive has already been emitted by
19628 ASM_DECLARE_FUNCTION_NAME. */
19629 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
19630 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
19631
19632 /* Generate a label, so that the debugger will notice the
19633 change in instruction sets. This label is also used by
19634 the assembler to bypass the ARM code when this function
19635 is called from a Thumb encoded function elsewhere in the
19636 same file. Hence the definition of STUB_NAME here must
19637 agree with the definition in gas/config/tc-arm.c. */
19638
19639 #define STUB_NAME ".real_start_of"
19640
19641 fprintf (f, "\t.code\t16\n");
19642 #ifdef ARM_PE
19643 if (arm_dllexport_name_p (name))
19644 name = arm_strip_name_encoding (name);
19645 #endif
19646 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
19647 fprintf (f, "\t.thumb_func\n");
19648 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
19649 }
19650
19651 if (crtl->args.pretend_args_size)
19652 {
19653 /* Output unwind directive for the stack adjustment. */
19654 if (ARM_EABI_UNWIND_TABLES)
19655 fprintf (f, "\t.pad #%d\n",
19656 crtl->args.pretend_args_size);
19657
19658 if (cfun->machine->uses_anonymous_args)
19659 {
19660 int num_pushes;
19661
19662 fprintf (f, "\tpush\t{");
19663
19664 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
19665
19666 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
19667 regno <= LAST_ARG_REGNUM;
19668 regno++)
19669 asm_fprintf (f, "%r%s", regno,
19670 regno == LAST_ARG_REGNUM ? "" : ", ");
19671
19672 fprintf (f, "}\n");
19673 }
19674 else
19675 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
19676 SP_REGNUM, SP_REGNUM,
19677 crtl->args.pretend_args_size);
19678
19679 /* We don't need to record the stores for unwinding (would it
19680 help the debugger any if we did?), but record the change in
19681 the stack pointer. */
19682 if (dwarf2out_do_frame ())
19683 {
19684 char *l = dwarf2out_cfi_label (false);
19685
19686 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
19687 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
19688 }
19689 }
19690
19691 /* Get the registers we are going to push. */
19692 offsets = arm_get_frame_offsets ();
19693 live_regs_mask = offsets->saved_regs_mask;
19694 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
19695 l_mask = live_regs_mask & 0x40ff;
19696 /* Then count how many other high registers will need to be pushed. */
19697 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19698
19699 if (TARGET_BACKTRACE)
19700 {
19701 unsigned offset;
19702 unsigned work_register;
19703
19704 /* We have been asked to create a stack backtrace structure.
19705 The code looks like this:
19706
19707 0 .align 2
19708 0 func:
19709 0 sub SP, #16 Reserve space for 4 registers.
19710 2 push {R7} Push low registers.
19711 4 add R7, SP, #20 Get the stack pointer before the push.
19712 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
19713 8 mov R7, PC Get hold of the start of this code plus 12.
19714 10 str R7, [SP, #16] Store it.
19715 12 mov R7, FP Get hold of the current frame pointer.
19716 14 str R7, [SP, #4] Store it.
19717 16 mov R7, LR Get hold of the current return address.
19718 18 str R7, [SP, #12] Store it.
19719 20 add R7, SP, #16 Point at the start of the backtrace structure.
19720 22 mov FP, R7 Put this value into the frame pointer. */
19721
19722 work_register = thumb_find_work_register (live_regs_mask);
19723
19724 if (ARM_EABI_UNWIND_TABLES)
19725 asm_fprintf (f, "\t.pad #16\n");
19726
19727 asm_fprintf
19728 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
19729 SP_REGNUM, SP_REGNUM);
19730
19731 if (dwarf2out_do_frame ())
19732 {
19733 char *l = dwarf2out_cfi_label (false);
19734
19735 cfa_offset = cfa_offset + 16;
19736 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
19737 }
19738
19739 if (l_mask)
19740 {
19741 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
19742 offset = bit_count (l_mask) * UNITS_PER_WORD;
19743 }
19744 else
19745 offset = 0;
19746
19747 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
19748 offset + 16 + crtl->args.pretend_args_size);
19749
19750 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19751 offset + 4);
19752
19753 /* Make sure that the instruction fetching the PC is in the right place
19754 to calculate "start of backtrace creation code + 12". */
19755 if (l_mask)
19756 {
19757 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
19758 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19759 offset + 12);
19760 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
19761 ARM_HARD_FRAME_POINTER_REGNUM);
19762 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19763 offset);
19764 }
19765 else
19766 {
19767 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
19768 ARM_HARD_FRAME_POINTER_REGNUM);
19769 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19770 offset);
19771 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
19772 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19773 offset + 12);
19774 }
19775
19776 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
19777 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
19778 offset + 8);
19779 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
19780 offset + 12);
19781 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
19782 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
19783 }
19784 /* Optimization: If we are not pushing any low registers but we are going
19785 to push some high registers then delay our first push. This will just
19786 be a push of LR and we can combine it with the push of the first high
19787 register. */
19788 else if ((l_mask & 0xff) != 0
19789 || (high_regs_pushed == 0 && l_mask))
19790 {
19791 unsigned long mask = l_mask;
19792 mask |= (1 << thumb1_extra_regs_pushed (offsets)) - 1;
19793 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
19794 }
19795
19796 if (high_regs_pushed)
19797 {
19798 unsigned pushable_regs;
19799 unsigned next_hi_reg;
19800
19801 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
19802 if (live_regs_mask & (1 << next_hi_reg))
19803 break;
19804
19805 pushable_regs = l_mask & 0xff;
19806
19807 if (pushable_regs == 0)
19808 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
19809
19810 while (high_regs_pushed > 0)
19811 {
19812 unsigned long real_regs_mask = 0;
19813
19814 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
19815 {
19816 if (pushable_regs & (1 << regno))
19817 {
19818 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
19819
19820 high_regs_pushed --;
19821 real_regs_mask |= (1 << next_hi_reg);
19822
19823 if (high_regs_pushed)
19824 {
19825 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
19826 next_hi_reg --)
19827 if (live_regs_mask & (1 << next_hi_reg))
19828 break;
19829 }
19830 else
19831 {
19832 pushable_regs &= ~((1 << regno) - 1);
19833 break;
19834 }
19835 }
19836 }
19837
19838 /* If we had to find a work register and we have not yet
19839 saved the LR then add it to the list of regs to push. */
19840 if (l_mask == (1 << LR_REGNUM))
19841 {
19842 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
19843 1, &cfa_offset,
19844 real_regs_mask | (1 << LR_REGNUM));
19845 l_mask = 0;
19846 }
19847 else
19848 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
19849 }
19850 }
19851 }
19852
19853 /* Handle the case of a double word load into a low register from
19854 a computed memory address. The computed address may involve a
19855 register which is overwritten by the load. */
19856 const char *
19857 thumb_load_double_from_address (rtx *operands)
19858 {
19859 rtx addr;
19860 rtx base;
19861 rtx offset;
19862 rtx arg1;
19863 rtx arg2;
19864
19865 gcc_assert (GET_CODE (operands[0]) == REG);
19866 gcc_assert (GET_CODE (operands[1]) == MEM);
19867
19868 /* Get the memory address. */
19869 addr = XEXP (operands[1], 0);
19870
19871 /* Work out how the memory address is computed. */
19872 switch (GET_CODE (addr))
19873 {
19874 case REG:
19875 operands[2] = adjust_address (operands[1], SImode, 4);
19876
19877 if (REGNO (operands[0]) == REGNO (addr))
19878 {
19879 output_asm_insn ("ldr\t%H0, %2", operands);
19880 output_asm_insn ("ldr\t%0, %1", operands);
19881 }
19882 else
19883 {
19884 output_asm_insn ("ldr\t%0, %1", operands);
19885 output_asm_insn ("ldr\t%H0, %2", operands);
19886 }
19887 break;
19888
19889 case CONST:
19890 /* Compute <address> + 4 for the high order load. */
19891 operands[2] = adjust_address (operands[1], SImode, 4);
19892
19893 output_asm_insn ("ldr\t%0, %1", operands);
19894 output_asm_insn ("ldr\t%H0, %2", operands);
19895 break;
19896
19897 case PLUS:
19898 arg1 = XEXP (addr, 0);
19899 arg2 = XEXP (addr, 1);
19900
19901 if (CONSTANT_P (arg1))
19902 base = arg2, offset = arg1;
19903 else
19904 base = arg1, offset = arg2;
19905
19906 gcc_assert (GET_CODE (base) == REG);
19907
19908 /* Catch the case of <address> = <reg> + <reg> */
19909 if (GET_CODE (offset) == REG)
19910 {
19911 int reg_offset = REGNO (offset);
19912 int reg_base = REGNO (base);
19913 int reg_dest = REGNO (operands[0]);
19914
19915 /* Add the base and offset registers together into the
19916 higher destination register. */
19917 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
19918 reg_dest + 1, reg_base, reg_offset);
19919
19920 /* Load the lower destination register from the address in
19921 the higher destination register. */
19922 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
19923 reg_dest, reg_dest + 1);
19924
19925 /* Load the higher destination register from its own address
19926 plus 4. */
19927 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
19928 reg_dest + 1, reg_dest + 1);
19929 }
19930 else
19931 {
19932 /* Compute <address> + 4 for the high order load. */
19933 operands[2] = adjust_address (operands[1], SImode, 4);
19934
19935 /* If the computed address is held in the low order register
19936 then load the high order register first, otherwise always
19937 load the low order register first. */
19938 if (REGNO (operands[0]) == REGNO (base))
19939 {
19940 output_asm_insn ("ldr\t%H0, %2", operands);
19941 output_asm_insn ("ldr\t%0, %1", operands);
19942 }
19943 else
19944 {
19945 output_asm_insn ("ldr\t%0, %1", operands);
19946 output_asm_insn ("ldr\t%H0, %2", operands);
19947 }
19948 }
19949 break;
19950
19951 case LABEL_REF:
19952 /* With no registers to worry about we can just load the value
19953 directly. */
19954 operands[2] = adjust_address (operands[1], SImode, 4);
19955
19956 output_asm_insn ("ldr\t%H0, %2", operands);
19957 output_asm_insn ("ldr\t%0, %1", operands);
19958 break;
19959
19960 default:
19961 gcc_unreachable ();
19962 }
19963
19964 return "";
19965 }
19966
19967 const char *
19968 thumb_output_move_mem_multiple (int n, rtx *operands)
19969 {
19970 rtx tmp;
19971
19972 switch (n)
19973 {
19974 case 2:
19975 if (REGNO (operands[4]) > REGNO (operands[5]))
19976 {
19977 tmp = operands[4];
19978 operands[4] = operands[5];
19979 operands[5] = tmp;
19980 }
19981 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
19982 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
19983 break;
19984
19985 case 3:
19986 if (REGNO (operands[4]) > REGNO (operands[5]))
19987 {
19988 tmp = operands[4];
19989 operands[4] = operands[5];
19990 operands[5] = tmp;
19991 }
19992 if (REGNO (operands[5]) > REGNO (operands[6]))
19993 {
19994 tmp = operands[5];
19995 operands[5] = operands[6];
19996 operands[6] = tmp;
19997 }
19998 if (REGNO (operands[4]) > REGNO (operands[5]))
19999 {
20000 tmp = operands[4];
20001 operands[4] = operands[5];
20002 operands[5] = tmp;
20003 }
20004
20005 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
20006 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
20007 break;
20008
20009 default:
20010 gcc_unreachable ();
20011 }
20012
20013 return "";
20014 }
20015
20016 /* Output a call-via instruction for thumb state. */
20017 const char *
20018 thumb_call_via_reg (rtx reg)
20019 {
20020 int regno = REGNO (reg);
20021 rtx *labelp;
20022
20023 gcc_assert (regno < LR_REGNUM);
20024
20025 /* If we are in the normal text section we can use a single instance
20026 per compilation unit. If we are doing function sections, then we need
20027 an entry per section, since we can't rely on reachability. */
20028 if (in_section == text_section)
20029 {
20030 thumb_call_reg_needed = 1;
20031
20032 if (thumb_call_via_label[regno] == NULL)
20033 thumb_call_via_label[regno] = gen_label_rtx ();
20034 labelp = thumb_call_via_label + regno;
20035 }
20036 else
20037 {
20038 if (cfun->machine->call_via[regno] == NULL)
20039 cfun->machine->call_via[regno] = gen_label_rtx ();
20040 labelp = cfun->machine->call_via + regno;
20041 }
20042
20043 output_asm_insn ("bl\t%a0", labelp);
20044 return "";
20045 }
20046
20047 /* Routines for generating rtl. */
20048 void
20049 thumb_expand_movmemqi (rtx *operands)
20050 {
20051 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
20052 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
20053 HOST_WIDE_INT len = INTVAL (operands[2]);
20054 HOST_WIDE_INT offset = 0;
20055
20056 while (len >= 12)
20057 {
20058 emit_insn (gen_movmem12b (out, in, out, in));
20059 len -= 12;
20060 }
20061
20062 if (len >= 8)
20063 {
20064 emit_insn (gen_movmem8b (out, in, out, in));
20065 len -= 8;
20066 }
20067
20068 if (len >= 4)
20069 {
20070 rtx reg = gen_reg_rtx (SImode);
20071 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
20072 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
20073 len -= 4;
20074 offset += 4;
20075 }
20076
20077 if (len >= 2)
20078 {
20079 rtx reg = gen_reg_rtx (HImode);
20080 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
20081 plus_constant (in, offset))));
20082 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
20083 reg));
20084 len -= 2;
20085 offset += 2;
20086 }
20087
20088 if (len)
20089 {
20090 rtx reg = gen_reg_rtx (QImode);
20091 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
20092 plus_constant (in, offset))));
20093 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
20094 reg));
20095 }
20096 }
20097
20098 void
20099 thumb_reload_out_hi (rtx *operands)
20100 {
20101 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
20102 }
20103
20104 /* Handle reading a half-word from memory during reload. */
20105 void
20106 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
20107 {
20108 gcc_unreachable ();
20109 }
20110
20111 /* Return the length of a function name prefix
20112 that starts with the character 'c'. */
20113 static int
20114 arm_get_strip_length (int c)
20115 {
20116 switch (c)
20117 {
20118 ARM_NAME_ENCODING_LENGTHS
20119 default: return 0;
20120 }
20121 }
20122
20123 /* Return a pointer to a function's name with any
20124 and all prefix encodings stripped from it. */
20125 const char *
20126 arm_strip_name_encoding (const char *name)
20127 {
20128 int skip;
20129
20130 while ((skip = arm_get_strip_length (* name)))
20131 name += skip;
20132
20133 return name;
20134 }
20135
20136 /* If there is a '*' anywhere in the name's prefix, then
20137 emit the stripped name verbatim, otherwise prepend an
20138 underscore if leading underscores are being used. */
20139 void
20140 arm_asm_output_labelref (FILE *stream, const char *name)
20141 {
20142 int skip;
20143 int verbatim = 0;
20144
20145 while ((skip = arm_get_strip_length (* name)))
20146 {
20147 verbatim |= (*name == '*');
20148 name += skip;
20149 }
20150
20151 if (verbatim)
20152 fputs (name, stream);
20153 else
20154 asm_fprintf (stream, "%U%s", name);
20155 }
20156
20157 static void
20158 arm_file_start (void)
20159 {
20160 int val;
20161
20162 if (TARGET_UNIFIED_ASM)
20163 asm_fprintf (asm_out_file, "\t.syntax unified\n");
20164
20165 if (TARGET_BPABI)
20166 {
20167 const char *fpu_name;
20168 if (arm_select[0].string)
20169 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
20170 else if (arm_select[1].string)
20171 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
20172 else
20173 asm_fprintf (asm_out_file, "\t.cpu %s\n",
20174 all_cores[arm_default_cpu].name);
20175
20176 if (TARGET_SOFT_FLOAT)
20177 {
20178 if (TARGET_VFP)
20179 fpu_name = "softvfp";
20180 else
20181 fpu_name = "softfpa";
20182 }
20183 else
20184 {
20185 fpu_name = arm_fpu_desc->name;
20186 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
20187 {
20188 if (TARGET_HARD_FLOAT)
20189 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
20190 if (TARGET_HARD_FLOAT_ABI)
20191 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
20192 }
20193 }
20194 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
20195
20196 /* Some of these attributes only apply when the corresponding features
20197 are used. However we don't have any easy way of figuring this out.
20198 Conservatively record the setting that would have been used. */
20199
20200 /* Tag_ABI_FP_rounding. */
20201 if (flag_rounding_math)
20202 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
20203 if (!flag_unsafe_math_optimizations)
20204 {
20205 /* Tag_ABI_FP_denomal. */
20206 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
20207 /* Tag_ABI_FP_exceptions. */
20208 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
20209 }
20210 /* Tag_ABI_FP_user_exceptions. */
20211 if (flag_signaling_nans)
20212 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
20213 /* Tag_ABI_FP_number_model. */
20214 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
20215 flag_finite_math_only ? 1 : 3);
20216
20217 /* Tag_ABI_align8_needed. */
20218 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
20219 /* Tag_ABI_align8_preserved. */
20220 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
20221 /* Tag_ABI_enum_size. */
20222 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
20223 flag_short_enums ? 1 : 2);
20224
20225 /* Tag_ABI_optimization_goals. */
20226 if (optimize_size)
20227 val = 4;
20228 else if (optimize >= 2)
20229 val = 2;
20230 else if (optimize)
20231 val = 1;
20232 else
20233 val = 6;
20234 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
20235
20236 /* Tag_ABI_FP_16bit_format. */
20237 if (arm_fp16_format)
20238 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
20239 (int)arm_fp16_format);
20240
20241 if (arm_lang_output_object_attributes_hook)
20242 arm_lang_output_object_attributes_hook();
20243 }
20244 default_file_start();
20245 }
20246
20247 static void
20248 arm_file_end (void)
20249 {
20250 int regno;
20251
20252 if (NEED_INDICATE_EXEC_STACK)
20253 /* Add .note.GNU-stack. */
20254 file_end_indicate_exec_stack ();
20255
20256 if (! thumb_call_reg_needed)
20257 return;
20258
20259 switch_to_section (text_section);
20260 asm_fprintf (asm_out_file, "\t.code 16\n");
20261 ASM_OUTPUT_ALIGN (asm_out_file, 1);
20262
20263 for (regno = 0; regno < LR_REGNUM; regno++)
20264 {
20265 rtx label = thumb_call_via_label[regno];
20266
20267 if (label != 0)
20268 {
20269 targetm.asm_out.internal_label (asm_out_file, "L",
20270 CODE_LABEL_NUMBER (label));
20271 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20272 }
20273 }
20274 }
20275
20276 #ifndef ARM_PE
20277 /* Symbols in the text segment can be accessed without indirecting via the
20278 constant pool; it may take an extra binary operation, but this is still
20279 faster than indirecting via memory. Don't do this when not optimizing,
20280 since we won't be calculating al of the offsets necessary to do this
20281 simplification. */
20282
20283 static void
20284 arm_encode_section_info (tree decl, rtx rtl, int first)
20285 {
20286 if (optimize > 0 && TREE_CONSTANT (decl))
20287 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
20288
20289 default_encode_section_info (decl, rtl, first);
20290 }
20291 #endif /* !ARM_PE */
20292
20293 static void
20294 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
20295 {
20296 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
20297 && !strcmp (prefix, "L"))
20298 {
20299 arm_ccfsm_state = 0;
20300 arm_target_insn = NULL;
20301 }
20302 default_internal_label (stream, prefix, labelno);
20303 }
20304
20305 /* Output code to add DELTA to the first argument, and then jump
20306 to FUNCTION. Used for C++ multiple inheritance. */
20307 static void
20308 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
20309 HOST_WIDE_INT delta,
20310 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
20311 tree function)
20312 {
20313 static int thunk_label = 0;
20314 char label[256];
20315 char labelpc[256];
20316 int mi_delta = delta;
20317 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
20318 int shift = 0;
20319 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
20320 ? 1 : 0);
20321 if (mi_delta < 0)
20322 mi_delta = - mi_delta;
20323
20324 if (TARGET_THUMB1)
20325 {
20326 int labelno = thunk_label++;
20327 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
20328 /* Thunks are entered in arm mode when avaiable. */
20329 if (TARGET_THUMB1_ONLY)
20330 {
20331 /* push r3 so we can use it as a temporary. */
20332 /* TODO: Omit this save if r3 is not used. */
20333 fputs ("\tpush {r3}\n", file);
20334 fputs ("\tldr\tr3, ", file);
20335 }
20336 else
20337 {
20338 fputs ("\tldr\tr12, ", file);
20339 }
20340 assemble_name (file, label);
20341 fputc ('\n', file);
20342 if (flag_pic)
20343 {
20344 /* If we are generating PIC, the ldr instruction below loads
20345 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
20346 the address of the add + 8, so we have:
20347
20348 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
20349 = target + 1.
20350
20351 Note that we have "+ 1" because some versions of GNU ld
20352 don't set the low bit of the result for R_ARM_REL32
20353 relocations against thumb function symbols.
20354 On ARMv6M this is +4, not +8. */
20355 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
20356 assemble_name (file, labelpc);
20357 fputs (":\n", file);
20358 if (TARGET_THUMB1_ONLY)
20359 {
20360 /* This is 2 insns after the start of the thunk, so we know it
20361 is 4-byte aligned. */
20362 fputs ("\tadd\tr3, pc, r3\n", file);
20363 fputs ("\tmov r12, r3\n", file);
20364 }
20365 else
20366 fputs ("\tadd\tr12, pc, r12\n", file);
20367 }
20368 else if (TARGET_THUMB1_ONLY)
20369 fputs ("\tmov r12, r3\n", file);
20370 }
20371 if (TARGET_THUMB1_ONLY)
20372 {
20373 if (mi_delta > 255)
20374 {
20375 fputs ("\tldr\tr3, ", file);
20376 assemble_name (file, label);
20377 fputs ("+4\n", file);
20378 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
20379 mi_op, this_regno, this_regno);
20380 }
20381 else if (mi_delta != 0)
20382 {
20383 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
20384 mi_op, this_regno, this_regno,
20385 mi_delta);
20386 }
20387 }
20388 else
20389 {
20390 /* TODO: Use movw/movt for large constants when available. */
20391 while (mi_delta != 0)
20392 {
20393 if ((mi_delta & (3 << shift)) == 0)
20394 shift += 2;
20395 else
20396 {
20397 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
20398 mi_op, this_regno, this_regno,
20399 mi_delta & (0xff << shift));
20400 mi_delta &= ~(0xff << shift);
20401 shift += 8;
20402 }
20403 }
20404 }
20405 if (TARGET_THUMB1)
20406 {
20407 if (TARGET_THUMB1_ONLY)
20408 fputs ("\tpop\t{r3}\n", file);
20409
20410 fprintf (file, "\tbx\tr12\n");
20411 ASM_OUTPUT_ALIGN (file, 2);
20412 assemble_name (file, label);
20413 fputs (":\n", file);
20414 if (flag_pic)
20415 {
20416 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
20417 rtx tem = XEXP (DECL_RTL (function), 0);
20418 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
20419 tem = gen_rtx_MINUS (GET_MODE (tem),
20420 tem,
20421 gen_rtx_SYMBOL_REF (Pmode,
20422 ggc_strdup (labelpc)));
20423 assemble_integer (tem, 4, BITS_PER_WORD, 1);
20424 }
20425 else
20426 /* Output ".word .LTHUNKn". */
20427 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
20428
20429 if (TARGET_THUMB1_ONLY && mi_delta > 255)
20430 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
20431 }
20432 else
20433 {
20434 fputs ("\tb\t", file);
20435 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
20436 if (NEED_PLT_RELOC)
20437 fputs ("(PLT)", file);
20438 fputc ('\n', file);
20439 }
20440 }
20441
20442 int
20443 arm_emit_vector_const (FILE *file, rtx x)
20444 {
20445 int i;
20446 const char * pattern;
20447
20448 gcc_assert (GET_CODE (x) == CONST_VECTOR);
20449
20450 switch (GET_MODE (x))
20451 {
20452 case V2SImode: pattern = "%08x"; break;
20453 case V4HImode: pattern = "%04x"; break;
20454 case V8QImode: pattern = "%02x"; break;
20455 default: gcc_unreachable ();
20456 }
20457
20458 fprintf (file, "0x");
20459 for (i = CONST_VECTOR_NUNITS (x); i--;)
20460 {
20461 rtx element;
20462
20463 element = CONST_VECTOR_ELT (x, i);
20464 fprintf (file, pattern, INTVAL (element));
20465 }
20466
20467 return 1;
20468 }
20469
20470 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
20471 HFmode constant pool entries are actually loaded with ldr. */
20472 void
20473 arm_emit_fp16_const (rtx c)
20474 {
20475 REAL_VALUE_TYPE r;
20476 long bits;
20477
20478 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
20479 bits = real_to_target (NULL, &r, HFmode);
20480 if (WORDS_BIG_ENDIAN)
20481 assemble_zeros (2);
20482 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
20483 if (!WORDS_BIG_ENDIAN)
20484 assemble_zeros (2);
20485 }
20486
20487 const char *
20488 arm_output_load_gr (rtx *operands)
20489 {
20490 rtx reg;
20491 rtx offset;
20492 rtx wcgr;
20493 rtx sum;
20494
20495 if (GET_CODE (operands [1]) != MEM
20496 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
20497 || GET_CODE (reg = XEXP (sum, 0)) != REG
20498 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
20499 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
20500 return "wldrw%?\t%0, %1";
20501
20502 /* Fix up an out-of-range load of a GR register. */
20503 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
20504 wcgr = operands[0];
20505 operands[0] = reg;
20506 output_asm_insn ("ldr%?\t%0, %1", operands);
20507
20508 operands[0] = wcgr;
20509 operands[1] = reg;
20510 output_asm_insn ("tmcr%?\t%0, %1", operands);
20511 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
20512
20513 return "";
20514 }
20515
20516 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
20517
20518 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
20519 named arg and all anonymous args onto the stack.
20520 XXX I know the prologue shouldn't be pushing registers, but it is faster
20521 that way. */
20522
20523 static void
20524 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
20525 enum machine_mode mode,
20526 tree type,
20527 int *pretend_size,
20528 int second_time ATTRIBUTE_UNUSED)
20529 {
20530 int nregs;
20531
20532 cfun->machine->uses_anonymous_args = 1;
20533 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
20534 {
20535 nregs = pcum->aapcs_ncrn;
20536 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
20537 nregs++;
20538 }
20539 else
20540 nregs = pcum->nregs;
20541
20542 if (nregs < NUM_ARG_REGS)
20543 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
20544 }
20545
20546 /* Return nonzero if the CONSUMER instruction (a store) does not need
20547 PRODUCER's value to calculate the address. */
20548
20549 int
20550 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
20551 {
20552 rtx value = PATTERN (producer);
20553 rtx addr = PATTERN (consumer);
20554
20555 if (GET_CODE (value) == COND_EXEC)
20556 value = COND_EXEC_CODE (value);
20557 if (GET_CODE (value) == PARALLEL)
20558 value = XVECEXP (value, 0, 0);
20559 value = XEXP (value, 0);
20560 if (GET_CODE (addr) == COND_EXEC)
20561 addr = COND_EXEC_CODE (addr);
20562 if (GET_CODE (addr) == PARALLEL)
20563 addr = XVECEXP (addr, 0, 0);
20564 addr = XEXP (addr, 0);
20565
20566 return !reg_overlap_mentioned_p (value, addr);
20567 }
20568
20569 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20570 have an early register shift value or amount dependency on the
20571 result of PRODUCER. */
20572
20573 int
20574 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
20575 {
20576 rtx value = PATTERN (producer);
20577 rtx op = PATTERN (consumer);
20578 rtx early_op;
20579
20580 if (GET_CODE (value) == COND_EXEC)
20581 value = COND_EXEC_CODE (value);
20582 if (GET_CODE (value) == PARALLEL)
20583 value = XVECEXP (value, 0, 0);
20584 value = XEXP (value, 0);
20585 if (GET_CODE (op) == COND_EXEC)
20586 op = COND_EXEC_CODE (op);
20587 if (GET_CODE (op) == PARALLEL)
20588 op = XVECEXP (op, 0, 0);
20589 op = XEXP (op, 1);
20590
20591 early_op = XEXP (op, 0);
20592 /* This is either an actual independent shift, or a shift applied to
20593 the first operand of another operation. We want the whole shift
20594 operation. */
20595 if (GET_CODE (early_op) == REG)
20596 early_op = op;
20597
20598 return !reg_overlap_mentioned_p (value, early_op);
20599 }
20600
20601 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20602 have an early register shift value dependency on the result of
20603 PRODUCER. */
20604
20605 int
20606 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
20607 {
20608 rtx value = PATTERN (producer);
20609 rtx op = PATTERN (consumer);
20610 rtx early_op;
20611
20612 if (GET_CODE (value) == COND_EXEC)
20613 value = COND_EXEC_CODE (value);
20614 if (GET_CODE (value) == PARALLEL)
20615 value = XVECEXP (value, 0, 0);
20616 value = XEXP (value, 0);
20617 if (GET_CODE (op) == COND_EXEC)
20618 op = COND_EXEC_CODE (op);
20619 if (GET_CODE (op) == PARALLEL)
20620 op = XVECEXP (op, 0, 0);
20621 op = XEXP (op, 1);
20622
20623 early_op = XEXP (op, 0);
20624
20625 /* This is either an actual independent shift, or a shift applied to
20626 the first operand of another operation. We want the value being
20627 shifted, in either case. */
20628 if (GET_CODE (early_op) != REG)
20629 early_op = XEXP (early_op, 0);
20630
20631 return !reg_overlap_mentioned_p (value, early_op);
20632 }
20633
20634 /* Return nonzero if the CONSUMER (a mul or mac op) does not
20635 have an early register mult dependency on the result of
20636 PRODUCER. */
20637
20638 int
20639 arm_no_early_mul_dep (rtx producer, rtx consumer)
20640 {
20641 rtx value = PATTERN (producer);
20642 rtx op = PATTERN (consumer);
20643
20644 if (GET_CODE (value) == COND_EXEC)
20645 value = COND_EXEC_CODE (value);
20646 if (GET_CODE (value) == PARALLEL)
20647 value = XVECEXP (value, 0, 0);
20648 value = XEXP (value, 0);
20649 if (GET_CODE (op) == COND_EXEC)
20650 op = COND_EXEC_CODE (op);
20651 if (GET_CODE (op) == PARALLEL)
20652 op = XVECEXP (op, 0, 0);
20653 op = XEXP (op, 1);
20654
20655 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
20656 {
20657 if (GET_CODE (XEXP (op, 0)) == MULT)
20658 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
20659 else
20660 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
20661 }
20662
20663 return 0;
20664 }
20665
20666 /* We can't rely on the caller doing the proper promotion when
20667 using APCS or ATPCS. */
20668
20669 static bool
20670 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
20671 {
20672 return !TARGET_AAPCS_BASED;
20673 }
20674
20675 static enum machine_mode
20676 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
20677 enum machine_mode mode,
20678 int *punsignedp ATTRIBUTE_UNUSED,
20679 const_tree fntype ATTRIBUTE_UNUSED,
20680 int for_return ATTRIBUTE_UNUSED)
20681 {
20682 if (GET_MODE_CLASS (mode) == MODE_INT
20683 && GET_MODE_SIZE (mode) < 4)
20684 return SImode;
20685
20686 return mode;
20687 }
20688
20689 /* AAPCS based ABIs use short enums by default. */
20690
20691 static bool
20692 arm_default_short_enums (void)
20693 {
20694 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
20695 }
20696
20697
20698 /* AAPCS requires that anonymous bitfields affect structure alignment. */
20699
20700 static bool
20701 arm_align_anon_bitfield (void)
20702 {
20703 return TARGET_AAPCS_BASED;
20704 }
20705
20706
20707 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
20708
20709 static tree
20710 arm_cxx_guard_type (void)
20711 {
20712 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
20713 }
20714
20715 /* Return non-zero if the consumer (a multiply-accumulate instruction)
20716 has an accumulator dependency on the result of the producer (a
20717 multiplication instruction) and no other dependency on that result. */
20718 int
20719 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
20720 {
20721 rtx mul = PATTERN (producer);
20722 rtx mac = PATTERN (consumer);
20723 rtx mul_result;
20724 rtx mac_op0, mac_op1, mac_acc;
20725
20726 if (GET_CODE (mul) == COND_EXEC)
20727 mul = COND_EXEC_CODE (mul);
20728 if (GET_CODE (mac) == COND_EXEC)
20729 mac = COND_EXEC_CODE (mac);
20730
20731 /* Check that mul is of the form (set (...) (mult ...))
20732 and mla is of the form (set (...) (plus (mult ...) (...))). */
20733 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
20734 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
20735 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
20736 return 0;
20737
20738 mul_result = XEXP (mul, 0);
20739 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
20740 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
20741 mac_acc = XEXP (XEXP (mac, 1), 1);
20742
20743 return (reg_overlap_mentioned_p (mul_result, mac_acc)
20744 && !reg_overlap_mentioned_p (mul_result, mac_op0)
20745 && !reg_overlap_mentioned_p (mul_result, mac_op1));
20746 }
20747
20748
20749 /* The EABI says test the least significant bit of a guard variable. */
20750
20751 static bool
20752 arm_cxx_guard_mask_bit (void)
20753 {
20754 return TARGET_AAPCS_BASED;
20755 }
20756
20757
20758 /* The EABI specifies that all array cookies are 8 bytes long. */
20759
20760 static tree
20761 arm_get_cookie_size (tree type)
20762 {
20763 tree size;
20764
20765 if (!TARGET_AAPCS_BASED)
20766 return default_cxx_get_cookie_size (type);
20767
20768 size = build_int_cst (sizetype, 8);
20769 return size;
20770 }
20771
20772
20773 /* The EABI says that array cookies should also contain the element size. */
20774
20775 static bool
20776 arm_cookie_has_size (void)
20777 {
20778 return TARGET_AAPCS_BASED;
20779 }
20780
20781
20782 /* The EABI says constructors and destructors should return a pointer to
20783 the object constructed/destroyed. */
20784
20785 static bool
20786 arm_cxx_cdtor_returns_this (void)
20787 {
20788 return TARGET_AAPCS_BASED;
20789 }
20790
20791 /* The EABI says that an inline function may never be the key
20792 method. */
20793
20794 static bool
20795 arm_cxx_key_method_may_be_inline (void)
20796 {
20797 return !TARGET_AAPCS_BASED;
20798 }
20799
20800 static void
20801 arm_cxx_determine_class_data_visibility (tree decl)
20802 {
20803 if (!TARGET_AAPCS_BASED
20804 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
20805 return;
20806
20807 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
20808 is exported. However, on systems without dynamic vague linkage,
20809 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
20810 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
20811 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
20812 else
20813 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
20814 DECL_VISIBILITY_SPECIFIED (decl) = 1;
20815 }
20816
20817 static bool
20818 arm_cxx_class_data_always_comdat (void)
20819 {
20820 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
20821 vague linkage if the class has no key function. */
20822 return !TARGET_AAPCS_BASED;
20823 }
20824
20825
20826 /* The EABI says __aeabi_atexit should be used to register static
20827 destructors. */
20828
20829 static bool
20830 arm_cxx_use_aeabi_atexit (void)
20831 {
20832 return TARGET_AAPCS_BASED;
20833 }
20834
20835
20836 void
20837 arm_set_return_address (rtx source, rtx scratch)
20838 {
20839 arm_stack_offsets *offsets;
20840 HOST_WIDE_INT delta;
20841 rtx addr;
20842 unsigned long saved_regs;
20843
20844 offsets = arm_get_frame_offsets ();
20845 saved_regs = offsets->saved_regs_mask;
20846
20847 if ((saved_regs & (1 << LR_REGNUM)) == 0)
20848 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
20849 else
20850 {
20851 if (frame_pointer_needed)
20852 addr = plus_constant(hard_frame_pointer_rtx, -4);
20853 else
20854 {
20855 /* LR will be the first saved register. */
20856 delta = offsets->outgoing_args - (offsets->frame + 4);
20857
20858
20859 if (delta >= 4096)
20860 {
20861 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
20862 GEN_INT (delta & ~4095)));
20863 addr = scratch;
20864 delta &= 4095;
20865 }
20866 else
20867 addr = stack_pointer_rtx;
20868
20869 addr = plus_constant (addr, delta);
20870 }
20871 emit_move_insn (gen_frame_mem (Pmode, addr), source);
20872 }
20873 }
20874
20875
20876 void
20877 thumb_set_return_address (rtx source, rtx scratch)
20878 {
20879 arm_stack_offsets *offsets;
20880 HOST_WIDE_INT delta;
20881 HOST_WIDE_INT limit;
20882 int reg;
20883 rtx addr;
20884 unsigned long mask;
20885
20886 emit_use (source);
20887
20888 offsets = arm_get_frame_offsets ();
20889 mask = offsets->saved_regs_mask;
20890 if (mask & (1 << LR_REGNUM))
20891 {
20892 limit = 1024;
20893 /* Find the saved regs. */
20894 if (frame_pointer_needed)
20895 {
20896 delta = offsets->soft_frame - offsets->saved_args;
20897 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
20898 if (TARGET_THUMB1)
20899 limit = 128;
20900 }
20901 else
20902 {
20903 delta = offsets->outgoing_args - offsets->saved_args;
20904 reg = SP_REGNUM;
20905 }
20906 /* Allow for the stack frame. */
20907 if (TARGET_THUMB1 && TARGET_BACKTRACE)
20908 delta -= 16;
20909 /* The link register is always the first saved register. */
20910 delta -= 4;
20911
20912 /* Construct the address. */
20913 addr = gen_rtx_REG (SImode, reg);
20914 if (delta > limit)
20915 {
20916 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
20917 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
20918 addr = scratch;
20919 }
20920 else
20921 addr = plus_constant (addr, delta);
20922
20923 emit_move_insn (gen_frame_mem (Pmode, addr), source);
20924 }
20925 else
20926 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
20927 }
20928
20929 /* Implements target hook vector_mode_supported_p. */
20930 bool
20931 arm_vector_mode_supported_p (enum machine_mode mode)
20932 {
20933 /* Neon also supports V2SImode, etc. listed in the clause below. */
20934 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
20935 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
20936 return true;
20937
20938 if ((TARGET_NEON || TARGET_IWMMXT)
20939 && ((mode == V2SImode)
20940 || (mode == V4HImode)
20941 || (mode == V8QImode)))
20942 return true;
20943
20944 return false;
20945 }
20946
20947 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
20948 ARM insns and therefore guarantee that the shift count is modulo 256.
20949 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
20950 guarantee no particular behavior for out-of-range counts. */
20951
20952 static unsigned HOST_WIDE_INT
20953 arm_shift_truncation_mask (enum machine_mode mode)
20954 {
20955 return mode == SImode ? 255 : 0;
20956 }
20957
20958
20959 /* Map internal gcc register numbers to DWARF2 register numbers. */
20960
20961 unsigned int
20962 arm_dbx_register_number (unsigned int regno)
20963 {
20964 if (regno < 16)
20965 return regno;
20966
20967 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
20968 compatibility. The EABI defines them as registers 96-103. */
20969 if (IS_FPA_REGNUM (regno))
20970 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
20971
20972 if (IS_VFP_REGNUM (regno))
20973 {
20974 /* See comment in arm_dwarf_register_span. */
20975 if (VFP_REGNO_OK_FOR_SINGLE (regno))
20976 return 64 + regno - FIRST_VFP_REGNUM;
20977 else
20978 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
20979 }
20980
20981 if (IS_IWMMXT_GR_REGNUM (regno))
20982 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
20983
20984 if (IS_IWMMXT_REGNUM (regno))
20985 return 112 + regno - FIRST_IWMMXT_REGNUM;
20986
20987 gcc_unreachable ();
20988 }
20989
20990 /* Dwarf models VFPv3 registers as 32 64-bit registers.
20991 GCC models tham as 64 32-bit registers, so we need to describe this to
20992 the DWARF generation code. Other registers can use the default. */
20993 static rtx
20994 arm_dwarf_register_span (rtx rtl)
20995 {
20996 unsigned regno;
20997 int nregs;
20998 int i;
20999 rtx p;
21000
21001 regno = REGNO (rtl);
21002 if (!IS_VFP_REGNUM (regno))
21003 return NULL_RTX;
21004
21005 /* XXX FIXME: The EABI defines two VFP register ranges:
21006 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
21007 256-287: D0-D31
21008 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
21009 corresponding D register. Until GDB supports this, we shall use the
21010 legacy encodings. We also use these encodings for D0-D15 for
21011 compatibility with older debuggers. */
21012 if (VFP_REGNO_OK_FOR_SINGLE (regno))
21013 return NULL_RTX;
21014
21015 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
21016 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
21017 regno = (regno - FIRST_VFP_REGNUM) / 2;
21018 for (i = 0; i < nregs; i++)
21019 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
21020
21021 return p;
21022 }
21023
21024 #ifdef TARGET_UNWIND_INFO
21025 /* Emit unwind directives for a store-multiple instruction or stack pointer
21026 push during alignment.
21027 These should only ever be generated by the function prologue code, so
21028 expect them to have a particular form. */
21029
21030 static void
21031 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
21032 {
21033 int i;
21034 HOST_WIDE_INT offset;
21035 HOST_WIDE_INT nregs;
21036 int reg_size;
21037 unsigned reg;
21038 unsigned lastreg;
21039 rtx e;
21040
21041 e = XVECEXP (p, 0, 0);
21042 if (GET_CODE (e) != SET)
21043 abort ();
21044
21045 /* First insn will adjust the stack pointer. */
21046 if (GET_CODE (e) != SET
21047 || GET_CODE (XEXP (e, 0)) != REG
21048 || REGNO (XEXP (e, 0)) != SP_REGNUM
21049 || GET_CODE (XEXP (e, 1)) != PLUS)
21050 abort ();
21051
21052 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
21053 nregs = XVECLEN (p, 0) - 1;
21054
21055 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
21056 if (reg < 16)
21057 {
21058 /* The function prologue may also push pc, but not annotate it as it is
21059 never restored. We turn this into a stack pointer adjustment. */
21060 if (nregs * 4 == offset - 4)
21061 {
21062 fprintf (asm_out_file, "\t.pad #4\n");
21063 offset -= 4;
21064 }
21065 reg_size = 4;
21066 fprintf (asm_out_file, "\t.save {");
21067 }
21068 else if (IS_VFP_REGNUM (reg))
21069 {
21070 reg_size = 8;
21071 fprintf (asm_out_file, "\t.vsave {");
21072 }
21073 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
21074 {
21075 /* FPA registers are done differently. */
21076 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
21077 return;
21078 }
21079 else
21080 /* Unknown register type. */
21081 abort ();
21082
21083 /* If the stack increment doesn't match the size of the saved registers,
21084 something has gone horribly wrong. */
21085 if (offset != nregs * reg_size)
21086 abort ();
21087
21088 offset = 0;
21089 lastreg = 0;
21090 /* The remaining insns will describe the stores. */
21091 for (i = 1; i <= nregs; i++)
21092 {
21093 /* Expect (set (mem <addr>) (reg)).
21094 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
21095 e = XVECEXP (p, 0, i);
21096 if (GET_CODE (e) != SET
21097 || GET_CODE (XEXP (e, 0)) != MEM
21098 || GET_CODE (XEXP (e, 1)) != REG)
21099 abort ();
21100
21101 reg = REGNO (XEXP (e, 1));
21102 if (reg < lastreg)
21103 abort ();
21104
21105 if (i != 1)
21106 fprintf (asm_out_file, ", ");
21107 /* We can't use %r for vfp because we need to use the
21108 double precision register names. */
21109 if (IS_VFP_REGNUM (reg))
21110 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
21111 else
21112 asm_fprintf (asm_out_file, "%r", reg);
21113
21114 #ifdef ENABLE_CHECKING
21115 /* Check that the addresses are consecutive. */
21116 e = XEXP (XEXP (e, 0), 0);
21117 if (GET_CODE (e) == PLUS)
21118 {
21119 offset += reg_size;
21120 if (GET_CODE (XEXP (e, 0)) != REG
21121 || REGNO (XEXP (e, 0)) != SP_REGNUM
21122 || GET_CODE (XEXP (e, 1)) != CONST_INT
21123 || offset != INTVAL (XEXP (e, 1)))
21124 abort ();
21125 }
21126 else if (i != 1
21127 || GET_CODE (e) != REG
21128 || REGNO (e) != SP_REGNUM)
21129 abort ();
21130 #endif
21131 }
21132 fprintf (asm_out_file, "}\n");
21133 }
21134
21135 /* Emit unwind directives for a SET. */
21136
21137 static void
21138 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
21139 {
21140 rtx e0;
21141 rtx e1;
21142 unsigned reg;
21143
21144 e0 = XEXP (p, 0);
21145 e1 = XEXP (p, 1);
21146 switch (GET_CODE (e0))
21147 {
21148 case MEM:
21149 /* Pushing a single register. */
21150 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
21151 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
21152 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
21153 abort ();
21154
21155 asm_fprintf (asm_out_file, "\t.save ");
21156 if (IS_VFP_REGNUM (REGNO (e1)))
21157 asm_fprintf(asm_out_file, "{d%d}\n",
21158 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
21159 else
21160 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
21161 break;
21162
21163 case REG:
21164 if (REGNO (e0) == SP_REGNUM)
21165 {
21166 /* A stack increment. */
21167 if (GET_CODE (e1) != PLUS
21168 || GET_CODE (XEXP (e1, 0)) != REG
21169 || REGNO (XEXP (e1, 0)) != SP_REGNUM
21170 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
21171 abort ();
21172
21173 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
21174 -INTVAL (XEXP (e1, 1)));
21175 }
21176 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
21177 {
21178 HOST_WIDE_INT offset;
21179
21180 if (GET_CODE (e1) == PLUS)
21181 {
21182 if (GET_CODE (XEXP (e1, 0)) != REG
21183 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
21184 abort ();
21185 reg = REGNO (XEXP (e1, 0));
21186 offset = INTVAL (XEXP (e1, 1));
21187 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
21188 HARD_FRAME_POINTER_REGNUM, reg,
21189 offset);
21190 }
21191 else if (GET_CODE (e1) == REG)
21192 {
21193 reg = REGNO (e1);
21194 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
21195 HARD_FRAME_POINTER_REGNUM, reg);
21196 }
21197 else
21198 abort ();
21199 }
21200 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
21201 {
21202 /* Move from sp to reg. */
21203 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
21204 }
21205 else if (GET_CODE (e1) == PLUS
21206 && GET_CODE (XEXP (e1, 0)) == REG
21207 && REGNO (XEXP (e1, 0)) == SP_REGNUM
21208 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
21209 {
21210 /* Set reg to offset from sp. */
21211 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
21212 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
21213 }
21214 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
21215 {
21216 /* Stack pointer save before alignment. */
21217 reg = REGNO (e0);
21218 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
21219 reg + 0x90, reg);
21220 }
21221 else
21222 abort ();
21223 break;
21224
21225 default:
21226 abort ();
21227 }
21228 }
21229
21230
21231 /* Emit unwind directives for the given insn. */
21232
21233 static void
21234 arm_unwind_emit (FILE * asm_out_file, rtx insn)
21235 {
21236 rtx pat;
21237
21238 if (!ARM_EABI_UNWIND_TABLES)
21239 return;
21240
21241 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
21242 && (TREE_NOTHROW (current_function_decl)
21243 || crtl->all_throwers_are_sibcalls))
21244 return;
21245
21246 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
21247 return;
21248
21249 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
21250 if (pat)
21251 pat = XEXP (pat, 0);
21252 else
21253 pat = PATTERN (insn);
21254
21255 switch (GET_CODE (pat))
21256 {
21257 case SET:
21258 arm_unwind_emit_set (asm_out_file, pat);
21259 break;
21260
21261 case SEQUENCE:
21262 /* Store multiple. */
21263 arm_unwind_emit_sequence (asm_out_file, pat);
21264 break;
21265
21266 default:
21267 abort();
21268 }
21269 }
21270
21271
21272 /* Output a reference from a function exception table to the type_info
21273 object X. The EABI specifies that the symbol should be relocated by
21274 an R_ARM_TARGET2 relocation. */
21275
21276 static bool
21277 arm_output_ttype (rtx x)
21278 {
21279 fputs ("\t.word\t", asm_out_file);
21280 output_addr_const (asm_out_file, x);
21281 /* Use special relocations for symbol references. */
21282 if (GET_CODE (x) != CONST_INT)
21283 fputs ("(TARGET2)", asm_out_file);
21284 fputc ('\n', asm_out_file);
21285
21286 return TRUE;
21287 }
21288 #endif /* TARGET_UNWIND_INFO */
21289
21290
21291 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
21292 stack alignment. */
21293
21294 static void
21295 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
21296 {
21297 rtx unspec = SET_SRC (pattern);
21298 gcc_assert (GET_CODE (unspec) == UNSPEC);
21299
21300 switch (index)
21301 {
21302 case UNSPEC_STACK_ALIGN:
21303 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
21304 put anything on the stack, so hopefully it won't matter.
21305 CFA = SP will be correct after alignment. */
21306 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
21307 SET_DEST (pattern));
21308 break;
21309 default:
21310 gcc_unreachable ();
21311 }
21312 }
21313
21314
21315 /* Output unwind directives for the start/end of a function. */
21316
21317 void
21318 arm_output_fn_unwind (FILE * f, bool prologue)
21319 {
21320 if (!ARM_EABI_UNWIND_TABLES)
21321 return;
21322
21323 if (prologue)
21324 fputs ("\t.fnstart\n", f);
21325 else
21326 {
21327 /* If this function will never be unwound, then mark it as such.
21328 The came condition is used in arm_unwind_emit to suppress
21329 the frame annotations. */
21330 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
21331 && (TREE_NOTHROW (current_function_decl)
21332 || crtl->all_throwers_are_sibcalls))
21333 fputs("\t.cantunwind\n", f);
21334
21335 fputs ("\t.fnend\n", f);
21336 }
21337 }
21338
21339 static bool
21340 arm_emit_tls_decoration (FILE *fp, rtx x)
21341 {
21342 enum tls_reloc reloc;
21343 rtx val;
21344
21345 val = XVECEXP (x, 0, 0);
21346 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
21347
21348 output_addr_const (fp, val);
21349
21350 switch (reloc)
21351 {
21352 case TLS_GD32:
21353 fputs ("(tlsgd)", fp);
21354 break;
21355 case TLS_LDM32:
21356 fputs ("(tlsldm)", fp);
21357 break;
21358 case TLS_LDO32:
21359 fputs ("(tlsldo)", fp);
21360 break;
21361 case TLS_IE32:
21362 fputs ("(gottpoff)", fp);
21363 break;
21364 case TLS_LE32:
21365 fputs ("(tpoff)", fp);
21366 break;
21367 default:
21368 gcc_unreachable ();
21369 }
21370
21371 switch (reloc)
21372 {
21373 case TLS_GD32:
21374 case TLS_LDM32:
21375 case TLS_IE32:
21376 fputs (" + (. - ", fp);
21377 output_addr_const (fp, XVECEXP (x, 0, 2));
21378 fputs (" - ", fp);
21379 output_addr_const (fp, XVECEXP (x, 0, 3));
21380 fputc (')', fp);
21381 break;
21382 default:
21383 break;
21384 }
21385
21386 return TRUE;
21387 }
21388
21389 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
21390
21391 static void
21392 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
21393 {
21394 gcc_assert (size == 4);
21395 fputs ("\t.word\t", file);
21396 output_addr_const (file, x);
21397 fputs ("(tlsldo)", file);
21398 }
21399
21400 bool
21401 arm_output_addr_const_extra (FILE *fp, rtx x)
21402 {
21403 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
21404 return arm_emit_tls_decoration (fp, x);
21405 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
21406 {
21407 char label[256];
21408 int labelno = INTVAL (XVECEXP (x, 0, 0));
21409
21410 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
21411 assemble_name_raw (fp, label);
21412
21413 return TRUE;
21414 }
21415 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
21416 {
21417 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
21418 if (GOT_PCREL)
21419 fputs ("+.", fp);
21420 fputs ("-(", fp);
21421 output_addr_const (fp, XVECEXP (x, 0, 0));
21422 fputc (')', fp);
21423 return TRUE;
21424 }
21425 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
21426 {
21427 output_addr_const (fp, XVECEXP (x, 0, 0));
21428 if (GOT_PCREL)
21429 fputs ("+.", fp);
21430 fputs ("-(", fp);
21431 output_addr_const (fp, XVECEXP (x, 0, 1));
21432 fputc (')', fp);
21433 return TRUE;
21434 }
21435 else if (GET_CODE (x) == CONST_VECTOR)
21436 return arm_emit_vector_const (fp, x);
21437
21438 return FALSE;
21439 }
21440
21441 /* Output assembly for a shift instruction.
21442 SET_FLAGS determines how the instruction modifies the condition codes.
21443 0 - Do not set condition codes.
21444 1 - Set condition codes.
21445 2 - Use smallest instruction. */
21446 const char *
21447 arm_output_shift(rtx * operands, int set_flags)
21448 {
21449 char pattern[100];
21450 static const char flag_chars[3] = {'?', '.', '!'};
21451 const char *shift;
21452 HOST_WIDE_INT val;
21453 char c;
21454
21455 c = flag_chars[set_flags];
21456 if (TARGET_UNIFIED_ASM)
21457 {
21458 shift = shift_op(operands[3], &val);
21459 if (shift)
21460 {
21461 if (val != -1)
21462 operands[2] = GEN_INT(val);
21463 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
21464 }
21465 else
21466 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
21467 }
21468 else
21469 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
21470 output_asm_insn (pattern, operands);
21471 return "";
21472 }
21473
21474 /* Output a Thumb-1 casesi dispatch sequence. */
21475 const char *
21476 thumb1_output_casesi (rtx *operands)
21477 {
21478 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
21479
21480 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21481
21482 switch (GET_MODE(diff_vec))
21483 {
21484 case QImode:
21485 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21486 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
21487 case HImode:
21488 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21489 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
21490 case SImode:
21491 return "bl\t%___gnu_thumb1_case_si";
21492 default:
21493 gcc_unreachable ();
21494 }
21495 }
21496
21497 /* Output a Thumb-2 casesi instruction. */
21498 const char *
21499 thumb2_output_casesi (rtx *operands)
21500 {
21501 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
21502
21503 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21504
21505 output_asm_insn ("cmp\t%0, %1", operands);
21506 output_asm_insn ("bhi\t%l3", operands);
21507 switch (GET_MODE(diff_vec))
21508 {
21509 case QImode:
21510 return "tbb\t[%|pc, %0]";
21511 case HImode:
21512 return "tbh\t[%|pc, %0, lsl #1]";
21513 case SImode:
21514 if (flag_pic)
21515 {
21516 output_asm_insn ("adr\t%4, %l2", operands);
21517 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
21518 output_asm_insn ("add\t%4, %4, %5", operands);
21519 return "bx\t%4";
21520 }
21521 else
21522 {
21523 output_asm_insn ("adr\t%4, %l2", operands);
21524 return "ldr\t%|pc, [%4, %0, lsl #2]";
21525 }
21526 default:
21527 gcc_unreachable ();
21528 }
21529 }
21530
21531 /* Most ARM cores are single issue, but some newer ones can dual issue.
21532 The scheduler descriptions rely on this being correct. */
21533 static int
21534 arm_issue_rate (void)
21535 {
21536 switch (arm_tune)
21537 {
21538 case cortexr4:
21539 case cortexr4f:
21540 case cortexa8:
21541 case cortexa9:
21542 return 2;
21543
21544 default:
21545 return 1;
21546 }
21547 }
21548
21549 /* A table and a function to perform ARM-specific name mangling for
21550 NEON vector types in order to conform to the AAPCS (see "Procedure
21551 Call Standard for the ARM Architecture", Appendix A). To qualify
21552 for emission with the mangled names defined in that document, a
21553 vector type must not only be of the correct mode but also be
21554 composed of NEON vector element types (e.g. __builtin_neon_qi). */
21555 typedef struct
21556 {
21557 enum machine_mode mode;
21558 const char *element_type_name;
21559 const char *aapcs_name;
21560 } arm_mangle_map_entry;
21561
21562 static arm_mangle_map_entry arm_mangle_map[] = {
21563 /* 64-bit containerized types. */
21564 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
21565 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
21566 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
21567 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
21568 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
21569 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
21570 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
21571 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
21572 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
21573 /* 128-bit containerized types. */
21574 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
21575 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
21576 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
21577 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
21578 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
21579 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
21580 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
21581 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
21582 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
21583 { VOIDmode, NULL, NULL }
21584 };
21585
21586 const char *
21587 arm_mangle_type (const_tree type)
21588 {
21589 arm_mangle_map_entry *pos = arm_mangle_map;
21590
21591 /* The ARM ABI documents (10th October 2008) say that "__va_list"
21592 has to be managled as if it is in the "std" namespace. */
21593 if (TARGET_AAPCS_BASED
21594 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
21595 {
21596 static bool warned;
21597 if (!warned && warn_psabi && !in_system_header)
21598 {
21599 warned = true;
21600 inform (input_location,
21601 "the mangling of %<va_list%> has changed in GCC 4.4");
21602 }
21603 return "St9__va_list";
21604 }
21605
21606 /* Half-precision float. */
21607 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
21608 return "Dh";
21609
21610 if (TREE_CODE (type) != VECTOR_TYPE)
21611 return NULL;
21612
21613 /* Check the mode of the vector type, and the name of the vector
21614 element type, against the table. */
21615 while (pos->mode != VOIDmode)
21616 {
21617 tree elt_type = TREE_TYPE (type);
21618
21619 if (pos->mode == TYPE_MODE (type)
21620 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
21621 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
21622 pos->element_type_name))
21623 return pos->aapcs_name;
21624
21625 pos++;
21626 }
21627
21628 /* Use the default mangling for unrecognized (possibly user-defined)
21629 vector types. */
21630 return NULL;
21631 }
21632
21633 /* Order of allocation of core registers for Thumb: this allocation is
21634 written over the corresponding initial entries of the array
21635 initialized with REG_ALLOC_ORDER. We allocate all low registers
21636 first. Saving and restoring a low register is usually cheaper than
21637 using a call-clobbered high register. */
21638
21639 static const int thumb_core_reg_alloc_order[] =
21640 {
21641 3, 2, 1, 0, 4, 5, 6, 7,
21642 14, 12, 8, 9, 10, 11, 13, 15
21643 };
21644
21645 /* Adjust register allocation order when compiling for Thumb. */
21646
21647 void
21648 arm_order_regs_for_local_alloc (void)
21649 {
21650 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
21651 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
21652 if (TARGET_THUMB)
21653 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
21654 sizeof (thumb_core_reg_alloc_order));
21655 }
21656
21657 /* Set default optimization options. */
21658 void
21659 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
21660 {
21661 /* Enable section anchors by default at -O1 or higher.
21662 Use 2 to distinguish from an explicit -fsection-anchors
21663 given on the command line. */
21664 if (level > 0)
21665 flag_section_anchors = 2;
21666 }
21667
21668 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
21669
21670 bool
21671 arm_frame_pointer_required (void)
21672 {
21673 return (cfun->has_nonlocal_label
21674 || SUBTARGET_FRAME_POINTER_REQUIRED
21675 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
21676 }
21677
21678 /* Only thumb1 can't support conditional execution, so return true if
21679 the target is not thumb1. */
21680 static bool
21681 arm_have_conditional_execution (void)
21682 {
21683 return !TARGET_THUMB1;
21684 }
21685
21686 #include "gt-arm.h"