22ddcd217cef586772ae4064ff06966e1821ae36
[gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8
9 This file is part of GCC.
10
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
15
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "reload.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "diagnostic-core.h"
44 #include "recog.h"
45 #include "cgraph.h"
46 #include "ggc.h"
47 #include "except.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
50 #include "tm_p.h"
51 #include "target.h"
52 #include "target-def.h"
53 #include "debug.h"
54 #include "langhooks.h"
55 #include "df.h"
56 #include "intl.h"
57 #include "libfuncs.h"
58 #include "params.h"
59 #include "opts.h"
60
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
64
65 void (*arm_lang_output_object_attributes_hook)(void);
66
67 /* Forward function declarations. */
68 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
69 static int arm_compute_static_chain_stack_bytes (void);
70 static arm_stack_offsets *arm_get_frame_offsets (void);
71 static void arm_add_gc_roots (void);
72 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
73 HOST_WIDE_INT, rtx, rtx, int, int);
74 static unsigned bit_count (unsigned long);
75 static int arm_address_register_rtx_p (rtx, int);
76 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
77 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
78 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
79 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
80 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
81 inline static int thumb1_index_register_rtx_p (rtx, int);
82 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
83 static int thumb_far_jump_used_p (void);
84 static bool thumb_force_lr_save (void);
85 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
86 static rtx emit_sfm (int, int);
87 static unsigned arm_size_return_regs (void);
88 static bool arm_assemble_integer (rtx, unsigned int, int);
89 static void arm_print_operand (FILE *, rtx, int);
90 static void arm_print_operand_address (FILE *, rtx);
91 static bool arm_print_operand_punct_valid_p (unsigned char code);
92 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
93 static arm_cc get_arm_condition_code (rtx);
94 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
95 static rtx is_jump_table (rtx);
96 static const char *output_multi_immediate (rtx *, const char *, const char *,
97 int, HOST_WIDE_INT);
98 static const char *shift_op (rtx, HOST_WIDE_INT *);
99 static struct machine_function *arm_init_machine_status (void);
100 static void thumb_exit (FILE *, int);
101 static rtx is_jump_table (rtx);
102 static HOST_WIDE_INT get_jump_table_size (rtx);
103 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
104 static Mnode *add_minipool_forward_ref (Mfix *);
105 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
106 static Mnode *add_minipool_backward_ref (Mfix *);
107 static void assign_minipool_offsets (Mfix *);
108 static void arm_print_value (FILE *, rtx);
109 static void dump_minipool (rtx);
110 static int arm_barrier_cost (rtx);
111 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
112 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
113 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
114 rtx);
115 static void arm_reorg (void);
116 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
117 static unsigned long arm_compute_save_reg0_reg12_mask (void);
118 static unsigned long arm_compute_save_reg_mask (void);
119 static unsigned long arm_isr_value (tree);
120 static unsigned long arm_compute_func_type (void);
121 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
122 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
123 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
124 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
125 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
126 #endif
127 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
128 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
129 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
130 static int arm_comp_type_attributes (const_tree, const_tree);
131 static void arm_set_default_type_attributes (tree);
132 static int arm_adjust_cost (rtx, rtx, rtx, int);
133 static int count_insns_for_constant (HOST_WIDE_INT, int);
134 static int arm_get_strip_length (int);
135 static bool arm_function_ok_for_sibcall (tree, tree);
136 static enum machine_mode arm_promote_function_mode (const_tree,
137 enum machine_mode, int *,
138 const_tree, int);
139 static bool arm_return_in_memory (const_tree, const_tree);
140 static rtx arm_function_value (const_tree, const_tree, bool);
141 static rtx arm_libcall_value (enum machine_mode, const_rtx);
142
143 static void arm_internal_label (FILE *, const char *, unsigned long);
144 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
145 tree);
146 static bool arm_have_conditional_execution (void);
147 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
148 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
149 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
150 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
151 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
152 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
153 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
154 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
155 static bool arm_rtx_costs (rtx, int, int, int *, bool);
156 static int arm_address_cost (rtx, bool);
157 static bool arm_memory_load_p (rtx);
158 static bool arm_cirrus_insn_p (rtx);
159 static void cirrus_reorg (rtx);
160 static void arm_init_builtins (void);
161 static void arm_init_iwmmxt_builtins (void);
162 static rtx safe_vector_operand (rtx, enum machine_mode);
163 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
164 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
165 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
166 static tree arm_builtin_decl (unsigned, bool);
167 static void emit_constant_insn (rtx cond, rtx pattern);
168 static rtx emit_set_insn (rtx, rtx);
169 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
170 tree, bool);
171 static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
172 const_tree, bool);
173 static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
174 const_tree, bool);
175 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
176 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
177 const_tree);
178 static int aapcs_select_return_coproc (const_tree, const_tree);
179
180 #ifdef OBJECT_FORMAT_ELF
181 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
182 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
183 #endif
184 #ifndef ARM_PE
185 static void arm_encode_section_info (tree, rtx, int);
186 #endif
187
188 static void arm_file_end (void);
189 static void arm_file_start (void);
190
191 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
192 tree, int *, int);
193 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
194 enum machine_mode, const_tree, bool);
195 static bool arm_promote_prototypes (const_tree);
196 static bool arm_default_short_enums (void);
197 static bool arm_align_anon_bitfield (void);
198 static bool arm_return_in_msb (const_tree);
199 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
200 static bool arm_return_in_memory (const_tree, const_tree);
201 #if ARM_UNWIND_INFO
202 static void arm_unwind_emit (FILE *, rtx);
203 static bool arm_output_ttype (rtx);
204 static void arm_asm_emit_except_personality (rtx);
205 static void arm_asm_init_sections (void);
206 #endif
207 static enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
208 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
209 static rtx arm_dwarf_register_span (rtx);
210
211 static tree arm_cxx_guard_type (void);
212 static bool arm_cxx_guard_mask_bit (void);
213 static tree arm_get_cookie_size (tree);
214 static bool arm_cookie_has_size (void);
215 static bool arm_cxx_cdtor_returns_this (void);
216 static bool arm_cxx_key_method_may_be_inline (void);
217 static void arm_cxx_determine_class_data_visibility (tree);
218 static bool arm_cxx_class_data_always_comdat (void);
219 static bool arm_cxx_use_aeabi_atexit (void);
220 static void arm_init_libfuncs (void);
221 static tree arm_build_builtin_va_list (void);
222 static void arm_expand_builtin_va_start (tree, rtx);
223 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
224 static void arm_option_override (void);
225 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
226 static bool arm_cannot_copy_insn_p (rtx);
227 static bool arm_tls_symbol_p (rtx x);
228 static int arm_issue_rate (void);
229 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
230 static bool arm_output_addr_const_extra (FILE *, rtx);
231 static bool arm_allocate_stack_slots_for_args (void);
232 static const char *arm_invalid_parameter_type (const_tree t);
233 static const char *arm_invalid_return_type (const_tree t);
234 static tree arm_promoted_type (const_tree t);
235 static tree arm_convert_to_type (tree type, tree expr);
236 static bool arm_scalar_mode_supported_p (enum machine_mode);
237 static bool arm_frame_pointer_required (void);
238 static bool arm_can_eliminate (const int, const int);
239 static void arm_asm_trampoline_template (FILE *);
240 static void arm_trampoline_init (rtx, tree, rtx);
241 static rtx arm_trampoline_adjust_address (rtx);
242 static rtx arm_pic_static_addr (rtx orig, rtx reg);
243 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
244 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
245 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
246 static bool arm_array_mode_supported_p (enum machine_mode,
247 unsigned HOST_WIDE_INT);
248 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
249 static bool arm_class_likely_spilled_p (reg_class_t);
250 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
251 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
252 const_tree type,
253 int misalignment,
254 bool is_packed);
255 static void arm_conditional_register_usage (void);
256 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
257 static unsigned int arm_autovectorize_vector_sizes (void);
258
259 \f
260 /* Table of machine attributes. */
261 static const struct attribute_spec arm_attribute_table[] =
262 {
263 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
264 affects_type_identity } */
265 /* Function calls made to this symbol must be done indirectly, because
266 it may lie outside of the 26 bit addressing range of a normal function
267 call. */
268 { "long_call", 0, 0, false, true, true, NULL, false },
269 /* Whereas these functions are always known to reside within the 26 bit
270 addressing range. */
271 { "short_call", 0, 0, false, true, true, NULL, false },
272 /* Specify the procedure call conventions for a function. */
273 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
274 false },
275 /* Interrupt Service Routines have special prologue and epilogue requirements. */
276 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
277 false },
278 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
279 false },
280 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
281 false },
282 #ifdef ARM_PE
283 /* ARM/PE has three new attributes:
284 interfacearm - ?
285 dllexport - for exporting a function/variable that will live in a dll
286 dllimport - for importing a function/variable from a dll
287
288 Microsoft allows multiple declspecs in one __declspec, separating
289 them with spaces. We do NOT support this. Instead, use __declspec
290 multiple times.
291 */
292 { "dllimport", 0, 0, true, false, false, NULL, false },
293 { "dllexport", 0, 0, true, false, false, NULL, false },
294 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
295 false },
296 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
297 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
298 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
299 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
300 false },
301 #endif
302 { NULL, 0, 0, false, false, false, NULL, false }
303 };
304
305 /* Set default optimization options. */
306 static const struct default_options arm_option_optimization_table[] =
307 {
308 /* Enable section anchors by default at -O1 or higher. */
309 { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
310 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
311 { OPT_LEVELS_NONE, 0, NULL, 0 }
312 };
313 \f
314 /* Initialize the GCC target structure. */
315 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
316 #undef TARGET_MERGE_DECL_ATTRIBUTES
317 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
318 #endif
319
320 #undef TARGET_LEGITIMIZE_ADDRESS
321 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
322
323 #undef TARGET_ATTRIBUTE_TABLE
324 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
325
326 #undef TARGET_ASM_FILE_START
327 #define TARGET_ASM_FILE_START arm_file_start
328 #undef TARGET_ASM_FILE_END
329 #define TARGET_ASM_FILE_END arm_file_end
330
331 #undef TARGET_ASM_ALIGNED_SI_OP
332 #define TARGET_ASM_ALIGNED_SI_OP NULL
333 #undef TARGET_ASM_INTEGER
334 #define TARGET_ASM_INTEGER arm_assemble_integer
335
336 #undef TARGET_PRINT_OPERAND
337 #define TARGET_PRINT_OPERAND arm_print_operand
338 #undef TARGET_PRINT_OPERAND_ADDRESS
339 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
340 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
341 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
342
343 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
344 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
345
346 #undef TARGET_ASM_FUNCTION_PROLOGUE
347 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
348
349 #undef TARGET_ASM_FUNCTION_EPILOGUE
350 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
351
352 #undef TARGET_DEFAULT_TARGET_FLAGS
353 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
354 #undef TARGET_OPTION_OVERRIDE
355 #define TARGET_OPTION_OVERRIDE arm_option_override
356 #undef TARGET_OPTION_OPTIMIZATION_TABLE
357 #define TARGET_OPTION_OPTIMIZATION_TABLE arm_option_optimization_table
358
359 #undef TARGET_COMP_TYPE_ATTRIBUTES
360 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
361
362 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
363 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
364
365 #undef TARGET_SCHED_ADJUST_COST
366 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
367
368 #undef TARGET_ENCODE_SECTION_INFO
369 #ifdef ARM_PE
370 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
371 #else
372 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
373 #endif
374
375 #undef TARGET_STRIP_NAME_ENCODING
376 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
377
378 #undef TARGET_ASM_INTERNAL_LABEL
379 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
380
381 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
382 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
383
384 #undef TARGET_FUNCTION_VALUE
385 #define TARGET_FUNCTION_VALUE arm_function_value
386
387 #undef TARGET_LIBCALL_VALUE
388 #define TARGET_LIBCALL_VALUE arm_libcall_value
389
390 #undef TARGET_ASM_OUTPUT_MI_THUNK
391 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
392 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
393 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
394
395 #undef TARGET_RTX_COSTS
396 #define TARGET_RTX_COSTS arm_rtx_costs
397 #undef TARGET_ADDRESS_COST
398 #define TARGET_ADDRESS_COST arm_address_cost
399
400 #undef TARGET_SHIFT_TRUNCATION_MASK
401 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
402 #undef TARGET_VECTOR_MODE_SUPPORTED_P
403 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
404 #undef TARGET_ARRAY_MODE_SUPPORTED_P
405 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
406 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
407 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
408 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
409 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
410 arm_autovectorize_vector_sizes
411
412 #undef TARGET_MACHINE_DEPENDENT_REORG
413 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
414
415 #undef TARGET_INIT_BUILTINS
416 #define TARGET_INIT_BUILTINS arm_init_builtins
417 #undef TARGET_EXPAND_BUILTIN
418 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
419 #undef TARGET_BUILTIN_DECL
420 #define TARGET_BUILTIN_DECL arm_builtin_decl
421
422 #undef TARGET_INIT_LIBFUNCS
423 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
424
425 #undef TARGET_PROMOTE_FUNCTION_MODE
426 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
427 #undef TARGET_PROMOTE_PROTOTYPES
428 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
429 #undef TARGET_PASS_BY_REFERENCE
430 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
431 #undef TARGET_ARG_PARTIAL_BYTES
432 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
433 #undef TARGET_FUNCTION_ARG
434 #define TARGET_FUNCTION_ARG arm_function_arg
435 #undef TARGET_FUNCTION_ARG_ADVANCE
436 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
437 #undef TARGET_FUNCTION_ARG_BOUNDARY
438 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
439
440 #undef TARGET_SETUP_INCOMING_VARARGS
441 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
442
443 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
444 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
445
446 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
447 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
448 #undef TARGET_TRAMPOLINE_INIT
449 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
450 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
451 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
452
453 #undef TARGET_DEFAULT_SHORT_ENUMS
454 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
455
456 #undef TARGET_ALIGN_ANON_BITFIELD
457 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
458
459 #undef TARGET_NARROW_VOLATILE_BITFIELD
460 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
461
462 #undef TARGET_CXX_GUARD_TYPE
463 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
464
465 #undef TARGET_CXX_GUARD_MASK_BIT
466 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
467
468 #undef TARGET_CXX_GET_COOKIE_SIZE
469 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
470
471 #undef TARGET_CXX_COOKIE_HAS_SIZE
472 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
473
474 #undef TARGET_CXX_CDTOR_RETURNS_THIS
475 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
476
477 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
478 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
479
480 #undef TARGET_CXX_USE_AEABI_ATEXIT
481 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
482
483 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
484 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
485 arm_cxx_determine_class_data_visibility
486
487 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
488 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
489
490 #undef TARGET_RETURN_IN_MSB
491 #define TARGET_RETURN_IN_MSB arm_return_in_msb
492
493 #undef TARGET_RETURN_IN_MEMORY
494 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
495
496 #undef TARGET_MUST_PASS_IN_STACK
497 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
498
499 #if ARM_UNWIND_INFO
500 #undef TARGET_ASM_UNWIND_EMIT
501 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
502
503 /* EABI unwinding tables use a different format for the typeinfo tables. */
504 #undef TARGET_ASM_TTYPE
505 #define TARGET_ASM_TTYPE arm_output_ttype
506
507 #undef TARGET_ARM_EABI_UNWINDER
508 #define TARGET_ARM_EABI_UNWINDER true
509
510 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
511 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
512
513 #undef TARGET_ASM_INIT_SECTIONS
514 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
515 #endif /* ARM_UNWIND_INFO */
516
517 #undef TARGET_EXCEPT_UNWIND_INFO
518 #define TARGET_EXCEPT_UNWIND_INFO arm_except_unwind_info
519
520 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
521 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
522
523 #undef TARGET_DWARF_REGISTER_SPAN
524 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
525
526 #undef TARGET_CANNOT_COPY_INSN_P
527 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
528
529 #ifdef HAVE_AS_TLS
530 #undef TARGET_HAVE_TLS
531 #define TARGET_HAVE_TLS true
532 #endif
533
534 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
535 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
536
537 #undef TARGET_LEGITIMATE_CONSTANT_P
538 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
539
540 #undef TARGET_CANNOT_FORCE_CONST_MEM
541 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
542
543 #undef TARGET_MAX_ANCHOR_OFFSET
544 #define TARGET_MAX_ANCHOR_OFFSET 4095
545
546 /* The minimum is set such that the total size of the block
547 for a particular anchor is -4088 + 1 + 4095 bytes, which is
548 divisible by eight, ensuring natural spacing of anchors. */
549 #undef TARGET_MIN_ANCHOR_OFFSET
550 #define TARGET_MIN_ANCHOR_OFFSET -4088
551
552 #undef TARGET_SCHED_ISSUE_RATE
553 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
554
555 #undef TARGET_MANGLE_TYPE
556 #define TARGET_MANGLE_TYPE arm_mangle_type
557
558 #undef TARGET_BUILD_BUILTIN_VA_LIST
559 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
560 #undef TARGET_EXPAND_BUILTIN_VA_START
561 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
562 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
563 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
564
565 #ifdef HAVE_AS_TLS
566 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
567 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
568 #endif
569
570 #undef TARGET_LEGITIMATE_ADDRESS_P
571 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
572
573 #undef TARGET_INVALID_PARAMETER_TYPE
574 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
575
576 #undef TARGET_INVALID_RETURN_TYPE
577 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
578
579 #undef TARGET_PROMOTED_TYPE
580 #define TARGET_PROMOTED_TYPE arm_promoted_type
581
582 #undef TARGET_CONVERT_TO_TYPE
583 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
584
585 #undef TARGET_SCALAR_MODE_SUPPORTED_P
586 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
587
588 #undef TARGET_FRAME_POINTER_REQUIRED
589 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
590
591 #undef TARGET_CAN_ELIMINATE
592 #define TARGET_CAN_ELIMINATE arm_can_eliminate
593
594 #undef TARGET_CONDITIONAL_REGISTER_USAGE
595 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
596
597 #undef TARGET_CLASS_LIKELY_SPILLED_P
598 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
599
600 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
601 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
602 arm_vector_alignment_reachable
603
604 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
605 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
606 arm_builtin_support_vector_misalignment
607
608 #undef TARGET_PREFERRED_RENAME_CLASS
609 #define TARGET_PREFERRED_RENAME_CLASS \
610 arm_preferred_rename_class
611
612 struct gcc_target targetm = TARGET_INITIALIZER;
613 \f
614 /* Obstack for minipool constant handling. */
615 static struct obstack minipool_obstack;
616 static char * minipool_startobj;
617
618 /* The maximum number of insns skipped which
619 will be conditionalised if possible. */
620 static int max_insns_skipped = 5;
621
622 extern FILE * asm_out_file;
623
624 /* True if we are currently building a constant table. */
625 int making_const_table;
626
627 /* The processor for which instructions should be scheduled. */
628 enum processor_type arm_tune = arm_none;
629
630 /* The current tuning set. */
631 const struct tune_params *current_tune;
632
633 /* Which floating point hardware to schedule for. */
634 int arm_fpu_attr;
635
636 /* Which floating popint hardware to use. */
637 const struct arm_fpu_desc *arm_fpu_desc;
638
639 /* Used for Thumb call_via trampolines. */
640 rtx thumb_call_via_label[14];
641 static int thumb_call_reg_needed;
642
643 /* Bit values used to identify processor capabilities. */
644 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
645 #define FL_ARCH3M (1 << 1) /* Extended multiply */
646 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
647 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
648 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
649 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
650 #define FL_THUMB (1 << 6) /* Thumb aware */
651 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
652 #define FL_STRONG (1 << 8) /* StrongARM */
653 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
654 #define FL_XSCALE (1 << 10) /* XScale */
655 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
656 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
657 media instructions. */
658 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
659 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
660 Note: ARM6 & 7 derivatives only. */
661 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
662 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
663 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
664 profile. */
665 #define FL_DIV (1 << 18) /* Hardware divide. */
666 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
667 #define FL_NEON (1 << 20) /* Neon instructions. */
668 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
669 architecture. */
670 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
671
672 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
673
674 /* Flags that only effect tuning, not available instructions. */
675 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
676 | FL_CO_PROC)
677
678 #define FL_FOR_ARCH2 FL_NOTM
679 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
680 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
681 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
682 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
683 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
684 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
685 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
686 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
687 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
688 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
689 #define FL_FOR_ARCH6J FL_FOR_ARCH6
690 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
691 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
692 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
693 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
694 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
695 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
696 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
697 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
698 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
699 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
700
701 /* The bits in this mask specify which
702 instructions we are allowed to generate. */
703 static unsigned long insn_flags = 0;
704
705 /* The bits in this mask specify which instruction scheduling options should
706 be used. */
707 static unsigned long tune_flags = 0;
708
709 /* The following are used in the arm.md file as equivalents to bits
710 in the above two flag variables. */
711
712 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
713 int arm_arch3m = 0;
714
715 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
716 int arm_arch4 = 0;
717
718 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
719 int arm_arch4t = 0;
720
721 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
722 int arm_arch5 = 0;
723
724 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
725 int arm_arch5e = 0;
726
727 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
728 int arm_arch6 = 0;
729
730 /* Nonzero if this chip supports the ARM 6K extensions. */
731 int arm_arch6k = 0;
732
733 /* Nonzero if this chip supports the ARM 7 extensions. */
734 int arm_arch7 = 0;
735
736 /* Nonzero if instructions not present in the 'M' profile can be used. */
737 int arm_arch_notm = 0;
738
739 /* Nonzero if instructions present in ARMv7E-M can be used. */
740 int arm_arch7em = 0;
741
742 /* Nonzero if this chip can benefit from load scheduling. */
743 int arm_ld_sched = 0;
744
745 /* Nonzero if this chip is a StrongARM. */
746 int arm_tune_strongarm = 0;
747
748 /* Nonzero if this chip is a Cirrus variant. */
749 int arm_arch_cirrus = 0;
750
751 /* Nonzero if this chip supports Intel Wireless MMX technology. */
752 int arm_arch_iwmmxt = 0;
753
754 /* Nonzero if this chip is an XScale. */
755 int arm_arch_xscale = 0;
756
757 /* Nonzero if tuning for XScale */
758 int arm_tune_xscale = 0;
759
760 /* Nonzero if we want to tune for stores that access the write-buffer.
761 This typically means an ARM6 or ARM7 with MMU or MPU. */
762 int arm_tune_wbuf = 0;
763
764 /* Nonzero if tuning for Cortex-A9. */
765 int arm_tune_cortex_a9 = 0;
766
767 /* Nonzero if generating Thumb instructions. */
768 int thumb_code = 0;
769
770 /* Nonzero if generating Thumb-1 instructions. */
771 int thumb1_code = 0;
772
773 /* Nonzero if we should define __THUMB_INTERWORK__ in the
774 preprocessor.
775 XXX This is a bit of a hack, it's intended to help work around
776 problems in GLD which doesn't understand that armv5t code is
777 interworking clean. */
778 int arm_cpp_interwork = 0;
779
780 /* Nonzero if chip supports Thumb 2. */
781 int arm_arch_thumb2;
782
783 /* Nonzero if chip supports integer division instruction. */
784 int arm_arch_hwdiv;
785
786 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
787 we must report the mode of the memory reference from
788 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
789 enum machine_mode output_memory_reference_mode;
790
791 /* The register number to be used for the PIC offset register. */
792 unsigned arm_pic_register = INVALID_REGNUM;
793
794 /* Set to 1 after arm_reorg has started. Reset to start at the start of
795 the next function. */
796 static int after_arm_reorg = 0;
797
798 enum arm_pcs arm_pcs_default;
799
800 /* For an explanation of these variables, see final_prescan_insn below. */
801 int arm_ccfsm_state;
802 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
803 enum arm_cond_code arm_current_cc;
804
805 rtx arm_target_insn;
806 int arm_target_label;
807 /* The number of conditionally executed insns, including the current insn. */
808 int arm_condexec_count = 0;
809 /* A bitmask specifying the patterns for the IT block.
810 Zero means do not output an IT block before this insn. */
811 int arm_condexec_mask = 0;
812 /* The number of bits used in arm_condexec_mask. */
813 int arm_condexec_masklen = 0;
814
815 /* The condition codes of the ARM, and the inverse function. */
816 static const char * const arm_condition_codes[] =
817 {
818 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
819 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
820 };
821
822 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
823 int arm_regs_in_sequence[] =
824 {
825 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
826 };
827
828 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
829 #define streq(string1, string2) (strcmp (string1, string2) == 0)
830
831 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
832 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
833 | (1 << PIC_OFFSET_TABLE_REGNUM)))
834 \f
835 /* Initialization code. */
836
837 struct processors
838 {
839 const char *const name;
840 enum processor_type core;
841 const char *arch;
842 const unsigned long flags;
843 const struct tune_params *const tune;
844 };
845
846
847 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
848 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
849 prefetch_slots, \
850 l1_size, \
851 l1_line_size
852
853 const struct tune_params arm_slowmul_tune =
854 {
855 arm_slowmul_rtx_costs,
856 NULL,
857 3,
858 ARM_PREFETCH_NOT_BENEFICIAL
859 };
860
861 const struct tune_params arm_fastmul_tune =
862 {
863 arm_fastmul_rtx_costs,
864 NULL,
865 1,
866 ARM_PREFETCH_NOT_BENEFICIAL
867 };
868
869 const struct tune_params arm_xscale_tune =
870 {
871 arm_xscale_rtx_costs,
872 xscale_sched_adjust_cost,
873 2,
874 ARM_PREFETCH_NOT_BENEFICIAL
875 };
876
877 const struct tune_params arm_9e_tune =
878 {
879 arm_9e_rtx_costs,
880 NULL,
881 1,
882 ARM_PREFETCH_NOT_BENEFICIAL
883 };
884
885 const struct tune_params arm_cortex_a9_tune =
886 {
887 arm_9e_rtx_costs,
888 cortex_a9_sched_adjust_cost,
889 1,
890 ARM_PREFETCH_BENEFICIAL(4,32,32)
891 };
892
893 const struct tune_params arm_fa726te_tune =
894 {
895 arm_9e_rtx_costs,
896 fa726te_sched_adjust_cost,
897 1,
898 ARM_PREFETCH_NOT_BENEFICIAL
899 };
900
901
902 /* Not all of these give usefully different compilation alternatives,
903 but there is no simple way of generalizing them. */
904 static const struct processors all_cores[] =
905 {
906 /* ARM Cores */
907 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
908 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
909 #include "arm-cores.def"
910 #undef ARM_CORE
911 {NULL, arm_none, NULL, 0, NULL}
912 };
913
914 static const struct processors all_architectures[] =
915 {
916 /* ARM Architectures */
917 /* We don't specify tuning costs here as it will be figured out
918 from the core. */
919
920 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
921 {NAME, CORE, #ARCH, FLAGS, NULL},
922 #include "arm-arches.def"
923 #undef ARM_ARCH
924 {NULL, arm_none, NULL, 0 , NULL}
925 };
926
927
928 /* These are populated as commandline arguments are processed, or NULL
929 if not specified. */
930 static const struct processors *arm_selected_arch;
931 static const struct processors *arm_selected_cpu;
932 static const struct processors *arm_selected_tune;
933
934 /* The name of the preprocessor macro to define for this architecture. */
935
936 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
937
938 /* Available values for -mfpu=. */
939
940 static const struct arm_fpu_desc all_fpus[] =
941 {
942 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
943 { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
944 #include "arm-fpus.def"
945 #undef ARM_FPU
946 };
947
948
949 /* Supported TLS relocations. */
950
951 enum tls_reloc {
952 TLS_GD32,
953 TLS_LDM32,
954 TLS_LDO32,
955 TLS_IE32,
956 TLS_LE32
957 };
958
959 /* The maximum number of insns to be used when loading a constant. */
960 inline static int
961 arm_constant_limit (bool size_p)
962 {
963 return size_p ? 1 : current_tune->constant_limit;
964 }
965
966 /* Emit an insn that's a simple single-set. Both the operands must be known
967 to be valid. */
968 inline static rtx
969 emit_set_insn (rtx x, rtx y)
970 {
971 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
972 }
973
974 /* Return the number of bits set in VALUE. */
975 static unsigned
976 bit_count (unsigned long value)
977 {
978 unsigned long count = 0;
979
980 while (value)
981 {
982 count++;
983 value &= value - 1; /* Clear the least-significant set bit. */
984 }
985
986 return count;
987 }
988
989 /* Set up library functions unique to ARM. */
990
991 static void
992 arm_init_libfuncs (void)
993 {
994 /* There are no special library functions unless we are using the
995 ARM BPABI. */
996 if (!TARGET_BPABI)
997 return;
998
999 /* The functions below are described in Section 4 of the "Run-Time
1000 ABI for the ARM architecture", Version 1.0. */
1001
1002 /* Double-precision floating-point arithmetic. Table 2. */
1003 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1004 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1005 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1006 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1007 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1008
1009 /* Double-precision comparisons. Table 3. */
1010 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1011 set_optab_libfunc (ne_optab, DFmode, NULL);
1012 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1013 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1014 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1015 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1016 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1017
1018 /* Single-precision floating-point arithmetic. Table 4. */
1019 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1020 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1021 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1022 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1023 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1024
1025 /* Single-precision comparisons. Table 5. */
1026 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1027 set_optab_libfunc (ne_optab, SFmode, NULL);
1028 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1029 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1030 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1031 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1032 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1033
1034 /* Floating-point to integer conversions. Table 6. */
1035 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1036 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1037 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1038 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1039 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1040 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1041 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1042 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1043
1044 /* Conversions between floating types. Table 7. */
1045 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1046 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1047
1048 /* Integer to floating-point conversions. Table 8. */
1049 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1050 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1051 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1052 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1053 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1054 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1055 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1056 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1057
1058 /* Long long. Table 9. */
1059 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1060 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1061 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1062 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1063 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1064 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1065 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1066 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1067
1068 /* Integer (32/32->32) division. \S 4.3.1. */
1069 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1070 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1071
1072 /* The divmod functions are designed so that they can be used for
1073 plain division, even though they return both the quotient and the
1074 remainder. The quotient is returned in the usual location (i.e.,
1075 r0 for SImode, {r0, r1} for DImode), just as would be expected
1076 for an ordinary division routine. Because the AAPCS calling
1077 conventions specify that all of { r0, r1, r2, r3 } are
1078 callee-saved registers, there is no need to tell the compiler
1079 explicitly that those registers are clobbered by these
1080 routines. */
1081 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1082 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1083
1084 /* For SImode division the ABI provides div-without-mod routines,
1085 which are faster. */
1086 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1087 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1088
1089 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1090 divmod libcalls instead. */
1091 set_optab_libfunc (smod_optab, DImode, NULL);
1092 set_optab_libfunc (umod_optab, DImode, NULL);
1093 set_optab_libfunc (smod_optab, SImode, NULL);
1094 set_optab_libfunc (umod_optab, SImode, NULL);
1095
1096 /* Half-precision float operations. The compiler handles all operations
1097 with NULL libfuncs by converting the SFmode. */
1098 switch (arm_fp16_format)
1099 {
1100 case ARM_FP16_FORMAT_IEEE:
1101 case ARM_FP16_FORMAT_ALTERNATIVE:
1102
1103 /* Conversions. */
1104 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1105 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1106 ? "__gnu_f2h_ieee"
1107 : "__gnu_f2h_alternative"));
1108 set_conv_libfunc (sext_optab, SFmode, HFmode,
1109 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1110 ? "__gnu_h2f_ieee"
1111 : "__gnu_h2f_alternative"));
1112
1113 /* Arithmetic. */
1114 set_optab_libfunc (add_optab, HFmode, NULL);
1115 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1116 set_optab_libfunc (smul_optab, HFmode, NULL);
1117 set_optab_libfunc (neg_optab, HFmode, NULL);
1118 set_optab_libfunc (sub_optab, HFmode, NULL);
1119
1120 /* Comparisons. */
1121 set_optab_libfunc (eq_optab, HFmode, NULL);
1122 set_optab_libfunc (ne_optab, HFmode, NULL);
1123 set_optab_libfunc (lt_optab, HFmode, NULL);
1124 set_optab_libfunc (le_optab, HFmode, NULL);
1125 set_optab_libfunc (ge_optab, HFmode, NULL);
1126 set_optab_libfunc (gt_optab, HFmode, NULL);
1127 set_optab_libfunc (unord_optab, HFmode, NULL);
1128 break;
1129
1130 default:
1131 break;
1132 }
1133
1134 if (TARGET_AAPCS_BASED)
1135 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1136 }
1137
1138 /* On AAPCS systems, this is the "struct __va_list". */
1139 static GTY(()) tree va_list_type;
1140
1141 /* Return the type to use as __builtin_va_list. */
1142 static tree
1143 arm_build_builtin_va_list (void)
1144 {
1145 tree va_list_name;
1146 tree ap_field;
1147
1148 if (!TARGET_AAPCS_BASED)
1149 return std_build_builtin_va_list ();
1150
1151 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1152 defined as:
1153
1154 struct __va_list
1155 {
1156 void *__ap;
1157 };
1158
1159 The C Library ABI further reinforces this definition in \S
1160 4.1.
1161
1162 We must follow this definition exactly. The structure tag
1163 name is visible in C++ mangled names, and thus forms a part
1164 of the ABI. The field name may be used by people who
1165 #include <stdarg.h>. */
1166 /* Create the type. */
1167 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1168 /* Give it the required name. */
1169 va_list_name = build_decl (BUILTINS_LOCATION,
1170 TYPE_DECL,
1171 get_identifier ("__va_list"),
1172 va_list_type);
1173 DECL_ARTIFICIAL (va_list_name) = 1;
1174 TYPE_NAME (va_list_type) = va_list_name;
1175 TYPE_STUB_DECL (va_list_type) = va_list_name;
1176 /* Create the __ap field. */
1177 ap_field = build_decl (BUILTINS_LOCATION,
1178 FIELD_DECL,
1179 get_identifier ("__ap"),
1180 ptr_type_node);
1181 DECL_ARTIFICIAL (ap_field) = 1;
1182 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1183 TYPE_FIELDS (va_list_type) = ap_field;
1184 /* Compute its layout. */
1185 layout_type (va_list_type);
1186
1187 return va_list_type;
1188 }
1189
1190 /* Return an expression of type "void *" pointing to the next
1191 available argument in a variable-argument list. VALIST is the
1192 user-level va_list object, of type __builtin_va_list. */
1193 static tree
1194 arm_extract_valist_ptr (tree valist)
1195 {
1196 if (TREE_TYPE (valist) == error_mark_node)
1197 return error_mark_node;
1198
1199 /* On an AAPCS target, the pointer is stored within "struct
1200 va_list". */
1201 if (TARGET_AAPCS_BASED)
1202 {
1203 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1204 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1205 valist, ap_field, NULL_TREE);
1206 }
1207
1208 return valist;
1209 }
1210
1211 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1212 static void
1213 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1214 {
1215 valist = arm_extract_valist_ptr (valist);
1216 std_expand_builtin_va_start (valist, nextarg);
1217 }
1218
1219 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1220 static tree
1221 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1222 gimple_seq *post_p)
1223 {
1224 valist = arm_extract_valist_ptr (valist);
1225 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1226 }
1227
1228 /* Fix up any incompatible options that the user has specified. */
1229 static void
1230 arm_option_override (void)
1231 {
1232 if (global_options_set.x_arm_arch_option)
1233 arm_selected_arch = &all_architectures[arm_arch_option];
1234
1235 if (global_options_set.x_arm_cpu_option)
1236 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1237
1238 if (global_options_set.x_arm_tune_option)
1239 arm_selected_tune = &all_cores[(int) arm_tune_option];
1240
1241 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1242 SUBTARGET_OVERRIDE_OPTIONS;
1243 #endif
1244
1245 if (arm_selected_arch)
1246 {
1247 if (arm_selected_cpu)
1248 {
1249 /* Check for conflict between mcpu and march. */
1250 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1251 {
1252 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1253 arm_selected_cpu->name, arm_selected_arch->name);
1254 /* -march wins for code generation.
1255 -mcpu wins for default tuning. */
1256 if (!arm_selected_tune)
1257 arm_selected_tune = arm_selected_cpu;
1258
1259 arm_selected_cpu = arm_selected_arch;
1260 }
1261 else
1262 /* -mcpu wins. */
1263 arm_selected_arch = NULL;
1264 }
1265 else
1266 /* Pick a CPU based on the architecture. */
1267 arm_selected_cpu = arm_selected_arch;
1268 }
1269
1270 /* If the user did not specify a processor, choose one for them. */
1271 if (!arm_selected_cpu)
1272 {
1273 const struct processors * sel;
1274 unsigned int sought;
1275
1276 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1277 if (!arm_selected_cpu->name)
1278 {
1279 #ifdef SUBTARGET_CPU_DEFAULT
1280 /* Use the subtarget default CPU if none was specified by
1281 configure. */
1282 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1283 #endif
1284 /* Default to ARM6. */
1285 if (!arm_selected_cpu->name)
1286 arm_selected_cpu = &all_cores[arm6];
1287 }
1288
1289 sel = arm_selected_cpu;
1290 insn_flags = sel->flags;
1291
1292 /* Now check to see if the user has specified some command line
1293 switch that require certain abilities from the cpu. */
1294 sought = 0;
1295
1296 if (TARGET_INTERWORK || TARGET_THUMB)
1297 {
1298 sought |= (FL_THUMB | FL_MODE32);
1299
1300 /* There are no ARM processors that support both APCS-26 and
1301 interworking. Therefore we force FL_MODE26 to be removed
1302 from insn_flags here (if it was set), so that the search
1303 below will always be able to find a compatible processor. */
1304 insn_flags &= ~FL_MODE26;
1305 }
1306
1307 if (sought != 0 && ((sought & insn_flags) != sought))
1308 {
1309 /* Try to locate a CPU type that supports all of the abilities
1310 of the default CPU, plus the extra abilities requested by
1311 the user. */
1312 for (sel = all_cores; sel->name != NULL; sel++)
1313 if ((sel->flags & sought) == (sought | insn_flags))
1314 break;
1315
1316 if (sel->name == NULL)
1317 {
1318 unsigned current_bit_count = 0;
1319 const struct processors * best_fit = NULL;
1320
1321 /* Ideally we would like to issue an error message here
1322 saying that it was not possible to find a CPU compatible
1323 with the default CPU, but which also supports the command
1324 line options specified by the programmer, and so they
1325 ought to use the -mcpu=<name> command line option to
1326 override the default CPU type.
1327
1328 If we cannot find a cpu that has both the
1329 characteristics of the default cpu and the given
1330 command line options we scan the array again looking
1331 for a best match. */
1332 for (sel = all_cores; sel->name != NULL; sel++)
1333 if ((sel->flags & sought) == sought)
1334 {
1335 unsigned count;
1336
1337 count = bit_count (sel->flags & insn_flags);
1338
1339 if (count >= current_bit_count)
1340 {
1341 best_fit = sel;
1342 current_bit_count = count;
1343 }
1344 }
1345
1346 gcc_assert (best_fit);
1347 sel = best_fit;
1348 }
1349
1350 arm_selected_cpu = sel;
1351 }
1352 }
1353
1354 gcc_assert (arm_selected_cpu);
1355 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1356 if (!arm_selected_tune)
1357 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1358
1359 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1360 insn_flags = arm_selected_cpu->flags;
1361
1362 arm_tune = arm_selected_tune->core;
1363 tune_flags = arm_selected_tune->flags;
1364 current_tune = arm_selected_tune->tune;
1365
1366 /* Make sure that the processor choice does not conflict with any of the
1367 other command line choices. */
1368 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1369 error ("target CPU does not support ARM mode");
1370
1371 /* BPABI targets use linker tricks to allow interworking on cores
1372 without thumb support. */
1373 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1374 {
1375 warning (0, "target CPU does not support interworking" );
1376 target_flags &= ~MASK_INTERWORK;
1377 }
1378
1379 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1380 {
1381 warning (0, "target CPU does not support THUMB instructions");
1382 target_flags &= ~MASK_THUMB;
1383 }
1384
1385 if (TARGET_APCS_FRAME && TARGET_THUMB)
1386 {
1387 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1388 target_flags &= ~MASK_APCS_FRAME;
1389 }
1390
1391 /* Callee super interworking implies thumb interworking. Adding
1392 this to the flags here simplifies the logic elsewhere. */
1393 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1394 target_flags |= MASK_INTERWORK;
1395
1396 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1397 from here where no function is being compiled currently. */
1398 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1399 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1400
1401 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1402 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1403
1404 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1405 {
1406 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1407 target_flags |= MASK_APCS_FRAME;
1408 }
1409
1410 if (TARGET_POKE_FUNCTION_NAME)
1411 target_flags |= MASK_APCS_FRAME;
1412
1413 if (TARGET_APCS_REENT && flag_pic)
1414 error ("-fpic and -mapcs-reent are incompatible");
1415
1416 if (TARGET_APCS_REENT)
1417 warning (0, "APCS reentrant code not supported. Ignored");
1418
1419 /* If this target is normally configured to use APCS frames, warn if they
1420 are turned off and debugging is turned on. */
1421 if (TARGET_ARM
1422 && write_symbols != NO_DEBUG
1423 && !TARGET_APCS_FRAME
1424 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1425 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1426
1427 if (TARGET_APCS_FLOAT)
1428 warning (0, "passing floating point arguments in fp regs not yet supported");
1429
1430 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1431 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1432 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1433 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1434 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1435 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1436 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1437 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1438 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1439 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1440 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1441 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1442 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1443 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1444
1445 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1446 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1447 thumb_code = TARGET_ARM == 0;
1448 thumb1_code = TARGET_THUMB1 != 0;
1449 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1450 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1451 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1452 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1453 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1454
1455 /* If we are not using the default (ARM mode) section anchor offset
1456 ranges, then set the correct ranges now. */
1457 if (TARGET_THUMB1)
1458 {
1459 /* Thumb-1 LDR instructions cannot have negative offsets.
1460 Permissible positive offset ranges are 5-bit (for byte loads),
1461 6-bit (for halfword loads), or 7-bit (for word loads).
1462 Empirical results suggest a 7-bit anchor range gives the best
1463 overall code size. */
1464 targetm.min_anchor_offset = 0;
1465 targetm.max_anchor_offset = 127;
1466 }
1467 else if (TARGET_THUMB2)
1468 {
1469 /* The minimum is set such that the total size of the block
1470 for a particular anchor is 248 + 1 + 4095 bytes, which is
1471 divisible by eight, ensuring natural spacing of anchors. */
1472 targetm.min_anchor_offset = -248;
1473 targetm.max_anchor_offset = 4095;
1474 }
1475
1476 /* V5 code we generate is completely interworking capable, so we turn off
1477 TARGET_INTERWORK here to avoid many tests later on. */
1478
1479 /* XXX However, we must pass the right pre-processor defines to CPP
1480 or GLD can get confused. This is a hack. */
1481 if (TARGET_INTERWORK)
1482 arm_cpp_interwork = 1;
1483
1484 if (arm_arch5)
1485 target_flags &= ~MASK_INTERWORK;
1486
1487 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1488 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1489
1490 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1491 error ("iwmmxt abi requires an iwmmxt capable cpu");
1492
1493 if (!global_options_set.x_arm_fpu_index)
1494 {
1495 const char *target_fpu_name;
1496 bool ok;
1497
1498 #ifdef FPUTYPE_DEFAULT
1499 target_fpu_name = FPUTYPE_DEFAULT;
1500 #else
1501 if (arm_arch_cirrus)
1502 target_fpu_name = "maverick";
1503 else
1504 target_fpu_name = "fpe2";
1505 #endif
1506
1507 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1508 CL_TARGET);
1509 gcc_assert (ok);
1510 }
1511
1512 arm_fpu_desc = &all_fpus[arm_fpu_index];
1513
1514 switch (arm_fpu_desc->model)
1515 {
1516 case ARM_FP_MODEL_FPA:
1517 if (arm_fpu_desc->rev == 2)
1518 arm_fpu_attr = FPU_FPE2;
1519 else if (arm_fpu_desc->rev == 3)
1520 arm_fpu_attr = FPU_FPE3;
1521 else
1522 arm_fpu_attr = FPU_FPA;
1523 break;
1524
1525 case ARM_FP_MODEL_MAVERICK:
1526 arm_fpu_attr = FPU_MAVERICK;
1527 break;
1528
1529 case ARM_FP_MODEL_VFP:
1530 arm_fpu_attr = FPU_VFP;
1531 break;
1532
1533 default:
1534 gcc_unreachable();
1535 }
1536
1537 if (TARGET_AAPCS_BASED
1538 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1539 error ("FPA is unsupported in the AAPCS");
1540
1541 if (TARGET_AAPCS_BASED)
1542 {
1543 if (TARGET_CALLER_INTERWORKING)
1544 error ("AAPCS does not support -mcaller-super-interworking");
1545 else
1546 if (TARGET_CALLEE_INTERWORKING)
1547 error ("AAPCS does not support -mcallee-super-interworking");
1548 }
1549
1550 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1551 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1552 will ever exist. GCC makes no attempt to support this combination. */
1553 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1554 sorry ("iWMMXt and hardware floating point");
1555
1556 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1557 if (TARGET_THUMB2 && TARGET_IWMMXT)
1558 sorry ("Thumb-2 iWMMXt");
1559
1560 /* __fp16 support currently assumes the core has ldrh. */
1561 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1562 sorry ("__fp16 and no ldrh");
1563
1564 /* If soft-float is specified then don't use FPU. */
1565 if (TARGET_SOFT_FLOAT)
1566 arm_fpu_attr = FPU_NONE;
1567
1568 if (TARGET_AAPCS_BASED)
1569 {
1570 if (arm_abi == ARM_ABI_IWMMXT)
1571 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1572 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1573 && TARGET_HARD_FLOAT
1574 && TARGET_VFP)
1575 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1576 else
1577 arm_pcs_default = ARM_PCS_AAPCS;
1578 }
1579 else
1580 {
1581 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1582 sorry ("-mfloat-abi=hard and VFP");
1583
1584 if (arm_abi == ARM_ABI_APCS)
1585 arm_pcs_default = ARM_PCS_APCS;
1586 else
1587 arm_pcs_default = ARM_PCS_ATPCS;
1588 }
1589
1590 /* For arm2/3 there is no need to do any scheduling if there is only
1591 a floating point emulator, or we are doing software floating-point. */
1592 if ((TARGET_SOFT_FLOAT
1593 || (TARGET_FPA && arm_fpu_desc->rev))
1594 && (tune_flags & FL_MODE32) == 0)
1595 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1596
1597 /* Use the cp15 method if it is available. */
1598 if (target_thread_pointer == TP_AUTO)
1599 {
1600 if (arm_arch6k && !TARGET_THUMB1)
1601 target_thread_pointer = TP_CP15;
1602 else
1603 target_thread_pointer = TP_SOFT;
1604 }
1605
1606 if (TARGET_HARD_TP && TARGET_THUMB1)
1607 error ("can not use -mtp=cp15 with 16-bit Thumb");
1608
1609 /* Override the default structure alignment for AAPCS ABI. */
1610 if (!global_options_set.x_arm_structure_size_boundary)
1611 {
1612 if (TARGET_AAPCS_BASED)
1613 arm_structure_size_boundary = 8;
1614 }
1615 else
1616 {
1617 if (arm_structure_size_boundary != 8
1618 && arm_structure_size_boundary != 32
1619 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1620 {
1621 if (ARM_DOUBLEWORD_ALIGN)
1622 warning (0,
1623 "structure size boundary can only be set to 8, 32 or 64");
1624 else
1625 warning (0, "structure size boundary can only be set to 8 or 32");
1626 arm_structure_size_boundary
1627 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1628 }
1629 }
1630
1631 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1632 {
1633 error ("RTP PIC is incompatible with Thumb");
1634 flag_pic = 0;
1635 }
1636
1637 /* If stack checking is disabled, we can use r10 as the PIC register,
1638 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1639 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1640 {
1641 if (TARGET_VXWORKS_RTP)
1642 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1643 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1644 }
1645
1646 if (flag_pic && TARGET_VXWORKS_RTP)
1647 arm_pic_register = 9;
1648
1649 if (arm_pic_register_string != NULL)
1650 {
1651 int pic_register = decode_reg_name (arm_pic_register_string);
1652
1653 if (!flag_pic)
1654 warning (0, "-mpic-register= is useless without -fpic");
1655
1656 /* Prevent the user from choosing an obviously stupid PIC register. */
1657 else if (pic_register < 0 || call_used_regs[pic_register]
1658 || pic_register == HARD_FRAME_POINTER_REGNUM
1659 || pic_register == STACK_POINTER_REGNUM
1660 || pic_register >= PC_REGNUM
1661 || (TARGET_VXWORKS_RTP
1662 && (unsigned int) pic_register != arm_pic_register))
1663 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1664 else
1665 arm_pic_register = pic_register;
1666 }
1667
1668 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1669 if (fix_cm3_ldrd == 2)
1670 {
1671 if (arm_selected_cpu->core == cortexm3)
1672 fix_cm3_ldrd = 1;
1673 else
1674 fix_cm3_ldrd = 0;
1675 }
1676
1677 if (TARGET_THUMB1 && flag_schedule_insns)
1678 {
1679 /* Don't warn since it's on by default in -O2. */
1680 flag_schedule_insns = 0;
1681 }
1682
1683 if (optimize_size)
1684 {
1685 /* If optimizing for size, bump the number of instructions that we
1686 are prepared to conditionally execute (even on a StrongARM). */
1687 max_insns_skipped = 6;
1688 }
1689 else
1690 {
1691 /* StrongARM has early execution of branches, so a sequence
1692 that is worth skipping is shorter. */
1693 if (arm_tune_strongarm)
1694 max_insns_skipped = 3;
1695 }
1696
1697 /* Hot/Cold partitioning is not currently supported, since we can't
1698 handle literal pool placement in that case. */
1699 if (flag_reorder_blocks_and_partition)
1700 {
1701 inform (input_location,
1702 "-freorder-blocks-and-partition not supported on this architecture");
1703 flag_reorder_blocks_and_partition = 0;
1704 flag_reorder_blocks = 1;
1705 }
1706
1707 if (flag_pic)
1708 /* Hoisting PIC address calculations more aggressively provides a small,
1709 but measurable, size reduction for PIC code. Therefore, we decrease
1710 the bar for unrestricted expression hoisting to the cost of PIC address
1711 calculation, which is 2 instructions. */
1712 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
1713 global_options.x_param_values,
1714 global_options_set.x_param_values);
1715
1716 /* ARM EABI defaults to strict volatile bitfields. */
1717 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0)
1718 flag_strict_volatile_bitfields = 1;
1719
1720 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
1721 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
1722 if (flag_prefetch_loop_arrays < 0
1723 && HAVE_prefetch
1724 && optimize >= 3
1725 && current_tune->num_prefetch_slots > 0)
1726 flag_prefetch_loop_arrays = 1;
1727
1728 /* Set up parameters to be used in prefetching algorithm. Do not override the
1729 defaults unless we are tuning for a core we have researched values for. */
1730 if (current_tune->num_prefetch_slots > 0)
1731 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1732 current_tune->num_prefetch_slots,
1733 global_options.x_param_values,
1734 global_options_set.x_param_values);
1735 if (current_tune->l1_cache_line_size >= 0)
1736 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1737 current_tune->l1_cache_line_size,
1738 global_options.x_param_values,
1739 global_options_set.x_param_values);
1740 if (current_tune->l1_cache_size >= 0)
1741 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1742 current_tune->l1_cache_size,
1743 global_options.x_param_values,
1744 global_options_set.x_param_values);
1745
1746 /* Register global variables with the garbage collector. */
1747 arm_add_gc_roots ();
1748 }
1749
1750 static void
1751 arm_add_gc_roots (void)
1752 {
1753 gcc_obstack_init(&minipool_obstack);
1754 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1755 }
1756 \f
1757 /* A table of known ARM exception types.
1758 For use with the interrupt function attribute. */
1759
1760 typedef struct
1761 {
1762 const char *const arg;
1763 const unsigned long return_value;
1764 }
1765 isr_attribute_arg;
1766
1767 static const isr_attribute_arg isr_attribute_args [] =
1768 {
1769 { "IRQ", ARM_FT_ISR },
1770 { "irq", ARM_FT_ISR },
1771 { "FIQ", ARM_FT_FIQ },
1772 { "fiq", ARM_FT_FIQ },
1773 { "ABORT", ARM_FT_ISR },
1774 { "abort", ARM_FT_ISR },
1775 { "ABORT", ARM_FT_ISR },
1776 { "abort", ARM_FT_ISR },
1777 { "UNDEF", ARM_FT_EXCEPTION },
1778 { "undef", ARM_FT_EXCEPTION },
1779 { "SWI", ARM_FT_EXCEPTION },
1780 { "swi", ARM_FT_EXCEPTION },
1781 { NULL, ARM_FT_NORMAL }
1782 };
1783
1784 /* Returns the (interrupt) function type of the current
1785 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1786
1787 static unsigned long
1788 arm_isr_value (tree argument)
1789 {
1790 const isr_attribute_arg * ptr;
1791 const char * arg;
1792
1793 if (!arm_arch_notm)
1794 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1795
1796 /* No argument - default to IRQ. */
1797 if (argument == NULL_TREE)
1798 return ARM_FT_ISR;
1799
1800 /* Get the value of the argument. */
1801 if (TREE_VALUE (argument) == NULL_TREE
1802 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1803 return ARM_FT_UNKNOWN;
1804
1805 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1806
1807 /* Check it against the list of known arguments. */
1808 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1809 if (streq (arg, ptr->arg))
1810 return ptr->return_value;
1811
1812 /* An unrecognized interrupt type. */
1813 return ARM_FT_UNKNOWN;
1814 }
1815
1816 /* Computes the type of the current function. */
1817
1818 static unsigned long
1819 arm_compute_func_type (void)
1820 {
1821 unsigned long type = ARM_FT_UNKNOWN;
1822 tree a;
1823 tree attr;
1824
1825 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1826
1827 /* Decide if the current function is volatile. Such functions
1828 never return, and many memory cycles can be saved by not storing
1829 register values that will never be needed again. This optimization
1830 was added to speed up context switching in a kernel application. */
1831 if (optimize > 0
1832 && (TREE_NOTHROW (current_function_decl)
1833 || !(flag_unwind_tables
1834 || (flag_exceptions
1835 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
1836 && TREE_THIS_VOLATILE (current_function_decl))
1837 type |= ARM_FT_VOLATILE;
1838
1839 if (cfun->static_chain_decl != NULL)
1840 type |= ARM_FT_NESTED;
1841
1842 attr = DECL_ATTRIBUTES (current_function_decl);
1843
1844 a = lookup_attribute ("naked", attr);
1845 if (a != NULL_TREE)
1846 type |= ARM_FT_NAKED;
1847
1848 a = lookup_attribute ("isr", attr);
1849 if (a == NULL_TREE)
1850 a = lookup_attribute ("interrupt", attr);
1851
1852 if (a == NULL_TREE)
1853 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1854 else
1855 type |= arm_isr_value (TREE_VALUE (a));
1856
1857 return type;
1858 }
1859
1860 /* Returns the type of the current function. */
1861
1862 unsigned long
1863 arm_current_func_type (void)
1864 {
1865 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1866 cfun->machine->func_type = arm_compute_func_type ();
1867
1868 return cfun->machine->func_type;
1869 }
1870
1871 bool
1872 arm_allocate_stack_slots_for_args (void)
1873 {
1874 /* Naked functions should not allocate stack slots for arguments. */
1875 return !IS_NAKED (arm_current_func_type ());
1876 }
1877
1878 \f
1879 /* Output assembler code for a block containing the constant parts
1880 of a trampoline, leaving space for the variable parts.
1881
1882 On the ARM, (if r8 is the static chain regnum, and remembering that
1883 referencing pc adds an offset of 8) the trampoline looks like:
1884 ldr r8, [pc, #0]
1885 ldr pc, [pc]
1886 .word static chain value
1887 .word function's address
1888 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
1889
1890 static void
1891 arm_asm_trampoline_template (FILE *f)
1892 {
1893 if (TARGET_ARM)
1894 {
1895 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
1896 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
1897 }
1898 else if (TARGET_THUMB2)
1899 {
1900 /* The Thumb-2 trampoline is similar to the arm implementation.
1901 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
1902 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
1903 STATIC_CHAIN_REGNUM, PC_REGNUM);
1904 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
1905 }
1906 else
1907 {
1908 ASM_OUTPUT_ALIGN (f, 2);
1909 fprintf (f, "\t.code\t16\n");
1910 fprintf (f, ".Ltrampoline_start:\n");
1911 asm_fprintf (f, "\tpush\t{r0, r1}\n");
1912 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
1913 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
1914 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
1915 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
1916 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
1917 }
1918 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
1919 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
1920 }
1921
1922 /* Emit RTL insns to initialize the variable parts of a trampoline. */
1923
1924 static void
1925 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
1926 {
1927 rtx fnaddr, mem, a_tramp;
1928
1929 emit_block_move (m_tramp, assemble_trampoline_template (),
1930 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
1931
1932 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
1933 emit_move_insn (mem, chain_value);
1934
1935 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
1936 fnaddr = XEXP (DECL_RTL (fndecl), 0);
1937 emit_move_insn (mem, fnaddr);
1938
1939 a_tramp = XEXP (m_tramp, 0);
1940 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
1941 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
1942 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
1943 }
1944
1945 /* Thumb trampolines should be entered in thumb mode, so set
1946 the bottom bit of the address. */
1947
1948 static rtx
1949 arm_trampoline_adjust_address (rtx addr)
1950 {
1951 if (TARGET_THUMB)
1952 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
1953 NULL, 0, OPTAB_LIB_WIDEN);
1954 return addr;
1955 }
1956 \f
1957 /* Return 1 if it is possible to return using a single instruction.
1958 If SIBLING is non-null, this is a test for a return before a sibling
1959 call. SIBLING is the call insn, so we can examine its register usage. */
1960
1961 int
1962 use_return_insn (int iscond, rtx sibling)
1963 {
1964 int regno;
1965 unsigned int func_type;
1966 unsigned long saved_int_regs;
1967 unsigned HOST_WIDE_INT stack_adjust;
1968 arm_stack_offsets *offsets;
1969
1970 /* Never use a return instruction before reload has run. */
1971 if (!reload_completed)
1972 return 0;
1973
1974 func_type = arm_current_func_type ();
1975
1976 /* Naked, volatile and stack alignment functions need special
1977 consideration. */
1978 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
1979 return 0;
1980
1981 /* So do interrupt functions that use the frame pointer and Thumb
1982 interrupt functions. */
1983 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
1984 return 0;
1985
1986 offsets = arm_get_frame_offsets ();
1987 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1988
1989 /* As do variadic functions. */
1990 if (crtl->args.pretend_args_size
1991 || cfun->machine->uses_anonymous_args
1992 /* Or if the function calls __builtin_eh_return () */
1993 || crtl->calls_eh_return
1994 /* Or if the function calls alloca */
1995 || cfun->calls_alloca
1996 /* Or if there is a stack adjustment. However, if the stack pointer
1997 is saved on the stack, we can use a pre-incrementing stack load. */
1998 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
1999 && stack_adjust == 4)))
2000 return 0;
2001
2002 saved_int_regs = offsets->saved_regs_mask;
2003
2004 /* Unfortunately, the insn
2005
2006 ldmib sp, {..., sp, ...}
2007
2008 triggers a bug on most SA-110 based devices, such that the stack
2009 pointer won't be correctly restored if the instruction takes a
2010 page fault. We work around this problem by popping r3 along with
2011 the other registers, since that is never slower than executing
2012 another instruction.
2013
2014 We test for !arm_arch5 here, because code for any architecture
2015 less than this could potentially be run on one of the buggy
2016 chips. */
2017 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2018 {
2019 /* Validate that r3 is a call-clobbered register (always true in
2020 the default abi) ... */
2021 if (!call_used_regs[3])
2022 return 0;
2023
2024 /* ... that it isn't being used for a return value ... */
2025 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2026 return 0;
2027
2028 /* ... or for a tail-call argument ... */
2029 if (sibling)
2030 {
2031 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2032
2033 if (find_regno_fusage (sibling, USE, 3))
2034 return 0;
2035 }
2036
2037 /* ... and that there are no call-saved registers in r0-r2
2038 (always true in the default ABI). */
2039 if (saved_int_regs & 0x7)
2040 return 0;
2041 }
2042
2043 /* Can't be done if interworking with Thumb, and any registers have been
2044 stacked. */
2045 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2046 return 0;
2047
2048 /* On StrongARM, conditional returns are expensive if they aren't
2049 taken and multiple registers have been stacked. */
2050 if (iscond && arm_tune_strongarm)
2051 {
2052 /* Conditional return when just the LR is stored is a simple
2053 conditional-load instruction, that's not expensive. */
2054 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2055 return 0;
2056
2057 if (flag_pic
2058 && arm_pic_register != INVALID_REGNUM
2059 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2060 return 0;
2061 }
2062
2063 /* If there are saved registers but the LR isn't saved, then we need
2064 two instructions for the return. */
2065 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2066 return 0;
2067
2068 /* Can't be done if any of the FPA regs are pushed,
2069 since this also requires an insn. */
2070 if (TARGET_HARD_FLOAT && TARGET_FPA)
2071 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2072 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2073 return 0;
2074
2075 /* Likewise VFP regs. */
2076 if (TARGET_HARD_FLOAT && TARGET_VFP)
2077 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2078 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2079 return 0;
2080
2081 if (TARGET_REALLY_IWMMXT)
2082 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2083 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2084 return 0;
2085
2086 return 1;
2087 }
2088
2089 /* Return TRUE if int I is a valid immediate ARM constant. */
2090
2091 int
2092 const_ok_for_arm (HOST_WIDE_INT i)
2093 {
2094 int lowbit;
2095
2096 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2097 be all zero, or all one. */
2098 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2099 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2100 != ((~(unsigned HOST_WIDE_INT) 0)
2101 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2102 return FALSE;
2103
2104 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2105
2106 /* Fast return for 0 and small values. We must do this for zero, since
2107 the code below can't handle that one case. */
2108 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2109 return TRUE;
2110
2111 /* Get the number of trailing zeros. */
2112 lowbit = ffs((int) i) - 1;
2113
2114 /* Only even shifts are allowed in ARM mode so round down to the
2115 nearest even number. */
2116 if (TARGET_ARM)
2117 lowbit &= ~1;
2118
2119 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2120 return TRUE;
2121
2122 if (TARGET_ARM)
2123 {
2124 /* Allow rotated constants in ARM mode. */
2125 if (lowbit <= 4
2126 && ((i & ~0xc000003f) == 0
2127 || (i & ~0xf000000f) == 0
2128 || (i & ~0xfc000003) == 0))
2129 return TRUE;
2130 }
2131 else
2132 {
2133 HOST_WIDE_INT v;
2134
2135 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2136 v = i & 0xff;
2137 v |= v << 16;
2138 if (i == v || i == (v | (v << 8)))
2139 return TRUE;
2140
2141 /* Allow repeated pattern 0xXY00XY00. */
2142 v = i & 0xff00;
2143 v |= v << 16;
2144 if (i == v)
2145 return TRUE;
2146 }
2147
2148 return FALSE;
2149 }
2150
2151 /* Return true if I is a valid constant for the operation CODE. */
2152 static int
2153 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2154 {
2155 if (const_ok_for_arm (i))
2156 return 1;
2157
2158 switch (code)
2159 {
2160 case SET:
2161 /* See if we can use movw. */
2162 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2163 return 1;
2164 else
2165 return 0;
2166
2167 case PLUS:
2168 case COMPARE:
2169 case EQ:
2170 case NE:
2171 case GT:
2172 case LE:
2173 case LT:
2174 case GE:
2175 case GEU:
2176 case LTU:
2177 case GTU:
2178 case LEU:
2179 case UNORDERED:
2180 case ORDERED:
2181 case UNEQ:
2182 case UNGE:
2183 case UNLT:
2184 case UNGT:
2185 case UNLE:
2186 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2187
2188 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2189 case XOR:
2190 return 0;
2191
2192 case IOR:
2193 if (TARGET_THUMB2)
2194 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2195 return 0;
2196
2197 case AND:
2198 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2199
2200 default:
2201 gcc_unreachable ();
2202 }
2203 }
2204
2205 /* Emit a sequence of insns to handle a large constant.
2206 CODE is the code of the operation required, it can be any of SET, PLUS,
2207 IOR, AND, XOR, MINUS;
2208 MODE is the mode in which the operation is being performed;
2209 VAL is the integer to operate on;
2210 SOURCE is the other operand (a register, or a null-pointer for SET);
2211 SUBTARGETS means it is safe to create scratch registers if that will
2212 either produce a simpler sequence, or we will want to cse the values.
2213 Return value is the number of insns emitted. */
2214
2215 /* ??? Tweak this for thumb2. */
2216 int
2217 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2218 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2219 {
2220 rtx cond;
2221
2222 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2223 cond = COND_EXEC_TEST (PATTERN (insn));
2224 else
2225 cond = NULL_RTX;
2226
2227 if (subtargets || code == SET
2228 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2229 && REGNO (target) != REGNO (source)))
2230 {
2231 /* After arm_reorg has been called, we can't fix up expensive
2232 constants by pushing them into memory so we must synthesize
2233 them in-line, regardless of the cost. This is only likely to
2234 be more costly on chips that have load delay slots and we are
2235 compiling without running the scheduler (so no splitting
2236 occurred before the final instruction emission).
2237
2238 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2239 */
2240 if (!after_arm_reorg
2241 && !cond
2242 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2243 1, 0)
2244 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2245 + (code != SET))))
2246 {
2247 if (code == SET)
2248 {
2249 /* Currently SET is the only monadic value for CODE, all
2250 the rest are diadic. */
2251 if (TARGET_USE_MOVT)
2252 arm_emit_movpair (target, GEN_INT (val));
2253 else
2254 emit_set_insn (target, GEN_INT (val));
2255
2256 return 1;
2257 }
2258 else
2259 {
2260 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2261
2262 if (TARGET_USE_MOVT)
2263 arm_emit_movpair (temp, GEN_INT (val));
2264 else
2265 emit_set_insn (temp, GEN_INT (val));
2266
2267 /* For MINUS, the value is subtracted from, since we never
2268 have subtraction of a constant. */
2269 if (code == MINUS)
2270 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2271 else
2272 emit_set_insn (target,
2273 gen_rtx_fmt_ee (code, mode, source, temp));
2274 return 2;
2275 }
2276 }
2277 }
2278
2279 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2280 1);
2281 }
2282
2283 /* Return the number of instructions required to synthesize the given
2284 constant, if we start emitting them from bit-position I. */
2285 static int
2286 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
2287 {
2288 HOST_WIDE_INT temp1;
2289 int step_size = TARGET_ARM ? 2 : 1;
2290 int num_insns = 0;
2291
2292 gcc_assert (TARGET_ARM || i == 0);
2293
2294 do
2295 {
2296 int end;
2297
2298 if (i <= 0)
2299 i += 32;
2300 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2301 {
2302 end = i - 8;
2303 if (end < 0)
2304 end += 32;
2305 temp1 = remainder & ((0x0ff << end)
2306 | ((i < end) ? (0xff >> (32 - end)) : 0));
2307 remainder &= ~temp1;
2308 num_insns++;
2309 i -= 8 - step_size;
2310 }
2311 i -= step_size;
2312 } while (remainder);
2313 return num_insns;
2314 }
2315
2316 static int
2317 find_best_start (unsigned HOST_WIDE_INT remainder)
2318 {
2319 int best_consecutive_zeros = 0;
2320 int i;
2321 int best_start = 0;
2322
2323 /* If we aren't targetting ARM, the best place to start is always at
2324 the bottom. */
2325 if (! TARGET_ARM)
2326 return 0;
2327
2328 for (i = 0; i < 32; i += 2)
2329 {
2330 int consecutive_zeros = 0;
2331
2332 if (!(remainder & (3 << i)))
2333 {
2334 while ((i < 32) && !(remainder & (3 << i)))
2335 {
2336 consecutive_zeros += 2;
2337 i += 2;
2338 }
2339 if (consecutive_zeros > best_consecutive_zeros)
2340 {
2341 best_consecutive_zeros = consecutive_zeros;
2342 best_start = i - consecutive_zeros;
2343 }
2344 i -= 2;
2345 }
2346 }
2347
2348 /* So long as it won't require any more insns to do so, it's
2349 desirable to emit a small constant (in bits 0...9) in the last
2350 insn. This way there is more chance that it can be combined with
2351 a later addressing insn to form a pre-indexed load or store
2352 operation. Consider:
2353
2354 *((volatile int *)0xe0000100) = 1;
2355 *((volatile int *)0xe0000110) = 2;
2356
2357 We want this to wind up as:
2358
2359 mov rA, #0xe0000000
2360 mov rB, #1
2361 str rB, [rA, #0x100]
2362 mov rB, #2
2363 str rB, [rA, #0x110]
2364
2365 rather than having to synthesize both large constants from scratch.
2366
2367 Therefore, we calculate how many insns would be required to emit
2368 the constant starting from `best_start', and also starting from
2369 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2370 yield a shorter sequence, we may as well use zero. */
2371 if (best_start != 0
2372 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2373 && (count_insns_for_constant (remainder, 0) <=
2374 count_insns_for_constant (remainder, best_start)))
2375 best_start = 0;
2376
2377 return best_start;
2378 }
2379
2380 /* Emit an instruction with the indicated PATTERN. If COND is
2381 non-NULL, conditionalize the execution of the instruction on COND
2382 being true. */
2383
2384 static void
2385 emit_constant_insn (rtx cond, rtx pattern)
2386 {
2387 if (cond)
2388 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2389 emit_insn (pattern);
2390 }
2391
2392 /* As above, but extra parameter GENERATE which, if clear, suppresses
2393 RTL generation. */
2394 /* ??? This needs more work for thumb2. */
2395
2396 static int
2397 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2398 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2399 int generate)
2400 {
2401 int can_invert = 0;
2402 int can_negate = 0;
2403 int final_invert = 0;
2404 int i;
2405 int num_bits_set = 0;
2406 int set_sign_bit_copies = 0;
2407 int clear_sign_bit_copies = 0;
2408 int clear_zero_bit_copies = 0;
2409 int set_zero_bit_copies = 0;
2410 int insns = 0;
2411 unsigned HOST_WIDE_INT temp1, temp2;
2412 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2413 int step_size = TARGET_ARM ? 2 : 1;
2414
2415 /* Find out which operations are safe for a given CODE. Also do a quick
2416 check for degenerate cases; these can occur when DImode operations
2417 are split. */
2418 switch (code)
2419 {
2420 case SET:
2421 can_invert = 1;
2422 can_negate = 1;
2423 break;
2424
2425 case PLUS:
2426 can_negate = 1;
2427 break;
2428
2429 case IOR:
2430 if (remainder == 0xffffffff)
2431 {
2432 if (generate)
2433 emit_constant_insn (cond,
2434 gen_rtx_SET (VOIDmode, target,
2435 GEN_INT (ARM_SIGN_EXTEND (val))));
2436 return 1;
2437 }
2438
2439 if (remainder == 0)
2440 {
2441 if (reload_completed && rtx_equal_p (target, source))
2442 return 0;
2443
2444 if (generate)
2445 emit_constant_insn (cond,
2446 gen_rtx_SET (VOIDmode, target, source));
2447 return 1;
2448 }
2449 break;
2450
2451 case AND:
2452 if (remainder == 0)
2453 {
2454 if (generate)
2455 emit_constant_insn (cond,
2456 gen_rtx_SET (VOIDmode, target, const0_rtx));
2457 return 1;
2458 }
2459 if (remainder == 0xffffffff)
2460 {
2461 if (reload_completed && rtx_equal_p (target, source))
2462 return 0;
2463 if (generate)
2464 emit_constant_insn (cond,
2465 gen_rtx_SET (VOIDmode, target, source));
2466 return 1;
2467 }
2468 can_invert = 1;
2469 break;
2470
2471 case XOR:
2472 if (remainder == 0)
2473 {
2474 if (reload_completed && rtx_equal_p (target, source))
2475 return 0;
2476 if (generate)
2477 emit_constant_insn (cond,
2478 gen_rtx_SET (VOIDmode, target, source));
2479 return 1;
2480 }
2481
2482 if (remainder == 0xffffffff)
2483 {
2484 if (generate)
2485 emit_constant_insn (cond,
2486 gen_rtx_SET (VOIDmode, target,
2487 gen_rtx_NOT (mode, source)));
2488 return 1;
2489 }
2490 break;
2491
2492 case MINUS:
2493 /* We treat MINUS as (val - source), since (source - val) is always
2494 passed as (source + (-val)). */
2495 if (remainder == 0)
2496 {
2497 if (generate)
2498 emit_constant_insn (cond,
2499 gen_rtx_SET (VOIDmode, target,
2500 gen_rtx_NEG (mode, source)));
2501 return 1;
2502 }
2503 if (const_ok_for_arm (val))
2504 {
2505 if (generate)
2506 emit_constant_insn (cond,
2507 gen_rtx_SET (VOIDmode, target,
2508 gen_rtx_MINUS (mode, GEN_INT (val),
2509 source)));
2510 return 1;
2511 }
2512 can_negate = 1;
2513
2514 break;
2515
2516 default:
2517 gcc_unreachable ();
2518 }
2519
2520 /* If we can do it in one insn get out quickly. */
2521 if (const_ok_for_op (val, code))
2522 {
2523 if (generate)
2524 emit_constant_insn (cond,
2525 gen_rtx_SET (VOIDmode, target,
2526 (source
2527 ? gen_rtx_fmt_ee (code, mode, source,
2528 GEN_INT (val))
2529 : GEN_INT (val))));
2530 return 1;
2531 }
2532
2533 /* Calculate a few attributes that may be useful for specific
2534 optimizations. */
2535 /* Count number of leading zeros. */
2536 for (i = 31; i >= 0; i--)
2537 {
2538 if ((remainder & (1 << i)) == 0)
2539 clear_sign_bit_copies++;
2540 else
2541 break;
2542 }
2543
2544 /* Count number of leading 1's. */
2545 for (i = 31; i >= 0; i--)
2546 {
2547 if ((remainder & (1 << i)) != 0)
2548 set_sign_bit_copies++;
2549 else
2550 break;
2551 }
2552
2553 /* Count number of trailing zero's. */
2554 for (i = 0; i <= 31; i++)
2555 {
2556 if ((remainder & (1 << i)) == 0)
2557 clear_zero_bit_copies++;
2558 else
2559 break;
2560 }
2561
2562 /* Count number of trailing 1's. */
2563 for (i = 0; i <= 31; i++)
2564 {
2565 if ((remainder & (1 << i)) != 0)
2566 set_zero_bit_copies++;
2567 else
2568 break;
2569 }
2570
2571 switch (code)
2572 {
2573 case SET:
2574 /* See if we can do this by sign_extending a constant that is known
2575 to be negative. This is a good, way of doing it, since the shift
2576 may well merge into a subsequent insn. */
2577 if (set_sign_bit_copies > 1)
2578 {
2579 if (const_ok_for_arm
2580 (temp1 = ARM_SIGN_EXTEND (remainder
2581 << (set_sign_bit_copies - 1))))
2582 {
2583 if (generate)
2584 {
2585 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2586 emit_constant_insn (cond,
2587 gen_rtx_SET (VOIDmode, new_src,
2588 GEN_INT (temp1)));
2589 emit_constant_insn (cond,
2590 gen_ashrsi3 (target, new_src,
2591 GEN_INT (set_sign_bit_copies - 1)));
2592 }
2593 return 2;
2594 }
2595 /* For an inverted constant, we will need to set the low bits,
2596 these will be shifted out of harm's way. */
2597 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2598 if (const_ok_for_arm (~temp1))
2599 {
2600 if (generate)
2601 {
2602 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2603 emit_constant_insn (cond,
2604 gen_rtx_SET (VOIDmode, new_src,
2605 GEN_INT (temp1)));
2606 emit_constant_insn (cond,
2607 gen_ashrsi3 (target, new_src,
2608 GEN_INT (set_sign_bit_copies - 1)));
2609 }
2610 return 2;
2611 }
2612 }
2613
2614 /* See if we can calculate the value as the difference between two
2615 valid immediates. */
2616 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2617 {
2618 int topshift = clear_sign_bit_copies & ~1;
2619
2620 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2621 & (0xff000000 >> topshift));
2622
2623 /* If temp1 is zero, then that means the 9 most significant
2624 bits of remainder were 1 and we've caused it to overflow.
2625 When topshift is 0 we don't need to do anything since we
2626 can borrow from 'bit 32'. */
2627 if (temp1 == 0 && topshift != 0)
2628 temp1 = 0x80000000 >> (topshift - 1);
2629
2630 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2631
2632 if (const_ok_for_arm (temp2))
2633 {
2634 if (generate)
2635 {
2636 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2637 emit_constant_insn (cond,
2638 gen_rtx_SET (VOIDmode, new_src,
2639 GEN_INT (temp1)));
2640 emit_constant_insn (cond,
2641 gen_addsi3 (target, new_src,
2642 GEN_INT (-temp2)));
2643 }
2644
2645 return 2;
2646 }
2647 }
2648
2649 /* See if we can generate this by setting the bottom (or the top)
2650 16 bits, and then shifting these into the other half of the
2651 word. We only look for the simplest cases, to do more would cost
2652 too much. Be careful, however, not to generate this when the
2653 alternative would take fewer insns. */
2654 if (val & 0xffff0000)
2655 {
2656 temp1 = remainder & 0xffff0000;
2657 temp2 = remainder & 0x0000ffff;
2658
2659 /* Overlaps outside this range are best done using other methods. */
2660 for (i = 9; i < 24; i++)
2661 {
2662 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2663 && !const_ok_for_arm (temp2))
2664 {
2665 rtx new_src = (subtargets
2666 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2667 : target);
2668 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2669 source, subtargets, generate);
2670 source = new_src;
2671 if (generate)
2672 emit_constant_insn
2673 (cond,
2674 gen_rtx_SET
2675 (VOIDmode, target,
2676 gen_rtx_IOR (mode,
2677 gen_rtx_ASHIFT (mode, source,
2678 GEN_INT (i)),
2679 source)));
2680 return insns + 1;
2681 }
2682 }
2683
2684 /* Don't duplicate cases already considered. */
2685 for (i = 17; i < 24; i++)
2686 {
2687 if (((temp1 | (temp1 >> i)) == remainder)
2688 && !const_ok_for_arm (temp1))
2689 {
2690 rtx new_src = (subtargets
2691 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2692 : target);
2693 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2694 source, subtargets, generate);
2695 source = new_src;
2696 if (generate)
2697 emit_constant_insn
2698 (cond,
2699 gen_rtx_SET (VOIDmode, target,
2700 gen_rtx_IOR
2701 (mode,
2702 gen_rtx_LSHIFTRT (mode, source,
2703 GEN_INT (i)),
2704 source)));
2705 return insns + 1;
2706 }
2707 }
2708 }
2709 break;
2710
2711 case IOR:
2712 case XOR:
2713 /* If we have IOR or XOR, and the constant can be loaded in a
2714 single instruction, and we can find a temporary to put it in,
2715 then this can be done in two instructions instead of 3-4. */
2716 if (subtargets
2717 /* TARGET can't be NULL if SUBTARGETS is 0 */
2718 || (reload_completed && !reg_mentioned_p (target, source)))
2719 {
2720 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2721 {
2722 if (generate)
2723 {
2724 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2725
2726 emit_constant_insn (cond,
2727 gen_rtx_SET (VOIDmode, sub,
2728 GEN_INT (val)));
2729 emit_constant_insn (cond,
2730 gen_rtx_SET (VOIDmode, target,
2731 gen_rtx_fmt_ee (code, mode,
2732 source, sub)));
2733 }
2734 return 2;
2735 }
2736 }
2737
2738 if (code == XOR)
2739 break;
2740
2741 /* Convert.
2742 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2743 and the remainder 0s for e.g. 0xfff00000)
2744 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2745
2746 This can be done in 2 instructions by using shifts with mov or mvn.
2747 e.g. for
2748 x = x | 0xfff00000;
2749 we generate.
2750 mvn r0, r0, asl #12
2751 mvn r0, r0, lsr #12 */
2752 if (set_sign_bit_copies > 8
2753 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2754 {
2755 if (generate)
2756 {
2757 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2758 rtx shift = GEN_INT (set_sign_bit_copies);
2759
2760 emit_constant_insn
2761 (cond,
2762 gen_rtx_SET (VOIDmode, sub,
2763 gen_rtx_NOT (mode,
2764 gen_rtx_ASHIFT (mode,
2765 source,
2766 shift))));
2767 emit_constant_insn
2768 (cond,
2769 gen_rtx_SET (VOIDmode, target,
2770 gen_rtx_NOT (mode,
2771 gen_rtx_LSHIFTRT (mode, sub,
2772 shift))));
2773 }
2774 return 2;
2775 }
2776
2777 /* Convert
2778 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2779 to
2780 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2781
2782 For eg. r0 = r0 | 0xfff
2783 mvn r0, r0, lsr #12
2784 mvn r0, r0, asl #12
2785
2786 */
2787 if (set_zero_bit_copies > 8
2788 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2789 {
2790 if (generate)
2791 {
2792 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2793 rtx shift = GEN_INT (set_zero_bit_copies);
2794
2795 emit_constant_insn
2796 (cond,
2797 gen_rtx_SET (VOIDmode, sub,
2798 gen_rtx_NOT (mode,
2799 gen_rtx_LSHIFTRT (mode,
2800 source,
2801 shift))));
2802 emit_constant_insn
2803 (cond,
2804 gen_rtx_SET (VOIDmode, target,
2805 gen_rtx_NOT (mode,
2806 gen_rtx_ASHIFT (mode, sub,
2807 shift))));
2808 }
2809 return 2;
2810 }
2811
2812 /* This will never be reached for Thumb2 because orn is a valid
2813 instruction. This is for Thumb1 and the ARM 32 bit cases.
2814
2815 x = y | constant (such that ~constant is a valid constant)
2816 Transform this to
2817 x = ~(~y & ~constant).
2818 */
2819 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2820 {
2821 if (generate)
2822 {
2823 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2824 emit_constant_insn (cond,
2825 gen_rtx_SET (VOIDmode, sub,
2826 gen_rtx_NOT (mode, source)));
2827 source = sub;
2828 if (subtargets)
2829 sub = gen_reg_rtx (mode);
2830 emit_constant_insn (cond,
2831 gen_rtx_SET (VOIDmode, sub,
2832 gen_rtx_AND (mode, source,
2833 GEN_INT (temp1))));
2834 emit_constant_insn (cond,
2835 gen_rtx_SET (VOIDmode, target,
2836 gen_rtx_NOT (mode, sub)));
2837 }
2838 return 3;
2839 }
2840 break;
2841
2842 case AND:
2843 /* See if two shifts will do 2 or more insn's worth of work. */
2844 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2845 {
2846 HOST_WIDE_INT shift_mask = ((0xffffffff
2847 << (32 - clear_sign_bit_copies))
2848 & 0xffffffff);
2849
2850 if ((remainder | shift_mask) != 0xffffffff)
2851 {
2852 if (generate)
2853 {
2854 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2855 insns = arm_gen_constant (AND, mode, cond,
2856 remainder | shift_mask,
2857 new_src, source, subtargets, 1);
2858 source = new_src;
2859 }
2860 else
2861 {
2862 rtx targ = subtargets ? NULL_RTX : target;
2863 insns = arm_gen_constant (AND, mode, cond,
2864 remainder | shift_mask,
2865 targ, source, subtargets, 0);
2866 }
2867 }
2868
2869 if (generate)
2870 {
2871 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2872 rtx shift = GEN_INT (clear_sign_bit_copies);
2873
2874 emit_insn (gen_ashlsi3 (new_src, source, shift));
2875 emit_insn (gen_lshrsi3 (target, new_src, shift));
2876 }
2877
2878 return insns + 2;
2879 }
2880
2881 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2882 {
2883 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2884
2885 if ((remainder | shift_mask) != 0xffffffff)
2886 {
2887 if (generate)
2888 {
2889 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2890
2891 insns = arm_gen_constant (AND, mode, cond,
2892 remainder | shift_mask,
2893 new_src, source, subtargets, 1);
2894 source = new_src;
2895 }
2896 else
2897 {
2898 rtx targ = subtargets ? NULL_RTX : target;
2899
2900 insns = arm_gen_constant (AND, mode, cond,
2901 remainder | shift_mask,
2902 targ, source, subtargets, 0);
2903 }
2904 }
2905
2906 if (generate)
2907 {
2908 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2909 rtx shift = GEN_INT (clear_zero_bit_copies);
2910
2911 emit_insn (gen_lshrsi3 (new_src, source, shift));
2912 emit_insn (gen_ashlsi3 (target, new_src, shift));
2913 }
2914
2915 return insns + 2;
2916 }
2917
2918 break;
2919
2920 default:
2921 break;
2922 }
2923
2924 for (i = 0; i < 32; i++)
2925 if (remainder & (1 << i))
2926 num_bits_set++;
2927
2928 if ((code == AND) || (can_invert && num_bits_set > 16))
2929 remainder ^= 0xffffffff;
2930 else if (code == PLUS && num_bits_set > 16)
2931 remainder = (-remainder) & 0xffffffff;
2932
2933 /* For XOR, if more than half the bits are set and there's a sequence
2934 of more than 8 consecutive ones in the pattern then we can XOR by the
2935 inverted constant and then invert the final result; this may save an
2936 instruction and might also lead to the final mvn being merged with
2937 some other operation. */
2938 else if (code == XOR && num_bits_set > 16
2939 && (count_insns_for_constant (remainder ^ 0xffffffff,
2940 find_best_start
2941 (remainder ^ 0xffffffff))
2942 < count_insns_for_constant (remainder,
2943 find_best_start (remainder))))
2944 {
2945 remainder ^= 0xffffffff;
2946 final_invert = 1;
2947 }
2948 else
2949 {
2950 can_invert = 0;
2951 can_negate = 0;
2952 }
2953
2954 /* Now try and find a way of doing the job in either two or three
2955 instructions.
2956 We start by looking for the largest block of zeros that are aligned on
2957 a 2-bit boundary, we then fill up the temps, wrapping around to the
2958 top of the word when we drop off the bottom.
2959 In the worst case this code should produce no more than four insns.
2960 Thumb-2 constants are shifted, not rotated, so the MSB is always the
2961 best place to start. */
2962
2963 /* ??? Use thumb2 replicated constants when the high and low halfwords are
2964 the same. */
2965 {
2966 /* Now start emitting the insns. */
2967 i = find_best_start (remainder);
2968 do
2969 {
2970 int end;
2971
2972 if (i <= 0)
2973 i += 32;
2974 if (remainder & (3 << (i - 2)))
2975 {
2976 end = i - 8;
2977 if (end < 0)
2978 end += 32;
2979 temp1 = remainder & ((0x0ff << end)
2980 | ((i < end) ? (0xff >> (32 - end)) : 0));
2981 remainder &= ~temp1;
2982
2983 if (generate)
2984 {
2985 rtx new_src, temp1_rtx;
2986
2987 if (code == SET || code == MINUS)
2988 {
2989 new_src = (subtargets ? gen_reg_rtx (mode) : target);
2990 if (can_invert && code != MINUS)
2991 temp1 = ~temp1;
2992 }
2993 else
2994 {
2995 if ((final_invert || remainder) && subtargets)
2996 new_src = gen_reg_rtx (mode);
2997 else
2998 new_src = target;
2999 if (can_invert)
3000 temp1 = ~temp1;
3001 else if (can_negate)
3002 temp1 = -temp1;
3003 }
3004
3005 temp1 = trunc_int_for_mode (temp1, mode);
3006 temp1_rtx = GEN_INT (temp1);
3007
3008 if (code == SET)
3009 ;
3010 else if (code == MINUS)
3011 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3012 else
3013 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3014
3015 emit_constant_insn (cond,
3016 gen_rtx_SET (VOIDmode, new_src,
3017 temp1_rtx));
3018 source = new_src;
3019 }
3020
3021 if (code == SET)
3022 {
3023 can_invert = 0;
3024 code = PLUS;
3025 }
3026 else if (code == MINUS)
3027 code = PLUS;
3028
3029 insns++;
3030 i -= 8 - step_size;
3031 }
3032 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3033 shifts. */
3034 i -= step_size;
3035 }
3036 while (remainder);
3037 }
3038
3039 if (final_invert)
3040 {
3041 if (generate)
3042 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3043 gen_rtx_NOT (mode, source)));
3044 insns++;
3045 }
3046
3047 return insns;
3048 }
3049
3050 /* Canonicalize a comparison so that we are more likely to recognize it.
3051 This can be done for a few constant compares, where we can make the
3052 immediate value easier to load. */
3053
3054 enum rtx_code
3055 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3056 {
3057 enum machine_mode mode;
3058 unsigned HOST_WIDE_INT i, maxval;
3059
3060 mode = GET_MODE (*op0);
3061 if (mode == VOIDmode)
3062 mode = GET_MODE (*op1);
3063
3064 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3065
3066 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3067 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3068 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3069 for GTU/LEU in Thumb mode. */
3070 if (mode == DImode)
3071 {
3072 rtx tem;
3073
3074 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3075 available. */
3076 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3077 return code;
3078
3079 if (code == GT || code == LE
3080 || (!TARGET_ARM && (code == GTU || code == LEU)))
3081 {
3082 /* Missing comparison. First try to use an available
3083 comparison. */
3084 if (GET_CODE (*op1) == CONST_INT)
3085 {
3086 i = INTVAL (*op1);
3087 switch (code)
3088 {
3089 case GT:
3090 case LE:
3091 if (i != maxval
3092 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3093 {
3094 *op1 = GEN_INT (i + 1);
3095 return code == GT ? GE : LT;
3096 }
3097 break;
3098 case GTU:
3099 case LEU:
3100 if (i != ~((unsigned HOST_WIDE_INT) 0)
3101 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3102 {
3103 *op1 = GEN_INT (i + 1);
3104 return code == GTU ? GEU : LTU;
3105 }
3106 break;
3107 default:
3108 gcc_unreachable ();
3109 }
3110 }
3111
3112 /* If that did not work, reverse the condition. */
3113 tem = *op0;
3114 *op0 = *op1;
3115 *op1 = tem;
3116 return swap_condition (code);
3117 }
3118
3119 return code;
3120 }
3121
3122 /* Comparisons smaller than DImode. Only adjust comparisons against
3123 an out-of-range constant. */
3124 if (GET_CODE (*op1) != CONST_INT
3125 || const_ok_for_arm (INTVAL (*op1))
3126 || const_ok_for_arm (- INTVAL (*op1)))
3127 return code;
3128
3129 i = INTVAL (*op1);
3130
3131 switch (code)
3132 {
3133 case EQ:
3134 case NE:
3135 return code;
3136
3137 case GT:
3138 case LE:
3139 if (i != maxval
3140 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3141 {
3142 *op1 = GEN_INT (i + 1);
3143 return code == GT ? GE : LT;
3144 }
3145 break;
3146
3147 case GE:
3148 case LT:
3149 if (i != ~maxval
3150 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3151 {
3152 *op1 = GEN_INT (i - 1);
3153 return code == GE ? GT : LE;
3154 }
3155 break;
3156
3157 case GTU:
3158 case LEU:
3159 if (i != ~((unsigned HOST_WIDE_INT) 0)
3160 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3161 {
3162 *op1 = GEN_INT (i + 1);
3163 return code == GTU ? GEU : LTU;
3164 }
3165 break;
3166
3167 case GEU:
3168 case LTU:
3169 if (i != 0
3170 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3171 {
3172 *op1 = GEN_INT (i - 1);
3173 return code == GEU ? GTU : LEU;
3174 }
3175 break;
3176
3177 default:
3178 gcc_unreachable ();
3179 }
3180
3181 return code;
3182 }
3183
3184
3185 /* Define how to find the value returned by a function. */
3186
3187 static rtx
3188 arm_function_value(const_tree type, const_tree func,
3189 bool outgoing ATTRIBUTE_UNUSED)
3190 {
3191 enum machine_mode mode;
3192 int unsignedp ATTRIBUTE_UNUSED;
3193 rtx r ATTRIBUTE_UNUSED;
3194
3195 mode = TYPE_MODE (type);
3196
3197 if (TARGET_AAPCS_BASED)
3198 return aapcs_allocate_return_reg (mode, type, func);
3199
3200 /* Promote integer types. */
3201 if (INTEGRAL_TYPE_P (type))
3202 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3203
3204 /* Promotes small structs returned in a register to full-word size
3205 for big-endian AAPCS. */
3206 if (arm_return_in_msb (type))
3207 {
3208 HOST_WIDE_INT size = int_size_in_bytes (type);
3209 if (size % UNITS_PER_WORD != 0)
3210 {
3211 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3212 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3213 }
3214 }
3215
3216 return LIBCALL_VALUE (mode);
3217 }
3218
3219 static int
3220 libcall_eq (const void *p1, const void *p2)
3221 {
3222 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3223 }
3224
3225 static hashval_t
3226 libcall_hash (const void *p1)
3227 {
3228 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3229 }
3230
3231 static void
3232 add_libcall (htab_t htab, rtx libcall)
3233 {
3234 *htab_find_slot (htab, libcall, INSERT) = libcall;
3235 }
3236
3237 static bool
3238 arm_libcall_uses_aapcs_base (const_rtx libcall)
3239 {
3240 static bool init_done = false;
3241 static htab_t libcall_htab;
3242
3243 if (!init_done)
3244 {
3245 init_done = true;
3246
3247 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3248 NULL);
3249 add_libcall (libcall_htab,
3250 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3251 add_libcall (libcall_htab,
3252 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3253 add_libcall (libcall_htab,
3254 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3255 add_libcall (libcall_htab,
3256 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3257
3258 add_libcall (libcall_htab,
3259 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3260 add_libcall (libcall_htab,
3261 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3262 add_libcall (libcall_htab,
3263 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3264 add_libcall (libcall_htab,
3265 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3266
3267 add_libcall (libcall_htab,
3268 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3269 add_libcall (libcall_htab,
3270 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3271 add_libcall (libcall_htab,
3272 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3273 add_libcall (libcall_htab,
3274 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3275 add_libcall (libcall_htab,
3276 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3277 add_libcall (libcall_htab,
3278 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3279 }
3280
3281 return libcall && htab_find (libcall_htab, libcall) != NULL;
3282 }
3283
3284 rtx
3285 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3286 {
3287 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3288 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3289 {
3290 /* The following libcalls return their result in integer registers,
3291 even though they return a floating point value. */
3292 if (arm_libcall_uses_aapcs_base (libcall))
3293 return gen_rtx_REG (mode, ARG_REGISTER(1));
3294
3295 }
3296
3297 return LIBCALL_VALUE (mode);
3298 }
3299
3300 /* Determine the amount of memory needed to store the possible return
3301 registers of an untyped call. */
3302 int
3303 arm_apply_result_size (void)
3304 {
3305 int size = 16;
3306
3307 if (TARGET_32BIT)
3308 {
3309 if (TARGET_HARD_FLOAT_ABI)
3310 {
3311 if (TARGET_VFP)
3312 size += 32;
3313 if (TARGET_FPA)
3314 size += 12;
3315 if (TARGET_MAVERICK)
3316 size += 8;
3317 }
3318 if (TARGET_IWMMXT_ABI)
3319 size += 8;
3320 }
3321
3322 return size;
3323 }
3324
3325 /* Decide whether TYPE should be returned in memory (true)
3326 or in a register (false). FNTYPE is the type of the function making
3327 the call. */
3328 static bool
3329 arm_return_in_memory (const_tree type, const_tree fntype)
3330 {
3331 HOST_WIDE_INT size;
3332
3333 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3334
3335 if (TARGET_AAPCS_BASED)
3336 {
3337 /* Simple, non-aggregate types (ie not including vectors and
3338 complex) are always returned in a register (or registers).
3339 We don't care about which register here, so we can short-cut
3340 some of the detail. */
3341 if (!AGGREGATE_TYPE_P (type)
3342 && TREE_CODE (type) != VECTOR_TYPE
3343 && TREE_CODE (type) != COMPLEX_TYPE)
3344 return false;
3345
3346 /* Any return value that is no larger than one word can be
3347 returned in r0. */
3348 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3349 return false;
3350
3351 /* Check any available co-processors to see if they accept the
3352 type as a register candidate (VFP, for example, can return
3353 some aggregates in consecutive registers). These aren't
3354 available if the call is variadic. */
3355 if (aapcs_select_return_coproc (type, fntype) >= 0)
3356 return false;
3357
3358 /* Vector values should be returned using ARM registers, not
3359 memory (unless they're over 16 bytes, which will break since
3360 we only have four call-clobbered registers to play with). */
3361 if (TREE_CODE (type) == VECTOR_TYPE)
3362 return (size < 0 || size > (4 * UNITS_PER_WORD));
3363
3364 /* The rest go in memory. */
3365 return true;
3366 }
3367
3368 if (TREE_CODE (type) == VECTOR_TYPE)
3369 return (size < 0 || size > (4 * UNITS_PER_WORD));
3370
3371 if (!AGGREGATE_TYPE_P (type) &&
3372 (TREE_CODE (type) != VECTOR_TYPE))
3373 /* All simple types are returned in registers. */
3374 return false;
3375
3376 if (arm_abi != ARM_ABI_APCS)
3377 {
3378 /* ATPCS and later return aggregate types in memory only if they are
3379 larger than a word (or are variable size). */
3380 return (size < 0 || size > UNITS_PER_WORD);
3381 }
3382
3383 /* For the arm-wince targets we choose to be compatible with Microsoft's
3384 ARM and Thumb compilers, which always return aggregates in memory. */
3385 #ifndef ARM_WINCE
3386 /* All structures/unions bigger than one word are returned in memory.
3387 Also catch the case where int_size_in_bytes returns -1. In this case
3388 the aggregate is either huge or of variable size, and in either case
3389 we will want to return it via memory and not in a register. */
3390 if (size < 0 || size > UNITS_PER_WORD)
3391 return true;
3392
3393 if (TREE_CODE (type) == RECORD_TYPE)
3394 {
3395 tree field;
3396
3397 /* For a struct the APCS says that we only return in a register
3398 if the type is 'integer like' and every addressable element
3399 has an offset of zero. For practical purposes this means
3400 that the structure can have at most one non bit-field element
3401 and that this element must be the first one in the structure. */
3402
3403 /* Find the first field, ignoring non FIELD_DECL things which will
3404 have been created by C++. */
3405 for (field = TYPE_FIELDS (type);
3406 field && TREE_CODE (field) != FIELD_DECL;
3407 field = DECL_CHAIN (field))
3408 continue;
3409
3410 if (field == NULL)
3411 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3412
3413 /* Check that the first field is valid for returning in a register. */
3414
3415 /* ... Floats are not allowed */
3416 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3417 return true;
3418
3419 /* ... Aggregates that are not themselves valid for returning in
3420 a register are not allowed. */
3421 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3422 return true;
3423
3424 /* Now check the remaining fields, if any. Only bitfields are allowed,
3425 since they are not addressable. */
3426 for (field = DECL_CHAIN (field);
3427 field;
3428 field = DECL_CHAIN (field))
3429 {
3430 if (TREE_CODE (field) != FIELD_DECL)
3431 continue;
3432
3433 if (!DECL_BIT_FIELD_TYPE (field))
3434 return true;
3435 }
3436
3437 return false;
3438 }
3439
3440 if (TREE_CODE (type) == UNION_TYPE)
3441 {
3442 tree field;
3443
3444 /* Unions can be returned in registers if every element is
3445 integral, or can be returned in an integer register. */
3446 for (field = TYPE_FIELDS (type);
3447 field;
3448 field = DECL_CHAIN (field))
3449 {
3450 if (TREE_CODE (field) != FIELD_DECL)
3451 continue;
3452
3453 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3454 return true;
3455
3456 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3457 return true;
3458 }
3459
3460 return false;
3461 }
3462 #endif /* not ARM_WINCE */
3463
3464 /* Return all other types in memory. */
3465 return true;
3466 }
3467
3468 /* Indicate whether or not words of a double are in big-endian order. */
3469
3470 int
3471 arm_float_words_big_endian (void)
3472 {
3473 if (TARGET_MAVERICK)
3474 return 0;
3475
3476 /* For FPA, float words are always big-endian. For VFP, floats words
3477 follow the memory system mode. */
3478
3479 if (TARGET_FPA)
3480 {
3481 return 1;
3482 }
3483
3484 if (TARGET_VFP)
3485 return (TARGET_BIG_END ? 1 : 0);
3486
3487 return 1;
3488 }
3489
3490 const struct pcs_attribute_arg
3491 {
3492 const char *arg;
3493 enum arm_pcs value;
3494 } pcs_attribute_args[] =
3495 {
3496 {"aapcs", ARM_PCS_AAPCS},
3497 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3498 #if 0
3499 /* We could recognize these, but changes would be needed elsewhere
3500 * to implement them. */
3501 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3502 {"atpcs", ARM_PCS_ATPCS},
3503 {"apcs", ARM_PCS_APCS},
3504 #endif
3505 {NULL, ARM_PCS_UNKNOWN}
3506 };
3507
3508 static enum arm_pcs
3509 arm_pcs_from_attribute (tree attr)
3510 {
3511 const struct pcs_attribute_arg *ptr;
3512 const char *arg;
3513
3514 /* Get the value of the argument. */
3515 if (TREE_VALUE (attr) == NULL_TREE
3516 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3517 return ARM_PCS_UNKNOWN;
3518
3519 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3520
3521 /* Check it against the list of known arguments. */
3522 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
3523 if (streq (arg, ptr->arg))
3524 return ptr->value;
3525
3526 /* An unrecognized interrupt type. */
3527 return ARM_PCS_UNKNOWN;
3528 }
3529
3530 /* Get the PCS variant to use for this call. TYPE is the function's type
3531 specification, DECL is the specific declartion. DECL may be null if
3532 the call could be indirect or if this is a library call. */
3533 static enum arm_pcs
3534 arm_get_pcs_model (const_tree type, const_tree decl)
3535 {
3536 bool user_convention = false;
3537 enum arm_pcs user_pcs = arm_pcs_default;
3538 tree attr;
3539
3540 gcc_assert (type);
3541
3542 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
3543 if (attr)
3544 {
3545 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
3546 user_convention = true;
3547 }
3548
3549 if (TARGET_AAPCS_BASED)
3550 {
3551 /* Detect varargs functions. These always use the base rules
3552 (no argument is ever a candidate for a co-processor
3553 register). */
3554 bool base_rules = stdarg_p (type);
3555
3556 if (user_convention)
3557 {
3558 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3559 sorry ("non-AAPCS derived PCS variant");
3560 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3561 error ("variadic functions must use the base AAPCS variant");
3562 }
3563
3564 if (base_rules)
3565 return ARM_PCS_AAPCS;
3566 else if (user_convention)
3567 return user_pcs;
3568 else if (decl && flag_unit_at_a_time)
3569 {
3570 /* Local functions never leak outside this compilation unit,
3571 so we are free to use whatever conventions are
3572 appropriate. */
3573 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3574 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3575 if (i && i->local)
3576 return ARM_PCS_AAPCS_LOCAL;
3577 }
3578 }
3579 else if (user_convention && user_pcs != arm_pcs_default)
3580 sorry ("PCS variant");
3581
3582 /* For everything else we use the target's default. */
3583 return arm_pcs_default;
3584 }
3585
3586
3587 static void
3588 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3589 const_tree fntype ATTRIBUTE_UNUSED,
3590 rtx libcall ATTRIBUTE_UNUSED,
3591 const_tree fndecl ATTRIBUTE_UNUSED)
3592 {
3593 /* Record the unallocated VFP registers. */
3594 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
3595 pcum->aapcs_vfp_reg_alloc = 0;
3596 }
3597
3598 /* Walk down the type tree of TYPE counting consecutive base elements.
3599 If *MODEP is VOIDmode, then set it to the first valid floating point
3600 type. If a non-floating point type is found, or if a floating point
3601 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3602 otherwise return the count in the sub-tree. */
3603 static int
3604 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
3605 {
3606 enum machine_mode mode;
3607 HOST_WIDE_INT size;
3608
3609 switch (TREE_CODE (type))
3610 {
3611 case REAL_TYPE:
3612 mode = TYPE_MODE (type);
3613 if (mode != DFmode && mode != SFmode)
3614 return -1;
3615
3616 if (*modep == VOIDmode)
3617 *modep = mode;
3618
3619 if (*modep == mode)
3620 return 1;
3621
3622 break;
3623
3624 case COMPLEX_TYPE:
3625 mode = TYPE_MODE (TREE_TYPE (type));
3626 if (mode != DFmode && mode != SFmode)
3627 return -1;
3628
3629 if (*modep == VOIDmode)
3630 *modep = mode;
3631
3632 if (*modep == mode)
3633 return 2;
3634
3635 break;
3636
3637 case VECTOR_TYPE:
3638 /* Use V2SImode and V4SImode as representatives of all 64-bit
3639 and 128-bit vector types, whether or not those modes are
3640 supported with the present options. */
3641 size = int_size_in_bytes (type);
3642 switch (size)
3643 {
3644 case 8:
3645 mode = V2SImode;
3646 break;
3647 case 16:
3648 mode = V4SImode;
3649 break;
3650 default:
3651 return -1;
3652 }
3653
3654 if (*modep == VOIDmode)
3655 *modep = mode;
3656
3657 /* Vector modes are considered to be opaque: two vectors are
3658 equivalent for the purposes of being homogeneous aggregates
3659 if they are the same size. */
3660 if (*modep == mode)
3661 return 1;
3662
3663 break;
3664
3665 case ARRAY_TYPE:
3666 {
3667 int count;
3668 tree index = TYPE_DOMAIN (type);
3669
3670 /* Can't handle incomplete types. */
3671 if (!COMPLETE_TYPE_P(type))
3672 return -1;
3673
3674 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
3675 if (count == -1
3676 || !index
3677 || !TYPE_MAX_VALUE (index)
3678 || !host_integerp (TYPE_MAX_VALUE (index), 1)
3679 || !TYPE_MIN_VALUE (index)
3680 || !host_integerp (TYPE_MIN_VALUE (index), 1)
3681 || count < 0)
3682 return -1;
3683
3684 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
3685 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
3686
3687 /* There must be no padding. */
3688 if (!host_integerp (TYPE_SIZE (type), 1)
3689 || (tree_low_cst (TYPE_SIZE (type), 1)
3690 != count * GET_MODE_BITSIZE (*modep)))
3691 return -1;
3692
3693 return count;
3694 }
3695
3696 case RECORD_TYPE:
3697 {
3698 int count = 0;
3699 int sub_count;
3700 tree field;
3701
3702 /* Can't handle incomplete types. */
3703 if (!COMPLETE_TYPE_P(type))
3704 return -1;
3705
3706 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3707 {
3708 if (TREE_CODE (field) != FIELD_DECL)
3709 continue;
3710
3711 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3712 if (sub_count < 0)
3713 return -1;
3714 count += sub_count;
3715 }
3716
3717 /* There must be no padding. */
3718 if (!host_integerp (TYPE_SIZE (type), 1)
3719 || (tree_low_cst (TYPE_SIZE (type), 1)
3720 != count * GET_MODE_BITSIZE (*modep)))
3721 return -1;
3722
3723 return count;
3724 }
3725
3726 case UNION_TYPE:
3727 case QUAL_UNION_TYPE:
3728 {
3729 /* These aren't very interesting except in a degenerate case. */
3730 int count = 0;
3731 int sub_count;
3732 tree field;
3733
3734 /* Can't handle incomplete types. */
3735 if (!COMPLETE_TYPE_P(type))
3736 return -1;
3737
3738 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3739 {
3740 if (TREE_CODE (field) != FIELD_DECL)
3741 continue;
3742
3743 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3744 if (sub_count < 0)
3745 return -1;
3746 count = count > sub_count ? count : sub_count;
3747 }
3748
3749 /* There must be no padding. */
3750 if (!host_integerp (TYPE_SIZE (type), 1)
3751 || (tree_low_cst (TYPE_SIZE (type), 1)
3752 != count * GET_MODE_BITSIZE (*modep)))
3753 return -1;
3754
3755 return count;
3756 }
3757
3758 default:
3759 break;
3760 }
3761
3762 return -1;
3763 }
3764
3765 /* Return true if PCS_VARIANT should use VFP registers. */
3766 static bool
3767 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3768 {
3769 if (pcs_variant == ARM_PCS_AAPCS_VFP)
3770 {
3771 static bool seen_thumb1_vfp = false;
3772
3773 if (TARGET_THUMB1 && !seen_thumb1_vfp)
3774 {
3775 sorry ("Thumb-1 hard-float VFP ABI");
3776 /* sorry() is not immediately fatal, so only display this once. */
3777 seen_thumb1_vfp = true;
3778 }
3779
3780 return true;
3781 }
3782
3783 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
3784 return false;
3785
3786 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
3787 (TARGET_VFP_DOUBLE || !is_double));
3788 }
3789
3790 static bool
3791 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
3792 enum machine_mode mode, const_tree type,
3793 enum machine_mode *base_mode, int *count)
3794 {
3795 enum machine_mode new_mode = VOIDmode;
3796
3797 if (GET_MODE_CLASS (mode) == MODE_FLOAT
3798 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3799 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3800 {
3801 *count = 1;
3802 new_mode = mode;
3803 }
3804 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
3805 {
3806 *count = 2;
3807 new_mode = (mode == DCmode ? DFmode : SFmode);
3808 }
3809 else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
3810 {
3811 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
3812
3813 if (ag_count > 0 && ag_count <= 4)
3814 *count = ag_count;
3815 else
3816 return false;
3817 }
3818 else
3819 return false;
3820
3821
3822 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
3823 return false;
3824
3825 *base_mode = new_mode;
3826 return true;
3827 }
3828
3829 static bool
3830 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
3831 enum machine_mode mode, const_tree type)
3832 {
3833 int count ATTRIBUTE_UNUSED;
3834 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
3835
3836 if (!use_vfp_abi (pcs_variant, false))
3837 return false;
3838 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3839 &ag_mode, &count);
3840 }
3841
3842 static bool
3843 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3844 const_tree type)
3845 {
3846 if (!use_vfp_abi (pcum->pcs_variant, false))
3847 return false;
3848
3849 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
3850 &pcum->aapcs_vfp_rmode,
3851 &pcum->aapcs_vfp_rcount);
3852 }
3853
3854 static bool
3855 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
3856 const_tree type ATTRIBUTE_UNUSED)
3857 {
3858 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
3859 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
3860 int regno;
3861
3862 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
3863 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
3864 {
3865 pcum->aapcs_vfp_reg_alloc = mask << regno;
3866 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3867 {
3868 int i;
3869 int rcount = pcum->aapcs_vfp_rcount;
3870 int rshift = shift;
3871 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
3872 rtx par;
3873 if (!TARGET_NEON)
3874 {
3875 /* Avoid using unsupported vector modes. */
3876 if (rmode == V2SImode)
3877 rmode = DImode;
3878 else if (rmode == V4SImode)
3879 {
3880 rmode = DImode;
3881 rcount *= 2;
3882 rshift /= 2;
3883 }
3884 }
3885 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
3886 for (i = 0; i < rcount; i++)
3887 {
3888 rtx tmp = gen_rtx_REG (rmode,
3889 FIRST_VFP_REGNUM + regno + i * rshift);
3890 tmp = gen_rtx_EXPR_LIST
3891 (VOIDmode, tmp,
3892 GEN_INT (i * GET_MODE_SIZE (rmode)));
3893 XVECEXP (par, 0, i) = tmp;
3894 }
3895
3896 pcum->aapcs_reg = par;
3897 }
3898 else
3899 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
3900 return true;
3901 }
3902 return false;
3903 }
3904
3905 static rtx
3906 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
3907 enum machine_mode mode,
3908 const_tree type ATTRIBUTE_UNUSED)
3909 {
3910 if (!use_vfp_abi (pcs_variant, false))
3911 return false;
3912
3913 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
3914 {
3915 int count;
3916 enum machine_mode ag_mode;
3917 int i;
3918 rtx par;
3919 int shift;
3920
3921 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
3922 &ag_mode, &count);
3923
3924 if (!TARGET_NEON)
3925 {
3926 if (ag_mode == V2SImode)
3927 ag_mode = DImode;
3928 else if (ag_mode == V4SImode)
3929 {
3930 ag_mode = DImode;
3931 count *= 2;
3932 }
3933 }
3934 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
3935 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
3936 for (i = 0; i < count; i++)
3937 {
3938 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
3939 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
3940 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
3941 XVECEXP (par, 0, i) = tmp;
3942 }
3943
3944 return par;
3945 }
3946
3947 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
3948 }
3949
3950 static void
3951 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
3952 enum machine_mode mode ATTRIBUTE_UNUSED,
3953 const_tree type ATTRIBUTE_UNUSED)
3954 {
3955 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
3956 pcum->aapcs_vfp_reg_alloc = 0;
3957 return;
3958 }
3959
3960 #define AAPCS_CP(X) \
3961 { \
3962 aapcs_ ## X ## _cum_init, \
3963 aapcs_ ## X ## _is_call_candidate, \
3964 aapcs_ ## X ## _allocate, \
3965 aapcs_ ## X ## _is_return_candidate, \
3966 aapcs_ ## X ## _allocate_return_reg, \
3967 aapcs_ ## X ## _advance \
3968 }
3969
3970 /* Table of co-processors that can be used to pass arguments in
3971 registers. Idealy no arugment should be a candidate for more than
3972 one co-processor table entry, but the table is processed in order
3973 and stops after the first match. If that entry then fails to put
3974 the argument into a co-processor register, the argument will go on
3975 the stack. */
3976 static struct
3977 {
3978 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
3979 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
3980
3981 /* Return true if an argument of mode MODE (or type TYPE if MODE is
3982 BLKmode) is a candidate for this co-processor's registers; this
3983 function should ignore any position-dependent state in
3984 CUMULATIVE_ARGS and only use call-type dependent information. */
3985 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
3986
3987 /* Return true if the argument does get a co-processor register; it
3988 should set aapcs_reg to an RTX of the register allocated as is
3989 required for a return from FUNCTION_ARG. */
3990 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
3991
3992 /* Return true if a result of mode MODE (or type TYPE if MODE is
3993 BLKmode) is can be returned in this co-processor's registers. */
3994 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
3995
3996 /* Allocate and return an RTX element to hold the return type of a
3997 call, this routine must not fail and will only be called if
3998 is_return_candidate returned true with the same parameters. */
3999 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4000
4001 /* Finish processing this argument and prepare to start processing
4002 the next one. */
4003 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4004 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4005 {
4006 AAPCS_CP(vfp)
4007 };
4008
4009 #undef AAPCS_CP
4010
4011 static int
4012 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4013 const_tree type)
4014 {
4015 int i;
4016
4017 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4018 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4019 return i;
4020
4021 return -1;
4022 }
4023
4024 static int
4025 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4026 {
4027 /* We aren't passed a decl, so we can't check that a call is local.
4028 However, it isn't clear that that would be a win anyway, since it
4029 might limit some tail-calling opportunities. */
4030 enum arm_pcs pcs_variant;
4031
4032 if (fntype)
4033 {
4034 const_tree fndecl = NULL_TREE;
4035
4036 if (TREE_CODE (fntype) == FUNCTION_DECL)
4037 {
4038 fndecl = fntype;
4039 fntype = TREE_TYPE (fntype);
4040 }
4041
4042 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4043 }
4044 else
4045 pcs_variant = arm_pcs_default;
4046
4047 if (pcs_variant != ARM_PCS_AAPCS)
4048 {
4049 int i;
4050
4051 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4052 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4053 TYPE_MODE (type),
4054 type))
4055 return i;
4056 }
4057 return -1;
4058 }
4059
4060 static rtx
4061 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4062 const_tree fntype)
4063 {
4064 /* We aren't passed a decl, so we can't check that a call is local.
4065 However, it isn't clear that that would be a win anyway, since it
4066 might limit some tail-calling opportunities. */
4067 enum arm_pcs pcs_variant;
4068 int unsignedp ATTRIBUTE_UNUSED;
4069
4070 if (fntype)
4071 {
4072 const_tree fndecl = NULL_TREE;
4073
4074 if (TREE_CODE (fntype) == FUNCTION_DECL)
4075 {
4076 fndecl = fntype;
4077 fntype = TREE_TYPE (fntype);
4078 }
4079
4080 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4081 }
4082 else
4083 pcs_variant = arm_pcs_default;
4084
4085 /* Promote integer types. */
4086 if (type && INTEGRAL_TYPE_P (type))
4087 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4088
4089 if (pcs_variant != ARM_PCS_AAPCS)
4090 {
4091 int i;
4092
4093 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4094 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4095 type))
4096 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4097 mode, type);
4098 }
4099
4100 /* Promotes small structs returned in a register to full-word size
4101 for big-endian AAPCS. */
4102 if (type && arm_return_in_msb (type))
4103 {
4104 HOST_WIDE_INT size = int_size_in_bytes (type);
4105 if (size % UNITS_PER_WORD != 0)
4106 {
4107 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4108 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4109 }
4110 }
4111
4112 return gen_rtx_REG (mode, R0_REGNUM);
4113 }
4114
4115 rtx
4116 aapcs_libcall_value (enum machine_mode mode)
4117 {
4118 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4119 }
4120
4121 /* Lay out a function argument using the AAPCS rules. The rule
4122 numbers referred to here are those in the AAPCS. */
4123 static void
4124 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4125 const_tree type, bool named)
4126 {
4127 int nregs, nregs2;
4128 int ncrn;
4129
4130 /* We only need to do this once per argument. */
4131 if (pcum->aapcs_arg_processed)
4132 return;
4133
4134 pcum->aapcs_arg_processed = true;
4135
4136 /* Special case: if named is false then we are handling an incoming
4137 anonymous argument which is on the stack. */
4138 if (!named)
4139 return;
4140
4141 /* Is this a potential co-processor register candidate? */
4142 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4143 {
4144 int slot = aapcs_select_call_coproc (pcum, mode, type);
4145 pcum->aapcs_cprc_slot = slot;
4146
4147 /* We don't have to apply any of the rules from part B of the
4148 preparation phase, these are handled elsewhere in the
4149 compiler. */
4150
4151 if (slot >= 0)
4152 {
4153 /* A Co-processor register candidate goes either in its own
4154 class of registers or on the stack. */
4155 if (!pcum->aapcs_cprc_failed[slot])
4156 {
4157 /* C1.cp - Try to allocate the argument to co-processor
4158 registers. */
4159 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4160 return;
4161
4162 /* C2.cp - Put the argument on the stack and note that we
4163 can't assign any more candidates in this slot. We also
4164 need to note that we have allocated stack space, so that
4165 we won't later try to split a non-cprc candidate between
4166 core registers and the stack. */
4167 pcum->aapcs_cprc_failed[slot] = true;
4168 pcum->can_split = false;
4169 }
4170
4171 /* We didn't get a register, so this argument goes on the
4172 stack. */
4173 gcc_assert (pcum->can_split == false);
4174 return;
4175 }
4176 }
4177
4178 /* C3 - For double-word aligned arguments, round the NCRN up to the
4179 next even number. */
4180 ncrn = pcum->aapcs_ncrn;
4181 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4182 ncrn++;
4183
4184 nregs = ARM_NUM_REGS2(mode, type);
4185
4186 /* Sigh, this test should really assert that nregs > 0, but a GCC
4187 extension allows empty structs and then gives them empty size; it
4188 then allows such a structure to be passed by value. For some of
4189 the code below we have to pretend that such an argument has
4190 non-zero size so that we 'locate' it correctly either in
4191 registers or on the stack. */
4192 gcc_assert (nregs >= 0);
4193
4194 nregs2 = nregs ? nregs : 1;
4195
4196 /* C4 - Argument fits entirely in core registers. */
4197 if (ncrn + nregs2 <= NUM_ARG_REGS)
4198 {
4199 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4200 pcum->aapcs_next_ncrn = ncrn + nregs;
4201 return;
4202 }
4203
4204 /* C5 - Some core registers left and there are no arguments already
4205 on the stack: split this argument between the remaining core
4206 registers and the stack. */
4207 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4208 {
4209 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4210 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4211 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4212 return;
4213 }
4214
4215 /* C6 - NCRN is set to 4. */
4216 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4217
4218 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4219 return;
4220 }
4221
4222 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4223 for a call to a function whose data type is FNTYPE.
4224 For a library call, FNTYPE is NULL. */
4225 void
4226 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4227 rtx libname,
4228 tree fndecl ATTRIBUTE_UNUSED)
4229 {
4230 /* Long call handling. */
4231 if (fntype)
4232 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4233 else
4234 pcum->pcs_variant = arm_pcs_default;
4235
4236 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4237 {
4238 if (arm_libcall_uses_aapcs_base (libname))
4239 pcum->pcs_variant = ARM_PCS_AAPCS;
4240
4241 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4242 pcum->aapcs_reg = NULL_RTX;
4243 pcum->aapcs_partial = 0;
4244 pcum->aapcs_arg_processed = false;
4245 pcum->aapcs_cprc_slot = -1;
4246 pcum->can_split = true;
4247
4248 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4249 {
4250 int i;
4251
4252 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4253 {
4254 pcum->aapcs_cprc_failed[i] = false;
4255 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4256 }
4257 }
4258 return;
4259 }
4260
4261 /* Legacy ABIs */
4262
4263 /* On the ARM, the offset starts at 0. */
4264 pcum->nregs = 0;
4265 pcum->iwmmxt_nregs = 0;
4266 pcum->can_split = true;
4267
4268 /* Varargs vectors are treated the same as long long.
4269 named_count avoids having to change the way arm handles 'named' */
4270 pcum->named_count = 0;
4271 pcum->nargs = 0;
4272
4273 if (TARGET_REALLY_IWMMXT && fntype)
4274 {
4275 tree fn_arg;
4276
4277 for (fn_arg = TYPE_ARG_TYPES (fntype);
4278 fn_arg;
4279 fn_arg = TREE_CHAIN (fn_arg))
4280 pcum->named_count += 1;
4281
4282 if (! pcum->named_count)
4283 pcum->named_count = INT_MAX;
4284 }
4285 }
4286
4287
4288 /* Return true if mode/type need doubleword alignment. */
4289 static bool
4290 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4291 {
4292 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4293 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4294 }
4295
4296
4297 /* Determine where to put an argument to a function.
4298 Value is zero to push the argument on the stack,
4299 or a hard register in which to store the argument.
4300
4301 MODE is the argument's machine mode.
4302 TYPE is the data type of the argument (as a tree).
4303 This is null for libcalls where that information may
4304 not be available.
4305 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4306 the preceding args and about the function being called.
4307 NAMED is nonzero if this argument is a named parameter
4308 (otherwise it is an extra parameter matching an ellipsis).
4309
4310 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4311 other arguments are passed on the stack. If (NAMED == 0) (which happens
4312 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4313 defined), say it is passed in the stack (function_prologue will
4314 indeed make it pass in the stack if necessary). */
4315
4316 static rtx
4317 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4318 const_tree type, bool named)
4319 {
4320 int nregs;
4321
4322 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4323 a call insn (op3 of a call_value insn). */
4324 if (mode == VOIDmode)
4325 return const0_rtx;
4326
4327 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4328 {
4329 aapcs_layout_arg (pcum, mode, type, named);
4330 return pcum->aapcs_reg;
4331 }
4332
4333 /* Varargs vectors are treated the same as long long.
4334 named_count avoids having to change the way arm handles 'named' */
4335 if (TARGET_IWMMXT_ABI
4336 && arm_vector_mode_supported_p (mode)
4337 && pcum->named_count > pcum->nargs + 1)
4338 {
4339 if (pcum->iwmmxt_nregs <= 9)
4340 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4341 else
4342 {
4343 pcum->can_split = false;
4344 return NULL_RTX;
4345 }
4346 }
4347
4348 /* Put doubleword aligned quantities in even register pairs. */
4349 if (pcum->nregs & 1
4350 && ARM_DOUBLEWORD_ALIGN
4351 && arm_needs_doubleword_align (mode, type))
4352 pcum->nregs++;
4353
4354 /* Only allow splitting an arg between regs and memory if all preceding
4355 args were allocated to regs. For args passed by reference we only count
4356 the reference pointer. */
4357 if (pcum->can_split)
4358 nregs = 1;
4359 else
4360 nregs = ARM_NUM_REGS2 (mode, type);
4361
4362 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4363 return NULL_RTX;
4364
4365 return gen_rtx_REG (mode, pcum->nregs);
4366 }
4367
4368 static unsigned int
4369 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4370 {
4371 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4372 ? DOUBLEWORD_ALIGNMENT
4373 : PARM_BOUNDARY);
4374 }
4375
4376 static int
4377 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4378 tree type, bool named)
4379 {
4380 int nregs = pcum->nregs;
4381
4382 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4383 {
4384 aapcs_layout_arg (pcum, mode, type, named);
4385 return pcum->aapcs_partial;
4386 }
4387
4388 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4389 return 0;
4390
4391 if (NUM_ARG_REGS > nregs
4392 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4393 && pcum->can_split)
4394 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4395
4396 return 0;
4397 }
4398
4399 /* Update the data in PCUM to advance over an argument
4400 of mode MODE and data type TYPE.
4401 (TYPE is null for libcalls where that information may not be available.) */
4402
4403 static void
4404 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4405 const_tree type, bool named)
4406 {
4407 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4408 {
4409 aapcs_layout_arg (pcum, mode, type, named);
4410
4411 if (pcum->aapcs_cprc_slot >= 0)
4412 {
4413 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4414 type);
4415 pcum->aapcs_cprc_slot = -1;
4416 }
4417
4418 /* Generic stuff. */
4419 pcum->aapcs_arg_processed = false;
4420 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4421 pcum->aapcs_reg = NULL_RTX;
4422 pcum->aapcs_partial = 0;
4423 }
4424 else
4425 {
4426 pcum->nargs += 1;
4427 if (arm_vector_mode_supported_p (mode)
4428 && pcum->named_count > pcum->nargs
4429 && TARGET_IWMMXT_ABI)
4430 pcum->iwmmxt_nregs += 1;
4431 else
4432 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4433 }
4434 }
4435
4436 /* Variable sized types are passed by reference. This is a GCC
4437 extension to the ARM ABI. */
4438
4439 static bool
4440 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4441 enum machine_mode mode ATTRIBUTE_UNUSED,
4442 const_tree type, bool named ATTRIBUTE_UNUSED)
4443 {
4444 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4445 }
4446 \f
4447 /* Encode the current state of the #pragma [no_]long_calls. */
4448 typedef enum
4449 {
4450 OFF, /* No #pragma [no_]long_calls is in effect. */
4451 LONG, /* #pragma long_calls is in effect. */
4452 SHORT /* #pragma no_long_calls is in effect. */
4453 } arm_pragma_enum;
4454
4455 static arm_pragma_enum arm_pragma_long_calls = OFF;
4456
4457 void
4458 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4459 {
4460 arm_pragma_long_calls = LONG;
4461 }
4462
4463 void
4464 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4465 {
4466 arm_pragma_long_calls = SHORT;
4467 }
4468
4469 void
4470 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4471 {
4472 arm_pragma_long_calls = OFF;
4473 }
4474 \f
4475 /* Handle an attribute requiring a FUNCTION_DECL;
4476 arguments as in struct attribute_spec.handler. */
4477 static tree
4478 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4479 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4480 {
4481 if (TREE_CODE (*node) != FUNCTION_DECL)
4482 {
4483 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4484 name);
4485 *no_add_attrs = true;
4486 }
4487
4488 return NULL_TREE;
4489 }
4490
4491 /* Handle an "interrupt" or "isr" attribute;
4492 arguments as in struct attribute_spec.handler. */
4493 static tree
4494 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4495 bool *no_add_attrs)
4496 {
4497 if (DECL_P (*node))
4498 {
4499 if (TREE_CODE (*node) != FUNCTION_DECL)
4500 {
4501 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4502 name);
4503 *no_add_attrs = true;
4504 }
4505 /* FIXME: the argument if any is checked for type attributes;
4506 should it be checked for decl ones? */
4507 }
4508 else
4509 {
4510 if (TREE_CODE (*node) == FUNCTION_TYPE
4511 || TREE_CODE (*node) == METHOD_TYPE)
4512 {
4513 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
4514 {
4515 warning (OPT_Wattributes, "%qE attribute ignored",
4516 name);
4517 *no_add_attrs = true;
4518 }
4519 }
4520 else if (TREE_CODE (*node) == POINTER_TYPE
4521 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
4522 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
4523 && arm_isr_value (args) != ARM_FT_UNKNOWN)
4524 {
4525 *node = build_variant_type_copy (*node);
4526 TREE_TYPE (*node) = build_type_attribute_variant
4527 (TREE_TYPE (*node),
4528 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
4529 *no_add_attrs = true;
4530 }
4531 else
4532 {
4533 /* Possibly pass this attribute on from the type to a decl. */
4534 if (flags & ((int) ATTR_FLAG_DECL_NEXT
4535 | (int) ATTR_FLAG_FUNCTION_NEXT
4536 | (int) ATTR_FLAG_ARRAY_NEXT))
4537 {
4538 *no_add_attrs = true;
4539 return tree_cons (name, args, NULL_TREE);
4540 }
4541 else
4542 {
4543 warning (OPT_Wattributes, "%qE attribute ignored",
4544 name);
4545 }
4546 }
4547 }
4548
4549 return NULL_TREE;
4550 }
4551
4552 /* Handle a "pcs" attribute; arguments as in struct
4553 attribute_spec.handler. */
4554 static tree
4555 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
4556 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4557 {
4558 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
4559 {
4560 warning (OPT_Wattributes, "%qE attribute ignored", name);
4561 *no_add_attrs = true;
4562 }
4563 return NULL_TREE;
4564 }
4565
4566 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4567 /* Handle the "notshared" attribute. This attribute is another way of
4568 requesting hidden visibility. ARM's compiler supports
4569 "__declspec(notshared)"; we support the same thing via an
4570 attribute. */
4571
4572 static tree
4573 arm_handle_notshared_attribute (tree *node,
4574 tree name ATTRIBUTE_UNUSED,
4575 tree args ATTRIBUTE_UNUSED,
4576 int flags ATTRIBUTE_UNUSED,
4577 bool *no_add_attrs)
4578 {
4579 tree decl = TYPE_NAME (*node);
4580
4581 if (decl)
4582 {
4583 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
4584 DECL_VISIBILITY_SPECIFIED (decl) = 1;
4585 *no_add_attrs = false;
4586 }
4587 return NULL_TREE;
4588 }
4589 #endif
4590
4591 /* Return 0 if the attributes for two types are incompatible, 1 if they
4592 are compatible, and 2 if they are nearly compatible (which causes a
4593 warning to be generated). */
4594 static int
4595 arm_comp_type_attributes (const_tree type1, const_tree type2)
4596 {
4597 int l1, l2, s1, s2;
4598
4599 /* Check for mismatch of non-default calling convention. */
4600 if (TREE_CODE (type1) != FUNCTION_TYPE)
4601 return 1;
4602
4603 /* Check for mismatched call attributes. */
4604 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
4605 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
4606 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
4607 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
4608
4609 /* Only bother to check if an attribute is defined. */
4610 if (l1 | l2 | s1 | s2)
4611 {
4612 /* If one type has an attribute, the other must have the same attribute. */
4613 if ((l1 != l2) || (s1 != s2))
4614 return 0;
4615
4616 /* Disallow mixed attributes. */
4617 if ((l1 & s2) || (l2 & s1))
4618 return 0;
4619 }
4620
4621 /* Check for mismatched ISR attribute. */
4622 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
4623 if (! l1)
4624 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
4625 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
4626 if (! l2)
4627 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
4628 if (l1 != l2)
4629 return 0;
4630
4631 return 1;
4632 }
4633
4634 /* Assigns default attributes to newly defined type. This is used to
4635 set short_call/long_call attributes for function types of
4636 functions defined inside corresponding #pragma scopes. */
4637 static void
4638 arm_set_default_type_attributes (tree type)
4639 {
4640 /* Add __attribute__ ((long_call)) to all functions, when
4641 inside #pragma long_calls or __attribute__ ((short_call)),
4642 when inside #pragma no_long_calls. */
4643 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
4644 {
4645 tree type_attr_list, attr_name;
4646 type_attr_list = TYPE_ATTRIBUTES (type);
4647
4648 if (arm_pragma_long_calls == LONG)
4649 attr_name = get_identifier ("long_call");
4650 else if (arm_pragma_long_calls == SHORT)
4651 attr_name = get_identifier ("short_call");
4652 else
4653 return;
4654
4655 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
4656 TYPE_ATTRIBUTES (type) = type_attr_list;
4657 }
4658 }
4659 \f
4660 /* Return true if DECL is known to be linked into section SECTION. */
4661
4662 static bool
4663 arm_function_in_section_p (tree decl, section *section)
4664 {
4665 /* We can only be certain about functions defined in the same
4666 compilation unit. */
4667 if (!TREE_STATIC (decl))
4668 return false;
4669
4670 /* Make sure that SYMBOL always binds to the definition in this
4671 compilation unit. */
4672 if (!targetm.binds_local_p (decl))
4673 return false;
4674
4675 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4676 if (!DECL_SECTION_NAME (decl))
4677 {
4678 /* Make sure that we will not create a unique section for DECL. */
4679 if (flag_function_sections || DECL_ONE_ONLY (decl))
4680 return false;
4681 }
4682
4683 return function_section (decl) == section;
4684 }
4685
4686 /* Return nonzero if a 32-bit "long_call" should be generated for
4687 a call from the current function to DECL. We generate a long_call
4688 if the function:
4689
4690 a. has an __attribute__((long call))
4691 or b. is within the scope of a #pragma long_calls
4692 or c. the -mlong-calls command line switch has been specified
4693
4694 However we do not generate a long call if the function:
4695
4696 d. has an __attribute__ ((short_call))
4697 or e. is inside the scope of a #pragma no_long_calls
4698 or f. is defined in the same section as the current function. */
4699
4700 bool
4701 arm_is_long_call_p (tree decl)
4702 {
4703 tree attrs;
4704
4705 if (!decl)
4706 return TARGET_LONG_CALLS;
4707
4708 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
4709 if (lookup_attribute ("short_call", attrs))
4710 return false;
4711
4712 /* For "f", be conservative, and only cater for cases in which the
4713 whole of the current function is placed in the same section. */
4714 if (!flag_reorder_blocks_and_partition
4715 && TREE_CODE (decl) == FUNCTION_DECL
4716 && arm_function_in_section_p (decl, current_function_section ()))
4717 return false;
4718
4719 if (lookup_attribute ("long_call", attrs))
4720 return true;
4721
4722 return TARGET_LONG_CALLS;
4723 }
4724
4725 /* Return nonzero if it is ok to make a tail-call to DECL. */
4726 static bool
4727 arm_function_ok_for_sibcall (tree decl, tree exp)
4728 {
4729 unsigned long func_type;
4730
4731 if (cfun->machine->sibcall_blocked)
4732 return false;
4733
4734 /* Never tailcall something for which we have no decl, or if we
4735 are generating code for Thumb-1. */
4736 if (decl == NULL || TARGET_THUMB1)
4737 return false;
4738
4739 /* The PIC register is live on entry to VxWorks PLT entries, so we
4740 must make the call before restoring the PIC register. */
4741 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4742 return false;
4743
4744 /* Cannot tail-call to long calls, since these are out of range of
4745 a branch instruction. */
4746 if (arm_is_long_call_p (decl))
4747 return false;
4748
4749 /* If we are interworking and the function is not declared static
4750 then we can't tail-call it unless we know that it exists in this
4751 compilation unit (since it might be a Thumb routine). */
4752 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
4753 return false;
4754
4755 func_type = arm_current_func_type ();
4756 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4757 if (IS_INTERRUPT (func_type))
4758 return false;
4759
4760 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4761 {
4762 /* Check that the return value locations are the same. For
4763 example that we aren't returning a value from the sibling in
4764 a VFP register but then need to transfer it to a core
4765 register. */
4766 rtx a, b;
4767
4768 a = arm_function_value (TREE_TYPE (exp), decl, false);
4769 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4770 cfun->decl, false);
4771 if (!rtx_equal_p (a, b))
4772 return false;
4773 }
4774
4775 /* Never tailcall if function may be called with a misaligned SP. */
4776 if (IS_STACKALIGN (func_type))
4777 return false;
4778
4779 /* Everything else is ok. */
4780 return true;
4781 }
4782
4783 \f
4784 /* Addressing mode support functions. */
4785
4786 /* Return nonzero if X is a legitimate immediate operand when compiling
4787 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4788 int
4789 legitimate_pic_operand_p (rtx x)
4790 {
4791 if (GET_CODE (x) == SYMBOL_REF
4792 || (GET_CODE (x) == CONST
4793 && GET_CODE (XEXP (x, 0)) == PLUS
4794 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4795 return 0;
4796
4797 return 1;
4798 }
4799
4800 /* Record that the current function needs a PIC register. Initialize
4801 cfun->machine->pic_reg if we have not already done so. */
4802
4803 static void
4804 require_pic_register (void)
4805 {
4806 /* A lot of the logic here is made obscure by the fact that this
4807 routine gets called as part of the rtx cost estimation process.
4808 We don't want those calls to affect any assumptions about the real
4809 function; and further, we can't call entry_of_function() until we
4810 start the real expansion process. */
4811 if (!crtl->uses_pic_offset_table)
4812 {
4813 gcc_assert (can_create_pseudo_p ());
4814 if (arm_pic_register != INVALID_REGNUM)
4815 {
4816 if (!cfun->machine->pic_reg)
4817 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
4818
4819 /* Play games to avoid marking the function as needing pic
4820 if we are being called as part of the cost-estimation
4821 process. */
4822 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4823 crtl->uses_pic_offset_table = 1;
4824 }
4825 else
4826 {
4827 rtx seq, insn;
4828
4829 if (!cfun->machine->pic_reg)
4830 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4831
4832 /* Play games to avoid marking the function as needing pic
4833 if we are being called as part of the cost-estimation
4834 process. */
4835 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4836 {
4837 crtl->uses_pic_offset_table = 1;
4838 start_sequence ();
4839
4840 arm_load_pic_register (0UL);
4841
4842 seq = get_insns ();
4843 end_sequence ();
4844
4845 for (insn = seq; insn; insn = NEXT_INSN (insn))
4846 if (INSN_P (insn))
4847 INSN_LOCATOR (insn) = prologue_locator;
4848
4849 /* We can be called during expansion of PHI nodes, where
4850 we can't yet emit instructions directly in the final
4851 insn stream. Queue the insns on the entry edge, they will
4852 be committed after everything else is expanded. */
4853 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
4854 }
4855 }
4856 }
4857 }
4858
4859 rtx
4860 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
4861 {
4862 if (GET_CODE (orig) == SYMBOL_REF
4863 || GET_CODE (orig) == LABEL_REF)
4864 {
4865 rtx insn;
4866
4867 if (reg == 0)
4868 {
4869 gcc_assert (can_create_pseudo_p ());
4870 reg = gen_reg_rtx (Pmode);
4871 }
4872
4873 /* VxWorks does not impose a fixed gap between segments; the run-time
4874 gap can be different from the object-file gap. We therefore can't
4875 use GOTOFF unless we are absolutely sure that the symbol is in the
4876 same segment as the GOT. Unfortunately, the flexibility of linker
4877 scripts means that we can't be sure of that in general, so assume
4878 that GOTOFF is never valid on VxWorks. */
4879 if ((GET_CODE (orig) == LABEL_REF
4880 || (GET_CODE (orig) == SYMBOL_REF &&
4881 SYMBOL_REF_LOCAL_P (orig)))
4882 && NEED_GOT_RELOC
4883 && !TARGET_VXWORKS_RTP)
4884 insn = arm_pic_static_addr (orig, reg);
4885 else
4886 {
4887 rtx pat;
4888 rtx mem;
4889
4890 /* If this function doesn't have a pic register, create one now. */
4891 require_pic_register ();
4892
4893 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
4894
4895 /* Make the MEM as close to a constant as possible. */
4896 mem = SET_SRC (pat);
4897 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
4898 MEM_READONLY_P (mem) = 1;
4899 MEM_NOTRAP_P (mem) = 1;
4900
4901 insn = emit_insn (pat);
4902 }
4903
4904 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4905 by loop. */
4906 set_unique_reg_note (insn, REG_EQUAL, orig);
4907
4908 return reg;
4909 }
4910 else if (GET_CODE (orig) == CONST)
4911 {
4912 rtx base, offset;
4913
4914 if (GET_CODE (XEXP (orig, 0)) == PLUS
4915 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
4916 return orig;
4917
4918 /* Handle the case where we have: const (UNSPEC_TLS). */
4919 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
4920 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
4921 return orig;
4922
4923 /* Handle the case where we have:
4924 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
4925 CONST_INT. */
4926 if (GET_CODE (XEXP (orig, 0)) == PLUS
4927 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
4928 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
4929 {
4930 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
4931 return orig;
4932 }
4933
4934 if (reg == 0)
4935 {
4936 gcc_assert (can_create_pseudo_p ());
4937 reg = gen_reg_rtx (Pmode);
4938 }
4939
4940 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4941
4942 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
4943 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
4944 base == reg ? 0 : reg);
4945
4946 if (GET_CODE (offset) == CONST_INT)
4947 {
4948 /* The base register doesn't really matter, we only want to
4949 test the index for the appropriate mode. */
4950 if (!arm_legitimate_index_p (mode, offset, SET, 0))
4951 {
4952 gcc_assert (can_create_pseudo_p ());
4953 offset = force_reg (Pmode, offset);
4954 }
4955
4956 if (GET_CODE (offset) == CONST_INT)
4957 return plus_constant (base, INTVAL (offset));
4958 }
4959
4960 if (GET_MODE_SIZE (mode) > 4
4961 && (GET_MODE_CLASS (mode) == MODE_INT
4962 || TARGET_SOFT_FLOAT))
4963 {
4964 emit_insn (gen_addsi3 (reg, base, offset));
4965 return reg;
4966 }
4967
4968 return gen_rtx_PLUS (Pmode, base, offset);
4969 }
4970
4971 return orig;
4972 }
4973
4974
4975 /* Find a spare register to use during the prolog of a function. */
4976
4977 static int
4978 thumb_find_work_register (unsigned long pushed_regs_mask)
4979 {
4980 int reg;
4981
4982 /* Check the argument registers first as these are call-used. The
4983 register allocation order means that sometimes r3 might be used
4984 but earlier argument registers might not, so check them all. */
4985 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
4986 if (!df_regs_ever_live_p (reg))
4987 return reg;
4988
4989 /* Before going on to check the call-saved registers we can try a couple
4990 more ways of deducing that r3 is available. The first is when we are
4991 pushing anonymous arguments onto the stack and we have less than 4
4992 registers worth of fixed arguments(*). In this case r3 will be part of
4993 the variable argument list and so we can be sure that it will be
4994 pushed right at the start of the function. Hence it will be available
4995 for the rest of the prologue.
4996 (*): ie crtl->args.pretend_args_size is greater than 0. */
4997 if (cfun->machine->uses_anonymous_args
4998 && crtl->args.pretend_args_size > 0)
4999 return LAST_ARG_REGNUM;
5000
5001 /* The other case is when we have fixed arguments but less than 4 registers
5002 worth. In this case r3 might be used in the body of the function, but
5003 it is not being used to convey an argument into the function. In theory
5004 we could just check crtl->args.size to see how many bytes are
5005 being passed in argument registers, but it seems that it is unreliable.
5006 Sometimes it will have the value 0 when in fact arguments are being
5007 passed. (See testcase execute/20021111-1.c for an example). So we also
5008 check the args_info.nregs field as well. The problem with this field is
5009 that it makes no allowances for arguments that are passed to the
5010 function but which are not used. Hence we could miss an opportunity
5011 when a function has an unused argument in r3. But it is better to be
5012 safe than to be sorry. */
5013 if (! cfun->machine->uses_anonymous_args
5014 && crtl->args.size >= 0
5015 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5016 && crtl->args.info.nregs < 4)
5017 return LAST_ARG_REGNUM;
5018
5019 /* Otherwise look for a call-saved register that is going to be pushed. */
5020 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5021 if (pushed_regs_mask & (1 << reg))
5022 return reg;
5023
5024 if (TARGET_THUMB2)
5025 {
5026 /* Thumb-2 can use high regs. */
5027 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5028 if (pushed_regs_mask & (1 << reg))
5029 return reg;
5030 }
5031 /* Something went wrong - thumb_compute_save_reg_mask()
5032 should have arranged for a suitable register to be pushed. */
5033 gcc_unreachable ();
5034 }
5035
5036 static GTY(()) int pic_labelno;
5037
5038 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5039 low register. */
5040
5041 void
5042 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5043 {
5044 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5045
5046 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5047 return;
5048
5049 gcc_assert (flag_pic);
5050
5051 pic_reg = cfun->machine->pic_reg;
5052 if (TARGET_VXWORKS_RTP)
5053 {
5054 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5055 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5056 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5057
5058 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5059
5060 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5061 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5062 }
5063 else
5064 {
5065 /* We use an UNSPEC rather than a LABEL_REF because this label
5066 never appears in the code stream. */
5067
5068 labelno = GEN_INT (pic_labelno++);
5069 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5070 l1 = gen_rtx_CONST (VOIDmode, l1);
5071
5072 /* On the ARM the PC register contains 'dot + 8' at the time of the
5073 addition, on the Thumb it is 'dot + 4'. */
5074 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5075 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5076 UNSPEC_GOTSYM_OFF);
5077 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5078
5079 if (TARGET_32BIT)
5080 {
5081 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5082 if (TARGET_ARM)
5083 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5084 else
5085 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5086 }
5087 else /* TARGET_THUMB1 */
5088 {
5089 if (arm_pic_register != INVALID_REGNUM
5090 && REGNO (pic_reg) > LAST_LO_REGNUM)
5091 {
5092 /* We will have pushed the pic register, so we should always be
5093 able to find a work register. */
5094 pic_tmp = gen_rtx_REG (SImode,
5095 thumb_find_work_register (saved_regs));
5096 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5097 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5098 }
5099 else
5100 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
5101 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5102 }
5103 }
5104
5105 /* Need to emit this whether or not we obey regdecls,
5106 since setjmp/longjmp can cause life info to screw up. */
5107 emit_use (pic_reg);
5108 }
5109
5110 /* Generate code to load the address of a static var when flag_pic is set. */
5111 static rtx
5112 arm_pic_static_addr (rtx orig, rtx reg)
5113 {
5114 rtx l1, labelno, offset_rtx, insn;
5115
5116 gcc_assert (flag_pic);
5117
5118 /* We use an UNSPEC rather than a LABEL_REF because this label
5119 never appears in the code stream. */
5120 labelno = GEN_INT (pic_labelno++);
5121 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5122 l1 = gen_rtx_CONST (VOIDmode, l1);
5123
5124 /* On the ARM the PC register contains 'dot + 8' at the time of the
5125 addition, on the Thumb it is 'dot + 4'. */
5126 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5127 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5128 UNSPEC_SYMBOL_OFFSET);
5129 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5130
5131 if (TARGET_32BIT)
5132 {
5133 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5134 if (TARGET_ARM)
5135 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5136 else
5137 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5138 }
5139 else /* TARGET_THUMB1 */
5140 {
5141 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5142 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5143 }
5144
5145 return insn;
5146 }
5147
5148 /* Return nonzero if X is valid as an ARM state addressing register. */
5149 static int
5150 arm_address_register_rtx_p (rtx x, int strict_p)
5151 {
5152 int regno;
5153
5154 if (GET_CODE (x) != REG)
5155 return 0;
5156
5157 regno = REGNO (x);
5158
5159 if (strict_p)
5160 return ARM_REGNO_OK_FOR_BASE_P (regno);
5161
5162 return (regno <= LAST_ARM_REGNUM
5163 || regno >= FIRST_PSEUDO_REGISTER
5164 || regno == FRAME_POINTER_REGNUM
5165 || regno == ARG_POINTER_REGNUM);
5166 }
5167
5168 /* Return TRUE if this rtx is the difference of a symbol and a label,
5169 and will reduce to a PC-relative relocation in the object file.
5170 Expressions like this can be left alone when generating PIC, rather
5171 than forced through the GOT. */
5172 static int
5173 pcrel_constant_p (rtx x)
5174 {
5175 if (GET_CODE (x) == MINUS)
5176 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5177
5178 return FALSE;
5179 }
5180
5181 /* Return true if X will surely end up in an index register after next
5182 splitting pass. */
5183 static bool
5184 will_be_in_index_register (const_rtx x)
5185 {
5186 /* arm.md: calculate_pic_address will split this into a register. */
5187 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5188 }
5189
5190 /* Return nonzero if X is a valid ARM state address operand. */
5191 int
5192 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5193 int strict_p)
5194 {
5195 bool use_ldrd;
5196 enum rtx_code code = GET_CODE (x);
5197
5198 if (arm_address_register_rtx_p (x, strict_p))
5199 return 1;
5200
5201 use_ldrd = (TARGET_LDRD
5202 && (mode == DImode
5203 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5204
5205 if (code == POST_INC || code == PRE_DEC
5206 || ((code == PRE_INC || code == POST_DEC)
5207 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5208 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5209
5210 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5211 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5212 && GET_CODE (XEXP (x, 1)) == PLUS
5213 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5214 {
5215 rtx addend = XEXP (XEXP (x, 1), 1);
5216
5217 /* Don't allow ldrd post increment by register because it's hard
5218 to fixup invalid register choices. */
5219 if (use_ldrd
5220 && GET_CODE (x) == POST_MODIFY
5221 && GET_CODE (addend) == REG)
5222 return 0;
5223
5224 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5225 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5226 }
5227
5228 /* After reload constants split into minipools will have addresses
5229 from a LABEL_REF. */
5230 else if (reload_completed
5231 && (code == LABEL_REF
5232 || (code == CONST
5233 && GET_CODE (XEXP (x, 0)) == PLUS
5234 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5235 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5236 return 1;
5237
5238 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5239 return 0;
5240
5241 else if (code == PLUS)
5242 {
5243 rtx xop0 = XEXP (x, 0);
5244 rtx xop1 = XEXP (x, 1);
5245
5246 return ((arm_address_register_rtx_p (xop0, strict_p)
5247 && ((GET_CODE(xop1) == CONST_INT
5248 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5249 || (!strict_p && will_be_in_index_register (xop1))))
5250 || (arm_address_register_rtx_p (xop1, strict_p)
5251 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5252 }
5253
5254 #if 0
5255 /* Reload currently can't handle MINUS, so disable this for now */
5256 else if (GET_CODE (x) == MINUS)
5257 {
5258 rtx xop0 = XEXP (x, 0);
5259 rtx xop1 = XEXP (x, 1);
5260
5261 return (arm_address_register_rtx_p (xop0, strict_p)
5262 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5263 }
5264 #endif
5265
5266 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5267 && code == SYMBOL_REF
5268 && CONSTANT_POOL_ADDRESS_P (x)
5269 && ! (flag_pic
5270 && symbol_mentioned_p (get_pool_constant (x))
5271 && ! pcrel_constant_p (get_pool_constant (x))))
5272 return 1;
5273
5274 return 0;
5275 }
5276
5277 /* Return nonzero if X is a valid Thumb-2 address operand. */
5278 static int
5279 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5280 {
5281 bool use_ldrd;
5282 enum rtx_code code = GET_CODE (x);
5283
5284 if (arm_address_register_rtx_p (x, strict_p))
5285 return 1;
5286
5287 use_ldrd = (TARGET_LDRD
5288 && (mode == DImode
5289 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5290
5291 if (code == POST_INC || code == PRE_DEC
5292 || ((code == PRE_INC || code == POST_DEC)
5293 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5294 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5295
5296 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5297 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5298 && GET_CODE (XEXP (x, 1)) == PLUS
5299 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5300 {
5301 /* Thumb-2 only has autoincrement by constant. */
5302 rtx addend = XEXP (XEXP (x, 1), 1);
5303 HOST_WIDE_INT offset;
5304
5305 if (GET_CODE (addend) != CONST_INT)
5306 return 0;
5307
5308 offset = INTVAL(addend);
5309 if (GET_MODE_SIZE (mode) <= 4)
5310 return (offset > -256 && offset < 256);
5311
5312 return (use_ldrd && offset > -1024 && offset < 1024
5313 && (offset & 3) == 0);
5314 }
5315
5316 /* After reload constants split into minipools will have addresses
5317 from a LABEL_REF. */
5318 else if (reload_completed
5319 && (code == LABEL_REF
5320 || (code == CONST
5321 && GET_CODE (XEXP (x, 0)) == PLUS
5322 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5323 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5324 return 1;
5325
5326 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5327 return 0;
5328
5329 else if (code == PLUS)
5330 {
5331 rtx xop0 = XEXP (x, 0);
5332 rtx xop1 = XEXP (x, 1);
5333
5334 return ((arm_address_register_rtx_p (xop0, strict_p)
5335 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5336 || (!strict_p && will_be_in_index_register (xop1))))
5337 || (arm_address_register_rtx_p (xop1, strict_p)
5338 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5339 }
5340
5341 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5342 && code == SYMBOL_REF
5343 && CONSTANT_POOL_ADDRESS_P (x)
5344 && ! (flag_pic
5345 && symbol_mentioned_p (get_pool_constant (x))
5346 && ! pcrel_constant_p (get_pool_constant (x))))
5347 return 1;
5348
5349 return 0;
5350 }
5351
5352 /* Return nonzero if INDEX is valid for an address index operand in
5353 ARM state. */
5354 static int
5355 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5356 int strict_p)
5357 {
5358 HOST_WIDE_INT range;
5359 enum rtx_code code = GET_CODE (index);
5360
5361 /* Standard coprocessor addressing modes. */
5362 if (TARGET_HARD_FLOAT
5363 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5364 && (mode == SFmode || mode == DFmode
5365 || (TARGET_MAVERICK && mode == DImode)))
5366 return (code == CONST_INT && INTVAL (index) < 1024
5367 && INTVAL (index) > -1024
5368 && (INTVAL (index) & 3) == 0);
5369
5370 /* For quad modes, we restrict the constant offset to be slightly less
5371 than what the instruction format permits. We do this because for
5372 quad mode moves, we will actually decompose them into two separate
5373 double-mode reads or writes. INDEX must therefore be a valid
5374 (double-mode) offset and so should INDEX+8. */
5375 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5376 return (code == CONST_INT
5377 && INTVAL (index) < 1016
5378 && INTVAL (index) > -1024
5379 && (INTVAL (index) & 3) == 0);
5380
5381 /* We have no such constraint on double mode offsets, so we permit the
5382 full range of the instruction format. */
5383 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5384 return (code == CONST_INT
5385 && INTVAL (index) < 1024
5386 && INTVAL (index) > -1024
5387 && (INTVAL (index) & 3) == 0);
5388
5389 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5390 return (code == CONST_INT
5391 && INTVAL (index) < 1024
5392 && INTVAL (index) > -1024
5393 && (INTVAL (index) & 3) == 0);
5394
5395 if (arm_address_register_rtx_p (index, strict_p)
5396 && (GET_MODE_SIZE (mode) <= 4))
5397 return 1;
5398
5399 if (mode == DImode || mode == DFmode)
5400 {
5401 if (code == CONST_INT)
5402 {
5403 HOST_WIDE_INT val = INTVAL (index);
5404
5405 if (TARGET_LDRD)
5406 return val > -256 && val < 256;
5407 else
5408 return val > -4096 && val < 4092;
5409 }
5410
5411 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5412 }
5413
5414 if (GET_MODE_SIZE (mode) <= 4
5415 && ! (arm_arch4
5416 && (mode == HImode
5417 || mode == HFmode
5418 || (mode == QImode && outer == SIGN_EXTEND))))
5419 {
5420 if (code == MULT)
5421 {
5422 rtx xiop0 = XEXP (index, 0);
5423 rtx xiop1 = XEXP (index, 1);
5424
5425 return ((arm_address_register_rtx_p (xiop0, strict_p)
5426 && power_of_two_operand (xiop1, SImode))
5427 || (arm_address_register_rtx_p (xiop1, strict_p)
5428 && power_of_two_operand (xiop0, SImode)));
5429 }
5430 else if (code == LSHIFTRT || code == ASHIFTRT
5431 || code == ASHIFT || code == ROTATERT)
5432 {
5433 rtx op = XEXP (index, 1);
5434
5435 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5436 && GET_CODE (op) == CONST_INT
5437 && INTVAL (op) > 0
5438 && INTVAL (op) <= 31);
5439 }
5440 }
5441
5442 /* For ARM v4 we may be doing a sign-extend operation during the
5443 load. */
5444 if (arm_arch4)
5445 {
5446 if (mode == HImode
5447 || mode == HFmode
5448 || (outer == SIGN_EXTEND && mode == QImode))
5449 range = 256;
5450 else
5451 range = 4096;
5452 }
5453 else
5454 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5455
5456 return (code == CONST_INT
5457 && INTVAL (index) < range
5458 && INTVAL (index) > -range);
5459 }
5460
5461 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5462 index operand. i.e. 1, 2, 4 or 8. */
5463 static bool
5464 thumb2_index_mul_operand (rtx op)
5465 {
5466 HOST_WIDE_INT val;
5467
5468 if (GET_CODE(op) != CONST_INT)
5469 return false;
5470
5471 val = INTVAL(op);
5472 return (val == 1 || val == 2 || val == 4 || val == 8);
5473 }
5474
5475 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5476 static int
5477 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5478 {
5479 enum rtx_code code = GET_CODE (index);
5480
5481 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5482 /* Standard coprocessor addressing modes. */
5483 if (TARGET_HARD_FLOAT
5484 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5485 && (mode == SFmode || mode == DFmode
5486 || (TARGET_MAVERICK && mode == DImode)))
5487 return (code == CONST_INT && INTVAL (index) < 1024
5488 /* Thumb-2 allows only > -256 index range for it's core register
5489 load/stores. Since we allow SF/DF in core registers, we have
5490 to use the intersection between -256~4096 (core) and -1024~1024
5491 (coprocessor). */
5492 && INTVAL (index) > -256
5493 && (INTVAL (index) & 3) == 0);
5494
5495 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5496 {
5497 /* For DImode assume values will usually live in core regs
5498 and only allow LDRD addressing modes. */
5499 if (!TARGET_LDRD || mode != DImode)
5500 return (code == CONST_INT
5501 && INTVAL (index) < 1024
5502 && INTVAL (index) > -1024
5503 && (INTVAL (index) & 3) == 0);
5504 }
5505
5506 /* For quad modes, we restrict the constant offset to be slightly less
5507 than what the instruction format permits. We do this because for
5508 quad mode moves, we will actually decompose them into two separate
5509 double-mode reads or writes. INDEX must therefore be a valid
5510 (double-mode) offset and so should INDEX+8. */
5511 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5512 return (code == CONST_INT
5513 && INTVAL (index) < 1016
5514 && INTVAL (index) > -1024
5515 && (INTVAL (index) & 3) == 0);
5516
5517 /* We have no such constraint on double mode offsets, so we permit the
5518 full range of the instruction format. */
5519 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5520 return (code == CONST_INT
5521 && INTVAL (index) < 1024
5522 && INTVAL (index) > -1024
5523 && (INTVAL (index) & 3) == 0);
5524
5525 if (arm_address_register_rtx_p (index, strict_p)
5526 && (GET_MODE_SIZE (mode) <= 4))
5527 return 1;
5528
5529 if (mode == DImode || mode == DFmode)
5530 {
5531 if (code == CONST_INT)
5532 {
5533 HOST_WIDE_INT val = INTVAL (index);
5534 /* ??? Can we assume ldrd for thumb2? */
5535 /* Thumb-2 ldrd only has reg+const addressing modes. */
5536 /* ldrd supports offsets of +-1020.
5537 However the ldr fallback does not. */
5538 return val > -256 && val < 256 && (val & 3) == 0;
5539 }
5540 else
5541 return 0;
5542 }
5543
5544 if (code == MULT)
5545 {
5546 rtx xiop0 = XEXP (index, 0);
5547 rtx xiop1 = XEXP (index, 1);
5548
5549 return ((arm_address_register_rtx_p (xiop0, strict_p)
5550 && thumb2_index_mul_operand (xiop1))
5551 || (arm_address_register_rtx_p (xiop1, strict_p)
5552 && thumb2_index_mul_operand (xiop0)));
5553 }
5554 else if (code == ASHIFT)
5555 {
5556 rtx op = XEXP (index, 1);
5557
5558 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5559 && GET_CODE (op) == CONST_INT
5560 && INTVAL (op) > 0
5561 && INTVAL (op) <= 3);
5562 }
5563
5564 return (code == CONST_INT
5565 && INTVAL (index) < 4096
5566 && INTVAL (index) > -256);
5567 }
5568
5569 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5570 static int
5571 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
5572 {
5573 int regno;
5574
5575 if (GET_CODE (x) != REG)
5576 return 0;
5577
5578 regno = REGNO (x);
5579
5580 if (strict_p)
5581 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
5582
5583 return (regno <= LAST_LO_REGNUM
5584 || regno > LAST_VIRTUAL_REGISTER
5585 || regno == FRAME_POINTER_REGNUM
5586 || (GET_MODE_SIZE (mode) >= 4
5587 && (regno == STACK_POINTER_REGNUM
5588 || regno >= FIRST_PSEUDO_REGISTER
5589 || x == hard_frame_pointer_rtx
5590 || x == arg_pointer_rtx)));
5591 }
5592
5593 /* Return nonzero if x is a legitimate index register. This is the case
5594 for any base register that can access a QImode object. */
5595 inline static int
5596 thumb1_index_register_rtx_p (rtx x, int strict_p)
5597 {
5598 return thumb1_base_register_rtx_p (x, QImode, strict_p);
5599 }
5600
5601 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5602
5603 The AP may be eliminated to either the SP or the FP, so we use the
5604 least common denominator, e.g. SImode, and offsets from 0 to 64.
5605
5606 ??? Verify whether the above is the right approach.
5607
5608 ??? Also, the FP may be eliminated to the SP, so perhaps that
5609 needs special handling also.
5610
5611 ??? Look at how the mips16 port solves this problem. It probably uses
5612 better ways to solve some of these problems.
5613
5614 Although it is not incorrect, we don't accept QImode and HImode
5615 addresses based on the frame pointer or arg pointer until the
5616 reload pass starts. This is so that eliminating such addresses
5617 into stack based ones won't produce impossible code. */
5618 int
5619 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5620 {
5621 /* ??? Not clear if this is right. Experiment. */
5622 if (GET_MODE_SIZE (mode) < 4
5623 && !(reload_in_progress || reload_completed)
5624 && (reg_mentioned_p (frame_pointer_rtx, x)
5625 || reg_mentioned_p (arg_pointer_rtx, x)
5626 || reg_mentioned_p (virtual_incoming_args_rtx, x)
5627 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
5628 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
5629 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
5630 return 0;
5631
5632 /* Accept any base register. SP only in SImode or larger. */
5633 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
5634 return 1;
5635
5636 /* This is PC relative data before arm_reorg runs. */
5637 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
5638 && GET_CODE (x) == SYMBOL_REF
5639 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
5640 return 1;
5641
5642 /* This is PC relative data after arm_reorg runs. */
5643 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
5644 && reload_completed
5645 && (GET_CODE (x) == LABEL_REF
5646 || (GET_CODE (x) == CONST
5647 && GET_CODE (XEXP (x, 0)) == PLUS
5648 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5649 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5650 return 1;
5651
5652 /* Post-inc indexing only supported for SImode and larger. */
5653 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
5654 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
5655 return 1;
5656
5657 else if (GET_CODE (x) == PLUS)
5658 {
5659 /* REG+REG address can be any two index registers. */
5660 /* We disallow FRAME+REG addressing since we know that FRAME
5661 will be replaced with STACK, and SP relative addressing only
5662 permits SP+OFFSET. */
5663 if (GET_MODE_SIZE (mode) <= 4
5664 && XEXP (x, 0) != frame_pointer_rtx
5665 && XEXP (x, 1) != frame_pointer_rtx
5666 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5667 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
5668 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
5669 return 1;
5670
5671 /* REG+const has 5-7 bit offset for non-SP registers. */
5672 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5673 || XEXP (x, 0) == arg_pointer_rtx)
5674 && GET_CODE (XEXP (x, 1)) == CONST_INT
5675 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
5676 return 1;
5677
5678 /* REG+const has 10-bit offset for SP, but only SImode and
5679 larger is supported. */
5680 /* ??? Should probably check for DI/DFmode overflow here
5681 just like GO_IF_LEGITIMATE_OFFSET does. */
5682 else if (GET_CODE (XEXP (x, 0)) == REG
5683 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
5684 && GET_MODE_SIZE (mode) >= 4
5685 && GET_CODE (XEXP (x, 1)) == CONST_INT
5686 && INTVAL (XEXP (x, 1)) >= 0
5687 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
5688 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5689 return 1;
5690
5691 else if (GET_CODE (XEXP (x, 0)) == REG
5692 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5693 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5694 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5695 && REGNO (XEXP (x, 0))
5696 <= LAST_VIRTUAL_POINTER_REGISTER))
5697 && GET_MODE_SIZE (mode) >= 4
5698 && GET_CODE (XEXP (x, 1)) == CONST_INT
5699 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5700 return 1;
5701 }
5702
5703 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5704 && GET_MODE_SIZE (mode) == 4
5705 && GET_CODE (x) == SYMBOL_REF
5706 && CONSTANT_POOL_ADDRESS_P (x)
5707 && ! (flag_pic
5708 && symbol_mentioned_p (get_pool_constant (x))
5709 && ! pcrel_constant_p (get_pool_constant (x))))
5710 return 1;
5711
5712 return 0;
5713 }
5714
5715 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5716 instruction of mode MODE. */
5717 int
5718 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
5719 {
5720 switch (GET_MODE_SIZE (mode))
5721 {
5722 case 1:
5723 return val >= 0 && val < 32;
5724
5725 case 2:
5726 return val >= 0 && val < 64 && (val & 1) == 0;
5727
5728 default:
5729 return (val >= 0
5730 && (val + GET_MODE_SIZE (mode)) <= 128
5731 && (val & 3) == 0);
5732 }
5733 }
5734
5735 bool
5736 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
5737 {
5738 if (TARGET_ARM)
5739 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
5740 else if (TARGET_THUMB2)
5741 return thumb2_legitimate_address_p (mode, x, strict_p);
5742 else /* if (TARGET_THUMB1) */
5743 return thumb1_legitimate_address_p (mode, x, strict_p);
5744 }
5745
5746 /* Build the SYMBOL_REF for __tls_get_addr. */
5747
5748 static GTY(()) rtx tls_get_addr_libfunc;
5749
5750 static rtx
5751 get_tls_get_addr (void)
5752 {
5753 if (!tls_get_addr_libfunc)
5754 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
5755 return tls_get_addr_libfunc;
5756 }
5757
5758 static rtx
5759 arm_load_tp (rtx target)
5760 {
5761 if (!target)
5762 target = gen_reg_rtx (SImode);
5763
5764 if (TARGET_HARD_TP)
5765 {
5766 /* Can return in any reg. */
5767 emit_insn (gen_load_tp_hard (target));
5768 }
5769 else
5770 {
5771 /* Always returned in r0. Immediately copy the result into a pseudo,
5772 otherwise other uses of r0 (e.g. setting up function arguments) may
5773 clobber the value. */
5774
5775 rtx tmp;
5776
5777 emit_insn (gen_load_tp_soft ());
5778
5779 tmp = gen_rtx_REG (SImode, 0);
5780 emit_move_insn (target, tmp);
5781 }
5782 return target;
5783 }
5784
5785 static rtx
5786 load_tls_operand (rtx x, rtx reg)
5787 {
5788 rtx tmp;
5789
5790 if (reg == NULL_RTX)
5791 reg = gen_reg_rtx (SImode);
5792
5793 tmp = gen_rtx_CONST (SImode, x);
5794
5795 emit_move_insn (reg, tmp);
5796
5797 return reg;
5798 }
5799
5800 static rtx
5801 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
5802 {
5803 rtx insns, label, labelno, sum;
5804
5805 start_sequence ();
5806
5807 labelno = GEN_INT (pic_labelno++);
5808 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5809 label = gen_rtx_CONST (VOIDmode, label);
5810
5811 sum = gen_rtx_UNSPEC (Pmode,
5812 gen_rtvec (4, x, GEN_INT (reloc), label,
5813 GEN_INT (TARGET_ARM ? 8 : 4)),
5814 UNSPEC_TLS);
5815 reg = load_tls_operand (sum, reg);
5816
5817 if (TARGET_ARM)
5818 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5819 else if (TARGET_THUMB2)
5820 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5821 else /* TARGET_THUMB1 */
5822 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5823
5824 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5825 Pmode, 1, reg, Pmode);
5826
5827 insns = get_insns ();
5828 end_sequence ();
5829
5830 return insns;
5831 }
5832
5833 rtx
5834 legitimize_tls_address (rtx x, rtx reg)
5835 {
5836 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
5837 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
5838
5839 switch (model)
5840 {
5841 case TLS_MODEL_GLOBAL_DYNAMIC:
5842 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
5843 dest = gen_reg_rtx (Pmode);
5844 emit_libcall_block (insns, dest, ret, x);
5845 return dest;
5846
5847 case TLS_MODEL_LOCAL_DYNAMIC:
5848 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
5849
5850 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5851 share the LDM result with other LD model accesses. */
5852 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
5853 UNSPEC_TLS);
5854 dest = gen_reg_rtx (Pmode);
5855 emit_libcall_block (insns, dest, ret, eqv);
5856
5857 /* Load the addend. */
5858 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
5859 UNSPEC_TLS);
5860 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
5861 return gen_rtx_PLUS (Pmode, dest, addend);
5862
5863 case TLS_MODEL_INITIAL_EXEC:
5864 labelno = GEN_INT (pic_labelno++);
5865 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5866 label = gen_rtx_CONST (VOIDmode, label);
5867 sum = gen_rtx_UNSPEC (Pmode,
5868 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
5869 GEN_INT (TARGET_ARM ? 8 : 4)),
5870 UNSPEC_TLS);
5871 reg = load_tls_operand (sum, reg);
5872
5873 if (TARGET_ARM)
5874 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
5875 else if (TARGET_THUMB2)
5876 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
5877 else
5878 {
5879 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5880 emit_move_insn (reg, gen_const_mem (SImode, reg));
5881 }
5882
5883 tp = arm_load_tp (NULL_RTX);
5884
5885 return gen_rtx_PLUS (Pmode, tp, reg);
5886
5887 case TLS_MODEL_LOCAL_EXEC:
5888 tp = arm_load_tp (NULL_RTX);
5889
5890 reg = gen_rtx_UNSPEC (Pmode,
5891 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
5892 UNSPEC_TLS);
5893 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
5894
5895 return gen_rtx_PLUS (Pmode, tp, reg);
5896
5897 default:
5898 abort ();
5899 }
5900 }
5901
5902 /* Try machine-dependent ways of modifying an illegitimate address
5903 to be legitimate. If we find one, return the new, valid address. */
5904 rtx
5905 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
5906 {
5907 if (!TARGET_ARM)
5908 {
5909 /* TODO: legitimize_address for Thumb2. */
5910 if (TARGET_THUMB2)
5911 return x;
5912 return thumb_legitimize_address (x, orig_x, mode);
5913 }
5914
5915 if (arm_tls_symbol_p (x))
5916 return legitimize_tls_address (x, NULL_RTX);
5917
5918 if (GET_CODE (x) == PLUS)
5919 {
5920 rtx xop0 = XEXP (x, 0);
5921 rtx xop1 = XEXP (x, 1);
5922
5923 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
5924 xop0 = force_reg (SImode, xop0);
5925
5926 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
5927 xop1 = force_reg (SImode, xop1);
5928
5929 if (ARM_BASE_REGISTER_RTX_P (xop0)
5930 && GET_CODE (xop1) == CONST_INT)
5931 {
5932 HOST_WIDE_INT n, low_n;
5933 rtx base_reg, val;
5934 n = INTVAL (xop1);
5935
5936 /* VFP addressing modes actually allow greater offsets, but for
5937 now we just stick with the lowest common denominator. */
5938 if (mode == DImode
5939 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
5940 {
5941 low_n = n & 0x0f;
5942 n &= ~0x0f;
5943 if (low_n > 4)
5944 {
5945 n += 16;
5946 low_n -= 16;
5947 }
5948 }
5949 else
5950 {
5951 low_n = ((mode) == TImode ? 0
5952 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
5953 n -= low_n;
5954 }
5955
5956 base_reg = gen_reg_rtx (SImode);
5957 val = force_operand (plus_constant (xop0, n), NULL_RTX);
5958 emit_move_insn (base_reg, val);
5959 x = plus_constant (base_reg, low_n);
5960 }
5961 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5962 x = gen_rtx_PLUS (SImode, xop0, xop1);
5963 }
5964
5965 /* XXX We don't allow MINUS any more -- see comment in
5966 arm_legitimate_address_outer_p (). */
5967 else if (GET_CODE (x) == MINUS)
5968 {
5969 rtx xop0 = XEXP (x, 0);
5970 rtx xop1 = XEXP (x, 1);
5971
5972 if (CONSTANT_P (xop0))
5973 xop0 = force_reg (SImode, xop0);
5974
5975 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
5976 xop1 = force_reg (SImode, xop1);
5977
5978 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
5979 x = gen_rtx_MINUS (SImode, xop0, xop1);
5980 }
5981
5982 /* Make sure to take full advantage of the pre-indexed addressing mode
5983 with absolute addresses which often allows for the base register to
5984 be factorized for multiple adjacent memory references, and it might
5985 even allows for the mini pool to be avoided entirely. */
5986 else if (GET_CODE (x) == CONST_INT && optimize > 0)
5987 {
5988 unsigned int bits;
5989 HOST_WIDE_INT mask, base, index;
5990 rtx base_reg;
5991
5992 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
5993 use a 8-bit index. So let's use a 12-bit index for SImode only and
5994 hope that arm_gen_constant will enable ldrb to use more bits. */
5995 bits = (mode == SImode) ? 12 : 8;
5996 mask = (1 << bits) - 1;
5997 base = INTVAL (x) & ~mask;
5998 index = INTVAL (x) & mask;
5999 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6000 {
6001 /* It'll most probably be more efficient to generate the base
6002 with more bits set and use a negative index instead. */
6003 base |= mask;
6004 index -= mask;
6005 }
6006 base_reg = force_reg (SImode, GEN_INT (base));
6007 x = plus_constant (base_reg, index);
6008 }
6009
6010 if (flag_pic)
6011 {
6012 /* We need to find and carefully transform any SYMBOL and LABEL
6013 references; so go back to the original address expression. */
6014 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6015
6016 if (new_x != orig_x)
6017 x = new_x;
6018 }
6019
6020 return x;
6021 }
6022
6023
6024 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6025 to be legitimate. If we find one, return the new, valid address. */
6026 rtx
6027 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6028 {
6029 if (arm_tls_symbol_p (x))
6030 return legitimize_tls_address (x, NULL_RTX);
6031
6032 if (GET_CODE (x) == PLUS
6033 && GET_CODE (XEXP (x, 1)) == CONST_INT
6034 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6035 || INTVAL (XEXP (x, 1)) < 0))
6036 {
6037 rtx xop0 = XEXP (x, 0);
6038 rtx xop1 = XEXP (x, 1);
6039 HOST_WIDE_INT offset = INTVAL (xop1);
6040
6041 /* Try and fold the offset into a biasing of the base register and
6042 then offsetting that. Don't do this when optimizing for space
6043 since it can cause too many CSEs. */
6044 if (optimize_size && offset >= 0
6045 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6046 {
6047 HOST_WIDE_INT delta;
6048
6049 if (offset >= 256)
6050 delta = offset - (256 - GET_MODE_SIZE (mode));
6051 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6052 delta = 31 * GET_MODE_SIZE (mode);
6053 else
6054 delta = offset & (~31 * GET_MODE_SIZE (mode));
6055
6056 xop0 = force_operand (plus_constant (xop0, offset - delta),
6057 NULL_RTX);
6058 x = plus_constant (xop0, delta);
6059 }
6060 else if (offset < 0 && offset > -256)
6061 /* Small negative offsets are best done with a subtract before the
6062 dereference, forcing these into a register normally takes two
6063 instructions. */
6064 x = force_operand (x, NULL_RTX);
6065 else
6066 {
6067 /* For the remaining cases, force the constant into a register. */
6068 xop1 = force_reg (SImode, xop1);
6069 x = gen_rtx_PLUS (SImode, xop0, xop1);
6070 }
6071 }
6072 else if (GET_CODE (x) == PLUS
6073 && s_register_operand (XEXP (x, 1), SImode)
6074 && !s_register_operand (XEXP (x, 0), SImode))
6075 {
6076 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6077
6078 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6079 }
6080
6081 if (flag_pic)
6082 {
6083 /* We need to find and carefully transform any SYMBOL and LABEL
6084 references; so go back to the original address expression. */
6085 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6086
6087 if (new_x != orig_x)
6088 x = new_x;
6089 }
6090
6091 return x;
6092 }
6093
6094 bool
6095 arm_legitimize_reload_address (rtx *p,
6096 enum machine_mode mode,
6097 int opnum, int type,
6098 int ind_levels ATTRIBUTE_UNUSED)
6099 {
6100 if (GET_CODE (*p) == PLUS
6101 && GET_CODE (XEXP (*p, 0)) == REG
6102 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6103 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6104 {
6105 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6106 HOST_WIDE_INT low, high;
6107
6108 /* Detect coprocessor load/stores. */
6109 bool coproc_p = ((TARGET_HARD_FLOAT
6110 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
6111 && (mode == SFmode || mode == DFmode
6112 || (mode == DImode && TARGET_MAVERICK)))
6113 || (TARGET_REALLY_IWMMXT
6114 && VALID_IWMMXT_REG_MODE (mode))
6115 || (TARGET_NEON
6116 && (VALID_NEON_DREG_MODE (mode)
6117 || VALID_NEON_QREG_MODE (mode))));
6118
6119 /* For some conditions, bail out when lower two bits are unaligned. */
6120 if ((val & 0x3) != 0
6121 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6122 && (coproc_p
6123 /* For DI, and DF under soft-float: */
6124 || ((mode == DImode || mode == DFmode)
6125 /* Without ldrd, we use stm/ldm, which does not
6126 fair well with unaligned bits. */
6127 && (! TARGET_LDRD
6128 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6129 || TARGET_THUMB2))))
6130 return false;
6131
6132 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6133 of which the (reg+high) gets turned into a reload add insn,
6134 we try to decompose the index into high/low values that can often
6135 also lead to better reload CSE.
6136 For example:
6137 ldr r0, [r2, #4100] // Offset too large
6138 ldr r1, [r2, #4104] // Offset too large
6139
6140 is best reloaded as:
6141 add t1, r2, #4096
6142 ldr r0, [t1, #4]
6143 add t2, r2, #4096
6144 ldr r1, [t2, #8]
6145
6146 which post-reload CSE can simplify in most cases to eliminate the
6147 second add instruction:
6148 add t1, r2, #4096
6149 ldr r0, [t1, #4]
6150 ldr r1, [t1, #8]
6151
6152 The idea here is that we want to split out the bits of the constant
6153 as a mask, rather than as subtracting the maximum offset that the
6154 respective type of load/store used can handle.
6155
6156 When encountering negative offsets, we can still utilize it even if
6157 the overall offset is positive; sometimes this may lead to an immediate
6158 that can be constructed with fewer instructions.
6159 For example:
6160 ldr r0, [r2, #0x3FFFFC]
6161
6162 This is best reloaded as:
6163 add t1, r2, #0x400000
6164 ldr r0, [t1, #-4]
6165
6166 The trick for spotting this for a load insn with N bits of offset
6167 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6168 negative offset that is going to make bit N and all the bits below
6169 it become zero in the remainder part.
6170
6171 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6172 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6173 used in most cases of ARM load/store instructions. */
6174
6175 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6176 (((VAL) & ((1 << (N)) - 1)) \
6177 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6178 : 0)
6179
6180 if (coproc_p)
6181 {
6182 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6183
6184 /* NEON quad-word load/stores are made of two double-word accesses,
6185 so the valid index range is reduced by 8. Treat as 9-bit range if
6186 we go over it. */
6187 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6188 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6189 }
6190 else if (GET_MODE_SIZE (mode) == 8)
6191 {
6192 if (TARGET_LDRD)
6193 low = (TARGET_THUMB2
6194 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6195 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6196 else
6197 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6198 to access doublewords. The supported load/store offsets are
6199 -8, -4, and 4, which we try to produce here. */
6200 low = ((val & 0xf) ^ 0x8) - 0x8;
6201 }
6202 else if (GET_MODE_SIZE (mode) < 8)
6203 {
6204 /* NEON element load/stores do not have an offset. */
6205 if (TARGET_NEON_FP16 && mode == HFmode)
6206 return false;
6207
6208 if (TARGET_THUMB2)
6209 {
6210 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6211 Try the wider 12-bit range first, and re-try if the result
6212 is out of range. */
6213 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6214 if (low < -255)
6215 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6216 }
6217 else
6218 {
6219 if (mode == HImode || mode == HFmode)
6220 {
6221 if (arm_arch4)
6222 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6223 else
6224 {
6225 /* The storehi/movhi_bytes fallbacks can use only
6226 [-4094,+4094] of the full ldrb/strb index range. */
6227 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6228 if (low == 4095 || low == -4095)
6229 return false;
6230 }
6231 }
6232 else
6233 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6234 }
6235 }
6236 else
6237 return false;
6238
6239 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6240 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6241 - (unsigned HOST_WIDE_INT) 0x80000000);
6242 /* Check for overflow or zero */
6243 if (low == 0 || high == 0 || (high + low != val))
6244 return false;
6245
6246 /* Reload the high part into a base reg; leave the low part
6247 in the mem. */
6248 *p = gen_rtx_PLUS (GET_MODE (*p),
6249 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6250 GEN_INT (high)),
6251 GEN_INT (low));
6252 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6253 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6254 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6255 return true;
6256 }
6257
6258 return false;
6259 }
6260
6261 rtx
6262 thumb_legitimize_reload_address (rtx *x_p,
6263 enum machine_mode mode,
6264 int opnum, int type,
6265 int ind_levels ATTRIBUTE_UNUSED)
6266 {
6267 rtx x = *x_p;
6268
6269 if (GET_CODE (x) == PLUS
6270 && GET_MODE_SIZE (mode) < 4
6271 && REG_P (XEXP (x, 0))
6272 && XEXP (x, 0) == stack_pointer_rtx
6273 && GET_CODE (XEXP (x, 1)) == CONST_INT
6274 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6275 {
6276 rtx orig_x = x;
6277
6278 x = copy_rtx (x);
6279 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6280 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6281 return x;
6282 }
6283
6284 /* If both registers are hi-regs, then it's better to reload the
6285 entire expression rather than each register individually. That
6286 only requires one reload register rather than two. */
6287 if (GET_CODE (x) == PLUS
6288 && REG_P (XEXP (x, 0))
6289 && REG_P (XEXP (x, 1))
6290 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6291 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6292 {
6293 rtx orig_x = x;
6294
6295 x = copy_rtx (x);
6296 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6297 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6298 return x;
6299 }
6300
6301 return NULL;
6302 }
6303
6304 /* Test for various thread-local symbols. */
6305
6306 /* Return TRUE if X is a thread-local symbol. */
6307
6308 static bool
6309 arm_tls_symbol_p (rtx x)
6310 {
6311 if (! TARGET_HAVE_TLS)
6312 return false;
6313
6314 if (GET_CODE (x) != SYMBOL_REF)
6315 return false;
6316
6317 return SYMBOL_REF_TLS_MODEL (x) != 0;
6318 }
6319
6320 /* Helper for arm_tls_referenced_p. */
6321
6322 static int
6323 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6324 {
6325 if (GET_CODE (*x) == SYMBOL_REF)
6326 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6327
6328 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6329 TLS offsets, not real symbol references. */
6330 if (GET_CODE (*x) == UNSPEC
6331 && XINT (*x, 1) == UNSPEC_TLS)
6332 return -1;
6333
6334 return 0;
6335 }
6336
6337 /* Return TRUE if X contains any TLS symbol references. */
6338
6339 bool
6340 arm_tls_referenced_p (rtx x)
6341 {
6342 if (! TARGET_HAVE_TLS)
6343 return false;
6344
6345 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6346 }
6347
6348 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6349
6350 On the ARM, allow any integer (invalid ones are removed later by insn
6351 patterns), nice doubles and symbol_refs which refer to the function's
6352 constant pool XXX.
6353
6354 When generating pic allow anything. */
6355
6356 static bool
6357 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
6358 {
6359 /* At present, we have no support for Neon structure constants, so forbid
6360 them here. It might be possible to handle simple cases like 0 and -1
6361 in future. */
6362 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
6363 return false;
6364
6365 return flag_pic || !label_mentioned_p (x);
6366 }
6367
6368 static bool
6369 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6370 {
6371 return (GET_CODE (x) == CONST_INT
6372 || GET_CODE (x) == CONST_DOUBLE
6373 || CONSTANT_ADDRESS_P (x)
6374 || flag_pic);
6375 }
6376
6377 static bool
6378 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
6379 {
6380 return (!arm_cannot_force_const_mem (mode, x)
6381 && (TARGET_32BIT
6382 ? arm_legitimate_constant_p_1 (mode, x)
6383 : thumb_legitimate_constant_p (mode, x)));
6384 }
6385
6386 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6387
6388 static bool
6389 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6390 {
6391 rtx base, offset;
6392
6393 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6394 {
6395 split_const (x, &base, &offset);
6396 if (GET_CODE (base) == SYMBOL_REF
6397 && !offset_within_block_p (base, INTVAL (offset)))
6398 return true;
6399 }
6400 return arm_tls_referenced_p (x);
6401 }
6402 \f
6403 #define REG_OR_SUBREG_REG(X) \
6404 (GET_CODE (X) == REG \
6405 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6406
6407 #define REG_OR_SUBREG_RTX(X) \
6408 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6409
6410 static inline int
6411 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6412 {
6413 enum machine_mode mode = GET_MODE (x);
6414 int total;
6415
6416 switch (code)
6417 {
6418 case ASHIFT:
6419 case ASHIFTRT:
6420 case LSHIFTRT:
6421 case ROTATERT:
6422 case PLUS:
6423 case MINUS:
6424 case COMPARE:
6425 case NEG:
6426 case NOT:
6427 return COSTS_N_INSNS (1);
6428
6429 case MULT:
6430 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6431 {
6432 int cycles = 0;
6433 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6434
6435 while (i)
6436 {
6437 i >>= 2;
6438 cycles++;
6439 }
6440 return COSTS_N_INSNS (2) + cycles;
6441 }
6442 return COSTS_N_INSNS (1) + 16;
6443
6444 case SET:
6445 return (COSTS_N_INSNS (1)
6446 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6447 + GET_CODE (SET_DEST (x)) == MEM));
6448
6449 case CONST_INT:
6450 if (outer == SET)
6451 {
6452 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6453 return 0;
6454 if (thumb_shiftable_const (INTVAL (x)))
6455 return COSTS_N_INSNS (2);
6456 return COSTS_N_INSNS (3);
6457 }
6458 else if ((outer == PLUS || outer == COMPARE)
6459 && INTVAL (x) < 256 && INTVAL (x) > -256)
6460 return 0;
6461 else if ((outer == IOR || outer == XOR || outer == AND)
6462 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6463 return COSTS_N_INSNS (1);
6464 else if (outer == AND)
6465 {
6466 int i;
6467 /* This duplicates the tests in the andsi3 expander. */
6468 for (i = 9; i <= 31; i++)
6469 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6470 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6471 return COSTS_N_INSNS (2);
6472 }
6473 else if (outer == ASHIFT || outer == ASHIFTRT
6474 || outer == LSHIFTRT)
6475 return 0;
6476 return COSTS_N_INSNS (2);
6477
6478 case CONST:
6479 case CONST_DOUBLE:
6480 case LABEL_REF:
6481 case SYMBOL_REF:
6482 return COSTS_N_INSNS (3);
6483
6484 case UDIV:
6485 case UMOD:
6486 case DIV:
6487 case MOD:
6488 return 100;
6489
6490 case TRUNCATE:
6491 return 99;
6492
6493 case AND:
6494 case XOR:
6495 case IOR:
6496 /* XXX guess. */
6497 return 8;
6498
6499 case MEM:
6500 /* XXX another guess. */
6501 /* Memory costs quite a lot for the first word, but subsequent words
6502 load at the equivalent of a single insn each. */
6503 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
6504 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
6505 ? 4 : 0));
6506
6507 case IF_THEN_ELSE:
6508 /* XXX a guess. */
6509 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6510 return 14;
6511 return 2;
6512
6513 case SIGN_EXTEND:
6514 case ZERO_EXTEND:
6515 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6516 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6517
6518 if (mode == SImode)
6519 return total;
6520
6521 if (arm_arch6)
6522 return total + COSTS_N_INSNS (1);
6523
6524 /* Assume a two-shift sequence. Increase the cost slightly so
6525 we prefer actual shifts over an extend operation. */
6526 return total + 1 + COSTS_N_INSNS (2);
6527
6528 default:
6529 return 99;
6530 }
6531 }
6532
6533 static inline bool
6534 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
6535 {
6536 enum machine_mode mode = GET_MODE (x);
6537 enum rtx_code subcode;
6538 rtx operand;
6539 enum rtx_code code = GET_CODE (x);
6540 *total = 0;
6541
6542 switch (code)
6543 {
6544 case MEM:
6545 /* Memory costs quite a lot for the first word, but subsequent words
6546 load at the equivalent of a single insn each. */
6547 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
6548 return true;
6549
6550 case DIV:
6551 case MOD:
6552 case UDIV:
6553 case UMOD:
6554 if (TARGET_HARD_FLOAT && mode == SFmode)
6555 *total = COSTS_N_INSNS (2);
6556 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
6557 *total = COSTS_N_INSNS (4);
6558 else
6559 *total = COSTS_N_INSNS (20);
6560 return false;
6561
6562 case ROTATE:
6563 if (GET_CODE (XEXP (x, 1)) == REG)
6564 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
6565 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6566 *total = rtx_cost (XEXP (x, 1), code, speed);
6567
6568 /* Fall through */
6569 case ROTATERT:
6570 if (mode != SImode)
6571 {
6572 *total += COSTS_N_INSNS (4);
6573 return true;
6574 }
6575
6576 /* Fall through */
6577 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
6578 *total += rtx_cost (XEXP (x, 0), code, speed);
6579 if (mode == DImode)
6580 {
6581 *total += COSTS_N_INSNS (3);
6582 return true;
6583 }
6584
6585 *total += COSTS_N_INSNS (1);
6586 /* Increase the cost of complex shifts because they aren't any faster,
6587 and reduce dual issue opportunities. */
6588 if (arm_tune_cortex_a9
6589 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
6590 ++*total;
6591
6592 return true;
6593
6594 case MINUS:
6595 if (mode == DImode)
6596 {
6597 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6598 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6599 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6600 {
6601 *total += rtx_cost (XEXP (x, 1), code, speed);
6602 return true;
6603 }
6604
6605 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6606 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
6607 {
6608 *total += rtx_cost (XEXP (x, 0), code, speed);
6609 return true;
6610 }
6611
6612 return false;
6613 }
6614
6615 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6616 {
6617 if (TARGET_HARD_FLOAT
6618 && (mode == SFmode
6619 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6620 {
6621 *total = COSTS_N_INSNS (1);
6622 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
6623 && arm_const_double_rtx (XEXP (x, 0)))
6624 {
6625 *total += rtx_cost (XEXP (x, 1), code, speed);
6626 return true;
6627 }
6628
6629 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6630 && arm_const_double_rtx (XEXP (x, 1)))
6631 {
6632 *total += rtx_cost (XEXP (x, 0), code, speed);
6633 return true;
6634 }
6635
6636 return false;
6637 }
6638 *total = COSTS_N_INSNS (20);
6639 return false;
6640 }
6641
6642 *total = COSTS_N_INSNS (1);
6643 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6644 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6645 {
6646 *total += rtx_cost (XEXP (x, 1), code, speed);
6647 return true;
6648 }
6649
6650 subcode = GET_CODE (XEXP (x, 1));
6651 if (subcode == ASHIFT || subcode == ASHIFTRT
6652 || subcode == LSHIFTRT
6653 || subcode == ROTATE || subcode == ROTATERT)
6654 {
6655 *total += rtx_cost (XEXP (x, 0), code, speed);
6656 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6657 return true;
6658 }
6659
6660 /* A shift as a part of RSB costs no more than RSB itself. */
6661 if (GET_CODE (XEXP (x, 0)) == MULT
6662 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6663 {
6664 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
6665 *total += rtx_cost (XEXP (x, 1), code, speed);
6666 return true;
6667 }
6668
6669 if (subcode == MULT
6670 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
6671 {
6672 *total += rtx_cost (XEXP (x, 0), code, speed);
6673 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
6674 return true;
6675 }
6676
6677 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
6678 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
6679 {
6680 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6681 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
6682 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
6683 *total += COSTS_N_INSNS (1);
6684
6685 return true;
6686 }
6687
6688 /* Fall through */
6689
6690 case PLUS:
6691 if (code == PLUS && arm_arch6 && mode == SImode
6692 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
6693 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
6694 {
6695 *total = COSTS_N_INSNS (1);
6696 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
6697 speed);
6698 *total += rtx_cost (XEXP (x, 1), code, speed);
6699 return true;
6700 }
6701
6702 /* MLA: All arguments must be registers. We filter out
6703 multiplication by a power of two, so that we fall down into
6704 the code below. */
6705 if (GET_CODE (XEXP (x, 0)) == MULT
6706 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6707 {
6708 /* The cost comes from the cost of the multiply. */
6709 return false;
6710 }
6711
6712 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6713 {
6714 if (TARGET_HARD_FLOAT
6715 && (mode == SFmode
6716 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6717 {
6718 *total = COSTS_N_INSNS (1);
6719 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
6720 && arm_const_double_rtx (XEXP (x, 1)))
6721 {
6722 *total += rtx_cost (XEXP (x, 0), code, speed);
6723 return true;
6724 }
6725
6726 return false;
6727 }
6728
6729 *total = COSTS_N_INSNS (20);
6730 return false;
6731 }
6732
6733 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
6734 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
6735 {
6736 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
6737 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
6738 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
6739 *total += COSTS_N_INSNS (1);
6740 return true;
6741 }
6742
6743 /* Fall through */
6744
6745 case AND: case XOR: case IOR:
6746
6747 /* Normally the frame registers will be spilt into reg+const during
6748 reload, so it is a bad idea to combine them with other instructions,
6749 since then they might not be moved outside of loops. As a compromise
6750 we allow integration with ops that have a constant as their second
6751 operand. */
6752 if (REG_OR_SUBREG_REG (XEXP (x, 0))
6753 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6754 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6755 *total = COSTS_N_INSNS (1);
6756
6757 if (mode == DImode)
6758 {
6759 *total += COSTS_N_INSNS (2);
6760 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6761 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6762 {
6763 *total += rtx_cost (XEXP (x, 0), code, speed);
6764 return true;
6765 }
6766
6767 return false;
6768 }
6769
6770 *total += COSTS_N_INSNS (1);
6771 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6772 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6773 {
6774 *total += rtx_cost (XEXP (x, 0), code, speed);
6775 return true;
6776 }
6777 subcode = GET_CODE (XEXP (x, 0));
6778 if (subcode == ASHIFT || subcode == ASHIFTRT
6779 || subcode == LSHIFTRT
6780 || subcode == ROTATE || subcode == ROTATERT)
6781 {
6782 *total += rtx_cost (XEXP (x, 1), code, speed);
6783 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6784 return true;
6785 }
6786
6787 if (subcode == MULT
6788 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6789 {
6790 *total += rtx_cost (XEXP (x, 1), code, speed);
6791 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6792 return true;
6793 }
6794
6795 if (subcode == UMIN || subcode == UMAX
6796 || subcode == SMIN || subcode == SMAX)
6797 {
6798 *total = COSTS_N_INSNS (3);
6799 return true;
6800 }
6801
6802 return false;
6803
6804 case MULT:
6805 /* This should have been handled by the CPU specific routines. */
6806 gcc_unreachable ();
6807
6808 case TRUNCATE:
6809 if (arm_arch3m && mode == SImode
6810 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6811 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6812 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
6813 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
6814 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6815 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
6816 {
6817 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
6818 return true;
6819 }
6820 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6821 return false;
6822
6823 case NEG:
6824 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6825 {
6826 if (TARGET_HARD_FLOAT
6827 && (mode == SFmode
6828 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6829 {
6830 *total = COSTS_N_INSNS (1);
6831 return false;
6832 }
6833 *total = COSTS_N_INSNS (2);
6834 return false;
6835 }
6836
6837 /* Fall through */
6838 case NOT:
6839 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
6840 if (mode == SImode && code == NOT)
6841 {
6842 subcode = GET_CODE (XEXP (x, 0));
6843 if (subcode == ASHIFT || subcode == ASHIFTRT
6844 || subcode == LSHIFTRT
6845 || subcode == ROTATE || subcode == ROTATERT
6846 || (subcode == MULT
6847 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
6848 {
6849 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6850 /* Register shifts cost an extra cycle. */
6851 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
6852 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
6853 subcode, speed);
6854 return true;
6855 }
6856 }
6857
6858 return false;
6859
6860 case IF_THEN_ELSE:
6861 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6862 {
6863 *total = COSTS_N_INSNS (4);
6864 return true;
6865 }
6866
6867 operand = XEXP (x, 0);
6868
6869 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
6870 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
6871 && GET_CODE (XEXP (operand, 0)) == REG
6872 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
6873 *total += COSTS_N_INSNS (1);
6874 *total += (rtx_cost (XEXP (x, 1), code, speed)
6875 + rtx_cost (XEXP (x, 2), code, speed));
6876 return true;
6877
6878 case NE:
6879 if (mode == SImode && XEXP (x, 1) == const0_rtx)
6880 {
6881 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6882 return true;
6883 }
6884 goto scc_insn;
6885
6886 case GE:
6887 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6888 && mode == SImode && XEXP (x, 1) == const0_rtx)
6889 {
6890 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6891 return true;
6892 }
6893 goto scc_insn;
6894
6895 case LT:
6896 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
6897 && mode == SImode && XEXP (x, 1) == const0_rtx)
6898 {
6899 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
6900 return true;
6901 }
6902 goto scc_insn;
6903
6904 case EQ:
6905 case GT:
6906 case LE:
6907 case GEU:
6908 case LTU:
6909 case GTU:
6910 case LEU:
6911 case UNORDERED:
6912 case ORDERED:
6913 case UNEQ:
6914 case UNGE:
6915 case UNLT:
6916 case UNGT:
6917 case UNLE:
6918 scc_insn:
6919 /* SCC insns. In the case where the comparison has already been
6920 performed, then they cost 2 instructions. Otherwise they need
6921 an additional comparison before them. */
6922 *total = COSTS_N_INSNS (2);
6923 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6924 {
6925 return true;
6926 }
6927
6928 /* Fall through */
6929 case COMPARE:
6930 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
6931 {
6932 *total = 0;
6933 return true;
6934 }
6935
6936 *total += COSTS_N_INSNS (1);
6937 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6938 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
6939 {
6940 *total += rtx_cost (XEXP (x, 0), code, speed);
6941 return true;
6942 }
6943
6944 subcode = GET_CODE (XEXP (x, 0));
6945 if (subcode == ASHIFT || subcode == ASHIFTRT
6946 || subcode == LSHIFTRT
6947 || subcode == ROTATE || subcode == ROTATERT)
6948 {
6949 *total += rtx_cost (XEXP (x, 1), code, speed);
6950 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6951 return true;
6952 }
6953
6954 if (subcode == MULT
6955 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
6956 {
6957 *total += rtx_cost (XEXP (x, 1), code, speed);
6958 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
6959 return true;
6960 }
6961
6962 return false;
6963
6964 case UMIN:
6965 case UMAX:
6966 case SMIN:
6967 case SMAX:
6968 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
6969 if (GET_CODE (XEXP (x, 1)) != CONST_INT
6970 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
6971 *total += rtx_cost (XEXP (x, 1), code, speed);
6972 return true;
6973
6974 case ABS:
6975 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6976 {
6977 if (TARGET_HARD_FLOAT
6978 && (mode == SFmode
6979 || (mode == DFmode && !TARGET_VFP_SINGLE)))
6980 {
6981 *total = COSTS_N_INSNS (1);
6982 return false;
6983 }
6984 *total = COSTS_N_INSNS (20);
6985 return false;
6986 }
6987 *total = COSTS_N_INSNS (1);
6988 if (mode == DImode)
6989 *total += COSTS_N_INSNS (3);
6990 return false;
6991
6992 case SIGN_EXTEND:
6993 case ZERO_EXTEND:
6994 *total = 0;
6995 if (GET_MODE_CLASS (mode) == MODE_INT)
6996 {
6997 rtx op = XEXP (x, 0);
6998 enum machine_mode opmode = GET_MODE (op);
6999
7000 if (mode == DImode)
7001 *total += COSTS_N_INSNS (1);
7002
7003 if (opmode != SImode)
7004 {
7005 if (MEM_P (op))
7006 {
7007 /* If !arm_arch4, we use one of the extendhisi2_mem
7008 or movhi_bytes patterns for HImode. For a QImode
7009 sign extension, we first zero-extend from memory
7010 and then perform a shift sequence. */
7011 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7012 *total += COSTS_N_INSNS (2);
7013 }
7014 else if (arm_arch6)
7015 *total += COSTS_N_INSNS (1);
7016
7017 /* We don't have the necessary insn, so we need to perform some
7018 other operation. */
7019 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7020 /* An and with constant 255. */
7021 *total += COSTS_N_INSNS (1);
7022 else
7023 /* A shift sequence. Increase costs slightly to avoid
7024 combining two shifts into an extend operation. */
7025 *total += COSTS_N_INSNS (2) + 1;
7026 }
7027
7028 return false;
7029 }
7030
7031 switch (GET_MODE (XEXP (x, 0)))
7032 {
7033 case V8QImode:
7034 case V4HImode:
7035 case V2SImode:
7036 case V4QImode:
7037 case V2HImode:
7038 *total = COSTS_N_INSNS (1);
7039 return false;
7040
7041 default:
7042 gcc_unreachable ();
7043 }
7044 gcc_unreachable ();
7045
7046 case ZERO_EXTRACT:
7047 case SIGN_EXTRACT:
7048 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
7049 return true;
7050
7051 case CONST_INT:
7052 if (const_ok_for_arm (INTVAL (x))
7053 || const_ok_for_arm (~INTVAL (x)))
7054 *total = COSTS_N_INSNS (1);
7055 else
7056 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7057 INTVAL (x), NULL_RTX,
7058 NULL_RTX, 0, 0));
7059 return true;
7060
7061 case CONST:
7062 case LABEL_REF:
7063 case SYMBOL_REF:
7064 *total = COSTS_N_INSNS (3);
7065 return true;
7066
7067 case HIGH:
7068 *total = COSTS_N_INSNS (1);
7069 return true;
7070
7071 case LO_SUM:
7072 *total = COSTS_N_INSNS (1);
7073 *total += rtx_cost (XEXP (x, 0), code, speed);
7074 return true;
7075
7076 case CONST_DOUBLE:
7077 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7078 && (mode == SFmode || !TARGET_VFP_SINGLE))
7079 *total = COSTS_N_INSNS (1);
7080 else
7081 *total = COSTS_N_INSNS (4);
7082 return true;
7083
7084 default:
7085 *total = COSTS_N_INSNS (4);
7086 return false;
7087 }
7088 }
7089
7090 /* Estimates the size cost of thumb1 instructions.
7091 For now most of the code is copied from thumb1_rtx_costs. We need more
7092 fine grain tuning when we have more related test cases. */
7093 static inline int
7094 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7095 {
7096 enum machine_mode mode = GET_MODE (x);
7097
7098 switch (code)
7099 {
7100 case ASHIFT:
7101 case ASHIFTRT:
7102 case LSHIFTRT:
7103 case ROTATERT:
7104 case PLUS:
7105 case MINUS:
7106 case COMPARE:
7107 case NEG:
7108 case NOT:
7109 return COSTS_N_INSNS (1);
7110
7111 case MULT:
7112 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7113 {
7114 /* Thumb1 mul instruction can't operate on const. We must Load it
7115 into a register first. */
7116 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7117 return COSTS_N_INSNS (1) + const_size;
7118 }
7119 return COSTS_N_INSNS (1);
7120
7121 case SET:
7122 return (COSTS_N_INSNS (1)
7123 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7124 + GET_CODE (SET_DEST (x)) == MEM));
7125
7126 case CONST_INT:
7127 if (outer == SET)
7128 {
7129 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7130 return COSTS_N_INSNS (1);
7131 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7132 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7133 return COSTS_N_INSNS (2);
7134 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7135 if (thumb_shiftable_const (INTVAL (x)))
7136 return COSTS_N_INSNS (2);
7137 return COSTS_N_INSNS (3);
7138 }
7139 else if ((outer == PLUS || outer == COMPARE)
7140 && INTVAL (x) < 256 && INTVAL (x) > -256)
7141 return 0;
7142 else if ((outer == IOR || outer == XOR || outer == AND)
7143 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7144 return COSTS_N_INSNS (1);
7145 else if (outer == AND)
7146 {
7147 int i;
7148 /* This duplicates the tests in the andsi3 expander. */
7149 for (i = 9; i <= 31; i++)
7150 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7151 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7152 return COSTS_N_INSNS (2);
7153 }
7154 else if (outer == ASHIFT || outer == ASHIFTRT
7155 || outer == LSHIFTRT)
7156 return 0;
7157 return COSTS_N_INSNS (2);
7158
7159 case CONST:
7160 case CONST_DOUBLE:
7161 case LABEL_REF:
7162 case SYMBOL_REF:
7163 return COSTS_N_INSNS (3);
7164
7165 case UDIV:
7166 case UMOD:
7167 case DIV:
7168 case MOD:
7169 return 100;
7170
7171 case TRUNCATE:
7172 return 99;
7173
7174 case AND:
7175 case XOR:
7176 case IOR:
7177 /* XXX guess. */
7178 return 8;
7179
7180 case MEM:
7181 /* XXX another guess. */
7182 /* Memory costs quite a lot for the first word, but subsequent words
7183 load at the equivalent of a single insn each. */
7184 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7185 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7186 ? 4 : 0));
7187
7188 case IF_THEN_ELSE:
7189 /* XXX a guess. */
7190 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7191 return 14;
7192 return 2;
7193
7194 case ZERO_EXTEND:
7195 /* XXX still guessing. */
7196 switch (GET_MODE (XEXP (x, 0)))
7197 {
7198 case QImode:
7199 return (1 + (mode == DImode ? 4 : 0)
7200 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7201
7202 case HImode:
7203 return (4 + (mode == DImode ? 4 : 0)
7204 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7205
7206 case SImode:
7207 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7208
7209 default:
7210 return 99;
7211 }
7212
7213 default:
7214 return 99;
7215 }
7216 }
7217
7218 /* RTX costs when optimizing for size. */
7219 static bool
7220 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7221 int *total)
7222 {
7223 enum machine_mode mode = GET_MODE (x);
7224 if (TARGET_THUMB1)
7225 {
7226 *total = thumb1_size_rtx_costs (x, code, outer_code);
7227 return true;
7228 }
7229
7230 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7231 switch (code)
7232 {
7233 case MEM:
7234 /* A memory access costs 1 insn if the mode is small, or the address is
7235 a single register, otherwise it costs one insn per word. */
7236 if (REG_P (XEXP (x, 0)))
7237 *total = COSTS_N_INSNS (1);
7238 else if (flag_pic
7239 && GET_CODE (XEXP (x, 0)) == PLUS
7240 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7241 /* This will be split into two instructions.
7242 See arm.md:calculate_pic_address. */
7243 *total = COSTS_N_INSNS (2);
7244 else
7245 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7246 return true;
7247
7248 case DIV:
7249 case MOD:
7250 case UDIV:
7251 case UMOD:
7252 /* Needs a libcall, so it costs about this. */
7253 *total = COSTS_N_INSNS (2);
7254 return false;
7255
7256 case ROTATE:
7257 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7258 {
7259 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, false);
7260 return true;
7261 }
7262 /* Fall through */
7263 case ROTATERT:
7264 case ASHIFT:
7265 case LSHIFTRT:
7266 case ASHIFTRT:
7267 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7268 {
7269 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, false);
7270 return true;
7271 }
7272 else if (mode == SImode)
7273 {
7274 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, false);
7275 /* Slightly disparage register shifts, but not by much. */
7276 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7277 *total += 1 + rtx_cost (XEXP (x, 1), code, false);
7278 return true;
7279 }
7280
7281 /* Needs a libcall. */
7282 *total = COSTS_N_INSNS (2);
7283 return false;
7284
7285 case MINUS:
7286 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7287 && (mode == SFmode || !TARGET_VFP_SINGLE))
7288 {
7289 *total = COSTS_N_INSNS (1);
7290 return false;
7291 }
7292
7293 if (mode == SImode)
7294 {
7295 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7296 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7297
7298 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7299 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7300 || subcode1 == ROTATE || subcode1 == ROTATERT
7301 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7302 || subcode1 == ASHIFTRT)
7303 {
7304 /* It's just the cost of the two operands. */
7305 *total = 0;
7306 return false;
7307 }
7308
7309 *total = COSTS_N_INSNS (1);
7310 return false;
7311 }
7312
7313 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7314 return false;
7315
7316 case PLUS:
7317 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7318 && (mode == SFmode || !TARGET_VFP_SINGLE))
7319 {
7320 *total = COSTS_N_INSNS (1);
7321 return false;
7322 }
7323
7324 /* A shift as a part of ADD costs nothing. */
7325 if (GET_CODE (XEXP (x, 0)) == MULT
7326 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7327 {
7328 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7329 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
7330 *total += rtx_cost (XEXP (x, 1), code, false);
7331 return true;
7332 }
7333
7334 /* Fall through */
7335 case AND: case XOR: case IOR:
7336 if (mode == SImode)
7337 {
7338 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7339
7340 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7341 || subcode == LSHIFTRT || subcode == ASHIFTRT
7342 || (code == AND && subcode == NOT))
7343 {
7344 /* It's just the cost of the two operands. */
7345 *total = 0;
7346 return false;
7347 }
7348 }
7349
7350 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7351 return false;
7352
7353 case MULT:
7354 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7355 return false;
7356
7357 case NEG:
7358 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7359 && (mode == SFmode || !TARGET_VFP_SINGLE))
7360 {
7361 *total = COSTS_N_INSNS (1);
7362 return false;
7363 }
7364
7365 /* Fall through */
7366 case NOT:
7367 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7368
7369 return false;
7370
7371 case IF_THEN_ELSE:
7372 *total = 0;
7373 return false;
7374
7375 case COMPARE:
7376 if (cc_register (XEXP (x, 0), VOIDmode))
7377 * total = 0;
7378 else
7379 *total = COSTS_N_INSNS (1);
7380 return false;
7381
7382 case ABS:
7383 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7384 && (mode == SFmode || !TARGET_VFP_SINGLE))
7385 *total = COSTS_N_INSNS (1);
7386 else
7387 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7388 return false;
7389
7390 case SIGN_EXTEND:
7391 case ZERO_EXTEND:
7392 return arm_rtx_costs_1 (x, outer_code, total, 0);
7393
7394 case CONST_INT:
7395 if (const_ok_for_arm (INTVAL (x)))
7396 /* A multiplication by a constant requires another instruction
7397 to load the constant to a register. */
7398 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7399 ? 1 : 0);
7400 else if (const_ok_for_arm (~INTVAL (x)))
7401 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7402 else if (const_ok_for_arm (-INTVAL (x)))
7403 {
7404 if (outer_code == COMPARE || outer_code == PLUS
7405 || outer_code == MINUS)
7406 *total = 0;
7407 else
7408 *total = COSTS_N_INSNS (1);
7409 }
7410 else
7411 *total = COSTS_N_INSNS (2);
7412 return true;
7413
7414 case CONST:
7415 case LABEL_REF:
7416 case SYMBOL_REF:
7417 *total = COSTS_N_INSNS (2);
7418 return true;
7419
7420 case CONST_DOUBLE:
7421 *total = COSTS_N_INSNS (4);
7422 return true;
7423
7424 case HIGH:
7425 case LO_SUM:
7426 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7427 cost of these slightly. */
7428 *total = COSTS_N_INSNS (1) + 1;
7429 return true;
7430
7431 default:
7432 if (mode != VOIDmode)
7433 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7434 else
7435 *total = COSTS_N_INSNS (4); /* How knows? */
7436 return false;
7437 }
7438 }
7439
7440 /* RTX costs when optimizing for size. */
7441 static bool
7442 arm_rtx_costs (rtx x, int code, int outer_code, int *total,
7443 bool speed)
7444 {
7445 if (!speed)
7446 return arm_size_rtx_costs (x, (enum rtx_code) code,
7447 (enum rtx_code) outer_code, total);
7448 else
7449 return current_tune->rtx_costs (x, (enum rtx_code) code,
7450 (enum rtx_code) outer_code,
7451 total, speed);
7452 }
7453
7454 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7455 supported on any "slowmul" cores, so it can be ignored. */
7456
7457 static bool
7458 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7459 int *total, bool speed)
7460 {
7461 enum machine_mode mode = GET_MODE (x);
7462
7463 if (TARGET_THUMB)
7464 {
7465 *total = thumb1_rtx_costs (x, code, outer_code);
7466 return true;
7467 }
7468
7469 switch (code)
7470 {
7471 case MULT:
7472 if (GET_MODE_CLASS (mode) == MODE_FLOAT
7473 || mode == DImode)
7474 {
7475 *total = COSTS_N_INSNS (20);
7476 return false;
7477 }
7478
7479 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7480 {
7481 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7482 & (unsigned HOST_WIDE_INT) 0xffffffff);
7483 int cost, const_ok = const_ok_for_arm (i);
7484 int j, booth_unit_size;
7485
7486 /* Tune as appropriate. */
7487 cost = const_ok ? 4 : 8;
7488 booth_unit_size = 2;
7489 for (j = 0; i && j < 32; j += booth_unit_size)
7490 {
7491 i >>= booth_unit_size;
7492 cost++;
7493 }
7494
7495 *total = COSTS_N_INSNS (cost);
7496 *total += rtx_cost (XEXP (x, 0), code, speed);
7497 return true;
7498 }
7499
7500 *total = COSTS_N_INSNS (20);
7501 return false;
7502
7503 default:
7504 return arm_rtx_costs_1 (x, outer_code, total, speed);;
7505 }
7506 }
7507
7508
7509 /* RTX cost for cores with a fast multiply unit (M variants). */
7510
7511 static bool
7512 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7513 int *total, bool speed)
7514 {
7515 enum machine_mode mode = GET_MODE (x);
7516
7517 if (TARGET_THUMB1)
7518 {
7519 *total = thumb1_rtx_costs (x, code, outer_code);
7520 return true;
7521 }
7522
7523 /* ??? should thumb2 use different costs? */
7524 switch (code)
7525 {
7526 case MULT:
7527 /* There is no point basing this on the tuning, since it is always the
7528 fast variant if it exists at all. */
7529 if (mode == DImode
7530 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7531 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7532 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7533 {
7534 *total = COSTS_N_INSNS(2);
7535 return false;
7536 }
7537
7538
7539 if (mode == DImode)
7540 {
7541 *total = COSTS_N_INSNS (5);
7542 return false;
7543 }
7544
7545 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7546 {
7547 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
7548 & (unsigned HOST_WIDE_INT) 0xffffffff);
7549 int cost, const_ok = const_ok_for_arm (i);
7550 int j, booth_unit_size;
7551
7552 /* Tune as appropriate. */
7553 cost = const_ok ? 4 : 8;
7554 booth_unit_size = 8;
7555 for (j = 0; i && j < 32; j += booth_unit_size)
7556 {
7557 i >>= booth_unit_size;
7558 cost++;
7559 }
7560
7561 *total = COSTS_N_INSNS(cost);
7562 return false;
7563 }
7564
7565 if (mode == SImode)
7566 {
7567 *total = COSTS_N_INSNS (4);
7568 return false;
7569 }
7570
7571 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7572 {
7573 if (TARGET_HARD_FLOAT
7574 && (mode == SFmode
7575 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7576 {
7577 *total = COSTS_N_INSNS (1);
7578 return false;
7579 }
7580 }
7581
7582 /* Requires a lib call */
7583 *total = COSTS_N_INSNS (20);
7584 return false;
7585
7586 default:
7587 return arm_rtx_costs_1 (x, outer_code, total, speed);
7588 }
7589 }
7590
7591
7592 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7593 so it can be ignored. */
7594
7595 static bool
7596 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7597 int *total, bool speed)
7598 {
7599 enum machine_mode mode = GET_MODE (x);
7600
7601 if (TARGET_THUMB)
7602 {
7603 *total = thumb1_rtx_costs (x, code, outer_code);
7604 return true;
7605 }
7606
7607 switch (code)
7608 {
7609 case COMPARE:
7610 if (GET_CODE (XEXP (x, 0)) != MULT)
7611 return arm_rtx_costs_1 (x, outer_code, total, speed);
7612
7613 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7614 will stall until the multiplication is complete. */
7615 *total = COSTS_N_INSNS (3);
7616 return false;
7617
7618 case MULT:
7619 /* There is no point basing this on the tuning, since it is always the
7620 fast variant if it exists at all. */
7621 if (mode == DImode
7622 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7623 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7624 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7625 {
7626 *total = COSTS_N_INSNS (2);
7627 return false;
7628 }
7629
7630
7631 if (mode == DImode)
7632 {
7633 *total = COSTS_N_INSNS (5);
7634 return false;
7635 }
7636
7637 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7638 {
7639 /* If operand 1 is a constant we can more accurately
7640 calculate the cost of the multiply. The multiplier can
7641 retire 15 bits on the first cycle and a further 12 on the
7642 second. We do, of course, have to load the constant into
7643 a register first. */
7644 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7645 /* There's a general overhead of one cycle. */
7646 int cost = 1;
7647 unsigned HOST_WIDE_INT masked_const;
7648
7649 if (i & 0x80000000)
7650 i = ~i;
7651
7652 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
7653
7654 masked_const = i & 0xffff8000;
7655 if (masked_const != 0)
7656 {
7657 cost++;
7658 masked_const = i & 0xf8000000;
7659 if (masked_const != 0)
7660 cost++;
7661 }
7662 *total = COSTS_N_INSNS (cost);
7663 return false;
7664 }
7665
7666 if (mode == SImode)
7667 {
7668 *total = COSTS_N_INSNS (3);
7669 return false;
7670 }
7671
7672 /* Requires a lib call */
7673 *total = COSTS_N_INSNS (20);
7674 return false;
7675
7676 default:
7677 return arm_rtx_costs_1 (x, outer_code, total, speed);
7678 }
7679 }
7680
7681
7682 /* RTX costs for 9e (and later) cores. */
7683
7684 static bool
7685 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7686 int *total, bool speed)
7687 {
7688 enum machine_mode mode = GET_MODE (x);
7689
7690 if (TARGET_THUMB1)
7691 {
7692 switch (code)
7693 {
7694 case MULT:
7695 *total = COSTS_N_INSNS (3);
7696 return true;
7697
7698 default:
7699 *total = thumb1_rtx_costs (x, code, outer_code);
7700 return true;
7701 }
7702 }
7703
7704 switch (code)
7705 {
7706 case MULT:
7707 /* There is no point basing this on the tuning, since it is always the
7708 fast variant if it exists at all. */
7709 if (mode == DImode
7710 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
7711 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7712 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7713 {
7714 *total = COSTS_N_INSNS (2);
7715 return false;
7716 }
7717
7718
7719 if (mode == DImode)
7720 {
7721 *total = COSTS_N_INSNS (5);
7722 return false;
7723 }
7724
7725 if (mode == SImode)
7726 {
7727 *total = COSTS_N_INSNS (2);
7728 return false;
7729 }
7730
7731 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7732 {
7733 if (TARGET_HARD_FLOAT
7734 && (mode == SFmode
7735 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7736 {
7737 *total = COSTS_N_INSNS (1);
7738 return false;
7739 }
7740 }
7741
7742 *total = COSTS_N_INSNS (20);
7743 return false;
7744
7745 default:
7746 return arm_rtx_costs_1 (x, outer_code, total, speed);
7747 }
7748 }
7749 /* All address computations that can be done are free, but rtx cost returns
7750 the same for practically all of them. So we weight the different types
7751 of address here in the order (most pref first):
7752 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7753 static inline int
7754 arm_arm_address_cost (rtx x)
7755 {
7756 enum rtx_code c = GET_CODE (x);
7757
7758 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
7759 return 0;
7760 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
7761 return 10;
7762
7763 if (c == PLUS)
7764 {
7765 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7766 return 2;
7767
7768 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
7769 return 3;
7770
7771 return 4;
7772 }
7773
7774 return 6;
7775 }
7776
7777 static inline int
7778 arm_thumb_address_cost (rtx x)
7779 {
7780 enum rtx_code c = GET_CODE (x);
7781
7782 if (c == REG)
7783 return 1;
7784 if (c == PLUS
7785 && GET_CODE (XEXP (x, 0)) == REG
7786 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7787 return 1;
7788
7789 return 2;
7790 }
7791
7792 static int
7793 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7794 {
7795 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7796 }
7797
7798 /* Adjust cost hook for XScale. */
7799 static bool
7800 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7801 {
7802 /* Some true dependencies can have a higher cost depending
7803 on precisely how certain input operands are used. */
7804 if (REG_NOTE_KIND(link) == 0
7805 && recog_memoized (insn) >= 0
7806 && recog_memoized (dep) >= 0)
7807 {
7808 int shift_opnum = get_attr_shift (insn);
7809 enum attr_type attr_type = get_attr_type (dep);
7810
7811 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7812 operand for INSN. If we have a shifted input operand and the
7813 instruction we depend on is another ALU instruction, then we may
7814 have to account for an additional stall. */
7815 if (shift_opnum != 0
7816 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
7817 {
7818 rtx shifted_operand;
7819 int opno;
7820
7821 /* Get the shifted operand. */
7822 extract_insn (insn);
7823 shifted_operand = recog_data.operand[shift_opnum];
7824
7825 /* Iterate over all the operands in DEP. If we write an operand
7826 that overlaps with SHIFTED_OPERAND, then we have increase the
7827 cost of this dependency. */
7828 extract_insn (dep);
7829 preprocess_constraints ();
7830 for (opno = 0; opno < recog_data.n_operands; opno++)
7831 {
7832 /* We can ignore strict inputs. */
7833 if (recog_data.operand_type[opno] == OP_IN)
7834 continue;
7835
7836 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7837 shifted_operand))
7838 {
7839 *cost = 2;
7840 return false;
7841 }
7842 }
7843 }
7844 }
7845 return true;
7846 }
7847
7848 /* Adjust cost hook for Cortex A9. */
7849 static bool
7850 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7851 {
7852 switch (REG_NOTE_KIND (link))
7853 {
7854 case REG_DEP_ANTI:
7855 *cost = 0;
7856 return false;
7857
7858 case REG_DEP_TRUE:
7859 case REG_DEP_OUTPUT:
7860 if (recog_memoized (insn) >= 0
7861 && recog_memoized (dep) >= 0)
7862 {
7863 if (GET_CODE (PATTERN (insn)) == SET)
7864 {
7865 if (GET_MODE_CLASS
7866 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
7867 || GET_MODE_CLASS
7868 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
7869 {
7870 enum attr_type attr_type_insn = get_attr_type (insn);
7871 enum attr_type attr_type_dep = get_attr_type (dep);
7872
7873 /* By default all dependencies of the form
7874 s0 = s0 <op> s1
7875 s0 = s0 <op> s2
7876 have an extra latency of 1 cycle because
7877 of the input and output dependency in this
7878 case. However this gets modeled as an true
7879 dependency and hence all these checks. */
7880 if (REG_P (SET_DEST (PATTERN (insn)))
7881 && REG_P (SET_DEST (PATTERN (dep)))
7882 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
7883 SET_DEST (PATTERN (dep))))
7884 {
7885 /* FMACS is a special case where the dependant
7886 instruction can be issued 3 cycles before
7887 the normal latency in case of an output
7888 dependency. */
7889 if ((attr_type_insn == TYPE_FMACS
7890 || attr_type_insn == TYPE_FMACD)
7891 && (attr_type_dep == TYPE_FMACS
7892 || attr_type_dep == TYPE_FMACD))
7893 {
7894 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7895 *cost = insn_default_latency (dep) - 3;
7896 else
7897 *cost = insn_default_latency (dep);
7898 return false;
7899 }
7900 else
7901 {
7902 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7903 *cost = insn_default_latency (dep) + 1;
7904 else
7905 *cost = insn_default_latency (dep);
7906 }
7907 return false;
7908 }
7909 }
7910 }
7911 }
7912 break;
7913
7914 default:
7915 gcc_unreachable ();
7916 }
7917
7918 return true;
7919 }
7920
7921 /* Adjust cost hook for FA726TE. */
7922 static bool
7923 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7924 {
7925 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
7926 have penalty of 3. */
7927 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
7928 && recog_memoized (insn) >= 0
7929 && recog_memoized (dep) >= 0
7930 && get_attr_conds (dep) == CONDS_SET)
7931 {
7932 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
7933 if (get_attr_conds (insn) == CONDS_USE
7934 && get_attr_type (insn) != TYPE_BRANCH)
7935 {
7936 *cost = 3;
7937 return false;
7938 }
7939
7940 if (GET_CODE (PATTERN (insn)) == COND_EXEC
7941 || get_attr_conds (insn) == CONDS_USE)
7942 {
7943 *cost = 0;
7944 return false;
7945 }
7946 }
7947
7948 return true;
7949 }
7950
7951 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
7952 It corrects the value of COST based on the relationship between
7953 INSN and DEP through the dependence LINK. It returns the new
7954 value. There is a per-core adjust_cost hook to adjust scheduler costs
7955 and the per-core hook can choose to completely override the generic
7956 adjust_cost function. Only put bits of code into arm_adjust_cost that
7957 are common across all cores. */
7958 static int
7959 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
7960 {
7961 rtx i_pat, d_pat;
7962
7963 /* When generating Thumb-1 code, we want to place flag-setting operations
7964 close to a conditional branch which depends on them, so that we can
7965 omit the comparison. */
7966 if (TARGET_THUMB1
7967 && REG_NOTE_KIND (link) == 0
7968 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
7969 && recog_memoized (dep) >= 0
7970 && get_attr_conds (dep) == CONDS_SET)
7971 return 0;
7972
7973 if (current_tune->sched_adjust_cost != NULL)
7974 {
7975 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
7976 return cost;
7977 }
7978
7979 /* XXX This is not strictly true for the FPA. */
7980 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7981 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7982 return 0;
7983
7984 /* Call insns don't incur a stall, even if they follow a load. */
7985 if (REG_NOTE_KIND (link) == 0
7986 && GET_CODE (insn) == CALL_INSN)
7987 return 1;
7988
7989 if ((i_pat = single_set (insn)) != NULL
7990 && GET_CODE (SET_SRC (i_pat)) == MEM
7991 && (d_pat = single_set (dep)) != NULL
7992 && GET_CODE (SET_DEST (d_pat)) == MEM)
7993 {
7994 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
7995 /* This is a load after a store, there is no conflict if the load reads
7996 from a cached area. Assume that loads from the stack, and from the
7997 constant pool are cached, and that others will miss. This is a
7998 hack. */
7999
8000 if ((GET_CODE (src_mem) == SYMBOL_REF
8001 && CONSTANT_POOL_ADDRESS_P (src_mem))
8002 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8003 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8004 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8005 return 1;
8006 }
8007
8008 return cost;
8009 }
8010
8011 static int fp_consts_inited = 0;
8012
8013 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8014 static const char * const strings_fp[8] =
8015 {
8016 "0", "1", "2", "3",
8017 "4", "5", "0.5", "10"
8018 };
8019
8020 static REAL_VALUE_TYPE values_fp[8];
8021
8022 static void
8023 init_fp_table (void)
8024 {
8025 int i;
8026 REAL_VALUE_TYPE r;
8027
8028 if (TARGET_VFP)
8029 fp_consts_inited = 1;
8030 else
8031 fp_consts_inited = 8;
8032
8033 for (i = 0; i < fp_consts_inited; i++)
8034 {
8035 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8036 values_fp[i] = r;
8037 }
8038 }
8039
8040 /* Return TRUE if rtx X is a valid immediate FP constant. */
8041 int
8042 arm_const_double_rtx (rtx x)
8043 {
8044 REAL_VALUE_TYPE r;
8045 int i;
8046
8047 if (!fp_consts_inited)
8048 init_fp_table ();
8049
8050 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8051 if (REAL_VALUE_MINUS_ZERO (r))
8052 return 0;
8053
8054 for (i = 0; i < fp_consts_inited; i++)
8055 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8056 return 1;
8057
8058 return 0;
8059 }
8060
8061 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8062 int
8063 neg_const_double_rtx_ok_for_fpa (rtx x)
8064 {
8065 REAL_VALUE_TYPE r;
8066 int i;
8067
8068 if (!fp_consts_inited)
8069 init_fp_table ();
8070
8071 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8072 r = real_value_negate (&r);
8073 if (REAL_VALUE_MINUS_ZERO (r))
8074 return 0;
8075
8076 for (i = 0; i < 8; i++)
8077 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8078 return 1;
8079
8080 return 0;
8081 }
8082
8083
8084 /* VFPv3 has a fairly wide range of representable immediates, formed from
8085 "quarter-precision" floating-point values. These can be evaluated using this
8086 formula (with ^ for exponentiation):
8087
8088 -1^s * n * 2^-r
8089
8090 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8091 16 <= n <= 31 and 0 <= r <= 7.
8092
8093 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8094
8095 - A (most-significant) is the sign bit.
8096 - BCD are the exponent (encoded as r XOR 3).
8097 - EFGH are the mantissa (encoded as n - 16).
8098 */
8099
8100 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8101 fconst[sd] instruction, or -1 if X isn't suitable. */
8102 static int
8103 vfp3_const_double_index (rtx x)
8104 {
8105 REAL_VALUE_TYPE r, m;
8106 int sign, exponent;
8107 unsigned HOST_WIDE_INT mantissa, mant_hi;
8108 unsigned HOST_WIDE_INT mask;
8109 HOST_WIDE_INT m1, m2;
8110 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8111
8112 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8113 return -1;
8114
8115 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8116
8117 /* We can't represent these things, so detect them first. */
8118 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8119 return -1;
8120
8121 /* Extract sign, exponent and mantissa. */
8122 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8123 r = real_value_abs (&r);
8124 exponent = REAL_EXP (&r);
8125 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8126 highest (sign) bit, with a fixed binary point at bit point_pos.
8127 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8128 bits for the mantissa, this may fail (low bits would be lost). */
8129 real_ldexp (&m, &r, point_pos - exponent);
8130 REAL_VALUE_TO_INT (&m1, &m2, m);
8131 mantissa = m1;
8132 mant_hi = m2;
8133
8134 /* If there are bits set in the low part of the mantissa, we can't
8135 represent this value. */
8136 if (mantissa != 0)
8137 return -1;
8138
8139 /* Now make it so that mantissa contains the most-significant bits, and move
8140 the point_pos to indicate that the least-significant bits have been
8141 discarded. */
8142 point_pos -= HOST_BITS_PER_WIDE_INT;
8143 mantissa = mant_hi;
8144
8145 /* We can permit four significant bits of mantissa only, plus a high bit
8146 which is always 1. */
8147 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8148 if ((mantissa & mask) != 0)
8149 return -1;
8150
8151 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8152 mantissa >>= point_pos - 5;
8153
8154 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8155 floating-point immediate zero with Neon using an integer-zero load, but
8156 that case is handled elsewhere.) */
8157 if (mantissa == 0)
8158 return -1;
8159
8160 gcc_assert (mantissa >= 16 && mantissa <= 31);
8161
8162 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8163 normalized significands are in the range [1, 2). (Our mantissa is shifted
8164 left 4 places at this point relative to normalized IEEE754 values). GCC
8165 internally uses [0.5, 1) (see real.c), so the exponent returned from
8166 REAL_EXP must be altered. */
8167 exponent = 5 - exponent;
8168
8169 if (exponent < 0 || exponent > 7)
8170 return -1;
8171
8172 /* Sign, mantissa and exponent are now in the correct form to plug into the
8173 formula described in the comment above. */
8174 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8175 }
8176
8177 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8178 int
8179 vfp3_const_double_rtx (rtx x)
8180 {
8181 if (!TARGET_VFP3)
8182 return 0;
8183
8184 return vfp3_const_double_index (x) != -1;
8185 }
8186
8187 /* Recognize immediates which can be used in various Neon instructions. Legal
8188 immediates are described by the following table (for VMVN variants, the
8189 bitwise inverse of the constant shown is recognized. In either case, VMOV
8190 is output and the correct instruction to use for a given constant is chosen
8191 by the assembler). The constant shown is replicated across all elements of
8192 the destination vector.
8193
8194 insn elems variant constant (binary)
8195 ---- ----- ------- -----------------
8196 vmov i32 0 00000000 00000000 00000000 abcdefgh
8197 vmov i32 1 00000000 00000000 abcdefgh 00000000
8198 vmov i32 2 00000000 abcdefgh 00000000 00000000
8199 vmov i32 3 abcdefgh 00000000 00000000 00000000
8200 vmov i16 4 00000000 abcdefgh
8201 vmov i16 5 abcdefgh 00000000
8202 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8203 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8204 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8205 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8206 vmvn i16 10 00000000 abcdefgh
8207 vmvn i16 11 abcdefgh 00000000
8208 vmov i32 12 00000000 00000000 abcdefgh 11111111
8209 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8210 vmov i32 14 00000000 abcdefgh 11111111 11111111
8211 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8212 vmov i8 16 abcdefgh
8213 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8214 eeeeeeee ffffffff gggggggg hhhhhhhh
8215 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8216
8217 For case 18, B = !b. Representable values are exactly those accepted by
8218 vfp3_const_double_index, but are output as floating-point numbers rather
8219 than indices.
8220
8221 Variants 0-5 (inclusive) may also be used as immediates for the second
8222 operand of VORR/VBIC instructions.
8223
8224 The INVERSE argument causes the bitwise inverse of the given operand to be
8225 recognized instead (used for recognizing legal immediates for the VAND/VORN
8226 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8227 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8228 output, rather than the real insns vbic/vorr).
8229
8230 INVERSE makes no difference to the recognition of float vectors.
8231
8232 The return value is the variant of immediate as shown in the above table, or
8233 -1 if the given value doesn't match any of the listed patterns.
8234 */
8235 static int
8236 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8237 rtx *modconst, int *elementwidth)
8238 {
8239 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8240 matches = 1; \
8241 for (i = 0; i < idx; i += (STRIDE)) \
8242 if (!(TEST)) \
8243 matches = 0; \
8244 if (matches) \
8245 { \
8246 immtype = (CLASS); \
8247 elsize = (ELSIZE); \
8248 break; \
8249 }
8250
8251 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8252 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8253 unsigned char bytes[16];
8254 int immtype = -1, matches;
8255 unsigned int invmask = inverse ? 0xff : 0;
8256
8257 /* Vectors of float constants. */
8258 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8259 {
8260 rtx el0 = CONST_VECTOR_ELT (op, 0);
8261 REAL_VALUE_TYPE r0;
8262
8263 if (!vfp3_const_double_rtx (el0))
8264 return -1;
8265
8266 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8267
8268 for (i = 1; i < n_elts; i++)
8269 {
8270 rtx elt = CONST_VECTOR_ELT (op, i);
8271 REAL_VALUE_TYPE re;
8272
8273 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8274
8275 if (!REAL_VALUES_EQUAL (r0, re))
8276 return -1;
8277 }
8278
8279 if (modconst)
8280 *modconst = CONST_VECTOR_ELT (op, 0);
8281
8282 if (elementwidth)
8283 *elementwidth = 0;
8284
8285 return 18;
8286 }
8287
8288 /* Splat vector constant out into a byte vector. */
8289 for (i = 0; i < n_elts; i++)
8290 {
8291 rtx el = CONST_VECTOR_ELT (op, i);
8292 unsigned HOST_WIDE_INT elpart;
8293 unsigned int part, parts;
8294
8295 if (GET_CODE (el) == CONST_INT)
8296 {
8297 elpart = INTVAL (el);
8298 parts = 1;
8299 }
8300 else if (GET_CODE (el) == CONST_DOUBLE)
8301 {
8302 elpart = CONST_DOUBLE_LOW (el);
8303 parts = 2;
8304 }
8305 else
8306 gcc_unreachable ();
8307
8308 for (part = 0; part < parts; part++)
8309 {
8310 unsigned int byte;
8311 for (byte = 0; byte < innersize; byte++)
8312 {
8313 bytes[idx++] = (elpart & 0xff) ^ invmask;
8314 elpart >>= BITS_PER_UNIT;
8315 }
8316 if (GET_CODE (el) == CONST_DOUBLE)
8317 elpart = CONST_DOUBLE_HIGH (el);
8318 }
8319 }
8320
8321 /* Sanity check. */
8322 gcc_assert (idx == GET_MODE_SIZE (mode));
8323
8324 do
8325 {
8326 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8327 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8328
8329 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8330 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8331
8332 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8333 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8334
8335 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8336 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8337
8338 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8339
8340 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8341
8342 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8343 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8344
8345 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8346 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8347
8348 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8349 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8350
8351 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8352 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8353
8354 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8355
8356 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8357
8358 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8359 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8360
8361 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8362 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8363
8364 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8365 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8366
8367 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8368 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8369
8370 CHECK (1, 8, 16, bytes[i] == bytes[0]);
8371
8372 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8373 && bytes[i] == bytes[(i + 8) % idx]);
8374 }
8375 while (0);
8376
8377 if (immtype == -1)
8378 return -1;
8379
8380 if (elementwidth)
8381 *elementwidth = elsize;
8382
8383 if (modconst)
8384 {
8385 unsigned HOST_WIDE_INT imm = 0;
8386
8387 /* Un-invert bytes of recognized vector, if necessary. */
8388 if (invmask != 0)
8389 for (i = 0; i < idx; i++)
8390 bytes[i] ^= invmask;
8391
8392 if (immtype == 17)
8393 {
8394 /* FIXME: Broken on 32-bit H_W_I hosts. */
8395 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8396
8397 for (i = 0; i < 8; i++)
8398 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8399 << (i * BITS_PER_UNIT);
8400
8401 *modconst = GEN_INT (imm);
8402 }
8403 else
8404 {
8405 unsigned HOST_WIDE_INT imm = 0;
8406
8407 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8408 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8409
8410 *modconst = GEN_INT (imm);
8411 }
8412 }
8413
8414 return immtype;
8415 #undef CHECK
8416 }
8417
8418 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8419 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8420 float elements), and a modified constant (whatever should be output for a
8421 VMOV) in *MODCONST. */
8422
8423 int
8424 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
8425 rtx *modconst, int *elementwidth)
8426 {
8427 rtx tmpconst;
8428 int tmpwidth;
8429 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
8430
8431 if (retval == -1)
8432 return 0;
8433
8434 if (modconst)
8435 *modconst = tmpconst;
8436
8437 if (elementwidth)
8438 *elementwidth = tmpwidth;
8439
8440 return 1;
8441 }
8442
8443 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8444 the immediate is valid, write a constant suitable for using as an operand
8445 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8446 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8447
8448 int
8449 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
8450 rtx *modconst, int *elementwidth)
8451 {
8452 rtx tmpconst;
8453 int tmpwidth;
8454 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
8455
8456 if (retval < 0 || retval > 5)
8457 return 0;
8458
8459 if (modconst)
8460 *modconst = tmpconst;
8461
8462 if (elementwidth)
8463 *elementwidth = tmpwidth;
8464
8465 return 1;
8466 }
8467
8468 /* Return a string suitable for output of Neon immediate logic operation
8469 MNEM. */
8470
8471 char *
8472 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
8473 int inverse, int quad)
8474 {
8475 int width, is_valid;
8476 static char templ[40];
8477
8478 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
8479
8480 gcc_assert (is_valid != 0);
8481
8482 if (quad)
8483 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
8484 else
8485 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
8486
8487 return templ;
8488 }
8489
8490 /* Output a sequence of pairwise operations to implement a reduction.
8491 NOTE: We do "too much work" here, because pairwise operations work on two
8492 registers-worth of operands in one go. Unfortunately we can't exploit those
8493 extra calculations to do the full operation in fewer steps, I don't think.
8494 Although all vector elements of the result but the first are ignored, we
8495 actually calculate the same result in each of the elements. An alternative
8496 such as initially loading a vector with zero to use as each of the second
8497 operands would use up an additional register and take an extra instruction,
8498 for no particular gain. */
8499
8500 void
8501 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
8502 rtx (*reduc) (rtx, rtx, rtx))
8503 {
8504 enum machine_mode inner = GET_MODE_INNER (mode);
8505 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
8506 rtx tmpsum = op1;
8507
8508 for (i = parts / 2; i >= 1; i /= 2)
8509 {
8510 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
8511 emit_insn (reduc (dest, tmpsum, tmpsum));
8512 tmpsum = dest;
8513 }
8514 }
8515
8516 /* If VALS is a vector constant that can be loaded into a register
8517 using VDUP, generate instructions to do so and return an RTX to
8518 assign to the register. Otherwise return NULL_RTX. */
8519
8520 static rtx
8521 neon_vdup_constant (rtx vals)
8522 {
8523 enum machine_mode mode = GET_MODE (vals);
8524 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8525 int n_elts = GET_MODE_NUNITS (mode);
8526 bool all_same = true;
8527 rtx x;
8528 int i;
8529
8530 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
8531 return NULL_RTX;
8532
8533 for (i = 0; i < n_elts; ++i)
8534 {
8535 x = XVECEXP (vals, 0, i);
8536 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8537 all_same = false;
8538 }
8539
8540 if (!all_same)
8541 /* The elements are not all the same. We could handle repeating
8542 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8543 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8544 vdup.i16). */
8545 return NULL_RTX;
8546
8547 /* We can load this constant by using VDUP and a constant in a
8548 single ARM register. This will be cheaper than a vector
8549 load. */
8550
8551 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8552 return gen_rtx_VEC_DUPLICATE (mode, x);
8553 }
8554
8555 /* Generate code to load VALS, which is a PARALLEL containing only
8556 constants (for vec_init) or CONST_VECTOR, efficiently into a
8557 register. Returns an RTX to copy into the register, or NULL_RTX
8558 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8559
8560 rtx
8561 neon_make_constant (rtx vals)
8562 {
8563 enum machine_mode mode = GET_MODE (vals);
8564 rtx target;
8565 rtx const_vec = NULL_RTX;
8566 int n_elts = GET_MODE_NUNITS (mode);
8567 int n_const = 0;
8568 int i;
8569
8570 if (GET_CODE (vals) == CONST_VECTOR)
8571 const_vec = vals;
8572 else if (GET_CODE (vals) == PARALLEL)
8573 {
8574 /* A CONST_VECTOR must contain only CONST_INTs and
8575 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8576 Only store valid constants in a CONST_VECTOR. */
8577 for (i = 0; i < n_elts; ++i)
8578 {
8579 rtx x = XVECEXP (vals, 0, i);
8580 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8581 n_const++;
8582 }
8583 if (n_const == n_elts)
8584 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8585 }
8586 else
8587 gcc_unreachable ();
8588
8589 if (const_vec != NULL
8590 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
8591 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8592 return const_vec;
8593 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
8594 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8595 pipeline cycle; creating the constant takes one or two ARM
8596 pipeline cycles. */
8597 return target;
8598 else if (const_vec != NULL_RTX)
8599 /* Load from constant pool. On Cortex-A8 this takes two cycles
8600 (for either double or quad vectors). We can not take advantage
8601 of single-cycle VLD1 because we need a PC-relative addressing
8602 mode. */
8603 return const_vec;
8604 else
8605 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8606 We can not construct an initializer. */
8607 return NULL_RTX;
8608 }
8609
8610 /* Initialize vector TARGET to VALS. */
8611
8612 void
8613 neon_expand_vector_init (rtx target, rtx vals)
8614 {
8615 enum machine_mode mode = GET_MODE (target);
8616 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8617 int n_elts = GET_MODE_NUNITS (mode);
8618 int n_var = 0, one_var = -1;
8619 bool all_same = true;
8620 rtx x, mem;
8621 int i;
8622
8623 for (i = 0; i < n_elts; ++i)
8624 {
8625 x = XVECEXP (vals, 0, i);
8626 if (!CONSTANT_P (x))
8627 ++n_var, one_var = i;
8628
8629 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8630 all_same = false;
8631 }
8632
8633 if (n_var == 0)
8634 {
8635 rtx constant = neon_make_constant (vals);
8636 if (constant != NULL_RTX)
8637 {
8638 emit_move_insn (target, constant);
8639 return;
8640 }
8641 }
8642
8643 /* Splat a single non-constant element if we can. */
8644 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8645 {
8646 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8647 emit_insn (gen_rtx_SET (VOIDmode, target,
8648 gen_rtx_VEC_DUPLICATE (mode, x)));
8649 return;
8650 }
8651
8652 /* One field is non-constant. Load constant then overwrite varying
8653 field. This is more efficient than using the stack. */
8654 if (n_var == 1)
8655 {
8656 rtx copy = copy_rtx (vals);
8657 rtx index = GEN_INT (one_var);
8658
8659 /* Load constant part of vector, substitute neighboring value for
8660 varying element. */
8661 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8662 neon_expand_vector_init (target, copy);
8663
8664 /* Insert variable. */
8665 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8666 switch (mode)
8667 {
8668 case V8QImode:
8669 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8670 break;
8671 case V16QImode:
8672 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8673 break;
8674 case V4HImode:
8675 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8676 break;
8677 case V8HImode:
8678 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8679 break;
8680 case V2SImode:
8681 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8682 break;
8683 case V4SImode:
8684 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8685 break;
8686 case V2SFmode:
8687 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8688 break;
8689 case V4SFmode:
8690 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8691 break;
8692 case V2DImode:
8693 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8694 break;
8695 default:
8696 gcc_unreachable ();
8697 }
8698 return;
8699 }
8700
8701 /* Construct the vector in memory one field at a time
8702 and load the whole vector. */
8703 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
8704 for (i = 0; i < n_elts; i++)
8705 emit_move_insn (adjust_address_nv (mem, inner_mode,
8706 i * GET_MODE_SIZE (inner_mode)),
8707 XVECEXP (vals, 0, i));
8708 emit_move_insn (target, mem);
8709 }
8710
8711 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8712 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8713 reported source locations are bogus. */
8714
8715 static void
8716 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8717 const char *err)
8718 {
8719 HOST_WIDE_INT lane;
8720
8721 gcc_assert (GET_CODE (operand) == CONST_INT);
8722
8723 lane = INTVAL (operand);
8724
8725 if (lane < low || lane >= high)
8726 error (err);
8727 }
8728
8729 /* Bounds-check lanes. */
8730
8731 void
8732 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8733 {
8734 bounds_check (operand, low, high, "lane out of range");
8735 }
8736
8737 /* Bounds-check constants. */
8738
8739 void
8740 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8741 {
8742 bounds_check (operand, low, high, "constant out of range");
8743 }
8744
8745 HOST_WIDE_INT
8746 neon_element_bits (enum machine_mode mode)
8747 {
8748 if (mode == DImode)
8749 return GET_MODE_BITSIZE (mode);
8750 else
8751 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
8752 }
8753
8754 \f
8755 /* Predicates for `match_operand' and `match_operator'. */
8756
8757 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8758 int
8759 cirrus_memory_offset (rtx op)
8760 {
8761 /* Reject eliminable registers. */
8762 if (! (reload_in_progress || reload_completed)
8763 && ( reg_mentioned_p (frame_pointer_rtx, op)
8764 || reg_mentioned_p (arg_pointer_rtx, op)
8765 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8766 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8767 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8768 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8769 return 0;
8770
8771 if (GET_CODE (op) == MEM)
8772 {
8773 rtx ind;
8774
8775 ind = XEXP (op, 0);
8776
8777 /* Match: (mem (reg)). */
8778 if (GET_CODE (ind) == REG)
8779 return 1;
8780
8781 /* Match:
8782 (mem (plus (reg)
8783 (const))). */
8784 if (GET_CODE (ind) == PLUS
8785 && GET_CODE (XEXP (ind, 0)) == REG
8786 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8787 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
8788 return 1;
8789 }
8790
8791 return 0;
8792 }
8793
8794 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8795 WB is true if full writeback address modes are allowed and is false
8796 if limited writeback address modes (POST_INC and PRE_DEC) are
8797 allowed. */
8798
8799 int
8800 arm_coproc_mem_operand (rtx op, bool wb)
8801 {
8802 rtx ind;
8803
8804 /* Reject eliminable registers. */
8805 if (! (reload_in_progress || reload_completed)
8806 && ( reg_mentioned_p (frame_pointer_rtx, op)
8807 || reg_mentioned_p (arg_pointer_rtx, op)
8808 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8809 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8810 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8811 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8812 return FALSE;
8813
8814 /* Constants are converted into offsets from labels. */
8815 if (GET_CODE (op) != MEM)
8816 return FALSE;
8817
8818 ind = XEXP (op, 0);
8819
8820 if (reload_completed
8821 && (GET_CODE (ind) == LABEL_REF
8822 || (GET_CODE (ind) == CONST
8823 && GET_CODE (XEXP (ind, 0)) == PLUS
8824 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8825 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8826 return TRUE;
8827
8828 /* Match: (mem (reg)). */
8829 if (GET_CODE (ind) == REG)
8830 return arm_address_register_rtx_p (ind, 0);
8831
8832 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8833 acceptable in any case (subject to verification by
8834 arm_address_register_rtx_p). We need WB to be true to accept
8835 PRE_INC and POST_DEC. */
8836 if (GET_CODE (ind) == POST_INC
8837 || GET_CODE (ind) == PRE_DEC
8838 || (wb
8839 && (GET_CODE (ind) == PRE_INC
8840 || GET_CODE (ind) == POST_DEC)))
8841 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8842
8843 if (wb
8844 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
8845 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
8846 && GET_CODE (XEXP (ind, 1)) == PLUS
8847 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
8848 ind = XEXP (ind, 1);
8849
8850 /* Match:
8851 (plus (reg)
8852 (const)). */
8853 if (GET_CODE (ind) == PLUS
8854 && GET_CODE (XEXP (ind, 0)) == REG
8855 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8856 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8857 && INTVAL (XEXP (ind, 1)) > -1024
8858 && INTVAL (XEXP (ind, 1)) < 1024
8859 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8860 return TRUE;
8861
8862 return FALSE;
8863 }
8864
8865 /* Return TRUE if OP is a memory operand which we can load or store a vector
8866 to/from. TYPE is one of the following values:
8867 0 - Vector load/stor (vldr)
8868 1 - Core registers (ldm)
8869 2 - Element/structure loads (vld1)
8870 */
8871 int
8872 neon_vector_mem_operand (rtx op, int type)
8873 {
8874 rtx ind;
8875
8876 /* Reject eliminable registers. */
8877 if (! (reload_in_progress || reload_completed)
8878 && ( reg_mentioned_p (frame_pointer_rtx, op)
8879 || reg_mentioned_p (arg_pointer_rtx, op)
8880 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8881 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8882 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8883 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8884 return FALSE;
8885
8886 /* Constants are converted into offsets from labels. */
8887 if (GET_CODE (op) != MEM)
8888 return FALSE;
8889
8890 ind = XEXP (op, 0);
8891
8892 if (reload_completed
8893 && (GET_CODE (ind) == LABEL_REF
8894 || (GET_CODE (ind) == CONST
8895 && GET_CODE (XEXP (ind, 0)) == PLUS
8896 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8897 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8898 return TRUE;
8899
8900 /* Match: (mem (reg)). */
8901 if (GET_CODE (ind) == REG)
8902 return arm_address_register_rtx_p (ind, 0);
8903
8904 /* Allow post-increment with Neon registers. */
8905 if ((type != 1 && GET_CODE (ind) == POST_INC)
8906 || (type == 0 && GET_CODE (ind) == PRE_DEC))
8907 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8908
8909 /* FIXME: vld1 allows register post-modify. */
8910
8911 /* Match:
8912 (plus (reg)
8913 (const)). */
8914 if (type == 0
8915 && GET_CODE (ind) == PLUS
8916 && GET_CODE (XEXP (ind, 0)) == REG
8917 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
8918 && GET_CODE (XEXP (ind, 1)) == CONST_INT
8919 && INTVAL (XEXP (ind, 1)) > -1024
8920 && INTVAL (XEXP (ind, 1)) < 1016
8921 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
8922 return TRUE;
8923
8924 return FALSE;
8925 }
8926
8927 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8928 type. */
8929 int
8930 neon_struct_mem_operand (rtx op)
8931 {
8932 rtx ind;
8933
8934 /* Reject eliminable registers. */
8935 if (! (reload_in_progress || reload_completed)
8936 && ( reg_mentioned_p (frame_pointer_rtx, op)
8937 || reg_mentioned_p (arg_pointer_rtx, op)
8938 || reg_mentioned_p (virtual_incoming_args_rtx, op)
8939 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
8940 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
8941 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
8942 return FALSE;
8943
8944 /* Constants are converted into offsets from labels. */
8945 if (GET_CODE (op) != MEM)
8946 return FALSE;
8947
8948 ind = XEXP (op, 0);
8949
8950 if (reload_completed
8951 && (GET_CODE (ind) == LABEL_REF
8952 || (GET_CODE (ind) == CONST
8953 && GET_CODE (XEXP (ind, 0)) == PLUS
8954 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
8955 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
8956 return TRUE;
8957
8958 /* Match: (mem (reg)). */
8959 if (GET_CODE (ind) == REG)
8960 return arm_address_register_rtx_p (ind, 0);
8961
8962 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
8963 if (GET_CODE (ind) == POST_INC
8964 || GET_CODE (ind) == PRE_DEC)
8965 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8966
8967 return FALSE;
8968 }
8969
8970 /* Return true if X is a register that will be eliminated later on. */
8971 int
8972 arm_eliminable_register (rtx x)
8973 {
8974 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
8975 || REGNO (x) == ARG_POINTER_REGNUM
8976 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
8977 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
8978 }
8979
8980 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8981 coprocessor registers. Otherwise return NO_REGS. */
8982
8983 enum reg_class
8984 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
8985 {
8986 if (mode == HFmode)
8987 {
8988 if (!TARGET_NEON_FP16)
8989 return GENERAL_REGS;
8990 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
8991 return NO_REGS;
8992 return GENERAL_REGS;
8993 }
8994
8995 /* The neon move patterns handle all legitimate vector and struct
8996 addresses. */
8997 if (TARGET_NEON
8998 && MEM_P (x)
8999 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9000 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9001 || VALID_NEON_STRUCT_MODE (mode)))
9002 return NO_REGS;
9003
9004 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9005 return NO_REGS;
9006
9007 return GENERAL_REGS;
9008 }
9009
9010 /* Values which must be returned in the most-significant end of the return
9011 register. */
9012
9013 static bool
9014 arm_return_in_msb (const_tree valtype)
9015 {
9016 return (TARGET_AAPCS_BASED
9017 && BYTES_BIG_ENDIAN
9018 && (AGGREGATE_TYPE_P (valtype)
9019 || TREE_CODE (valtype) == COMPLEX_TYPE));
9020 }
9021
9022 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9023 Use by the Cirrus Maverick code which has to workaround
9024 a hardware bug triggered by such instructions. */
9025 static bool
9026 arm_memory_load_p (rtx insn)
9027 {
9028 rtx body, lhs, rhs;;
9029
9030 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9031 return false;
9032
9033 body = PATTERN (insn);
9034
9035 if (GET_CODE (body) != SET)
9036 return false;
9037
9038 lhs = XEXP (body, 0);
9039 rhs = XEXP (body, 1);
9040
9041 lhs = REG_OR_SUBREG_RTX (lhs);
9042
9043 /* If the destination is not a general purpose
9044 register we do not have to worry. */
9045 if (GET_CODE (lhs) != REG
9046 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9047 return false;
9048
9049 /* As well as loads from memory we also have to react
9050 to loads of invalid constants which will be turned
9051 into loads from the minipool. */
9052 return (GET_CODE (rhs) == MEM
9053 || GET_CODE (rhs) == SYMBOL_REF
9054 || note_invalid_constants (insn, -1, false));
9055 }
9056
9057 /* Return TRUE if INSN is a Cirrus instruction. */
9058 static bool
9059 arm_cirrus_insn_p (rtx insn)
9060 {
9061 enum attr_cirrus attr;
9062
9063 /* get_attr cannot accept USE or CLOBBER. */
9064 if (!insn
9065 || GET_CODE (insn) != INSN
9066 || GET_CODE (PATTERN (insn)) == USE
9067 || GET_CODE (PATTERN (insn)) == CLOBBER)
9068 return 0;
9069
9070 attr = get_attr_cirrus (insn);
9071
9072 return attr != CIRRUS_NOT;
9073 }
9074
9075 /* Cirrus reorg for invalid instruction combinations. */
9076 static void
9077 cirrus_reorg (rtx first)
9078 {
9079 enum attr_cirrus attr;
9080 rtx body = PATTERN (first);
9081 rtx t;
9082 int nops;
9083
9084 /* Any branch must be followed by 2 non Cirrus instructions. */
9085 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9086 {
9087 nops = 0;
9088 t = next_nonnote_insn (first);
9089
9090 if (arm_cirrus_insn_p (t))
9091 ++ nops;
9092
9093 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9094 ++ nops;
9095
9096 while (nops --)
9097 emit_insn_after (gen_nop (), first);
9098
9099 return;
9100 }
9101
9102 /* (float (blah)) is in parallel with a clobber. */
9103 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9104 body = XVECEXP (body, 0, 0);
9105
9106 if (GET_CODE (body) == SET)
9107 {
9108 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9109
9110 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9111 be followed by a non Cirrus insn. */
9112 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9113 {
9114 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9115 emit_insn_after (gen_nop (), first);
9116
9117 return;
9118 }
9119 else if (arm_memory_load_p (first))
9120 {
9121 unsigned int arm_regno;
9122
9123 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9124 ldr/cfmv64hr combination where the Rd field is the same
9125 in both instructions must be split with a non Cirrus
9126 insn. Example:
9127
9128 ldr r0, blah
9129 nop
9130 cfmvsr mvf0, r0. */
9131
9132 /* Get Arm register number for ldr insn. */
9133 if (GET_CODE (lhs) == REG)
9134 arm_regno = REGNO (lhs);
9135 else
9136 {
9137 gcc_assert (GET_CODE (rhs) == REG);
9138 arm_regno = REGNO (rhs);
9139 }
9140
9141 /* Next insn. */
9142 first = next_nonnote_insn (first);
9143
9144 if (! arm_cirrus_insn_p (first))
9145 return;
9146
9147 body = PATTERN (first);
9148
9149 /* (float (blah)) is in parallel with a clobber. */
9150 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9151 body = XVECEXP (body, 0, 0);
9152
9153 if (GET_CODE (body) == FLOAT)
9154 body = XEXP (body, 0);
9155
9156 if (get_attr_cirrus (first) == CIRRUS_MOVE
9157 && GET_CODE (XEXP (body, 1)) == REG
9158 && arm_regno == REGNO (XEXP (body, 1)))
9159 emit_insn_after (gen_nop (), first);
9160
9161 return;
9162 }
9163 }
9164
9165 /* get_attr cannot accept USE or CLOBBER. */
9166 if (!first
9167 || GET_CODE (first) != INSN
9168 || GET_CODE (PATTERN (first)) == USE
9169 || GET_CODE (PATTERN (first)) == CLOBBER)
9170 return;
9171
9172 attr = get_attr_cirrus (first);
9173
9174 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9175 must be followed by a non-coprocessor instruction. */
9176 if (attr == CIRRUS_COMPARE)
9177 {
9178 nops = 0;
9179
9180 t = next_nonnote_insn (first);
9181
9182 if (arm_cirrus_insn_p (t))
9183 ++ nops;
9184
9185 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9186 ++ nops;
9187
9188 while (nops --)
9189 emit_insn_after (gen_nop (), first);
9190
9191 return;
9192 }
9193 }
9194
9195 /* Return TRUE if X references a SYMBOL_REF. */
9196 int
9197 symbol_mentioned_p (rtx x)
9198 {
9199 const char * fmt;
9200 int i;
9201
9202 if (GET_CODE (x) == SYMBOL_REF)
9203 return 1;
9204
9205 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9206 are constant offsets, not symbols. */
9207 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9208 return 0;
9209
9210 fmt = GET_RTX_FORMAT (GET_CODE (x));
9211
9212 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9213 {
9214 if (fmt[i] == 'E')
9215 {
9216 int j;
9217
9218 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9219 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9220 return 1;
9221 }
9222 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9223 return 1;
9224 }
9225
9226 return 0;
9227 }
9228
9229 /* Return TRUE if X references a LABEL_REF. */
9230 int
9231 label_mentioned_p (rtx x)
9232 {
9233 const char * fmt;
9234 int i;
9235
9236 if (GET_CODE (x) == LABEL_REF)
9237 return 1;
9238
9239 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9240 instruction, but they are constant offsets, not symbols. */
9241 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9242 return 0;
9243
9244 fmt = GET_RTX_FORMAT (GET_CODE (x));
9245 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9246 {
9247 if (fmt[i] == 'E')
9248 {
9249 int j;
9250
9251 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9252 if (label_mentioned_p (XVECEXP (x, i, j)))
9253 return 1;
9254 }
9255 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9256 return 1;
9257 }
9258
9259 return 0;
9260 }
9261
9262 int
9263 tls_mentioned_p (rtx x)
9264 {
9265 switch (GET_CODE (x))
9266 {
9267 case CONST:
9268 return tls_mentioned_p (XEXP (x, 0));
9269
9270 case UNSPEC:
9271 if (XINT (x, 1) == UNSPEC_TLS)
9272 return 1;
9273
9274 default:
9275 return 0;
9276 }
9277 }
9278
9279 /* Must not copy any rtx that uses a pc-relative address. */
9280
9281 static int
9282 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9283 {
9284 if (GET_CODE (*x) == UNSPEC
9285 && XINT (*x, 1) == UNSPEC_PIC_BASE)
9286 return 1;
9287 return 0;
9288 }
9289
9290 static bool
9291 arm_cannot_copy_insn_p (rtx insn)
9292 {
9293 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9294 }
9295
9296 enum rtx_code
9297 minmax_code (rtx x)
9298 {
9299 enum rtx_code code = GET_CODE (x);
9300
9301 switch (code)
9302 {
9303 case SMAX:
9304 return GE;
9305 case SMIN:
9306 return LE;
9307 case UMIN:
9308 return LEU;
9309 case UMAX:
9310 return GEU;
9311 default:
9312 gcc_unreachable ();
9313 }
9314 }
9315
9316 /* Return 1 if memory locations are adjacent. */
9317 int
9318 adjacent_mem_locations (rtx a, rtx b)
9319 {
9320 /* We don't guarantee to preserve the order of these memory refs. */
9321 if (volatile_refs_p (a) || volatile_refs_p (b))
9322 return 0;
9323
9324 if ((GET_CODE (XEXP (a, 0)) == REG
9325 || (GET_CODE (XEXP (a, 0)) == PLUS
9326 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9327 && (GET_CODE (XEXP (b, 0)) == REG
9328 || (GET_CODE (XEXP (b, 0)) == PLUS
9329 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9330 {
9331 HOST_WIDE_INT val0 = 0, val1 = 0;
9332 rtx reg0, reg1;
9333 int val_diff;
9334
9335 if (GET_CODE (XEXP (a, 0)) == PLUS)
9336 {
9337 reg0 = XEXP (XEXP (a, 0), 0);
9338 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9339 }
9340 else
9341 reg0 = XEXP (a, 0);
9342
9343 if (GET_CODE (XEXP (b, 0)) == PLUS)
9344 {
9345 reg1 = XEXP (XEXP (b, 0), 0);
9346 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9347 }
9348 else
9349 reg1 = XEXP (b, 0);
9350
9351 /* Don't accept any offset that will require multiple
9352 instructions to handle, since this would cause the
9353 arith_adjacentmem pattern to output an overlong sequence. */
9354 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9355 return 0;
9356
9357 /* Don't allow an eliminable register: register elimination can make
9358 the offset too large. */
9359 if (arm_eliminable_register (reg0))
9360 return 0;
9361
9362 val_diff = val1 - val0;
9363
9364 if (arm_ld_sched)
9365 {
9366 /* If the target has load delay slots, then there's no benefit
9367 to using an ldm instruction unless the offset is zero and
9368 we are optimizing for size. */
9369 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9370 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9371 && (val_diff == 4 || val_diff == -4));
9372 }
9373
9374 return ((REGNO (reg0) == REGNO (reg1))
9375 && (val_diff == 4 || val_diff == -4));
9376 }
9377
9378 return 0;
9379 }
9380
9381 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9382 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9383 instruction. ADD_OFFSET is nonzero if the base address register needs
9384 to be modified with an add instruction before we can use it. */
9385
9386 static bool
9387 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9388 int nops, HOST_WIDE_INT add_offset)
9389 {
9390 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9391 if the offset isn't small enough. The reason 2 ldrs are faster
9392 is because these ARMs are able to do more than one cache access
9393 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9394 whilst the ARM8 has a double bandwidth cache. This means that
9395 these cores can do both an instruction fetch and a data fetch in
9396 a single cycle, so the trick of calculating the address into a
9397 scratch register (one of the result regs) and then doing a load
9398 multiple actually becomes slower (and no smaller in code size).
9399 That is the transformation
9400
9401 ldr rd1, [rbase + offset]
9402 ldr rd2, [rbase + offset + 4]
9403
9404 to
9405
9406 add rd1, rbase, offset
9407 ldmia rd1, {rd1, rd2}
9408
9409 produces worse code -- '3 cycles + any stalls on rd2' instead of
9410 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9411 access per cycle, the first sequence could never complete in less
9412 than 6 cycles, whereas the ldm sequence would only take 5 and
9413 would make better use of sequential accesses if not hitting the
9414 cache.
9415
9416 We cheat here and test 'arm_ld_sched' which we currently know to
9417 only be true for the ARM8, ARM9 and StrongARM. If this ever
9418 changes, then the test below needs to be reworked. */
9419 if (nops == 2 && arm_ld_sched && add_offset != 0)
9420 return false;
9421
9422 /* XScale has load-store double instructions, but they have stricter
9423 alignment requirements than load-store multiple, so we cannot
9424 use them.
9425
9426 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9427 the pipeline until completion.
9428
9429 NREGS CYCLES
9430 1 3
9431 2 4
9432 3 5
9433 4 6
9434
9435 An ldr instruction takes 1-3 cycles, but does not block the
9436 pipeline.
9437
9438 NREGS CYCLES
9439 1 1-3
9440 2 2-6
9441 3 3-9
9442 4 4-12
9443
9444 Best case ldr will always win. However, the more ldr instructions
9445 we issue, the less likely we are to be able to schedule them well.
9446 Using ldr instructions also increases code size.
9447
9448 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9449 for counts of 3 or 4 regs. */
9450 if (nops <= 2 && arm_tune_xscale && !optimize_size)
9451 return false;
9452 return true;
9453 }
9454
9455 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9456 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9457 an array ORDER which describes the sequence to use when accessing the
9458 offsets that produces an ascending order. In this sequence, each
9459 offset must be larger by exactly 4 than the previous one. ORDER[0]
9460 must have been filled in with the lowest offset by the caller.
9461 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9462 we use to verify that ORDER produces an ascending order of registers.
9463 Return true if it was possible to construct such an order, false if
9464 not. */
9465
9466 static bool
9467 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9468 int *unsorted_regs)
9469 {
9470 int i;
9471 for (i = 1; i < nops; i++)
9472 {
9473 int j;
9474
9475 order[i] = order[i - 1];
9476 for (j = 0; j < nops; j++)
9477 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9478 {
9479 /* We must find exactly one offset that is higher than the
9480 previous one by 4. */
9481 if (order[i] != order[i - 1])
9482 return false;
9483 order[i] = j;
9484 }
9485 if (order[i] == order[i - 1])
9486 return false;
9487 /* The register numbers must be ascending. */
9488 if (unsorted_regs != NULL
9489 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9490 return false;
9491 }
9492 return true;
9493 }
9494
9495 /* Used to determine in a peephole whether a sequence of load
9496 instructions can be changed into a load-multiple instruction.
9497 NOPS is the number of separate load instructions we are examining. The
9498 first NOPS entries in OPERANDS are the destination registers, the
9499 next NOPS entries are memory operands. If this function is
9500 successful, *BASE is set to the common base register of the memory
9501 accesses; *LOAD_OFFSET is set to the first memory location's offset
9502 from that base register.
9503 REGS is an array filled in with the destination register numbers.
9504 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9505 insn numbers to an ascending order of stores. If CHECK_REGS is true,
9506 the sequence of registers in REGS matches the loads from ascending memory
9507 locations, and the function verifies that the register numbers are
9508 themselves ascending. If CHECK_REGS is false, the register numbers
9509 are stored in the order they are found in the operands. */
9510 static int
9511 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
9512 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
9513 {
9514 int unsorted_regs[MAX_LDM_STM_OPS];
9515 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9516 int order[MAX_LDM_STM_OPS];
9517 rtx base_reg_rtx = NULL;
9518 int base_reg = -1;
9519 int i, ldm_case;
9520
9521 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9522 easily extended if required. */
9523 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9524
9525 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9526
9527 /* Loop over the operands and check that the memory references are
9528 suitable (i.e. immediate offsets from the same base register). At
9529 the same time, extract the target register, and the memory
9530 offsets. */
9531 for (i = 0; i < nops; i++)
9532 {
9533 rtx reg;
9534 rtx offset;
9535
9536 /* Convert a subreg of a mem into the mem itself. */
9537 if (GET_CODE (operands[nops + i]) == SUBREG)
9538 operands[nops + i] = alter_subreg (operands + (nops + i));
9539
9540 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9541
9542 /* Don't reorder volatile memory references; it doesn't seem worth
9543 looking for the case where the order is ok anyway. */
9544 if (MEM_VOLATILE_P (operands[nops + i]))
9545 return 0;
9546
9547 offset = const0_rtx;
9548
9549 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9550 || (GET_CODE (reg) == SUBREG
9551 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9552 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9553 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9554 == REG)
9555 || (GET_CODE (reg) == SUBREG
9556 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9557 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9558 == CONST_INT)))
9559 {
9560 if (i == 0)
9561 {
9562 base_reg = REGNO (reg);
9563 base_reg_rtx = reg;
9564 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9565 return 0;
9566 }
9567 else if (base_reg != (int) REGNO (reg))
9568 /* Not addressed from the same base register. */
9569 return 0;
9570
9571 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9572 ? REGNO (operands[i])
9573 : REGNO (SUBREG_REG (operands[i])));
9574
9575 /* If it isn't an integer register, or if it overwrites the
9576 base register but isn't the last insn in the list, then
9577 we can't do this. */
9578 if (unsorted_regs[i] < 0
9579 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9580 || unsorted_regs[i] > 14
9581 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9582 return 0;
9583
9584 unsorted_offsets[i] = INTVAL (offset);
9585 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9586 order[0] = i;
9587 }
9588 else
9589 /* Not a suitable memory address. */
9590 return 0;
9591 }
9592
9593 /* All the useful information has now been extracted from the
9594 operands into unsorted_regs and unsorted_offsets; additionally,
9595 order[0] has been set to the lowest offset in the list. Sort
9596 the offsets into order, verifying that they are adjacent, and
9597 check that the register numbers are ascending. */
9598 if (!compute_offset_order (nops, unsorted_offsets, order,
9599 check_regs ? unsorted_regs : NULL))
9600 return 0;
9601
9602 if (saved_order)
9603 memcpy (saved_order, order, sizeof order);
9604
9605 if (base)
9606 {
9607 *base = base_reg;
9608
9609 for (i = 0; i < nops; i++)
9610 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9611
9612 *load_offset = unsorted_offsets[order[0]];
9613 }
9614
9615 if (TARGET_THUMB1
9616 && !peep2_reg_dead_p (nops, base_reg_rtx))
9617 return 0;
9618
9619 if (unsorted_offsets[order[0]] == 0)
9620 ldm_case = 1; /* ldmia */
9621 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9622 ldm_case = 2; /* ldmib */
9623 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9624 ldm_case = 3; /* ldmda */
9625 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9626 ldm_case = 4; /* ldmdb */
9627 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9628 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9629 ldm_case = 5;
9630 else
9631 return 0;
9632
9633 if (!multiple_operation_profitable_p (false, nops,
9634 ldm_case == 5
9635 ? unsorted_offsets[order[0]] : 0))
9636 return 0;
9637
9638 return ldm_case;
9639 }
9640
9641 /* Used to determine in a peephole whether a sequence of store instructions can
9642 be changed into a store-multiple instruction.
9643 NOPS is the number of separate store instructions we are examining.
9644 NOPS_TOTAL is the total number of instructions recognized by the peephole
9645 pattern.
9646 The first NOPS entries in OPERANDS are the source registers, the next
9647 NOPS entries are memory operands. If this function is successful, *BASE is
9648 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9649 to the first memory location's offset from that base register. REGS is an
9650 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9651 likewise filled with the corresponding rtx's.
9652 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9653 numbers to an ascending order of stores.
9654 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9655 from ascending memory locations, and the function verifies that the register
9656 numbers are themselves ascending. If CHECK_REGS is false, the register
9657 numbers are stored in the order they are found in the operands. */
9658 static int
9659 store_multiple_sequence (rtx *operands, int nops, int nops_total,
9660 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
9661 HOST_WIDE_INT *load_offset, bool check_regs)
9662 {
9663 int unsorted_regs[MAX_LDM_STM_OPS];
9664 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
9665 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9666 int order[MAX_LDM_STM_OPS];
9667 int base_reg = -1;
9668 rtx base_reg_rtx = NULL;
9669 int i, stm_case;
9670
9671 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9672 easily extended if required. */
9673 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9674
9675 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9676
9677 /* Loop over the operands and check that the memory references are
9678 suitable (i.e. immediate offsets from the same base register). At
9679 the same time, extract the target register, and the memory
9680 offsets. */
9681 for (i = 0; i < nops; i++)
9682 {
9683 rtx reg;
9684 rtx offset;
9685
9686 /* Convert a subreg of a mem into the mem itself. */
9687 if (GET_CODE (operands[nops + i]) == SUBREG)
9688 operands[nops + i] = alter_subreg (operands + (nops + i));
9689
9690 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
9691
9692 /* Don't reorder volatile memory references; it doesn't seem worth
9693 looking for the case where the order is ok anyway. */
9694 if (MEM_VOLATILE_P (operands[nops + i]))
9695 return 0;
9696
9697 offset = const0_rtx;
9698
9699 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
9700 || (GET_CODE (reg) == SUBREG
9701 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9702 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
9703 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
9704 == REG)
9705 || (GET_CODE (reg) == SUBREG
9706 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9707 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9708 == CONST_INT)))
9709 {
9710 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
9711 ? operands[i] : SUBREG_REG (operands[i]));
9712 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
9713
9714 if (i == 0)
9715 {
9716 base_reg = REGNO (reg);
9717 base_reg_rtx = reg;
9718 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9719 return 0;
9720 }
9721 else if (base_reg != (int) REGNO (reg))
9722 /* Not addressed from the same base register. */
9723 return 0;
9724
9725 /* If it isn't an integer register, then we can't do this. */
9726 if (unsorted_regs[i] < 0
9727 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9728 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
9729 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
9730 || unsorted_regs[i] > 14)
9731 return 0;
9732
9733 unsorted_offsets[i] = INTVAL (offset);
9734 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9735 order[0] = i;
9736 }
9737 else
9738 /* Not a suitable memory address. */
9739 return 0;
9740 }
9741
9742 /* All the useful information has now been extracted from the
9743 operands into unsorted_regs and unsorted_offsets; additionally,
9744 order[0] has been set to the lowest offset in the list. Sort
9745 the offsets into order, verifying that they are adjacent, and
9746 check that the register numbers are ascending. */
9747 if (!compute_offset_order (nops, unsorted_offsets, order,
9748 check_regs ? unsorted_regs : NULL))
9749 return 0;
9750
9751 if (saved_order)
9752 memcpy (saved_order, order, sizeof order);
9753
9754 if (base)
9755 {
9756 *base = base_reg;
9757
9758 for (i = 0; i < nops; i++)
9759 {
9760 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9761 if (reg_rtxs)
9762 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
9763 }
9764
9765 *load_offset = unsorted_offsets[order[0]];
9766 }
9767
9768 if (TARGET_THUMB1
9769 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
9770 return 0;
9771
9772 if (unsorted_offsets[order[0]] == 0)
9773 stm_case = 1; /* stmia */
9774 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9775 stm_case = 2; /* stmib */
9776 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9777 stm_case = 3; /* stmda */
9778 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9779 stm_case = 4; /* stmdb */
9780 else
9781 return 0;
9782
9783 if (!multiple_operation_profitable_p (false, nops, 0))
9784 return 0;
9785
9786 return stm_case;
9787 }
9788 \f
9789 /* Routines for use in generating RTL. */
9790
9791 /* Generate a load-multiple instruction. COUNT is the number of loads in
9792 the instruction; REGS and MEMS are arrays containing the operands.
9793 BASEREG is the base register to be used in addressing the memory operands.
9794 WBACK_OFFSET is nonzero if the instruction should update the base
9795 register. */
9796
9797 static rtx
9798 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9799 HOST_WIDE_INT wback_offset)
9800 {
9801 int i = 0, j;
9802 rtx result;
9803
9804 if (!multiple_operation_profitable_p (false, count, 0))
9805 {
9806 rtx seq;
9807
9808 start_sequence ();
9809
9810 for (i = 0; i < count; i++)
9811 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
9812
9813 if (wback_offset != 0)
9814 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9815
9816 seq = get_insns ();
9817 end_sequence ();
9818
9819 return seq;
9820 }
9821
9822 result = gen_rtx_PARALLEL (VOIDmode,
9823 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9824 if (wback_offset != 0)
9825 {
9826 XVECEXP (result, 0, 0)
9827 = gen_rtx_SET (VOIDmode, basereg,
9828 plus_constant (basereg, wback_offset));
9829 i = 1;
9830 count++;
9831 }
9832
9833 for (j = 0; i < count; i++, j++)
9834 XVECEXP (result, 0, i)
9835 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
9836
9837 return result;
9838 }
9839
9840 /* Generate a store-multiple instruction. COUNT is the number of stores in
9841 the instruction; REGS and MEMS are arrays containing the operands.
9842 BASEREG is the base register to be used in addressing the memory operands.
9843 WBACK_OFFSET is nonzero if the instruction should update the base
9844 register. */
9845
9846 static rtx
9847 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9848 HOST_WIDE_INT wback_offset)
9849 {
9850 int i = 0, j;
9851 rtx result;
9852
9853 if (GET_CODE (basereg) == PLUS)
9854 basereg = XEXP (basereg, 0);
9855
9856 if (!multiple_operation_profitable_p (false, count, 0))
9857 {
9858 rtx seq;
9859
9860 start_sequence ();
9861
9862 for (i = 0; i < count; i++)
9863 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
9864
9865 if (wback_offset != 0)
9866 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9867
9868 seq = get_insns ();
9869 end_sequence ();
9870
9871 return seq;
9872 }
9873
9874 result = gen_rtx_PARALLEL (VOIDmode,
9875 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9876 if (wback_offset != 0)
9877 {
9878 XVECEXP (result, 0, 0)
9879 = gen_rtx_SET (VOIDmode, basereg,
9880 plus_constant (basereg, wback_offset));
9881 i = 1;
9882 count++;
9883 }
9884
9885 for (j = 0; i < count; i++, j++)
9886 XVECEXP (result, 0, i)
9887 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
9888
9889 return result;
9890 }
9891
9892 /* Generate either a load-multiple or a store-multiple instruction. This
9893 function can be used in situations where we can start with a single MEM
9894 rtx and adjust its address upwards.
9895 COUNT is the number of operations in the instruction, not counting a
9896 possible update of the base register. REGS is an array containing the
9897 register operands.
9898 BASEREG is the base register to be used in addressing the memory operands,
9899 which are constructed from BASEMEM.
9900 WRITE_BACK specifies whether the generated instruction should include an
9901 update of the base register.
9902 OFFSETP is used to pass an offset to and from this function; this offset
9903 is not used when constructing the address (instead BASEMEM should have an
9904 appropriate offset in its address), it is used only for setting
9905 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
9906
9907 static rtx
9908 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
9909 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
9910 {
9911 rtx mems[MAX_LDM_STM_OPS];
9912 HOST_WIDE_INT offset = *offsetp;
9913 int i;
9914
9915 gcc_assert (count <= MAX_LDM_STM_OPS);
9916
9917 if (GET_CODE (basereg) == PLUS)
9918 basereg = XEXP (basereg, 0);
9919
9920 for (i = 0; i < count; i++)
9921 {
9922 rtx addr = plus_constant (basereg, i * 4);
9923 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
9924 offset += 4;
9925 }
9926
9927 if (write_back)
9928 *offsetp = offset;
9929
9930 if (is_load)
9931 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
9932 write_back ? 4 * count : 0);
9933 else
9934 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
9935 write_back ? 4 * count : 0);
9936 }
9937
9938 rtx
9939 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
9940 rtx basemem, HOST_WIDE_INT *offsetp)
9941 {
9942 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
9943 offsetp);
9944 }
9945
9946 rtx
9947 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
9948 rtx basemem, HOST_WIDE_INT *offsetp)
9949 {
9950 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
9951 offsetp);
9952 }
9953
9954 /* Called from a peephole2 expander to turn a sequence of loads into an
9955 LDM instruction. OPERANDS are the operands found by the peephole matcher;
9956 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
9957 is true if we can reorder the registers because they are used commutatively
9958 subsequently.
9959 Returns true iff we could generate a new instruction. */
9960
9961 bool
9962 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
9963 {
9964 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
9965 rtx mems[MAX_LDM_STM_OPS];
9966 int i, j, base_reg;
9967 rtx base_reg_rtx;
9968 HOST_WIDE_INT offset;
9969 int write_back = FALSE;
9970 int ldm_case;
9971 rtx addr;
9972
9973 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
9974 &base_reg, &offset, !sort_regs);
9975
9976 if (ldm_case == 0)
9977 return false;
9978
9979 if (sort_regs)
9980 for (i = 0; i < nops - 1; i++)
9981 for (j = i + 1; j < nops; j++)
9982 if (regs[i] > regs[j])
9983 {
9984 int t = regs[i];
9985 regs[i] = regs[j];
9986 regs[j] = t;
9987 }
9988 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
9989
9990 if (TARGET_THUMB1)
9991 {
9992 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
9993 gcc_assert (ldm_case == 1 || ldm_case == 5);
9994 write_back = TRUE;
9995 }
9996
9997 if (ldm_case == 5)
9998 {
9999 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10000 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10001 offset = 0;
10002 if (!TARGET_THUMB1)
10003 {
10004 base_reg = regs[0];
10005 base_reg_rtx = newbase;
10006 }
10007 }
10008
10009 for (i = 0; i < nops; i++)
10010 {
10011 addr = plus_constant (base_reg_rtx, offset + i * 4);
10012 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10013 SImode, addr, 0);
10014 }
10015 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10016 write_back ? offset + i * 4 : 0));
10017 return true;
10018 }
10019
10020 /* Called from a peephole2 expander to turn a sequence of stores into an
10021 STM instruction. OPERANDS are the operands found by the peephole matcher;
10022 NOPS indicates how many separate stores we are trying to combine.
10023 Returns true iff we could generate a new instruction. */
10024
10025 bool
10026 gen_stm_seq (rtx *operands, int nops)
10027 {
10028 int i;
10029 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10030 rtx mems[MAX_LDM_STM_OPS];
10031 int base_reg;
10032 rtx base_reg_rtx;
10033 HOST_WIDE_INT offset;
10034 int write_back = FALSE;
10035 int stm_case;
10036 rtx addr;
10037 bool base_reg_dies;
10038
10039 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10040 mem_order, &base_reg, &offset, true);
10041
10042 if (stm_case == 0)
10043 return false;
10044
10045 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10046
10047 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10048 if (TARGET_THUMB1)
10049 {
10050 gcc_assert (base_reg_dies);
10051 write_back = TRUE;
10052 }
10053
10054 if (stm_case == 5)
10055 {
10056 gcc_assert (base_reg_dies);
10057 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10058 offset = 0;
10059 }
10060
10061 addr = plus_constant (base_reg_rtx, offset);
10062
10063 for (i = 0; i < nops; i++)
10064 {
10065 addr = plus_constant (base_reg_rtx, offset + i * 4);
10066 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10067 SImode, addr, 0);
10068 }
10069 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10070 write_back ? offset + i * 4 : 0));
10071 return true;
10072 }
10073
10074 /* Called from a peephole2 expander to turn a sequence of stores that are
10075 preceded by constant loads into an STM instruction. OPERANDS are the
10076 operands found by the peephole matcher; NOPS indicates how many
10077 separate stores we are trying to combine; there are 2 * NOPS
10078 instructions in the peephole.
10079 Returns true iff we could generate a new instruction. */
10080
10081 bool
10082 gen_const_stm_seq (rtx *operands, int nops)
10083 {
10084 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10085 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10086 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10087 rtx mems[MAX_LDM_STM_OPS];
10088 int base_reg;
10089 rtx base_reg_rtx;
10090 HOST_WIDE_INT offset;
10091 int write_back = FALSE;
10092 int stm_case;
10093 rtx addr;
10094 bool base_reg_dies;
10095 int i, j;
10096 HARD_REG_SET allocated;
10097
10098 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10099 mem_order, &base_reg, &offset, false);
10100
10101 if (stm_case == 0)
10102 return false;
10103
10104 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10105
10106 /* If the same register is used more than once, try to find a free
10107 register. */
10108 CLEAR_HARD_REG_SET (allocated);
10109 for (i = 0; i < nops; i++)
10110 {
10111 for (j = i + 1; j < nops; j++)
10112 if (regs[i] == regs[j])
10113 {
10114 rtx t = peep2_find_free_register (0, nops * 2,
10115 TARGET_THUMB1 ? "l" : "r",
10116 SImode, &allocated);
10117 if (t == NULL_RTX)
10118 return false;
10119 reg_rtxs[i] = t;
10120 regs[i] = REGNO (t);
10121 }
10122 }
10123
10124 /* Compute an ordering that maps the register numbers to an ascending
10125 sequence. */
10126 reg_order[0] = 0;
10127 for (i = 0; i < nops; i++)
10128 if (regs[i] < regs[reg_order[0]])
10129 reg_order[0] = i;
10130
10131 for (i = 1; i < nops; i++)
10132 {
10133 int this_order = reg_order[i - 1];
10134 for (j = 0; j < nops; j++)
10135 if (regs[j] > regs[reg_order[i - 1]]
10136 && (this_order == reg_order[i - 1]
10137 || regs[j] < regs[this_order]))
10138 this_order = j;
10139 reg_order[i] = this_order;
10140 }
10141
10142 /* Ensure that registers that must be live after the instruction end
10143 up with the correct value. */
10144 for (i = 0; i < nops; i++)
10145 {
10146 int this_order = reg_order[i];
10147 if ((this_order != mem_order[i]
10148 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10149 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10150 return false;
10151 }
10152
10153 /* Load the constants. */
10154 for (i = 0; i < nops; i++)
10155 {
10156 rtx op = operands[2 * nops + mem_order[i]];
10157 sorted_regs[i] = regs[reg_order[i]];
10158 emit_move_insn (reg_rtxs[reg_order[i]], op);
10159 }
10160
10161 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10162
10163 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10164 if (TARGET_THUMB1)
10165 {
10166 gcc_assert (base_reg_dies);
10167 write_back = TRUE;
10168 }
10169
10170 if (stm_case == 5)
10171 {
10172 gcc_assert (base_reg_dies);
10173 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10174 offset = 0;
10175 }
10176
10177 addr = plus_constant (base_reg_rtx, offset);
10178
10179 for (i = 0; i < nops; i++)
10180 {
10181 addr = plus_constant (base_reg_rtx, offset + i * 4);
10182 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10183 SImode, addr, 0);
10184 }
10185 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10186 write_back ? offset + i * 4 : 0));
10187 return true;
10188 }
10189
10190 int
10191 arm_gen_movmemqi (rtx *operands)
10192 {
10193 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
10194 HOST_WIDE_INT srcoffset, dstoffset;
10195 int i;
10196 rtx src, dst, srcbase, dstbase;
10197 rtx part_bytes_reg = NULL;
10198 rtx mem;
10199
10200 if (GET_CODE (operands[2]) != CONST_INT
10201 || GET_CODE (operands[3]) != CONST_INT
10202 || INTVAL (operands[2]) > 64
10203 || INTVAL (operands[3]) & 3)
10204 return 0;
10205
10206 dstbase = operands[0];
10207 srcbase = operands[1];
10208
10209 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
10210 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
10211
10212 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
10213 out_words_to_go = INTVAL (operands[2]) / 4;
10214 last_bytes = INTVAL (operands[2]) & 3;
10215 dstoffset = srcoffset = 0;
10216
10217 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
10218 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
10219
10220 for (i = 0; in_words_to_go >= 2; i+=4)
10221 {
10222 if (in_words_to_go > 4)
10223 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
10224 TRUE, srcbase, &srcoffset));
10225 else
10226 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
10227 src, FALSE, srcbase,
10228 &srcoffset));
10229
10230 if (out_words_to_go)
10231 {
10232 if (out_words_to_go > 4)
10233 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
10234 TRUE, dstbase, &dstoffset));
10235 else if (out_words_to_go != 1)
10236 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
10237 out_words_to_go, dst,
10238 (last_bytes == 0
10239 ? FALSE : TRUE),
10240 dstbase, &dstoffset));
10241 else
10242 {
10243 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10244 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
10245 if (last_bytes != 0)
10246 {
10247 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
10248 dstoffset += 4;
10249 }
10250 }
10251 }
10252
10253 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
10254 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
10255 }
10256
10257 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10258 if (out_words_to_go)
10259 {
10260 rtx sreg;
10261
10262 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10263 sreg = copy_to_reg (mem);
10264
10265 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
10266 emit_move_insn (mem, sreg);
10267 in_words_to_go--;
10268
10269 gcc_assert (!in_words_to_go); /* Sanity check */
10270 }
10271
10272 if (in_words_to_go)
10273 {
10274 gcc_assert (in_words_to_go > 0);
10275
10276 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
10277 part_bytes_reg = copy_to_mode_reg (SImode, mem);
10278 }
10279
10280 gcc_assert (!last_bytes || part_bytes_reg);
10281
10282 if (BYTES_BIG_ENDIAN && last_bytes)
10283 {
10284 rtx tmp = gen_reg_rtx (SImode);
10285
10286 /* The bytes we want are in the top end of the word. */
10287 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
10288 GEN_INT (8 * (4 - last_bytes))));
10289 part_bytes_reg = tmp;
10290
10291 while (last_bytes)
10292 {
10293 mem = adjust_automodify_address (dstbase, QImode,
10294 plus_constant (dst, last_bytes - 1),
10295 dstoffset + last_bytes - 1);
10296 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10297
10298 if (--last_bytes)
10299 {
10300 tmp = gen_reg_rtx (SImode);
10301 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
10302 part_bytes_reg = tmp;
10303 }
10304 }
10305
10306 }
10307 else
10308 {
10309 if (last_bytes > 1)
10310 {
10311 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
10312 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
10313 last_bytes -= 2;
10314 if (last_bytes)
10315 {
10316 rtx tmp = gen_reg_rtx (SImode);
10317 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
10318 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
10319 part_bytes_reg = tmp;
10320 dstoffset += 2;
10321 }
10322 }
10323
10324 if (last_bytes)
10325 {
10326 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
10327 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
10328 }
10329 }
10330
10331 return 1;
10332 }
10333
10334 /* Select a dominance comparison mode if possible for a test of the general
10335 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10336 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10337 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10338 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10339 In all cases OP will be either EQ or NE, but we don't need to know which
10340 here. If we are unable to support a dominance comparison we return
10341 CC mode. This will then fail to match for the RTL expressions that
10342 generate this call. */
10343 enum machine_mode
10344 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
10345 {
10346 enum rtx_code cond1, cond2;
10347 int swapped = 0;
10348
10349 /* Currently we will probably get the wrong result if the individual
10350 comparisons are not simple. This also ensures that it is safe to
10351 reverse a comparison if necessary. */
10352 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
10353 != CCmode)
10354 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
10355 != CCmode))
10356 return CCmode;
10357
10358 /* The if_then_else variant of this tests the second condition if the
10359 first passes, but is true if the first fails. Reverse the first
10360 condition to get a true "inclusive-or" expression. */
10361 if (cond_or == DOM_CC_NX_OR_Y)
10362 cond1 = reverse_condition (cond1);
10363
10364 /* If the comparisons are not equal, and one doesn't dominate the other,
10365 then we can't do this. */
10366 if (cond1 != cond2
10367 && !comparison_dominates_p (cond1, cond2)
10368 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
10369 return CCmode;
10370
10371 if (swapped)
10372 {
10373 enum rtx_code temp = cond1;
10374 cond1 = cond2;
10375 cond2 = temp;
10376 }
10377
10378 switch (cond1)
10379 {
10380 case EQ:
10381 if (cond_or == DOM_CC_X_AND_Y)
10382 return CC_DEQmode;
10383
10384 switch (cond2)
10385 {
10386 case EQ: return CC_DEQmode;
10387 case LE: return CC_DLEmode;
10388 case LEU: return CC_DLEUmode;
10389 case GE: return CC_DGEmode;
10390 case GEU: return CC_DGEUmode;
10391 default: gcc_unreachable ();
10392 }
10393
10394 case LT:
10395 if (cond_or == DOM_CC_X_AND_Y)
10396 return CC_DLTmode;
10397
10398 switch (cond2)
10399 {
10400 case LT:
10401 return CC_DLTmode;
10402 case LE:
10403 return CC_DLEmode;
10404 case NE:
10405 return CC_DNEmode;
10406 default:
10407 gcc_unreachable ();
10408 }
10409
10410 case GT:
10411 if (cond_or == DOM_CC_X_AND_Y)
10412 return CC_DGTmode;
10413
10414 switch (cond2)
10415 {
10416 case GT:
10417 return CC_DGTmode;
10418 case GE:
10419 return CC_DGEmode;
10420 case NE:
10421 return CC_DNEmode;
10422 default:
10423 gcc_unreachable ();
10424 }
10425
10426 case LTU:
10427 if (cond_or == DOM_CC_X_AND_Y)
10428 return CC_DLTUmode;
10429
10430 switch (cond2)
10431 {
10432 case LTU:
10433 return CC_DLTUmode;
10434 case LEU:
10435 return CC_DLEUmode;
10436 case NE:
10437 return CC_DNEmode;
10438 default:
10439 gcc_unreachable ();
10440 }
10441
10442 case GTU:
10443 if (cond_or == DOM_CC_X_AND_Y)
10444 return CC_DGTUmode;
10445
10446 switch (cond2)
10447 {
10448 case GTU:
10449 return CC_DGTUmode;
10450 case GEU:
10451 return CC_DGEUmode;
10452 case NE:
10453 return CC_DNEmode;
10454 default:
10455 gcc_unreachable ();
10456 }
10457
10458 /* The remaining cases only occur when both comparisons are the
10459 same. */
10460 case NE:
10461 gcc_assert (cond1 == cond2);
10462 return CC_DNEmode;
10463
10464 case LE:
10465 gcc_assert (cond1 == cond2);
10466 return CC_DLEmode;
10467
10468 case GE:
10469 gcc_assert (cond1 == cond2);
10470 return CC_DGEmode;
10471
10472 case LEU:
10473 gcc_assert (cond1 == cond2);
10474 return CC_DLEUmode;
10475
10476 case GEU:
10477 gcc_assert (cond1 == cond2);
10478 return CC_DGEUmode;
10479
10480 default:
10481 gcc_unreachable ();
10482 }
10483 }
10484
10485 enum machine_mode
10486 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
10487 {
10488 /* All floating point compares return CCFP if it is an equality
10489 comparison, and CCFPE otherwise. */
10490 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
10491 {
10492 switch (op)
10493 {
10494 case EQ:
10495 case NE:
10496 case UNORDERED:
10497 case ORDERED:
10498 case UNLT:
10499 case UNLE:
10500 case UNGT:
10501 case UNGE:
10502 case UNEQ:
10503 case LTGT:
10504 return CCFPmode;
10505
10506 case LT:
10507 case LE:
10508 case GT:
10509 case GE:
10510 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
10511 return CCFPmode;
10512 return CCFPEmode;
10513
10514 default:
10515 gcc_unreachable ();
10516 }
10517 }
10518
10519 /* A compare with a shifted operand. Because of canonicalization, the
10520 comparison will have to be swapped when we emit the assembler. */
10521 if (GET_MODE (y) == SImode
10522 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10523 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10524 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
10525 || GET_CODE (x) == ROTATERT))
10526 return CC_SWPmode;
10527
10528 /* This operation is performed swapped, but since we only rely on the Z
10529 flag we don't need an additional mode. */
10530 if (GET_MODE (y) == SImode
10531 && (REG_P (y) || (GET_CODE (y) == SUBREG))
10532 && GET_CODE (x) == NEG
10533 && (op == EQ || op == NE))
10534 return CC_Zmode;
10535
10536 /* This is a special case that is used by combine to allow a
10537 comparison of a shifted byte load to be split into a zero-extend
10538 followed by a comparison of the shifted integer (only valid for
10539 equalities and unsigned inequalities). */
10540 if (GET_MODE (x) == SImode
10541 && GET_CODE (x) == ASHIFT
10542 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
10543 && GET_CODE (XEXP (x, 0)) == SUBREG
10544 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
10545 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
10546 && (op == EQ || op == NE
10547 || op == GEU || op == GTU || op == LTU || op == LEU)
10548 && GET_CODE (y) == CONST_INT)
10549 return CC_Zmode;
10550
10551 /* A construct for a conditional compare, if the false arm contains
10552 0, then both conditions must be true, otherwise either condition
10553 must be true. Not all conditions are possible, so CCmode is
10554 returned if it can't be done. */
10555 if (GET_CODE (x) == IF_THEN_ELSE
10556 && (XEXP (x, 2) == const0_rtx
10557 || XEXP (x, 2) == const1_rtx)
10558 && COMPARISON_P (XEXP (x, 0))
10559 && COMPARISON_P (XEXP (x, 1)))
10560 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10561 INTVAL (XEXP (x, 2)));
10562
10563 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10564 if (GET_CODE (x) == AND
10565 && (op == EQ || op == NE)
10566 && COMPARISON_P (XEXP (x, 0))
10567 && COMPARISON_P (XEXP (x, 1)))
10568 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10569 DOM_CC_X_AND_Y);
10570
10571 if (GET_CODE (x) == IOR
10572 && (op == EQ || op == NE)
10573 && COMPARISON_P (XEXP (x, 0))
10574 && COMPARISON_P (XEXP (x, 1)))
10575 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10576 DOM_CC_X_OR_Y);
10577
10578 /* An operation (on Thumb) where we want to test for a single bit.
10579 This is done by shifting that bit up into the top bit of a
10580 scratch register; we can then branch on the sign bit. */
10581 if (TARGET_THUMB1
10582 && GET_MODE (x) == SImode
10583 && (op == EQ || op == NE)
10584 && GET_CODE (x) == ZERO_EXTRACT
10585 && XEXP (x, 1) == const1_rtx)
10586 return CC_Nmode;
10587
10588 /* An operation that sets the condition codes as a side-effect, the
10589 V flag is not set correctly, so we can only use comparisons where
10590 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10591 instead.) */
10592 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10593 if (GET_MODE (x) == SImode
10594 && y == const0_rtx
10595 && (op == EQ || op == NE || op == LT || op == GE)
10596 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
10597 || GET_CODE (x) == AND || GET_CODE (x) == IOR
10598 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
10599 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
10600 || GET_CODE (x) == LSHIFTRT
10601 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
10602 || GET_CODE (x) == ROTATERT
10603 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
10604 return CC_NOOVmode;
10605
10606 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
10607 return CC_Zmode;
10608
10609 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10610 && GET_CODE (x) == PLUS
10611 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10612 return CC_Cmode;
10613
10614 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10615 {
10616 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10617 available. */
10618 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10619 return CCmode;
10620
10621 switch (op)
10622 {
10623 case EQ:
10624 case NE:
10625 /* A DImode comparison against zero can be implemented by
10626 or'ing the two halves together. */
10627 if (y == const0_rtx)
10628 return CC_Zmode;
10629
10630 /* We can do an equality test in three Thumb instructions. */
10631 if (!TARGET_ARM)
10632 return CC_Zmode;
10633
10634 /* FALLTHROUGH */
10635
10636 case LTU:
10637 case LEU:
10638 case GTU:
10639 case GEU:
10640 /* DImode unsigned comparisons can be implemented by cmp +
10641 cmpeq without a scratch register. Not worth doing in
10642 Thumb-2. */
10643 if (TARGET_ARM)
10644 return CC_CZmode;
10645
10646 /* FALLTHROUGH */
10647
10648 case LT:
10649 case LE:
10650 case GT:
10651 case GE:
10652 /* DImode signed and unsigned comparisons can be implemented
10653 by cmp + sbcs with a scratch register, but that does not
10654 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10655 gcc_assert (op != EQ && op != NE);
10656 return CC_NCVmode;
10657
10658 default:
10659 gcc_unreachable ();
10660 }
10661 }
10662
10663 return CCmode;
10664 }
10665
10666 /* X and Y are two things to compare using CODE. Emit the compare insn and
10667 return the rtx for register 0 in the proper mode. FP means this is a
10668 floating point compare: I don't think that it is needed on the arm. */
10669 rtx
10670 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10671 {
10672 enum machine_mode mode;
10673 rtx cc_reg;
10674 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10675
10676 /* We might have X as a constant, Y as a register because of the predicates
10677 used for cmpdi. If so, force X to a register here. */
10678 if (dimode_comparison && !REG_P (x))
10679 x = force_reg (DImode, x);
10680
10681 mode = SELECT_CC_MODE (code, x, y);
10682 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10683
10684 if (dimode_comparison
10685 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10686 && mode != CC_CZmode)
10687 {
10688 rtx clobber, set;
10689
10690 /* To compare two non-zero values for equality, XOR them and
10691 then compare against zero. Not used for ARM mode; there
10692 CC_CZmode is cheaper. */
10693 if (mode == CC_Zmode && y != const0_rtx)
10694 {
10695 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10696 y = const0_rtx;
10697 }
10698 /* A scratch register is required. */
10699 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10700 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10701 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10702 }
10703 else
10704 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10705
10706 return cc_reg;
10707 }
10708
10709 /* Generate a sequence of insns that will generate the correct return
10710 address mask depending on the physical architecture that the program
10711 is running on. */
10712 rtx
10713 arm_gen_return_addr_mask (void)
10714 {
10715 rtx reg = gen_reg_rtx (Pmode);
10716
10717 emit_insn (gen_return_addr_mask (reg));
10718 return reg;
10719 }
10720
10721 void
10722 arm_reload_in_hi (rtx *operands)
10723 {
10724 rtx ref = operands[1];
10725 rtx base, scratch;
10726 HOST_WIDE_INT offset = 0;
10727
10728 if (GET_CODE (ref) == SUBREG)
10729 {
10730 offset = SUBREG_BYTE (ref);
10731 ref = SUBREG_REG (ref);
10732 }
10733
10734 if (GET_CODE (ref) == REG)
10735 {
10736 /* We have a pseudo which has been spilt onto the stack; there
10737 are two cases here: the first where there is a simple
10738 stack-slot replacement and a second where the stack-slot is
10739 out of range, or is used as a subreg. */
10740 if (reg_equiv_mem (REGNO (ref)))
10741 {
10742 ref = reg_equiv_mem (REGNO (ref));
10743 base = find_replacement (&XEXP (ref, 0));
10744 }
10745 else
10746 /* The slot is out of range, or was dressed up in a SUBREG. */
10747 base = reg_equiv_address (REGNO (ref));
10748 }
10749 else
10750 base = find_replacement (&XEXP (ref, 0));
10751
10752 /* Handle the case where the address is too complex to be offset by 1. */
10753 if (GET_CODE (base) == MINUS
10754 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10755 {
10756 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10757
10758 emit_set_insn (base_plus, base);
10759 base = base_plus;
10760 }
10761 else if (GET_CODE (base) == PLUS)
10762 {
10763 /* The addend must be CONST_INT, or we would have dealt with it above. */
10764 HOST_WIDE_INT hi, lo;
10765
10766 offset += INTVAL (XEXP (base, 1));
10767 base = XEXP (base, 0);
10768
10769 /* Rework the address into a legal sequence of insns. */
10770 /* Valid range for lo is -4095 -> 4095 */
10771 lo = (offset >= 0
10772 ? (offset & 0xfff)
10773 : -((-offset) & 0xfff));
10774
10775 /* Corner case, if lo is the max offset then we would be out of range
10776 once we have added the additional 1 below, so bump the msb into the
10777 pre-loading insn(s). */
10778 if (lo == 4095)
10779 lo &= 0x7ff;
10780
10781 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10782 ^ (HOST_WIDE_INT) 0x80000000)
10783 - (HOST_WIDE_INT) 0x80000000);
10784
10785 gcc_assert (hi + lo == offset);
10786
10787 if (hi != 0)
10788 {
10789 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10790
10791 /* Get the base address; addsi3 knows how to handle constants
10792 that require more than one insn. */
10793 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10794 base = base_plus;
10795 offset = lo;
10796 }
10797 }
10798
10799 /* Operands[2] may overlap operands[0] (though it won't overlap
10800 operands[1]), that's why we asked for a DImode reg -- so we can
10801 use the bit that does not overlap. */
10802 if (REGNO (operands[2]) == REGNO (operands[0]))
10803 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10804 else
10805 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10806
10807 emit_insn (gen_zero_extendqisi2 (scratch,
10808 gen_rtx_MEM (QImode,
10809 plus_constant (base,
10810 offset))));
10811 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
10812 gen_rtx_MEM (QImode,
10813 plus_constant (base,
10814 offset + 1))));
10815 if (!BYTES_BIG_ENDIAN)
10816 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10817 gen_rtx_IOR (SImode,
10818 gen_rtx_ASHIFT
10819 (SImode,
10820 gen_rtx_SUBREG (SImode, operands[0], 0),
10821 GEN_INT (8)),
10822 scratch));
10823 else
10824 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
10825 gen_rtx_IOR (SImode,
10826 gen_rtx_ASHIFT (SImode, scratch,
10827 GEN_INT (8)),
10828 gen_rtx_SUBREG (SImode, operands[0], 0)));
10829 }
10830
10831 /* Handle storing a half-word to memory during reload by synthesizing as two
10832 byte stores. Take care not to clobber the input values until after we
10833 have moved them somewhere safe. This code assumes that if the DImode
10834 scratch in operands[2] overlaps either the input value or output address
10835 in some way, then that value must die in this insn (we absolutely need
10836 two scratch registers for some corner cases). */
10837 void
10838 arm_reload_out_hi (rtx *operands)
10839 {
10840 rtx ref = operands[0];
10841 rtx outval = operands[1];
10842 rtx base, scratch;
10843 HOST_WIDE_INT offset = 0;
10844
10845 if (GET_CODE (ref) == SUBREG)
10846 {
10847 offset = SUBREG_BYTE (ref);
10848 ref = SUBREG_REG (ref);
10849 }
10850
10851 if (GET_CODE (ref) == REG)
10852 {
10853 /* We have a pseudo which has been spilt onto the stack; there
10854 are two cases here: the first where there is a simple
10855 stack-slot replacement and a second where the stack-slot is
10856 out of range, or is used as a subreg. */
10857 if (reg_equiv_mem (REGNO (ref)))
10858 {
10859 ref = reg_equiv_mem (REGNO (ref));
10860 base = find_replacement (&XEXP (ref, 0));
10861 }
10862 else
10863 /* The slot is out of range, or was dressed up in a SUBREG. */
10864 base = reg_equiv_address (REGNO (ref));
10865 }
10866 else
10867 base = find_replacement (&XEXP (ref, 0));
10868
10869 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
10870
10871 /* Handle the case where the address is too complex to be offset by 1. */
10872 if (GET_CODE (base) == MINUS
10873 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
10874 {
10875 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10876
10877 /* Be careful not to destroy OUTVAL. */
10878 if (reg_overlap_mentioned_p (base_plus, outval))
10879 {
10880 /* Updating base_plus might destroy outval, see if we can
10881 swap the scratch and base_plus. */
10882 if (!reg_overlap_mentioned_p (scratch, outval))
10883 {
10884 rtx tmp = scratch;
10885 scratch = base_plus;
10886 base_plus = tmp;
10887 }
10888 else
10889 {
10890 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10891
10892 /* Be conservative and copy OUTVAL into the scratch now,
10893 this should only be necessary if outval is a subreg
10894 of something larger than a word. */
10895 /* XXX Might this clobber base? I can't see how it can,
10896 since scratch is known to overlap with OUTVAL, and
10897 must be wider than a word. */
10898 emit_insn (gen_movhi (scratch_hi, outval));
10899 outval = scratch_hi;
10900 }
10901 }
10902
10903 emit_set_insn (base_plus, base);
10904 base = base_plus;
10905 }
10906 else if (GET_CODE (base) == PLUS)
10907 {
10908 /* The addend must be CONST_INT, or we would have dealt with it above. */
10909 HOST_WIDE_INT hi, lo;
10910
10911 offset += INTVAL (XEXP (base, 1));
10912 base = XEXP (base, 0);
10913
10914 /* Rework the address into a legal sequence of insns. */
10915 /* Valid range for lo is -4095 -> 4095 */
10916 lo = (offset >= 0
10917 ? (offset & 0xfff)
10918 : -((-offset) & 0xfff));
10919
10920 /* Corner case, if lo is the max offset then we would be out of range
10921 once we have added the additional 1 below, so bump the msb into the
10922 pre-loading insn(s). */
10923 if (lo == 4095)
10924 lo &= 0x7ff;
10925
10926 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
10927 ^ (HOST_WIDE_INT) 0x80000000)
10928 - (HOST_WIDE_INT) 0x80000000);
10929
10930 gcc_assert (hi + lo == offset);
10931
10932 if (hi != 0)
10933 {
10934 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
10935
10936 /* Be careful not to destroy OUTVAL. */
10937 if (reg_overlap_mentioned_p (base_plus, outval))
10938 {
10939 /* Updating base_plus might destroy outval, see if we
10940 can swap the scratch and base_plus. */
10941 if (!reg_overlap_mentioned_p (scratch, outval))
10942 {
10943 rtx tmp = scratch;
10944 scratch = base_plus;
10945 base_plus = tmp;
10946 }
10947 else
10948 {
10949 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
10950
10951 /* Be conservative and copy outval into scratch now,
10952 this should only be necessary if outval is a
10953 subreg of something larger than a word. */
10954 /* XXX Might this clobber base? I can't see how it
10955 can, since scratch is known to overlap with
10956 outval. */
10957 emit_insn (gen_movhi (scratch_hi, outval));
10958 outval = scratch_hi;
10959 }
10960 }
10961
10962 /* Get the base address; addsi3 knows how to handle constants
10963 that require more than one insn. */
10964 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
10965 base = base_plus;
10966 offset = lo;
10967 }
10968 }
10969
10970 if (BYTES_BIG_ENDIAN)
10971 {
10972 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10973 plus_constant (base, offset + 1)),
10974 gen_lowpart (QImode, outval)));
10975 emit_insn (gen_lshrsi3 (scratch,
10976 gen_rtx_SUBREG (SImode, outval, 0),
10977 GEN_INT (8)));
10978 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10979 gen_lowpart (QImode, scratch)));
10980 }
10981 else
10982 {
10983 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
10984 gen_lowpart (QImode, outval)));
10985 emit_insn (gen_lshrsi3 (scratch,
10986 gen_rtx_SUBREG (SImode, outval, 0),
10987 GEN_INT (8)));
10988 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
10989 plus_constant (base, offset + 1)),
10990 gen_lowpart (QImode, scratch)));
10991 }
10992 }
10993
10994 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
10995 (padded to the size of a word) should be passed in a register. */
10996
10997 static bool
10998 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
10999 {
11000 if (TARGET_AAPCS_BASED)
11001 return must_pass_in_stack_var_size (mode, type);
11002 else
11003 return must_pass_in_stack_var_size_or_pad (mode, type);
11004 }
11005
11006
11007 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
11008 Return true if an argument passed on the stack should be padded upwards,
11009 i.e. if the least-significant byte has useful data.
11010 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
11011 aggregate types are placed in the lowest memory address. */
11012
11013 bool
11014 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
11015 {
11016 if (!TARGET_AAPCS_BASED)
11017 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
11018
11019 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
11020 return false;
11021
11022 return true;
11023 }
11024
11025
11026 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
11027 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
11028 byte of the register has useful data, and return the opposite if the
11029 most significant byte does.
11030 For AAPCS, small aggregates and small complex types are always padded
11031 upwards. */
11032
11033 bool
11034 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
11035 tree type, int first ATTRIBUTE_UNUSED)
11036 {
11037 if (TARGET_AAPCS_BASED
11038 && BYTES_BIG_ENDIAN
11039 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
11040 && int_size_in_bytes (type) <= 4)
11041 return true;
11042
11043 /* Otherwise, use default padding. */
11044 return !BYTES_BIG_ENDIAN;
11045 }
11046
11047 \f
11048 /* Print a symbolic form of X to the debug file, F. */
11049 static void
11050 arm_print_value (FILE *f, rtx x)
11051 {
11052 switch (GET_CODE (x))
11053 {
11054 case CONST_INT:
11055 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
11056 return;
11057
11058 case CONST_DOUBLE:
11059 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
11060 return;
11061
11062 case CONST_VECTOR:
11063 {
11064 int i;
11065
11066 fprintf (f, "<");
11067 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
11068 {
11069 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
11070 if (i < (CONST_VECTOR_NUNITS (x) - 1))
11071 fputc (',', f);
11072 }
11073 fprintf (f, ">");
11074 }
11075 return;
11076
11077 case CONST_STRING:
11078 fprintf (f, "\"%s\"", XSTR (x, 0));
11079 return;
11080
11081 case SYMBOL_REF:
11082 fprintf (f, "`%s'", XSTR (x, 0));
11083 return;
11084
11085 case LABEL_REF:
11086 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
11087 return;
11088
11089 case CONST:
11090 arm_print_value (f, XEXP (x, 0));
11091 return;
11092
11093 case PLUS:
11094 arm_print_value (f, XEXP (x, 0));
11095 fprintf (f, "+");
11096 arm_print_value (f, XEXP (x, 1));
11097 return;
11098
11099 case PC:
11100 fprintf (f, "pc");
11101 return;
11102
11103 default:
11104 fprintf (f, "????");
11105 return;
11106 }
11107 }
11108 \f
11109 /* Routines for manipulation of the constant pool. */
11110
11111 /* Arm instructions cannot load a large constant directly into a
11112 register; they have to come from a pc relative load. The constant
11113 must therefore be placed in the addressable range of the pc
11114 relative load. Depending on the precise pc relative load
11115 instruction the range is somewhere between 256 bytes and 4k. This
11116 means that we often have to dump a constant inside a function, and
11117 generate code to branch around it.
11118
11119 It is important to minimize this, since the branches will slow
11120 things down and make the code larger.
11121
11122 Normally we can hide the table after an existing unconditional
11123 branch so that there is no interruption of the flow, but in the
11124 worst case the code looks like this:
11125
11126 ldr rn, L1
11127 ...
11128 b L2
11129 align
11130 L1: .long value
11131 L2:
11132 ...
11133
11134 ldr rn, L3
11135 ...
11136 b L4
11137 align
11138 L3: .long value
11139 L4:
11140 ...
11141
11142 We fix this by performing a scan after scheduling, which notices
11143 which instructions need to have their operands fetched from the
11144 constant table and builds the table.
11145
11146 The algorithm starts by building a table of all the constants that
11147 need fixing up and all the natural barriers in the function (places
11148 where a constant table can be dropped without breaking the flow).
11149 For each fixup we note how far the pc-relative replacement will be
11150 able to reach and the offset of the instruction into the function.
11151
11152 Having built the table we then group the fixes together to form
11153 tables that are as large as possible (subject to addressing
11154 constraints) and emit each table of constants after the last
11155 barrier that is within range of all the instructions in the group.
11156 If a group does not contain a barrier, then we forcibly create one
11157 by inserting a jump instruction into the flow. Once the table has
11158 been inserted, the insns are then modified to reference the
11159 relevant entry in the pool.
11160
11161 Possible enhancements to the algorithm (not implemented) are:
11162
11163 1) For some processors and object formats, there may be benefit in
11164 aligning the pools to the start of cache lines; this alignment
11165 would need to be taken into account when calculating addressability
11166 of a pool. */
11167
11168 /* These typedefs are located at the start of this file, so that
11169 they can be used in the prototypes there. This comment is to
11170 remind readers of that fact so that the following structures
11171 can be understood more easily.
11172
11173 typedef struct minipool_node Mnode;
11174 typedef struct minipool_fixup Mfix; */
11175
11176 struct minipool_node
11177 {
11178 /* Doubly linked chain of entries. */
11179 Mnode * next;
11180 Mnode * prev;
11181 /* The maximum offset into the code that this entry can be placed. While
11182 pushing fixes for forward references, all entries are sorted in order
11183 of increasing max_address. */
11184 HOST_WIDE_INT max_address;
11185 /* Similarly for an entry inserted for a backwards ref. */
11186 HOST_WIDE_INT min_address;
11187 /* The number of fixes referencing this entry. This can become zero
11188 if we "unpush" an entry. In this case we ignore the entry when we
11189 come to emit the code. */
11190 int refcount;
11191 /* The offset from the start of the minipool. */
11192 HOST_WIDE_INT offset;
11193 /* The value in table. */
11194 rtx value;
11195 /* The mode of value. */
11196 enum machine_mode mode;
11197 /* The size of the value. With iWMMXt enabled
11198 sizes > 4 also imply an alignment of 8-bytes. */
11199 int fix_size;
11200 };
11201
11202 struct minipool_fixup
11203 {
11204 Mfix * next;
11205 rtx insn;
11206 HOST_WIDE_INT address;
11207 rtx * loc;
11208 enum machine_mode mode;
11209 int fix_size;
11210 rtx value;
11211 Mnode * minipool;
11212 HOST_WIDE_INT forwards;
11213 HOST_WIDE_INT backwards;
11214 };
11215
11216 /* Fixes less than a word need padding out to a word boundary. */
11217 #define MINIPOOL_FIX_SIZE(mode) \
11218 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11219
11220 static Mnode * minipool_vector_head;
11221 static Mnode * minipool_vector_tail;
11222 static rtx minipool_vector_label;
11223 static int minipool_pad;
11224
11225 /* The linked list of all minipool fixes required for this function. */
11226 Mfix * minipool_fix_head;
11227 Mfix * minipool_fix_tail;
11228 /* The fix entry for the current minipool, once it has been placed. */
11229 Mfix * minipool_barrier;
11230
11231 /* Determines if INSN is the start of a jump table. Returns the end
11232 of the TABLE or NULL_RTX. */
11233 static rtx
11234 is_jump_table (rtx insn)
11235 {
11236 rtx table;
11237
11238 if (GET_CODE (insn) == JUMP_INSN
11239 && JUMP_LABEL (insn) != NULL
11240 && ((table = next_real_insn (JUMP_LABEL (insn)))
11241 == next_real_insn (insn))
11242 && table != NULL
11243 && GET_CODE (table) == JUMP_INSN
11244 && (GET_CODE (PATTERN (table)) == ADDR_VEC
11245 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
11246 return table;
11247
11248 return NULL_RTX;
11249 }
11250
11251 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11252 #define JUMP_TABLES_IN_TEXT_SECTION 0
11253 #endif
11254
11255 static HOST_WIDE_INT
11256 get_jump_table_size (rtx insn)
11257 {
11258 /* ADDR_VECs only take room if read-only data does into the text
11259 section. */
11260 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
11261 {
11262 rtx body = PATTERN (insn);
11263 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
11264 HOST_WIDE_INT size;
11265 HOST_WIDE_INT modesize;
11266
11267 modesize = GET_MODE_SIZE (GET_MODE (body));
11268 size = modesize * XVECLEN (body, elt);
11269 switch (modesize)
11270 {
11271 case 1:
11272 /* Round up size of TBB table to a halfword boundary. */
11273 size = (size + 1) & ~(HOST_WIDE_INT)1;
11274 break;
11275 case 2:
11276 /* No padding necessary for TBH. */
11277 break;
11278 case 4:
11279 /* Add two bytes for alignment on Thumb. */
11280 if (TARGET_THUMB)
11281 size += 2;
11282 break;
11283 default:
11284 gcc_unreachable ();
11285 }
11286 return size;
11287 }
11288
11289 return 0;
11290 }
11291
11292 /* Move a minipool fix MP from its current location to before MAX_MP.
11293 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11294 constraints may need updating. */
11295 static Mnode *
11296 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
11297 HOST_WIDE_INT max_address)
11298 {
11299 /* The code below assumes these are different. */
11300 gcc_assert (mp != max_mp);
11301
11302 if (max_mp == NULL)
11303 {
11304 if (max_address < mp->max_address)
11305 mp->max_address = max_address;
11306 }
11307 else
11308 {
11309 if (max_address > max_mp->max_address - mp->fix_size)
11310 mp->max_address = max_mp->max_address - mp->fix_size;
11311 else
11312 mp->max_address = max_address;
11313
11314 /* Unlink MP from its current position. Since max_mp is non-null,
11315 mp->prev must be non-null. */
11316 mp->prev->next = mp->next;
11317 if (mp->next != NULL)
11318 mp->next->prev = mp->prev;
11319 else
11320 minipool_vector_tail = mp->prev;
11321
11322 /* Re-insert it before MAX_MP. */
11323 mp->next = max_mp;
11324 mp->prev = max_mp->prev;
11325 max_mp->prev = mp;
11326
11327 if (mp->prev != NULL)
11328 mp->prev->next = mp;
11329 else
11330 minipool_vector_head = mp;
11331 }
11332
11333 /* Save the new entry. */
11334 max_mp = mp;
11335
11336 /* Scan over the preceding entries and adjust their addresses as
11337 required. */
11338 while (mp->prev != NULL
11339 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11340 {
11341 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11342 mp = mp->prev;
11343 }
11344
11345 return max_mp;
11346 }
11347
11348 /* Add a constant to the minipool for a forward reference. Returns the
11349 node added or NULL if the constant will not fit in this pool. */
11350 static Mnode *
11351 add_minipool_forward_ref (Mfix *fix)
11352 {
11353 /* If set, max_mp is the first pool_entry that has a lower
11354 constraint than the one we are trying to add. */
11355 Mnode * max_mp = NULL;
11356 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
11357 Mnode * mp;
11358
11359 /* If the minipool starts before the end of FIX->INSN then this FIX
11360 can not be placed into the current pool. Furthermore, adding the
11361 new constant pool entry may cause the pool to start FIX_SIZE bytes
11362 earlier. */
11363 if (minipool_vector_head &&
11364 (fix->address + get_attr_length (fix->insn)
11365 >= minipool_vector_head->max_address - fix->fix_size))
11366 return NULL;
11367
11368 /* Scan the pool to see if a constant with the same value has
11369 already been added. While we are doing this, also note the
11370 location where we must insert the constant if it doesn't already
11371 exist. */
11372 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11373 {
11374 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11375 && fix->mode == mp->mode
11376 && (GET_CODE (fix->value) != CODE_LABEL
11377 || (CODE_LABEL_NUMBER (fix->value)
11378 == CODE_LABEL_NUMBER (mp->value)))
11379 && rtx_equal_p (fix->value, mp->value))
11380 {
11381 /* More than one fix references this entry. */
11382 mp->refcount++;
11383 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
11384 }
11385
11386 /* Note the insertion point if necessary. */
11387 if (max_mp == NULL
11388 && mp->max_address > max_address)
11389 max_mp = mp;
11390
11391 /* If we are inserting an 8-bytes aligned quantity and
11392 we have not already found an insertion point, then
11393 make sure that all such 8-byte aligned quantities are
11394 placed at the start of the pool. */
11395 if (ARM_DOUBLEWORD_ALIGN
11396 && max_mp == NULL
11397 && fix->fix_size >= 8
11398 && mp->fix_size < 8)
11399 {
11400 max_mp = mp;
11401 max_address = mp->max_address;
11402 }
11403 }
11404
11405 /* The value is not currently in the minipool, so we need to create
11406 a new entry for it. If MAX_MP is NULL, the entry will be put on
11407 the end of the list since the placement is less constrained than
11408 any existing entry. Otherwise, we insert the new fix before
11409 MAX_MP and, if necessary, adjust the constraints on the other
11410 entries. */
11411 mp = XNEW (Mnode);
11412 mp->fix_size = fix->fix_size;
11413 mp->mode = fix->mode;
11414 mp->value = fix->value;
11415 mp->refcount = 1;
11416 /* Not yet required for a backwards ref. */
11417 mp->min_address = -65536;
11418
11419 if (max_mp == NULL)
11420 {
11421 mp->max_address = max_address;
11422 mp->next = NULL;
11423 mp->prev = minipool_vector_tail;
11424
11425 if (mp->prev == NULL)
11426 {
11427 minipool_vector_head = mp;
11428 minipool_vector_label = gen_label_rtx ();
11429 }
11430 else
11431 mp->prev->next = mp;
11432
11433 minipool_vector_tail = mp;
11434 }
11435 else
11436 {
11437 if (max_address > max_mp->max_address - mp->fix_size)
11438 mp->max_address = max_mp->max_address - mp->fix_size;
11439 else
11440 mp->max_address = max_address;
11441
11442 mp->next = max_mp;
11443 mp->prev = max_mp->prev;
11444 max_mp->prev = mp;
11445 if (mp->prev != NULL)
11446 mp->prev->next = mp;
11447 else
11448 minipool_vector_head = mp;
11449 }
11450
11451 /* Save the new entry. */
11452 max_mp = mp;
11453
11454 /* Scan over the preceding entries and adjust their addresses as
11455 required. */
11456 while (mp->prev != NULL
11457 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
11458 {
11459 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
11460 mp = mp->prev;
11461 }
11462
11463 return max_mp;
11464 }
11465
11466 static Mnode *
11467 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
11468 HOST_WIDE_INT min_address)
11469 {
11470 HOST_WIDE_INT offset;
11471
11472 /* The code below assumes these are different. */
11473 gcc_assert (mp != min_mp);
11474
11475 if (min_mp == NULL)
11476 {
11477 if (min_address > mp->min_address)
11478 mp->min_address = min_address;
11479 }
11480 else
11481 {
11482 /* We will adjust this below if it is too loose. */
11483 mp->min_address = min_address;
11484
11485 /* Unlink MP from its current position. Since min_mp is non-null,
11486 mp->next must be non-null. */
11487 mp->next->prev = mp->prev;
11488 if (mp->prev != NULL)
11489 mp->prev->next = mp->next;
11490 else
11491 minipool_vector_head = mp->next;
11492
11493 /* Reinsert it after MIN_MP. */
11494 mp->prev = min_mp;
11495 mp->next = min_mp->next;
11496 min_mp->next = mp;
11497 if (mp->next != NULL)
11498 mp->next->prev = mp;
11499 else
11500 minipool_vector_tail = mp;
11501 }
11502
11503 min_mp = mp;
11504
11505 offset = 0;
11506 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11507 {
11508 mp->offset = offset;
11509 if (mp->refcount > 0)
11510 offset += mp->fix_size;
11511
11512 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
11513 mp->next->min_address = mp->min_address + mp->fix_size;
11514 }
11515
11516 return min_mp;
11517 }
11518
11519 /* Add a constant to the minipool for a backward reference. Returns the
11520 node added or NULL if the constant will not fit in this pool.
11521
11522 Note that the code for insertion for a backwards reference can be
11523 somewhat confusing because the calculated offsets for each fix do
11524 not take into account the size of the pool (which is still under
11525 construction. */
11526 static Mnode *
11527 add_minipool_backward_ref (Mfix *fix)
11528 {
11529 /* If set, min_mp is the last pool_entry that has a lower constraint
11530 than the one we are trying to add. */
11531 Mnode *min_mp = NULL;
11532 /* This can be negative, since it is only a constraint. */
11533 HOST_WIDE_INT min_address = fix->address - fix->backwards;
11534 Mnode *mp;
11535
11536 /* If we can't reach the current pool from this insn, or if we can't
11537 insert this entry at the end of the pool without pushing other
11538 fixes out of range, then we don't try. This ensures that we
11539 can't fail later on. */
11540 if (min_address >= minipool_barrier->address
11541 || (minipool_vector_tail->min_address + fix->fix_size
11542 >= minipool_barrier->address))
11543 return NULL;
11544
11545 /* Scan the pool to see if a constant with the same value has
11546 already been added. While we are doing this, also note the
11547 location where we must insert the constant if it doesn't already
11548 exist. */
11549 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
11550 {
11551 if (GET_CODE (fix->value) == GET_CODE (mp->value)
11552 && fix->mode == mp->mode
11553 && (GET_CODE (fix->value) != CODE_LABEL
11554 || (CODE_LABEL_NUMBER (fix->value)
11555 == CODE_LABEL_NUMBER (mp->value)))
11556 && rtx_equal_p (fix->value, mp->value)
11557 /* Check that there is enough slack to move this entry to the
11558 end of the table (this is conservative). */
11559 && (mp->max_address
11560 > (minipool_barrier->address
11561 + minipool_vector_tail->offset
11562 + minipool_vector_tail->fix_size)))
11563 {
11564 mp->refcount++;
11565 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
11566 }
11567
11568 if (min_mp != NULL)
11569 mp->min_address += fix->fix_size;
11570 else
11571 {
11572 /* Note the insertion point if necessary. */
11573 if (mp->min_address < min_address)
11574 {
11575 /* For now, we do not allow the insertion of 8-byte alignment
11576 requiring nodes anywhere but at the start of the pool. */
11577 if (ARM_DOUBLEWORD_ALIGN
11578 && fix->fix_size >= 8 && mp->fix_size < 8)
11579 return NULL;
11580 else
11581 min_mp = mp;
11582 }
11583 else if (mp->max_address
11584 < minipool_barrier->address + mp->offset + fix->fix_size)
11585 {
11586 /* Inserting before this entry would push the fix beyond
11587 its maximum address (which can happen if we have
11588 re-located a forwards fix); force the new fix to come
11589 after it. */
11590 if (ARM_DOUBLEWORD_ALIGN
11591 && fix->fix_size >= 8 && mp->fix_size < 8)
11592 return NULL;
11593 else
11594 {
11595 min_mp = mp;
11596 min_address = mp->min_address + fix->fix_size;
11597 }
11598 }
11599 /* Do not insert a non-8-byte aligned quantity before 8-byte
11600 aligned quantities. */
11601 else if (ARM_DOUBLEWORD_ALIGN
11602 && fix->fix_size < 8
11603 && mp->fix_size >= 8)
11604 {
11605 min_mp = mp;
11606 min_address = mp->min_address + fix->fix_size;
11607 }
11608 }
11609 }
11610
11611 /* We need to create a new entry. */
11612 mp = XNEW (Mnode);
11613 mp->fix_size = fix->fix_size;
11614 mp->mode = fix->mode;
11615 mp->value = fix->value;
11616 mp->refcount = 1;
11617 mp->max_address = minipool_barrier->address + 65536;
11618
11619 mp->min_address = min_address;
11620
11621 if (min_mp == NULL)
11622 {
11623 mp->prev = NULL;
11624 mp->next = minipool_vector_head;
11625
11626 if (mp->next == NULL)
11627 {
11628 minipool_vector_tail = mp;
11629 minipool_vector_label = gen_label_rtx ();
11630 }
11631 else
11632 mp->next->prev = mp;
11633
11634 minipool_vector_head = mp;
11635 }
11636 else
11637 {
11638 mp->next = min_mp->next;
11639 mp->prev = min_mp;
11640 min_mp->next = mp;
11641
11642 if (mp->next != NULL)
11643 mp->next->prev = mp;
11644 else
11645 minipool_vector_tail = mp;
11646 }
11647
11648 /* Save the new entry. */
11649 min_mp = mp;
11650
11651 if (mp->prev)
11652 mp = mp->prev;
11653 else
11654 mp->offset = 0;
11655
11656 /* Scan over the following entries and adjust their offsets. */
11657 while (mp->next != NULL)
11658 {
11659 if (mp->next->min_address < mp->min_address + mp->fix_size)
11660 mp->next->min_address = mp->min_address + mp->fix_size;
11661
11662 if (mp->refcount)
11663 mp->next->offset = mp->offset + mp->fix_size;
11664 else
11665 mp->next->offset = mp->offset;
11666
11667 mp = mp->next;
11668 }
11669
11670 return min_mp;
11671 }
11672
11673 static void
11674 assign_minipool_offsets (Mfix *barrier)
11675 {
11676 HOST_WIDE_INT offset = 0;
11677 Mnode *mp;
11678
11679 minipool_barrier = barrier;
11680
11681 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11682 {
11683 mp->offset = offset;
11684
11685 if (mp->refcount > 0)
11686 offset += mp->fix_size;
11687 }
11688 }
11689
11690 /* Output the literal table */
11691 static void
11692 dump_minipool (rtx scan)
11693 {
11694 Mnode * mp;
11695 Mnode * nmp;
11696 int align64 = 0;
11697
11698 if (ARM_DOUBLEWORD_ALIGN)
11699 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
11700 if (mp->refcount > 0 && mp->fix_size >= 8)
11701 {
11702 align64 = 1;
11703 break;
11704 }
11705
11706 if (dump_file)
11707 fprintf (dump_file,
11708 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11709 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
11710
11711 scan = emit_label_after (gen_label_rtx (), scan);
11712 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
11713 scan = emit_label_after (minipool_vector_label, scan);
11714
11715 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
11716 {
11717 if (mp->refcount > 0)
11718 {
11719 if (dump_file)
11720 {
11721 fprintf (dump_file,
11722 ";; Offset %u, min %ld, max %ld ",
11723 (unsigned) mp->offset, (unsigned long) mp->min_address,
11724 (unsigned long) mp->max_address);
11725 arm_print_value (dump_file, mp->value);
11726 fputc ('\n', dump_file);
11727 }
11728
11729 switch (mp->fix_size)
11730 {
11731 #ifdef HAVE_consttable_1
11732 case 1:
11733 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
11734 break;
11735
11736 #endif
11737 #ifdef HAVE_consttable_2
11738 case 2:
11739 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
11740 break;
11741
11742 #endif
11743 #ifdef HAVE_consttable_4
11744 case 4:
11745 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
11746 break;
11747
11748 #endif
11749 #ifdef HAVE_consttable_8
11750 case 8:
11751 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
11752 break;
11753
11754 #endif
11755 #ifdef HAVE_consttable_16
11756 case 16:
11757 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
11758 break;
11759
11760 #endif
11761 default:
11762 gcc_unreachable ();
11763 }
11764 }
11765
11766 nmp = mp->next;
11767 free (mp);
11768 }
11769
11770 minipool_vector_head = minipool_vector_tail = NULL;
11771 scan = emit_insn_after (gen_consttable_end (), scan);
11772 scan = emit_barrier_after (scan);
11773 }
11774
11775 /* Return the cost of forcibly inserting a barrier after INSN. */
11776 static int
11777 arm_barrier_cost (rtx insn)
11778 {
11779 /* Basing the location of the pool on the loop depth is preferable,
11780 but at the moment, the basic block information seems to be
11781 corrupt by this stage of the compilation. */
11782 int base_cost = 50;
11783 rtx next = next_nonnote_insn (insn);
11784
11785 if (next != NULL && GET_CODE (next) == CODE_LABEL)
11786 base_cost -= 20;
11787
11788 switch (GET_CODE (insn))
11789 {
11790 case CODE_LABEL:
11791 /* It will always be better to place the table before the label, rather
11792 than after it. */
11793 return 50;
11794
11795 case INSN:
11796 case CALL_INSN:
11797 return base_cost;
11798
11799 case JUMP_INSN:
11800 return base_cost - 10;
11801
11802 default:
11803 return base_cost + 10;
11804 }
11805 }
11806
11807 /* Find the best place in the insn stream in the range
11808 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11809 Create the barrier by inserting a jump and add a new fix entry for
11810 it. */
11811 static Mfix *
11812 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
11813 {
11814 HOST_WIDE_INT count = 0;
11815 rtx barrier;
11816 rtx from = fix->insn;
11817 /* The instruction after which we will insert the jump. */
11818 rtx selected = NULL;
11819 int selected_cost;
11820 /* The address at which the jump instruction will be placed. */
11821 HOST_WIDE_INT selected_address;
11822 Mfix * new_fix;
11823 HOST_WIDE_INT max_count = max_address - fix->address;
11824 rtx label = gen_label_rtx ();
11825
11826 selected_cost = arm_barrier_cost (from);
11827 selected_address = fix->address;
11828
11829 while (from && count < max_count)
11830 {
11831 rtx tmp;
11832 int new_cost;
11833
11834 /* This code shouldn't have been called if there was a natural barrier
11835 within range. */
11836 gcc_assert (GET_CODE (from) != BARRIER);
11837
11838 /* Count the length of this insn. */
11839 count += get_attr_length (from);
11840
11841 /* If there is a jump table, add its length. */
11842 tmp = is_jump_table (from);
11843 if (tmp != NULL)
11844 {
11845 count += get_jump_table_size (tmp);
11846
11847 /* Jump tables aren't in a basic block, so base the cost on
11848 the dispatch insn. If we select this location, we will
11849 still put the pool after the table. */
11850 new_cost = arm_barrier_cost (from);
11851
11852 if (count < max_count
11853 && (!selected || new_cost <= selected_cost))
11854 {
11855 selected = tmp;
11856 selected_cost = new_cost;
11857 selected_address = fix->address + count;
11858 }
11859
11860 /* Continue after the dispatch table. */
11861 from = NEXT_INSN (tmp);
11862 continue;
11863 }
11864
11865 new_cost = arm_barrier_cost (from);
11866
11867 if (count < max_count
11868 && (!selected || new_cost <= selected_cost))
11869 {
11870 selected = from;
11871 selected_cost = new_cost;
11872 selected_address = fix->address + count;
11873 }
11874
11875 from = NEXT_INSN (from);
11876 }
11877
11878 /* Make sure that we found a place to insert the jump. */
11879 gcc_assert (selected);
11880
11881 /* Make sure we do not split a call and its corresponding
11882 CALL_ARG_LOCATION note. */
11883 if (CALL_P (selected))
11884 {
11885 rtx next = NEXT_INSN (selected);
11886 if (next && NOTE_P (next)
11887 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
11888 selected = next;
11889 }
11890
11891 /* Create a new JUMP_INSN that branches around a barrier. */
11892 from = emit_jump_insn_after (gen_jump (label), selected);
11893 JUMP_LABEL (from) = label;
11894 barrier = emit_barrier_after (from);
11895 emit_label_after (label, barrier);
11896
11897 /* Create a minipool barrier entry for the new barrier. */
11898 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
11899 new_fix->insn = barrier;
11900 new_fix->address = selected_address;
11901 new_fix->next = fix->next;
11902 fix->next = new_fix;
11903
11904 return new_fix;
11905 }
11906
11907 /* Record that there is a natural barrier in the insn stream at
11908 ADDRESS. */
11909 static void
11910 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
11911 {
11912 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11913
11914 fix->insn = insn;
11915 fix->address = address;
11916
11917 fix->next = NULL;
11918 if (minipool_fix_head != NULL)
11919 minipool_fix_tail->next = fix;
11920 else
11921 minipool_fix_head = fix;
11922
11923 minipool_fix_tail = fix;
11924 }
11925
11926 /* Record INSN, which will need fixing up to load a value from the
11927 minipool. ADDRESS is the offset of the insn since the start of the
11928 function; LOC is a pointer to the part of the insn which requires
11929 fixing; VALUE is the constant that must be loaded, which is of type
11930 MODE. */
11931 static void
11932 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
11933 enum machine_mode mode, rtx value)
11934 {
11935 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
11936
11937 fix->insn = insn;
11938 fix->address = address;
11939 fix->loc = loc;
11940 fix->mode = mode;
11941 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
11942 fix->value = value;
11943 fix->forwards = get_attr_pool_range (insn);
11944 fix->backwards = get_attr_neg_pool_range (insn);
11945 fix->minipool = NULL;
11946
11947 /* If an insn doesn't have a range defined for it, then it isn't
11948 expecting to be reworked by this code. Better to stop now than
11949 to generate duff assembly code. */
11950 gcc_assert (fix->forwards || fix->backwards);
11951
11952 /* If an entry requires 8-byte alignment then assume all constant pools
11953 require 4 bytes of padding. Trying to do this later on a per-pool
11954 basis is awkward because existing pool entries have to be modified. */
11955 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
11956 minipool_pad = 4;
11957
11958 if (dump_file)
11959 {
11960 fprintf (dump_file,
11961 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11962 GET_MODE_NAME (mode),
11963 INSN_UID (insn), (unsigned long) address,
11964 -1 * (long)fix->backwards, (long)fix->forwards);
11965 arm_print_value (dump_file, fix->value);
11966 fprintf (dump_file, "\n");
11967 }
11968
11969 /* Add it to the chain of fixes. */
11970 fix->next = NULL;
11971
11972 if (minipool_fix_head != NULL)
11973 minipool_fix_tail->next = fix;
11974 else
11975 minipool_fix_head = fix;
11976
11977 minipool_fix_tail = fix;
11978 }
11979
11980 /* Return the cost of synthesizing a 64-bit constant VAL inline.
11981 Returns the number of insns needed, or 99 if we don't know how to
11982 do it. */
11983 int
11984 arm_const_double_inline_cost (rtx val)
11985 {
11986 rtx lowpart, highpart;
11987 enum machine_mode mode;
11988
11989 mode = GET_MODE (val);
11990
11991 if (mode == VOIDmode)
11992 mode = DImode;
11993
11994 gcc_assert (GET_MODE_SIZE (mode) == 8);
11995
11996 lowpart = gen_lowpart (SImode, val);
11997 highpart = gen_highpart_mode (SImode, mode, val);
11998
11999 gcc_assert (GET_CODE (lowpart) == CONST_INT);
12000 gcc_assert (GET_CODE (highpart) == CONST_INT);
12001
12002 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
12003 NULL_RTX, NULL_RTX, 0, 0)
12004 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
12005 NULL_RTX, NULL_RTX, 0, 0));
12006 }
12007
12008 /* Return true if it is worthwhile to split a 64-bit constant into two
12009 32-bit operations. This is the case if optimizing for size, or
12010 if we have load delay slots, or if one 32-bit part can be done with
12011 a single data operation. */
12012 bool
12013 arm_const_double_by_parts (rtx val)
12014 {
12015 enum machine_mode mode = GET_MODE (val);
12016 rtx part;
12017
12018 if (optimize_size || arm_ld_sched)
12019 return true;
12020
12021 if (mode == VOIDmode)
12022 mode = DImode;
12023
12024 part = gen_highpart_mode (SImode, mode, val);
12025
12026 gcc_assert (GET_CODE (part) == CONST_INT);
12027
12028 if (const_ok_for_arm (INTVAL (part))
12029 || const_ok_for_arm (~INTVAL (part)))
12030 return true;
12031
12032 part = gen_lowpart (SImode, val);
12033
12034 gcc_assert (GET_CODE (part) == CONST_INT);
12035
12036 if (const_ok_for_arm (INTVAL (part))
12037 || const_ok_for_arm (~INTVAL (part)))
12038 return true;
12039
12040 return false;
12041 }
12042
12043 /* Return true if it is possible to inline both the high and low parts
12044 of a 64-bit constant into 32-bit data processing instructions. */
12045 bool
12046 arm_const_double_by_immediates (rtx val)
12047 {
12048 enum machine_mode mode = GET_MODE (val);
12049 rtx part;
12050
12051 if (mode == VOIDmode)
12052 mode = DImode;
12053
12054 part = gen_highpart_mode (SImode, mode, val);
12055
12056 gcc_assert (GET_CODE (part) == CONST_INT);
12057
12058 if (!const_ok_for_arm (INTVAL (part)))
12059 return false;
12060
12061 part = gen_lowpart (SImode, val);
12062
12063 gcc_assert (GET_CODE (part) == CONST_INT);
12064
12065 if (!const_ok_for_arm (INTVAL (part)))
12066 return false;
12067
12068 return true;
12069 }
12070
12071 /* Scan INSN and note any of its operands that need fixing.
12072 If DO_PUSHES is false we do not actually push any of the fixups
12073 needed. The function returns TRUE if any fixups were needed/pushed.
12074 This is used by arm_memory_load_p() which needs to know about loads
12075 of constants that will be converted into minipool loads. */
12076 static bool
12077 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
12078 {
12079 bool result = false;
12080 int opno;
12081
12082 extract_insn (insn);
12083
12084 if (!constrain_operands (1))
12085 fatal_insn_not_found (insn);
12086
12087 if (recog_data.n_alternatives == 0)
12088 return false;
12089
12090 /* Fill in recog_op_alt with information about the constraints of
12091 this insn. */
12092 preprocess_constraints ();
12093
12094 for (opno = 0; opno < recog_data.n_operands; opno++)
12095 {
12096 /* Things we need to fix can only occur in inputs. */
12097 if (recog_data.operand_type[opno] != OP_IN)
12098 continue;
12099
12100 /* If this alternative is a memory reference, then any mention
12101 of constants in this alternative is really to fool reload
12102 into allowing us to accept one there. We need to fix them up
12103 now so that we output the right code. */
12104 if (recog_op_alt[opno][which_alternative].memory_ok)
12105 {
12106 rtx op = recog_data.operand[opno];
12107
12108 if (CONSTANT_P (op))
12109 {
12110 if (do_pushes)
12111 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
12112 recog_data.operand_mode[opno], op);
12113 result = true;
12114 }
12115 else if (GET_CODE (op) == MEM
12116 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
12117 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
12118 {
12119 if (do_pushes)
12120 {
12121 rtx cop = avoid_constant_pool_reference (op);
12122
12123 /* Casting the address of something to a mode narrower
12124 than a word can cause avoid_constant_pool_reference()
12125 to return the pool reference itself. That's no good to
12126 us here. Lets just hope that we can use the
12127 constant pool value directly. */
12128 if (op == cop)
12129 cop = get_pool_constant (XEXP (op, 0));
12130
12131 push_minipool_fix (insn, address,
12132 recog_data.operand_loc[opno],
12133 recog_data.operand_mode[opno], cop);
12134 }
12135
12136 result = true;
12137 }
12138 }
12139 }
12140
12141 return result;
12142 }
12143
12144 /* Convert instructions to their cc-clobbering variant if possible, since
12145 that allows us to use smaller encodings. */
12146
12147 static void
12148 thumb2_reorg (void)
12149 {
12150 basic_block bb;
12151 regset_head live;
12152
12153 INIT_REG_SET (&live);
12154
12155 /* We are freeing block_for_insn in the toplev to keep compatibility
12156 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12157 compute_bb_for_insn ();
12158 df_analyze ();
12159
12160 FOR_EACH_BB (bb)
12161 {
12162 rtx insn;
12163
12164 COPY_REG_SET (&live, DF_LR_OUT (bb));
12165 df_simulate_initialize_backwards (bb, &live);
12166 FOR_BB_INSNS_REVERSE (bb, insn)
12167 {
12168 if (NONJUMP_INSN_P (insn)
12169 && !REGNO_REG_SET_P (&live, CC_REGNUM))
12170 {
12171 rtx pat = PATTERN (insn);
12172 if (GET_CODE (pat) == SET
12173 && low_register_operand (XEXP (pat, 0), SImode)
12174 && thumb_16bit_operator (XEXP (pat, 1), SImode)
12175 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
12176 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
12177 {
12178 rtx dst = XEXP (pat, 0);
12179 rtx src = XEXP (pat, 1);
12180 rtx op0 = XEXP (src, 0);
12181 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
12182 ? XEXP (src, 1) : NULL);
12183
12184 if (rtx_equal_p (dst, op0)
12185 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
12186 {
12187 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12188 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12189 rtvec vec = gen_rtvec (2, pat, clobber);
12190
12191 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12192 INSN_CODE (insn) = -1;
12193 }
12194 /* We can also handle a commutative operation where the
12195 second operand matches the destination. */
12196 else if (op1 && rtx_equal_p (dst, op1))
12197 {
12198 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12199 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12200 rtvec vec;
12201
12202 src = copy_rtx (src);
12203 XEXP (src, 0) = op1;
12204 XEXP (src, 1) = op0;
12205 pat = gen_rtx_SET (VOIDmode, dst, src);
12206 vec = gen_rtvec (2, pat, clobber);
12207 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12208 INSN_CODE (insn) = -1;
12209 }
12210 }
12211 }
12212
12213 if (NONDEBUG_INSN_P (insn))
12214 df_simulate_one_insn_backwards (bb, insn, &live);
12215 }
12216 }
12217
12218 CLEAR_REG_SET (&live);
12219 }
12220
12221 /* Gcc puts the pool in the wrong place for ARM, since we can only
12222 load addresses a limited distance around the pc. We do some
12223 special munging to move the constant pool values to the correct
12224 point in the code. */
12225 static void
12226 arm_reorg (void)
12227 {
12228 rtx insn;
12229 HOST_WIDE_INT address = 0;
12230 Mfix * fix;
12231
12232 if (TARGET_THUMB2)
12233 thumb2_reorg ();
12234
12235 minipool_fix_head = minipool_fix_tail = NULL;
12236
12237 /* The first insn must always be a note, or the code below won't
12238 scan it properly. */
12239 insn = get_insns ();
12240 gcc_assert (GET_CODE (insn) == NOTE);
12241 minipool_pad = 0;
12242
12243 /* Scan all the insns and record the operands that will need fixing. */
12244 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
12245 {
12246 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12247 && (arm_cirrus_insn_p (insn)
12248 || GET_CODE (insn) == JUMP_INSN
12249 || arm_memory_load_p (insn)))
12250 cirrus_reorg (insn);
12251
12252 if (GET_CODE (insn) == BARRIER)
12253 push_minipool_barrier (insn, address);
12254 else if (INSN_P (insn))
12255 {
12256 rtx table;
12257
12258 note_invalid_constants (insn, address, true);
12259 address += get_attr_length (insn);
12260
12261 /* If the insn is a vector jump, add the size of the table
12262 and skip the table. */
12263 if ((table = is_jump_table (insn)) != NULL)
12264 {
12265 address += get_jump_table_size (table);
12266 insn = table;
12267 }
12268 }
12269 }
12270
12271 fix = minipool_fix_head;
12272
12273 /* Now scan the fixups and perform the required changes. */
12274 while (fix)
12275 {
12276 Mfix * ftmp;
12277 Mfix * fdel;
12278 Mfix * last_added_fix;
12279 Mfix * last_barrier = NULL;
12280 Mfix * this_fix;
12281
12282 /* Skip any further barriers before the next fix. */
12283 while (fix && GET_CODE (fix->insn) == BARRIER)
12284 fix = fix->next;
12285
12286 /* No more fixes. */
12287 if (fix == NULL)
12288 break;
12289
12290 last_added_fix = NULL;
12291
12292 for (ftmp = fix; ftmp; ftmp = ftmp->next)
12293 {
12294 if (GET_CODE (ftmp->insn) == BARRIER)
12295 {
12296 if (ftmp->address >= minipool_vector_head->max_address)
12297 break;
12298
12299 last_barrier = ftmp;
12300 }
12301 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
12302 break;
12303
12304 last_added_fix = ftmp; /* Keep track of the last fix added. */
12305 }
12306
12307 /* If we found a barrier, drop back to that; any fixes that we
12308 could have reached but come after the barrier will now go in
12309 the next mini-pool. */
12310 if (last_barrier != NULL)
12311 {
12312 /* Reduce the refcount for those fixes that won't go into this
12313 pool after all. */
12314 for (fdel = last_barrier->next;
12315 fdel && fdel != ftmp;
12316 fdel = fdel->next)
12317 {
12318 fdel->minipool->refcount--;
12319 fdel->minipool = NULL;
12320 }
12321
12322 ftmp = last_barrier;
12323 }
12324 else
12325 {
12326 /* ftmp is first fix that we can't fit into this pool and
12327 there no natural barriers that we could use. Insert a
12328 new barrier in the code somewhere between the previous
12329 fix and this one, and arrange to jump around it. */
12330 HOST_WIDE_INT max_address;
12331
12332 /* The last item on the list of fixes must be a barrier, so
12333 we can never run off the end of the list of fixes without
12334 last_barrier being set. */
12335 gcc_assert (ftmp);
12336
12337 max_address = minipool_vector_head->max_address;
12338 /* Check that there isn't another fix that is in range that
12339 we couldn't fit into this pool because the pool was
12340 already too large: we need to put the pool before such an
12341 instruction. The pool itself may come just after the
12342 fix because create_fix_barrier also allows space for a
12343 jump instruction. */
12344 if (ftmp->address < max_address)
12345 max_address = ftmp->address + 1;
12346
12347 last_barrier = create_fix_barrier (last_added_fix, max_address);
12348 }
12349
12350 assign_minipool_offsets (last_barrier);
12351
12352 while (ftmp)
12353 {
12354 if (GET_CODE (ftmp->insn) != BARRIER
12355 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
12356 == NULL))
12357 break;
12358
12359 ftmp = ftmp->next;
12360 }
12361
12362 /* Scan over the fixes we have identified for this pool, fixing them
12363 up and adding the constants to the pool itself. */
12364 for (this_fix = fix; this_fix && ftmp != this_fix;
12365 this_fix = this_fix->next)
12366 if (GET_CODE (this_fix->insn) != BARRIER)
12367 {
12368 rtx addr
12369 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
12370 minipool_vector_label),
12371 this_fix->minipool->offset);
12372 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
12373 }
12374
12375 dump_minipool (last_barrier->insn);
12376 fix = ftmp;
12377 }
12378
12379 /* From now on we must synthesize any constants that we can't handle
12380 directly. This can happen if the RTL gets split during final
12381 instruction generation. */
12382 after_arm_reorg = 1;
12383
12384 /* Free the minipool memory. */
12385 obstack_free (&minipool_obstack, minipool_startobj);
12386 }
12387 \f
12388 /* Routines to output assembly language. */
12389
12390 /* If the rtx is the correct value then return the string of the number.
12391 In this way we can ensure that valid double constants are generated even
12392 when cross compiling. */
12393 const char *
12394 fp_immediate_constant (rtx x)
12395 {
12396 REAL_VALUE_TYPE r;
12397 int i;
12398
12399 if (!fp_consts_inited)
12400 init_fp_table ();
12401
12402 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12403 for (i = 0; i < 8; i++)
12404 if (REAL_VALUES_EQUAL (r, values_fp[i]))
12405 return strings_fp[i];
12406
12407 gcc_unreachable ();
12408 }
12409
12410 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12411 static const char *
12412 fp_const_from_val (REAL_VALUE_TYPE *r)
12413 {
12414 int i;
12415
12416 if (!fp_consts_inited)
12417 init_fp_table ();
12418
12419 for (i = 0; i < 8; i++)
12420 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
12421 return strings_fp[i];
12422
12423 gcc_unreachable ();
12424 }
12425
12426 /* Output the operands of a LDM/STM instruction to STREAM.
12427 MASK is the ARM register set mask of which only bits 0-15 are important.
12428 REG is the base register, either the frame pointer or the stack pointer,
12429 INSTR is the possibly suffixed load or store instruction.
12430 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12431
12432 static void
12433 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
12434 unsigned long mask, int rfe)
12435 {
12436 unsigned i;
12437 bool not_first = FALSE;
12438
12439 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
12440 fputc ('\t', stream);
12441 asm_fprintf (stream, instr, reg);
12442 fputc ('{', stream);
12443
12444 for (i = 0; i <= LAST_ARM_REGNUM; i++)
12445 if (mask & (1 << i))
12446 {
12447 if (not_first)
12448 fprintf (stream, ", ");
12449
12450 asm_fprintf (stream, "%r", i);
12451 not_first = TRUE;
12452 }
12453
12454 if (rfe)
12455 fprintf (stream, "}^\n");
12456 else
12457 fprintf (stream, "}\n");
12458 }
12459
12460
12461 /* Output a FLDMD instruction to STREAM.
12462 BASE if the register containing the address.
12463 REG and COUNT specify the register range.
12464 Extra registers may be added to avoid hardware bugs.
12465
12466 We output FLDMD even for ARMv5 VFP implementations. Although
12467 FLDMD is technically not supported until ARMv6, it is believed
12468 that all VFP implementations support its use in this context. */
12469
12470 static void
12471 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
12472 {
12473 int i;
12474
12475 /* Workaround ARM10 VFPr1 bug. */
12476 if (count == 2 && !arm_arch6)
12477 {
12478 if (reg == 15)
12479 reg--;
12480 count++;
12481 }
12482
12483 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12484 load into multiple parts if we have to handle more than 16 registers. */
12485 if (count > 16)
12486 {
12487 vfp_output_fldmd (stream, base, reg, 16);
12488 vfp_output_fldmd (stream, base, reg + 16, count - 16);
12489 return;
12490 }
12491
12492 fputc ('\t', stream);
12493 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
12494
12495 for (i = reg; i < reg + count; i++)
12496 {
12497 if (i > reg)
12498 fputs (", ", stream);
12499 asm_fprintf (stream, "d%d", i);
12500 }
12501 fputs ("}\n", stream);
12502
12503 }
12504
12505
12506 /* Output the assembly for a store multiple. */
12507
12508 const char *
12509 vfp_output_fstmd (rtx * operands)
12510 {
12511 char pattern[100];
12512 int p;
12513 int base;
12514 int i;
12515
12516 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
12517 p = strlen (pattern);
12518
12519 gcc_assert (GET_CODE (operands[1]) == REG);
12520
12521 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
12522 for (i = 1; i < XVECLEN (operands[2], 0); i++)
12523 {
12524 p += sprintf (&pattern[p], ", d%d", base + i);
12525 }
12526 strcpy (&pattern[p], "}");
12527
12528 output_asm_insn (pattern, operands);
12529 return "";
12530 }
12531
12532
12533 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12534 number of bytes pushed. */
12535
12536 static int
12537 vfp_emit_fstmd (int base_reg, int count)
12538 {
12539 rtx par;
12540 rtx dwarf;
12541 rtx tmp, reg;
12542 int i;
12543
12544 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12545 register pairs are stored by a store multiple insn. We avoid this
12546 by pushing an extra pair. */
12547 if (count == 2 && !arm_arch6)
12548 {
12549 if (base_reg == LAST_VFP_REGNUM - 3)
12550 base_reg -= 2;
12551 count++;
12552 }
12553
12554 /* FSTMD may not store more than 16 doubleword registers at once. Split
12555 larger stores into multiple parts (up to a maximum of two, in
12556 practice). */
12557 if (count > 16)
12558 {
12559 int saved;
12560 /* NOTE: base_reg is an internal register number, so each D register
12561 counts as 2. */
12562 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
12563 saved += vfp_emit_fstmd (base_reg, 16);
12564 return saved;
12565 }
12566
12567 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
12568 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
12569
12570 reg = gen_rtx_REG (DFmode, base_reg);
12571 base_reg += 2;
12572
12573 XVECEXP (par, 0, 0)
12574 = gen_rtx_SET (VOIDmode,
12575 gen_frame_mem
12576 (BLKmode,
12577 gen_rtx_PRE_MODIFY (Pmode,
12578 stack_pointer_rtx,
12579 plus_constant
12580 (stack_pointer_rtx,
12581 - (count * 8)))
12582 ),
12583 gen_rtx_UNSPEC (BLKmode,
12584 gen_rtvec (1, reg),
12585 UNSPEC_PUSH_MULT));
12586
12587 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12588 plus_constant (stack_pointer_rtx, -(count * 8)));
12589 RTX_FRAME_RELATED_P (tmp) = 1;
12590 XVECEXP (dwarf, 0, 0) = tmp;
12591
12592 tmp = gen_rtx_SET (VOIDmode,
12593 gen_frame_mem (DFmode, stack_pointer_rtx),
12594 reg);
12595 RTX_FRAME_RELATED_P (tmp) = 1;
12596 XVECEXP (dwarf, 0, 1) = tmp;
12597
12598 for (i = 1; i < count; i++)
12599 {
12600 reg = gen_rtx_REG (DFmode, base_reg);
12601 base_reg += 2;
12602 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
12603
12604 tmp = gen_rtx_SET (VOIDmode,
12605 gen_frame_mem (DFmode,
12606 plus_constant (stack_pointer_rtx,
12607 i * 8)),
12608 reg);
12609 RTX_FRAME_RELATED_P (tmp) = 1;
12610 XVECEXP (dwarf, 0, i + 1) = tmp;
12611 }
12612
12613 par = emit_insn (par);
12614 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
12615 RTX_FRAME_RELATED_P (par) = 1;
12616
12617 return count * 8;
12618 }
12619
12620 /* Emit a call instruction with pattern PAT. ADDR is the address of
12621 the call target. */
12622
12623 void
12624 arm_emit_call_insn (rtx pat, rtx addr)
12625 {
12626 rtx insn;
12627
12628 insn = emit_call_insn (pat);
12629
12630 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12631 If the call might use such an entry, add a use of the PIC register
12632 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12633 if (TARGET_VXWORKS_RTP
12634 && flag_pic
12635 && GET_CODE (addr) == SYMBOL_REF
12636 && (SYMBOL_REF_DECL (addr)
12637 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
12638 : !SYMBOL_REF_LOCAL_P (addr)))
12639 {
12640 require_pic_register ();
12641 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
12642 }
12643 }
12644
12645 /* Output a 'call' insn. */
12646 const char *
12647 output_call (rtx *operands)
12648 {
12649 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
12650
12651 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12652 if (REGNO (operands[0]) == LR_REGNUM)
12653 {
12654 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
12655 output_asm_insn ("mov%?\t%0, %|lr", operands);
12656 }
12657
12658 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12659
12660 if (TARGET_INTERWORK || arm_arch4t)
12661 output_asm_insn ("bx%?\t%0", operands);
12662 else
12663 output_asm_insn ("mov%?\t%|pc, %0", operands);
12664
12665 return "";
12666 }
12667
12668 /* Output a 'call' insn that is a reference in memory. This is
12669 disabled for ARMv5 and we prefer a blx instead because otherwise
12670 there's a significant performance overhead. */
12671 const char *
12672 output_call_mem (rtx *operands)
12673 {
12674 gcc_assert (!arm_arch5);
12675 if (TARGET_INTERWORK)
12676 {
12677 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12678 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12679 output_asm_insn ("bx%?\t%|ip", operands);
12680 }
12681 else if (regno_use_in (LR_REGNUM, operands[0]))
12682 {
12683 /* LR is used in the memory address. We load the address in the
12684 first instruction. It's safe to use IP as the target of the
12685 load since the call will kill it anyway. */
12686 output_asm_insn ("ldr%?\t%|ip, %0", operands);
12687 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12688 if (arm_arch4t)
12689 output_asm_insn ("bx%?\t%|ip", operands);
12690 else
12691 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
12692 }
12693 else
12694 {
12695 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
12696 output_asm_insn ("ldr%?\t%|pc, %0", operands);
12697 }
12698
12699 return "";
12700 }
12701
12702
12703 /* Output a move from arm registers to an fpa registers.
12704 OPERANDS[0] is an fpa register.
12705 OPERANDS[1] is the first registers of an arm register pair. */
12706 const char *
12707 output_mov_long_double_fpa_from_arm (rtx *operands)
12708 {
12709 int arm_reg0 = REGNO (operands[1]);
12710 rtx ops[3];
12711
12712 gcc_assert (arm_reg0 != IP_REGNUM);
12713
12714 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12715 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12716 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12717
12718 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12719 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
12720
12721 return "";
12722 }
12723
12724 /* Output a move from an fpa register to arm registers.
12725 OPERANDS[0] is the first registers of an arm register pair.
12726 OPERANDS[1] is an fpa register. */
12727 const char *
12728 output_mov_long_double_arm_from_fpa (rtx *operands)
12729 {
12730 int arm_reg0 = REGNO (operands[0]);
12731 rtx ops[3];
12732
12733 gcc_assert (arm_reg0 != IP_REGNUM);
12734
12735 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12736 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12737 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
12738
12739 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
12740 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
12741 return "";
12742 }
12743
12744 /* Output a move from arm registers to arm registers of a long double
12745 OPERANDS[0] is the destination.
12746 OPERANDS[1] is the source. */
12747 const char *
12748 output_mov_long_double_arm_from_arm (rtx *operands)
12749 {
12750 /* We have to be careful here because the two might overlap. */
12751 int dest_start = REGNO (operands[0]);
12752 int src_start = REGNO (operands[1]);
12753 rtx ops[2];
12754 int i;
12755
12756 if (dest_start < src_start)
12757 {
12758 for (i = 0; i < 3; i++)
12759 {
12760 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12761 ops[1] = gen_rtx_REG (SImode, src_start + i);
12762 output_asm_insn ("mov%?\t%0, %1", ops);
12763 }
12764 }
12765 else
12766 {
12767 for (i = 2; i >= 0; i--)
12768 {
12769 ops[0] = gen_rtx_REG (SImode, dest_start + i);
12770 ops[1] = gen_rtx_REG (SImode, src_start + i);
12771 output_asm_insn ("mov%?\t%0, %1", ops);
12772 }
12773 }
12774
12775 return "";
12776 }
12777
12778 void
12779 arm_emit_movpair (rtx dest, rtx src)
12780 {
12781 /* If the src is an immediate, simplify it. */
12782 if (CONST_INT_P (src))
12783 {
12784 HOST_WIDE_INT val = INTVAL (src);
12785 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
12786 if ((val >> 16) & 0x0000ffff)
12787 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
12788 GEN_INT (16)),
12789 GEN_INT ((val >> 16) & 0x0000ffff));
12790 return;
12791 }
12792 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
12793 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
12794 }
12795
12796 /* Output a move from arm registers to an fpa registers.
12797 OPERANDS[0] is an fpa register.
12798 OPERANDS[1] is the first registers of an arm register pair. */
12799 const char *
12800 output_mov_double_fpa_from_arm (rtx *operands)
12801 {
12802 int arm_reg0 = REGNO (operands[1]);
12803 rtx ops[2];
12804
12805 gcc_assert (arm_reg0 != IP_REGNUM);
12806
12807 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12808 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12809 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
12810 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
12811 return "";
12812 }
12813
12814 /* Output a move from an fpa register to arm registers.
12815 OPERANDS[0] is the first registers of an arm register pair.
12816 OPERANDS[1] is an fpa register. */
12817 const char *
12818 output_mov_double_arm_from_fpa (rtx *operands)
12819 {
12820 int arm_reg0 = REGNO (operands[0]);
12821 rtx ops[2];
12822
12823 gcc_assert (arm_reg0 != IP_REGNUM);
12824
12825 ops[0] = gen_rtx_REG (SImode, arm_reg0);
12826 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
12827 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12828 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12829 return "";
12830 }
12831
12832 /* Output a move between double words. It must be REG<-MEM
12833 or MEM<-REG. */
12834 const char *
12835 output_move_double (rtx *operands)
12836 {
12837 enum rtx_code code0 = GET_CODE (operands[0]);
12838 enum rtx_code code1 = GET_CODE (operands[1]);
12839 rtx otherops[3];
12840
12841 if (code0 == REG)
12842 {
12843 unsigned int reg0 = REGNO (operands[0]);
12844
12845 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
12846
12847 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
12848
12849 switch (GET_CODE (XEXP (operands[1], 0)))
12850 {
12851 case REG:
12852 if (TARGET_LDRD
12853 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
12854 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
12855 else
12856 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
12857 break;
12858
12859 case PRE_INC:
12860 gcc_assert (TARGET_LDRD);
12861 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
12862 break;
12863
12864 case PRE_DEC:
12865 if (TARGET_LDRD)
12866 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
12867 else
12868 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
12869 break;
12870
12871 case POST_INC:
12872 if (TARGET_LDRD)
12873 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
12874 else
12875 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
12876 break;
12877
12878 case POST_DEC:
12879 gcc_assert (TARGET_LDRD);
12880 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
12881 break;
12882
12883 case PRE_MODIFY:
12884 case POST_MODIFY:
12885 /* Autoicrement addressing modes should never have overlapping
12886 base and destination registers, and overlapping index registers
12887 are already prohibited, so this doesn't need to worry about
12888 fix_cm3_ldrd. */
12889 otherops[0] = operands[0];
12890 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
12891 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
12892
12893 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
12894 {
12895 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
12896 {
12897 /* Registers overlap so split out the increment. */
12898 output_asm_insn ("add%?\t%1, %1, %2", otherops);
12899 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
12900 }
12901 else
12902 {
12903 /* Use a single insn if we can.
12904 FIXME: IWMMXT allows offsets larger than ldrd can
12905 handle, fix these up with a pair of ldr. */
12906 if (TARGET_THUMB2
12907 || GET_CODE (otherops[2]) != CONST_INT
12908 || (INTVAL (otherops[2]) > -256
12909 && INTVAL (otherops[2]) < 256))
12910 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
12911 else
12912 {
12913 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
12914 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12915 }
12916 }
12917 }
12918 else
12919 {
12920 /* Use a single insn if we can.
12921 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12922 fix these up with a pair of ldr. */
12923 if (TARGET_THUMB2
12924 || GET_CODE (otherops[2]) != CONST_INT
12925 || (INTVAL (otherops[2]) > -256
12926 && INTVAL (otherops[2]) < 256))
12927 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
12928 else
12929 {
12930 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
12931 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
12932 }
12933 }
12934 break;
12935
12936 case LABEL_REF:
12937 case CONST:
12938 /* We might be able to use ldrd %0, %1 here. However the range is
12939 different to ldr/adr, and it is broken on some ARMv7-M
12940 implementations. */
12941 /* Use the second register of the pair to avoid problematic
12942 overlap. */
12943 otherops[1] = operands[1];
12944 output_asm_insn ("adr%?\t%0, %1", otherops);
12945 operands[1] = otherops[0];
12946 if (TARGET_LDRD)
12947 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
12948 else
12949 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
12950 break;
12951
12952 /* ??? This needs checking for thumb2. */
12953 default:
12954 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
12955 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
12956 {
12957 otherops[0] = operands[0];
12958 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
12959 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
12960
12961 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
12962 {
12963 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
12964 {
12965 switch ((int) INTVAL (otherops[2]))
12966 {
12967 case -8:
12968 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
12969 return "";
12970 case -4:
12971 if (TARGET_THUMB2)
12972 break;
12973 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
12974 return "";
12975 case 4:
12976 if (TARGET_THUMB2)
12977 break;
12978 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
12979 return "";
12980 }
12981 }
12982 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
12983 operands[1] = otherops[0];
12984 if (TARGET_LDRD
12985 && (GET_CODE (otherops[2]) == REG
12986 || TARGET_THUMB2
12987 || (GET_CODE (otherops[2]) == CONST_INT
12988 && INTVAL (otherops[2]) > -256
12989 && INTVAL (otherops[2]) < 256)))
12990 {
12991 if (reg_overlap_mentioned_p (operands[0],
12992 otherops[2]))
12993 {
12994 rtx tmp;
12995 /* Swap base and index registers over to
12996 avoid a conflict. */
12997 tmp = otherops[1];
12998 otherops[1] = otherops[2];
12999 otherops[2] = tmp;
13000 }
13001 /* If both registers conflict, it will usually
13002 have been fixed by a splitter. */
13003 if (reg_overlap_mentioned_p (operands[0], otherops[2])
13004 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
13005 {
13006 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13007 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
13008 }
13009 else
13010 {
13011 otherops[0] = operands[0];
13012 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
13013 }
13014 return "";
13015 }
13016
13017 if (GET_CODE (otherops[2]) == CONST_INT)
13018 {
13019 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
13020 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
13021 else
13022 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13023 }
13024 else
13025 output_asm_insn ("add%?\t%0, %1, %2", otherops);
13026 }
13027 else
13028 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
13029
13030 if (TARGET_LDRD)
13031 return "ldr%(d%)\t%0, [%1]";
13032
13033 return "ldm%(ia%)\t%1, %M0";
13034 }
13035 else
13036 {
13037 otherops[1] = adjust_address (operands[1], SImode, 4);
13038 /* Take care of overlapping base/data reg. */
13039 if (reg_mentioned_p (operands[0], operands[1]))
13040 {
13041 output_asm_insn ("ldr%?\t%0, %1", otherops);
13042 output_asm_insn ("ldr%?\t%0, %1", operands);
13043 }
13044 else
13045 {
13046 output_asm_insn ("ldr%?\t%0, %1", operands);
13047 output_asm_insn ("ldr%?\t%0, %1", otherops);
13048 }
13049 }
13050 }
13051 }
13052 else
13053 {
13054 /* Constraints should ensure this. */
13055 gcc_assert (code0 == MEM && code1 == REG);
13056 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
13057
13058 switch (GET_CODE (XEXP (operands[0], 0)))
13059 {
13060 case REG:
13061 if (TARGET_LDRD)
13062 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
13063 else
13064 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13065 break;
13066
13067 case PRE_INC:
13068 gcc_assert (TARGET_LDRD);
13069 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
13070 break;
13071
13072 case PRE_DEC:
13073 if (TARGET_LDRD)
13074 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
13075 else
13076 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
13077 break;
13078
13079 case POST_INC:
13080 if (TARGET_LDRD)
13081 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
13082 else
13083 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
13084 break;
13085
13086 case POST_DEC:
13087 gcc_assert (TARGET_LDRD);
13088 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
13089 break;
13090
13091 case PRE_MODIFY:
13092 case POST_MODIFY:
13093 otherops[0] = operands[1];
13094 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
13095 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
13096
13097 /* IWMMXT allows offsets larger than ldrd can handle,
13098 fix these up with a pair of ldr. */
13099 if (!TARGET_THUMB2
13100 && GET_CODE (otherops[2]) == CONST_INT
13101 && (INTVAL(otherops[2]) <= -256
13102 || INTVAL(otherops[2]) >= 256))
13103 {
13104 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13105 {
13106 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
13107 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13108 }
13109 else
13110 {
13111 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
13112 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
13113 }
13114 }
13115 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
13116 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
13117 else
13118 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
13119 break;
13120
13121 case PLUS:
13122 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
13123 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
13124 {
13125 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
13126 {
13127 case -8:
13128 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
13129 return "";
13130
13131 case -4:
13132 if (TARGET_THUMB2)
13133 break;
13134 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
13135 return "";
13136
13137 case 4:
13138 if (TARGET_THUMB2)
13139 break;
13140 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
13141 return "";
13142 }
13143 }
13144 if (TARGET_LDRD
13145 && (GET_CODE (otherops[2]) == REG
13146 || TARGET_THUMB2
13147 || (GET_CODE (otherops[2]) == CONST_INT
13148 && INTVAL (otherops[2]) > -256
13149 && INTVAL (otherops[2]) < 256)))
13150 {
13151 otherops[0] = operands[1];
13152 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
13153 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
13154 return "";
13155 }
13156 /* Fall through */
13157
13158 default:
13159 otherops[0] = adjust_address (operands[0], SImode, 4);
13160 otherops[1] = operands[1];
13161 output_asm_insn ("str%?\t%1, %0", operands);
13162 output_asm_insn ("str%?\t%H1, %0", otherops);
13163 }
13164 }
13165
13166 return "";
13167 }
13168
13169 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13170 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13171
13172 const char *
13173 output_move_quad (rtx *operands)
13174 {
13175 if (REG_P (operands[0]))
13176 {
13177 /* Load, or reg->reg move. */
13178
13179 if (MEM_P (operands[1]))
13180 {
13181 switch (GET_CODE (XEXP (operands[1], 0)))
13182 {
13183 case REG:
13184 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
13185 break;
13186
13187 case LABEL_REF:
13188 case CONST:
13189 output_asm_insn ("adr%?\t%0, %1", operands);
13190 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
13191 break;
13192
13193 default:
13194 gcc_unreachable ();
13195 }
13196 }
13197 else
13198 {
13199 rtx ops[2];
13200 int dest, src, i;
13201
13202 gcc_assert (REG_P (operands[1]));
13203
13204 dest = REGNO (operands[0]);
13205 src = REGNO (operands[1]);
13206
13207 /* This seems pretty dumb, but hopefully GCC won't try to do it
13208 very often. */
13209 if (dest < src)
13210 for (i = 0; i < 4; i++)
13211 {
13212 ops[0] = gen_rtx_REG (SImode, dest + i);
13213 ops[1] = gen_rtx_REG (SImode, src + i);
13214 output_asm_insn ("mov%?\t%0, %1", ops);
13215 }
13216 else
13217 for (i = 3; i >= 0; i--)
13218 {
13219 ops[0] = gen_rtx_REG (SImode, dest + i);
13220 ops[1] = gen_rtx_REG (SImode, src + i);
13221 output_asm_insn ("mov%?\t%0, %1", ops);
13222 }
13223 }
13224 }
13225 else
13226 {
13227 gcc_assert (MEM_P (operands[0]));
13228 gcc_assert (REG_P (operands[1]));
13229 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
13230
13231 switch (GET_CODE (XEXP (operands[0], 0)))
13232 {
13233 case REG:
13234 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
13235 break;
13236
13237 default:
13238 gcc_unreachable ();
13239 }
13240 }
13241
13242 return "";
13243 }
13244
13245 /* Output a VFP load or store instruction. */
13246
13247 const char *
13248 output_move_vfp (rtx *operands)
13249 {
13250 rtx reg, mem, addr, ops[2];
13251 int load = REG_P (operands[0]);
13252 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
13253 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
13254 const char *templ;
13255 char buff[50];
13256 enum machine_mode mode;
13257
13258 reg = operands[!load];
13259 mem = operands[load];
13260
13261 mode = GET_MODE (reg);
13262
13263 gcc_assert (REG_P (reg));
13264 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
13265 gcc_assert (mode == SFmode
13266 || mode == DFmode
13267 || mode == SImode
13268 || mode == DImode
13269 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
13270 gcc_assert (MEM_P (mem));
13271
13272 addr = XEXP (mem, 0);
13273
13274 switch (GET_CODE (addr))
13275 {
13276 case PRE_DEC:
13277 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13278 ops[0] = XEXP (addr, 0);
13279 ops[1] = reg;
13280 break;
13281
13282 case POST_INC:
13283 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
13284 ops[0] = XEXP (addr, 0);
13285 ops[1] = reg;
13286 break;
13287
13288 default:
13289 templ = "f%s%c%%?\t%%%s0, %%1%s";
13290 ops[0] = reg;
13291 ops[1] = mem;
13292 break;
13293 }
13294
13295 sprintf (buff, templ,
13296 load ? "ld" : "st",
13297 dp ? 'd' : 's',
13298 dp ? "P" : "",
13299 integer_p ? "\t%@ int" : "");
13300 output_asm_insn (buff, ops);
13301
13302 return "";
13303 }
13304
13305 /* Output a Neon quad-word load or store, or a load or store for
13306 larger structure modes.
13307
13308 WARNING: The ordering of elements is weird in big-endian mode,
13309 because we use VSTM, as required by the EABI. GCC RTL defines
13310 element ordering based on in-memory order. This can be differ
13311 from the architectural ordering of elements within a NEON register.
13312 The intrinsics defined in arm_neon.h use the NEON register element
13313 ordering, not the GCC RTL element ordering.
13314
13315 For example, the in-memory ordering of a big-endian a quadword
13316 vector with 16-bit elements when stored from register pair {d0,d1}
13317 will be (lowest address first, d0[N] is NEON register element N):
13318
13319 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13320
13321 When necessary, quadword registers (dN, dN+1) are moved to ARM
13322 registers from rN in the order:
13323
13324 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13325
13326 So that STM/LDM can be used on vectors in ARM registers, and the
13327 same memory layout will result as if VSTM/VLDM were used. */
13328
13329 const char *
13330 output_move_neon (rtx *operands)
13331 {
13332 rtx reg, mem, addr, ops[2];
13333 int regno, load = REG_P (operands[0]);
13334 const char *templ;
13335 char buff[50];
13336 enum machine_mode mode;
13337
13338 reg = operands[!load];
13339 mem = operands[load];
13340
13341 mode = GET_MODE (reg);
13342
13343 gcc_assert (REG_P (reg));
13344 regno = REGNO (reg);
13345 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
13346 || NEON_REGNO_OK_FOR_QUAD (regno));
13347 gcc_assert (VALID_NEON_DREG_MODE (mode)
13348 || VALID_NEON_QREG_MODE (mode)
13349 || VALID_NEON_STRUCT_MODE (mode));
13350 gcc_assert (MEM_P (mem));
13351
13352 addr = XEXP (mem, 0);
13353
13354 /* Strip off const from addresses like (const (plus (...))). */
13355 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13356 addr = XEXP (addr, 0);
13357
13358 switch (GET_CODE (addr))
13359 {
13360 case POST_INC:
13361 templ = "v%smia%%?\t%%0!, %%h1";
13362 ops[0] = XEXP (addr, 0);
13363 ops[1] = reg;
13364 break;
13365
13366 case PRE_DEC:
13367 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13368 templ = "v%smdb%%?\t%%0!, %%h1";
13369 ops[0] = XEXP (addr, 0);
13370 ops[1] = reg;
13371 break;
13372
13373 case POST_MODIFY:
13374 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13375 gcc_unreachable ();
13376
13377 case LABEL_REF:
13378 case PLUS:
13379 {
13380 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13381 int i;
13382 int overlap = -1;
13383 for (i = 0; i < nregs; i++)
13384 {
13385 /* We're only using DImode here because it's a convenient size. */
13386 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
13387 ops[1] = adjust_address (mem, DImode, 8 * i);
13388 if (reg_overlap_mentioned_p (ops[0], mem))
13389 {
13390 gcc_assert (overlap == -1);
13391 overlap = i;
13392 }
13393 else
13394 {
13395 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13396 output_asm_insn (buff, ops);
13397 }
13398 }
13399 if (overlap != -1)
13400 {
13401 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
13402 ops[1] = adjust_address (mem, SImode, 8 * overlap);
13403 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
13404 output_asm_insn (buff, ops);
13405 }
13406
13407 return "";
13408 }
13409
13410 default:
13411 templ = "v%smia%%?\t%%m0, %%h1";
13412 ops[0] = mem;
13413 ops[1] = reg;
13414 }
13415
13416 sprintf (buff, templ, load ? "ld" : "st");
13417 output_asm_insn (buff, ops);
13418
13419 return "";
13420 }
13421
13422 /* Compute and return the length of neon_mov<mode>, where <mode> is
13423 one of VSTRUCT modes: EI, OI, CI or XI. */
13424 int
13425 arm_attr_length_move_neon (rtx insn)
13426 {
13427 rtx reg, mem, addr;
13428 int load;
13429 enum machine_mode mode;
13430
13431 extract_insn_cached (insn);
13432
13433 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
13434 {
13435 mode = GET_MODE (recog_data.operand[0]);
13436 switch (mode)
13437 {
13438 case EImode:
13439 case OImode:
13440 return 8;
13441 case CImode:
13442 return 12;
13443 case XImode:
13444 return 16;
13445 default:
13446 gcc_unreachable ();
13447 }
13448 }
13449
13450 load = REG_P (recog_data.operand[0]);
13451 reg = recog_data.operand[!load];
13452 mem = recog_data.operand[load];
13453
13454 gcc_assert (MEM_P (mem));
13455
13456 mode = GET_MODE (reg);
13457 addr = XEXP (mem, 0);
13458
13459 /* Strip off const from addresses like (const (plus (...))). */
13460 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13461 addr = XEXP (addr, 0);
13462
13463 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
13464 {
13465 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13466 return insns * 4;
13467 }
13468 else
13469 return 4;
13470 }
13471
13472 /* Return nonzero if the offset in the address is an immediate. Otherwise,
13473 return zero. */
13474
13475 int
13476 arm_address_offset_is_imm (rtx insn)
13477 {
13478 rtx mem, addr;
13479
13480 extract_insn_cached (insn);
13481
13482 if (REG_P (recog_data.operand[0]))
13483 return 0;
13484
13485 mem = recog_data.operand[0];
13486
13487 gcc_assert (MEM_P (mem));
13488
13489 addr = XEXP (mem, 0);
13490
13491 if (GET_CODE (addr) == REG
13492 || (GET_CODE (addr) == PLUS
13493 && GET_CODE (XEXP (addr, 0)) == REG
13494 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
13495 return 1;
13496 else
13497 return 0;
13498 }
13499
13500 /* Output an ADD r, s, #n where n may be too big for one instruction.
13501 If adding zero to one register, output nothing. */
13502 const char *
13503 output_add_immediate (rtx *operands)
13504 {
13505 HOST_WIDE_INT n = INTVAL (operands[2]);
13506
13507 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
13508 {
13509 if (n < 0)
13510 output_multi_immediate (operands,
13511 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13512 -n);
13513 else
13514 output_multi_immediate (operands,
13515 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13516 n);
13517 }
13518
13519 return "";
13520 }
13521
13522 /* Output a multiple immediate operation.
13523 OPERANDS is the vector of operands referred to in the output patterns.
13524 INSTR1 is the output pattern to use for the first constant.
13525 INSTR2 is the output pattern to use for subsequent constants.
13526 IMMED_OP is the index of the constant slot in OPERANDS.
13527 N is the constant value. */
13528 static const char *
13529 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
13530 int immed_op, HOST_WIDE_INT n)
13531 {
13532 #if HOST_BITS_PER_WIDE_INT > 32
13533 n &= 0xffffffff;
13534 #endif
13535
13536 if (n == 0)
13537 {
13538 /* Quick and easy output. */
13539 operands[immed_op] = const0_rtx;
13540 output_asm_insn (instr1, operands);
13541 }
13542 else
13543 {
13544 int i;
13545 const char * instr = instr1;
13546
13547 /* Note that n is never zero here (which would give no output). */
13548 for (i = 0; i < 32; i += 2)
13549 {
13550 if (n & (3 << i))
13551 {
13552 operands[immed_op] = GEN_INT (n & (255 << i));
13553 output_asm_insn (instr, operands);
13554 instr = instr2;
13555 i += 6;
13556 }
13557 }
13558 }
13559
13560 return "";
13561 }
13562
13563 /* Return the name of a shifter operation. */
13564 static const char *
13565 arm_shift_nmem(enum rtx_code code)
13566 {
13567 switch (code)
13568 {
13569 case ASHIFT:
13570 return ARM_LSL_NAME;
13571
13572 case ASHIFTRT:
13573 return "asr";
13574
13575 case LSHIFTRT:
13576 return "lsr";
13577
13578 case ROTATERT:
13579 return "ror";
13580
13581 default:
13582 abort();
13583 }
13584 }
13585
13586 /* Return the appropriate ARM instruction for the operation code.
13587 The returned result should not be overwritten. OP is the rtx of the
13588 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13589 was shifted. */
13590 const char *
13591 arithmetic_instr (rtx op, int shift_first_arg)
13592 {
13593 switch (GET_CODE (op))
13594 {
13595 case PLUS:
13596 return "add";
13597
13598 case MINUS:
13599 return shift_first_arg ? "rsb" : "sub";
13600
13601 case IOR:
13602 return "orr";
13603
13604 case XOR:
13605 return "eor";
13606
13607 case AND:
13608 return "and";
13609
13610 case ASHIFT:
13611 case ASHIFTRT:
13612 case LSHIFTRT:
13613 case ROTATERT:
13614 return arm_shift_nmem(GET_CODE(op));
13615
13616 default:
13617 gcc_unreachable ();
13618 }
13619 }
13620
13621 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13622 for the operation code. The returned result should not be overwritten.
13623 OP is the rtx code of the shift.
13624 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13625 shift. */
13626 static const char *
13627 shift_op (rtx op, HOST_WIDE_INT *amountp)
13628 {
13629 const char * mnem;
13630 enum rtx_code code = GET_CODE (op);
13631
13632 switch (GET_CODE (XEXP (op, 1)))
13633 {
13634 case REG:
13635 case SUBREG:
13636 *amountp = -1;
13637 break;
13638
13639 case CONST_INT:
13640 *amountp = INTVAL (XEXP (op, 1));
13641 break;
13642
13643 default:
13644 gcc_unreachable ();
13645 }
13646
13647 switch (code)
13648 {
13649 case ROTATE:
13650 gcc_assert (*amountp != -1);
13651 *amountp = 32 - *amountp;
13652 code = ROTATERT;
13653
13654 /* Fall through. */
13655
13656 case ASHIFT:
13657 case ASHIFTRT:
13658 case LSHIFTRT:
13659 case ROTATERT:
13660 mnem = arm_shift_nmem(code);
13661 break;
13662
13663 case MULT:
13664 /* We never have to worry about the amount being other than a
13665 power of 2, since this case can never be reloaded from a reg. */
13666 gcc_assert (*amountp != -1);
13667 *amountp = int_log2 (*amountp);
13668 return ARM_LSL_NAME;
13669
13670 default:
13671 gcc_unreachable ();
13672 }
13673
13674 if (*amountp != -1)
13675 {
13676 /* This is not 100% correct, but follows from the desire to merge
13677 multiplication by a power of 2 with the recognizer for a
13678 shift. >=32 is not a valid shift for "lsl", so we must try and
13679 output a shift that produces the correct arithmetical result.
13680 Using lsr #32 is identical except for the fact that the carry bit
13681 is not set correctly if we set the flags; but we never use the
13682 carry bit from such an operation, so we can ignore that. */
13683 if (code == ROTATERT)
13684 /* Rotate is just modulo 32. */
13685 *amountp &= 31;
13686 else if (*amountp != (*amountp & 31))
13687 {
13688 if (code == ASHIFT)
13689 mnem = "lsr";
13690 *amountp = 32;
13691 }
13692
13693 /* Shifts of 0 are no-ops. */
13694 if (*amountp == 0)
13695 return NULL;
13696 }
13697
13698 return mnem;
13699 }
13700
13701 /* Obtain the shift from the POWER of two. */
13702
13703 static HOST_WIDE_INT
13704 int_log2 (HOST_WIDE_INT power)
13705 {
13706 HOST_WIDE_INT shift = 0;
13707
13708 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
13709 {
13710 gcc_assert (shift <= 31);
13711 shift++;
13712 }
13713
13714 return shift;
13715 }
13716
13717 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13718 because /bin/as is horribly restrictive. The judgement about
13719 whether or not each character is 'printable' (and can be output as
13720 is) or not (and must be printed with an octal escape) must be made
13721 with reference to the *host* character set -- the situation is
13722 similar to that discussed in the comments above pp_c_char in
13723 c-pretty-print.c. */
13724
13725 #define MAX_ASCII_LEN 51
13726
13727 void
13728 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
13729 {
13730 int i;
13731 int len_so_far = 0;
13732
13733 fputs ("\t.ascii\t\"", stream);
13734
13735 for (i = 0; i < len; i++)
13736 {
13737 int c = p[i];
13738
13739 if (len_so_far >= MAX_ASCII_LEN)
13740 {
13741 fputs ("\"\n\t.ascii\t\"", stream);
13742 len_so_far = 0;
13743 }
13744
13745 if (ISPRINT (c))
13746 {
13747 if (c == '\\' || c == '\"')
13748 {
13749 putc ('\\', stream);
13750 len_so_far++;
13751 }
13752 putc (c, stream);
13753 len_so_far++;
13754 }
13755 else
13756 {
13757 fprintf (stream, "\\%03o", c);
13758 len_so_far += 4;
13759 }
13760 }
13761
13762 fputs ("\"\n", stream);
13763 }
13764 \f
13765 /* Compute the register save mask for registers 0 through 12
13766 inclusive. This code is used by arm_compute_save_reg_mask. */
13767
13768 static unsigned long
13769 arm_compute_save_reg0_reg12_mask (void)
13770 {
13771 unsigned long func_type = arm_current_func_type ();
13772 unsigned long save_reg_mask = 0;
13773 unsigned int reg;
13774
13775 if (IS_INTERRUPT (func_type))
13776 {
13777 unsigned int max_reg;
13778 /* Interrupt functions must not corrupt any registers,
13779 even call clobbered ones. If this is a leaf function
13780 we can just examine the registers used by the RTL, but
13781 otherwise we have to assume that whatever function is
13782 called might clobber anything, and so we have to save
13783 all the call-clobbered registers as well. */
13784 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
13785 /* FIQ handlers have registers r8 - r12 banked, so
13786 we only need to check r0 - r7, Normal ISRs only
13787 bank r14 and r15, so we must check up to r12.
13788 r13 is the stack pointer which is always preserved,
13789 so we do not need to consider it here. */
13790 max_reg = 7;
13791 else
13792 max_reg = 12;
13793
13794 for (reg = 0; reg <= max_reg; reg++)
13795 if (df_regs_ever_live_p (reg)
13796 || (! current_function_is_leaf && call_used_regs[reg]))
13797 save_reg_mask |= (1 << reg);
13798
13799 /* Also save the pic base register if necessary. */
13800 if (flag_pic
13801 && !TARGET_SINGLE_PIC_BASE
13802 && arm_pic_register != INVALID_REGNUM
13803 && crtl->uses_pic_offset_table)
13804 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13805 }
13806 else if (IS_VOLATILE(func_type))
13807 {
13808 /* For noreturn functions we historically omitted register saves
13809 altogether. However this really messes up debugging. As a
13810 compromise save just the frame pointers. Combined with the link
13811 register saved elsewhere this should be sufficient to get
13812 a backtrace. */
13813 if (frame_pointer_needed)
13814 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13815 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
13816 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13817 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
13818 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
13819 }
13820 else
13821 {
13822 /* In the normal case we only need to save those registers
13823 which are call saved and which are used by this function. */
13824 for (reg = 0; reg <= 11; reg++)
13825 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
13826 save_reg_mask |= (1 << reg);
13827
13828 /* Handle the frame pointer as a special case. */
13829 if (frame_pointer_needed)
13830 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
13831
13832 /* If we aren't loading the PIC register,
13833 don't stack it even though it may be live. */
13834 if (flag_pic
13835 && !TARGET_SINGLE_PIC_BASE
13836 && arm_pic_register != INVALID_REGNUM
13837 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
13838 || crtl->uses_pic_offset_table))
13839 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13840
13841 /* The prologue will copy SP into R0, so save it. */
13842 if (IS_STACKALIGN (func_type))
13843 save_reg_mask |= 1;
13844 }
13845
13846 /* Save registers so the exception handler can modify them. */
13847 if (crtl->calls_eh_return)
13848 {
13849 unsigned int i;
13850
13851 for (i = 0; ; i++)
13852 {
13853 reg = EH_RETURN_DATA_REGNO (i);
13854 if (reg == INVALID_REGNUM)
13855 break;
13856 save_reg_mask |= 1 << reg;
13857 }
13858 }
13859
13860 return save_reg_mask;
13861 }
13862
13863
13864 /* Compute the number of bytes used to store the static chain register on the
13865 stack, above the stack frame. We need to know this accurately to get the
13866 alignment of the rest of the stack frame correct. */
13867
13868 static int arm_compute_static_chain_stack_bytes (void)
13869 {
13870 unsigned long func_type = arm_current_func_type ();
13871 int static_chain_stack_bytes = 0;
13872
13873 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
13874 IS_NESTED (func_type) &&
13875 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
13876 static_chain_stack_bytes = 4;
13877
13878 return static_chain_stack_bytes;
13879 }
13880
13881
13882 /* Compute a bit mask of which registers need to be
13883 saved on the stack for the current function.
13884 This is used by arm_get_frame_offsets, which may add extra registers. */
13885
13886 static unsigned long
13887 arm_compute_save_reg_mask (void)
13888 {
13889 unsigned int save_reg_mask = 0;
13890 unsigned long func_type = arm_current_func_type ();
13891 unsigned int reg;
13892
13893 if (IS_NAKED (func_type))
13894 /* This should never really happen. */
13895 return 0;
13896
13897 /* If we are creating a stack frame, then we must save the frame pointer,
13898 IP (which will hold the old stack pointer), LR and the PC. */
13899 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
13900 save_reg_mask |=
13901 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
13902 | (1 << IP_REGNUM)
13903 | (1 << LR_REGNUM)
13904 | (1 << PC_REGNUM);
13905
13906 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
13907
13908 /* Decide if we need to save the link register.
13909 Interrupt routines have their own banked link register,
13910 so they never need to save it.
13911 Otherwise if we do not use the link register we do not need to save
13912 it. If we are pushing other registers onto the stack however, we
13913 can save an instruction in the epilogue by pushing the link register
13914 now and then popping it back into the PC. This incurs extra memory
13915 accesses though, so we only do it when optimizing for size, and only
13916 if we know that we will not need a fancy return sequence. */
13917 if (df_regs_ever_live_p (LR_REGNUM)
13918 || (save_reg_mask
13919 && optimize_size
13920 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
13921 && !crtl->calls_eh_return))
13922 save_reg_mask |= 1 << LR_REGNUM;
13923
13924 if (cfun->machine->lr_save_eliminated)
13925 save_reg_mask &= ~ (1 << LR_REGNUM);
13926
13927 if (TARGET_REALLY_IWMMXT
13928 && ((bit_count (save_reg_mask)
13929 + ARM_NUM_INTS (crtl->args.pretend_args_size +
13930 arm_compute_static_chain_stack_bytes())
13931 ) % 2) != 0)
13932 {
13933 /* The total number of registers that are going to be pushed
13934 onto the stack is odd. We need to ensure that the stack
13935 is 64-bit aligned before we start to save iWMMXt registers,
13936 and also before we start to create locals. (A local variable
13937 might be a double or long long which we will load/store using
13938 an iWMMXt instruction). Therefore we need to push another
13939 ARM register, so that the stack will be 64-bit aligned. We
13940 try to avoid using the arg registers (r0 -r3) as they might be
13941 used to pass values in a tail call. */
13942 for (reg = 4; reg <= 12; reg++)
13943 if ((save_reg_mask & (1 << reg)) == 0)
13944 break;
13945
13946 if (reg <= 12)
13947 save_reg_mask |= (1 << reg);
13948 else
13949 {
13950 cfun->machine->sibcall_blocked = 1;
13951 save_reg_mask |= (1 << 3);
13952 }
13953 }
13954
13955 /* We may need to push an additional register for use initializing the
13956 PIC base register. */
13957 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
13958 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
13959 {
13960 reg = thumb_find_work_register (1 << 4);
13961 if (!call_used_regs[reg])
13962 save_reg_mask |= (1 << reg);
13963 }
13964
13965 return save_reg_mask;
13966 }
13967
13968
13969 /* Compute a bit mask of which registers need to be
13970 saved on the stack for the current function. */
13971 static unsigned long
13972 thumb1_compute_save_reg_mask (void)
13973 {
13974 unsigned long mask;
13975 unsigned reg;
13976
13977 mask = 0;
13978 for (reg = 0; reg < 12; reg ++)
13979 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13980 mask |= 1 << reg;
13981
13982 if (flag_pic
13983 && !TARGET_SINGLE_PIC_BASE
13984 && arm_pic_register != INVALID_REGNUM
13985 && crtl->uses_pic_offset_table)
13986 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
13987
13988 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
13989 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
13990 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
13991
13992 /* LR will also be pushed if any lo regs are pushed. */
13993 if (mask & 0xff || thumb_force_lr_save ())
13994 mask |= (1 << LR_REGNUM);
13995
13996 /* Make sure we have a low work register if we need one.
13997 We will need one if we are going to push a high register,
13998 but we are not currently intending to push a low register. */
13999 if ((mask & 0xff) == 0
14000 && ((mask & 0x0f00) || TARGET_BACKTRACE))
14001 {
14002 /* Use thumb_find_work_register to choose which register
14003 we will use. If the register is live then we will
14004 have to push it. Use LAST_LO_REGNUM as our fallback
14005 choice for the register to select. */
14006 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
14007 /* Make sure the register returned by thumb_find_work_register is
14008 not part of the return value. */
14009 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
14010 reg = LAST_LO_REGNUM;
14011
14012 if (! call_used_regs[reg])
14013 mask |= 1 << reg;
14014 }
14015
14016 /* The 504 below is 8 bytes less than 512 because there are two possible
14017 alignment words. We can't tell here if they will be present or not so we
14018 have to play it safe and assume that they are. */
14019 if ((CALLER_INTERWORKING_SLOT_SIZE +
14020 ROUND_UP_WORD (get_frame_size ()) +
14021 crtl->outgoing_args_size) >= 504)
14022 {
14023 /* This is the same as the code in thumb1_expand_prologue() which
14024 determines which register to use for stack decrement. */
14025 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
14026 if (mask & (1 << reg))
14027 break;
14028
14029 if (reg > LAST_LO_REGNUM)
14030 {
14031 /* Make sure we have a register available for stack decrement. */
14032 mask |= 1 << LAST_LO_REGNUM;
14033 }
14034 }
14035
14036 return mask;
14037 }
14038
14039
14040 /* Return the number of bytes required to save VFP registers. */
14041 static int
14042 arm_get_vfp_saved_size (void)
14043 {
14044 unsigned int regno;
14045 int count;
14046 int saved;
14047
14048 saved = 0;
14049 /* Space for saved VFP registers. */
14050 if (TARGET_HARD_FLOAT && TARGET_VFP)
14051 {
14052 count = 0;
14053 for (regno = FIRST_VFP_REGNUM;
14054 regno < LAST_VFP_REGNUM;
14055 regno += 2)
14056 {
14057 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
14058 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
14059 {
14060 if (count > 0)
14061 {
14062 /* Workaround ARM10 VFPr1 bug. */
14063 if (count == 2 && !arm_arch6)
14064 count++;
14065 saved += count * 8;
14066 }
14067 count = 0;
14068 }
14069 else
14070 count++;
14071 }
14072 if (count > 0)
14073 {
14074 if (count == 2 && !arm_arch6)
14075 count++;
14076 saved += count * 8;
14077 }
14078 }
14079 return saved;
14080 }
14081
14082
14083 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
14084 everything bar the final return instruction. */
14085 const char *
14086 output_return_instruction (rtx operand, int really_return, int reverse)
14087 {
14088 char conditional[10];
14089 char instr[100];
14090 unsigned reg;
14091 unsigned long live_regs_mask;
14092 unsigned long func_type;
14093 arm_stack_offsets *offsets;
14094
14095 func_type = arm_current_func_type ();
14096
14097 if (IS_NAKED (func_type))
14098 return "";
14099
14100 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14101 {
14102 /* If this function was declared non-returning, and we have
14103 found a tail call, then we have to trust that the called
14104 function won't return. */
14105 if (really_return)
14106 {
14107 rtx ops[2];
14108
14109 /* Otherwise, trap an attempted return by aborting. */
14110 ops[0] = operand;
14111 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
14112 : "abort");
14113 assemble_external_libcall (ops[1]);
14114 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
14115 }
14116
14117 return "";
14118 }
14119
14120 gcc_assert (!cfun->calls_alloca || really_return);
14121
14122 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
14123
14124 cfun->machine->return_used_this_function = 1;
14125
14126 offsets = arm_get_frame_offsets ();
14127 live_regs_mask = offsets->saved_regs_mask;
14128
14129 if (live_regs_mask)
14130 {
14131 const char * return_reg;
14132
14133 /* If we do not have any special requirements for function exit
14134 (e.g. interworking) then we can load the return address
14135 directly into the PC. Otherwise we must load it into LR. */
14136 if (really_return
14137 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
14138 return_reg = reg_names[PC_REGNUM];
14139 else
14140 return_reg = reg_names[LR_REGNUM];
14141
14142 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
14143 {
14144 /* There are three possible reasons for the IP register
14145 being saved. 1) a stack frame was created, in which case
14146 IP contains the old stack pointer, or 2) an ISR routine
14147 corrupted it, or 3) it was saved to align the stack on
14148 iWMMXt. In case 1, restore IP into SP, otherwise just
14149 restore IP. */
14150 if (frame_pointer_needed)
14151 {
14152 live_regs_mask &= ~ (1 << IP_REGNUM);
14153 live_regs_mask |= (1 << SP_REGNUM);
14154 }
14155 else
14156 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
14157 }
14158
14159 /* On some ARM architectures it is faster to use LDR rather than
14160 LDM to load a single register. On other architectures, the
14161 cost is the same. In 26 bit mode, or for exception handlers,
14162 we have to use LDM to load the PC so that the CPSR is also
14163 restored. */
14164 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14165 if (live_regs_mask == (1U << reg))
14166 break;
14167
14168 if (reg <= LAST_ARM_REGNUM
14169 && (reg != LR_REGNUM
14170 || ! really_return
14171 || ! IS_INTERRUPT (func_type)))
14172 {
14173 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
14174 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
14175 }
14176 else
14177 {
14178 char *p;
14179 int first = 1;
14180
14181 /* Generate the load multiple instruction to restore the
14182 registers. Note we can get here, even if
14183 frame_pointer_needed is true, but only if sp already
14184 points to the base of the saved core registers. */
14185 if (live_regs_mask & (1 << SP_REGNUM))
14186 {
14187 unsigned HOST_WIDE_INT stack_adjust;
14188
14189 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
14190 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
14191
14192 if (stack_adjust && arm_arch5 && TARGET_ARM)
14193 if (TARGET_UNIFIED_ASM)
14194 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
14195 else
14196 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
14197 else
14198 {
14199 /* If we can't use ldmib (SA110 bug),
14200 then try to pop r3 instead. */
14201 if (stack_adjust)
14202 live_regs_mask |= 1 << 3;
14203
14204 if (TARGET_UNIFIED_ASM)
14205 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
14206 else
14207 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
14208 }
14209 }
14210 else
14211 if (TARGET_UNIFIED_ASM)
14212 sprintf (instr, "pop%s\t{", conditional);
14213 else
14214 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
14215
14216 p = instr + strlen (instr);
14217
14218 for (reg = 0; reg <= SP_REGNUM; reg++)
14219 if (live_regs_mask & (1 << reg))
14220 {
14221 int l = strlen (reg_names[reg]);
14222
14223 if (first)
14224 first = 0;
14225 else
14226 {
14227 memcpy (p, ", ", 2);
14228 p += 2;
14229 }
14230
14231 memcpy (p, "%|", 2);
14232 memcpy (p + 2, reg_names[reg], l);
14233 p += l + 2;
14234 }
14235
14236 if (live_regs_mask & (1 << LR_REGNUM))
14237 {
14238 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
14239 /* If returning from an interrupt, restore the CPSR. */
14240 if (IS_INTERRUPT (func_type))
14241 strcat (p, "^");
14242 }
14243 else
14244 strcpy (p, "}");
14245 }
14246
14247 output_asm_insn (instr, & operand);
14248
14249 /* See if we need to generate an extra instruction to
14250 perform the actual function return. */
14251 if (really_return
14252 && func_type != ARM_FT_INTERWORKED
14253 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
14254 {
14255 /* The return has already been handled
14256 by loading the LR into the PC. */
14257 really_return = 0;
14258 }
14259 }
14260
14261 if (really_return)
14262 {
14263 switch ((int) ARM_FUNC_TYPE (func_type))
14264 {
14265 case ARM_FT_ISR:
14266 case ARM_FT_FIQ:
14267 /* ??? This is wrong for unified assembly syntax. */
14268 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
14269 break;
14270
14271 case ARM_FT_INTERWORKED:
14272 sprintf (instr, "bx%s\t%%|lr", conditional);
14273 break;
14274
14275 case ARM_FT_EXCEPTION:
14276 /* ??? This is wrong for unified assembly syntax. */
14277 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
14278 break;
14279
14280 default:
14281 /* Use bx if it's available. */
14282 if (arm_arch5 || arm_arch4t)
14283 sprintf (instr, "bx%s\t%%|lr", conditional);
14284 else
14285 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
14286 break;
14287 }
14288
14289 output_asm_insn (instr, & operand);
14290 }
14291
14292 return "";
14293 }
14294
14295 /* Write the function name into the code section, directly preceding
14296 the function prologue.
14297
14298 Code will be output similar to this:
14299 t0
14300 .ascii "arm_poke_function_name", 0
14301 .align
14302 t1
14303 .word 0xff000000 + (t1 - t0)
14304 arm_poke_function_name
14305 mov ip, sp
14306 stmfd sp!, {fp, ip, lr, pc}
14307 sub fp, ip, #4
14308
14309 When performing a stack backtrace, code can inspect the value
14310 of 'pc' stored at 'fp' + 0. If the trace function then looks
14311 at location pc - 12 and the top 8 bits are set, then we know
14312 that there is a function name embedded immediately preceding this
14313 location and has length ((pc[-3]) & 0xff000000).
14314
14315 We assume that pc is declared as a pointer to an unsigned long.
14316
14317 It is of no benefit to output the function name if we are assembling
14318 a leaf function. These function types will not contain a stack
14319 backtrace structure, therefore it is not possible to determine the
14320 function name. */
14321 void
14322 arm_poke_function_name (FILE *stream, const char *name)
14323 {
14324 unsigned long alignlength;
14325 unsigned long length;
14326 rtx x;
14327
14328 length = strlen (name) + 1;
14329 alignlength = ROUND_UP_WORD (length);
14330
14331 ASM_OUTPUT_ASCII (stream, name, length);
14332 ASM_OUTPUT_ALIGN (stream, 2);
14333 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
14334 assemble_aligned_integer (UNITS_PER_WORD, x);
14335 }
14336
14337 /* Place some comments into the assembler stream
14338 describing the current function. */
14339 static void
14340 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
14341 {
14342 unsigned long func_type;
14343
14344 if (TARGET_THUMB1)
14345 {
14346 thumb1_output_function_prologue (f, frame_size);
14347 return;
14348 }
14349
14350 /* Sanity check. */
14351 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
14352
14353 func_type = arm_current_func_type ();
14354
14355 switch ((int) ARM_FUNC_TYPE (func_type))
14356 {
14357 default:
14358 case ARM_FT_NORMAL:
14359 break;
14360 case ARM_FT_INTERWORKED:
14361 asm_fprintf (f, "\t%@ Function supports interworking.\n");
14362 break;
14363 case ARM_FT_ISR:
14364 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
14365 break;
14366 case ARM_FT_FIQ:
14367 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
14368 break;
14369 case ARM_FT_EXCEPTION:
14370 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
14371 break;
14372 }
14373
14374 if (IS_NAKED (func_type))
14375 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14376
14377 if (IS_VOLATILE (func_type))
14378 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
14379
14380 if (IS_NESTED (func_type))
14381 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
14382 if (IS_STACKALIGN (func_type))
14383 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14384
14385 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14386 crtl->args.size,
14387 crtl->args.pretend_args_size, frame_size);
14388
14389 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14390 frame_pointer_needed,
14391 cfun->machine->uses_anonymous_args);
14392
14393 if (cfun->machine->lr_save_eliminated)
14394 asm_fprintf (f, "\t%@ link register save eliminated.\n");
14395
14396 if (crtl->calls_eh_return)
14397 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
14398
14399 }
14400
14401 const char *
14402 arm_output_epilogue (rtx sibling)
14403 {
14404 int reg;
14405 unsigned long saved_regs_mask;
14406 unsigned long func_type;
14407 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14408 frame that is $fp + 4 for a non-variadic function. */
14409 int floats_offset = 0;
14410 rtx operands[3];
14411 FILE * f = asm_out_file;
14412 unsigned int lrm_count = 0;
14413 int really_return = (sibling == NULL);
14414 int start_reg;
14415 arm_stack_offsets *offsets;
14416
14417 /* If we have already generated the return instruction
14418 then it is futile to generate anything else. */
14419 if (use_return_insn (FALSE, sibling) &&
14420 (cfun->machine->return_used_this_function != 0))
14421 return "";
14422
14423 func_type = arm_current_func_type ();
14424
14425 if (IS_NAKED (func_type))
14426 /* Naked functions don't have epilogues. */
14427 return "";
14428
14429 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
14430 {
14431 rtx op;
14432
14433 /* A volatile function should never return. Call abort. */
14434 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
14435 assemble_external_libcall (op);
14436 output_asm_insn ("bl\t%a0", &op);
14437
14438 return "";
14439 }
14440
14441 /* If we are throwing an exception, then we really must be doing a
14442 return, so we can't tail-call. */
14443 gcc_assert (!crtl->calls_eh_return || really_return);
14444
14445 offsets = arm_get_frame_offsets ();
14446 saved_regs_mask = offsets->saved_regs_mask;
14447
14448 if (TARGET_IWMMXT)
14449 lrm_count = bit_count (saved_regs_mask);
14450
14451 floats_offset = offsets->saved_args;
14452 /* Compute how far away the floats will be. */
14453 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
14454 if (saved_regs_mask & (1 << reg))
14455 floats_offset += 4;
14456
14457 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
14458 {
14459 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14460 int vfp_offset = offsets->frame;
14461
14462 if (TARGET_FPA_EMU2)
14463 {
14464 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14465 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14466 {
14467 floats_offset += 12;
14468 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
14469 reg, FP_REGNUM, floats_offset - vfp_offset);
14470 }
14471 }
14472 else
14473 {
14474 start_reg = LAST_FPA_REGNUM;
14475
14476 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14477 {
14478 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14479 {
14480 floats_offset += 12;
14481
14482 /* We can't unstack more than four registers at once. */
14483 if (start_reg - reg == 3)
14484 {
14485 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
14486 reg, FP_REGNUM, floats_offset - vfp_offset);
14487 start_reg = reg - 1;
14488 }
14489 }
14490 else
14491 {
14492 if (reg != start_reg)
14493 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14494 reg + 1, start_reg - reg,
14495 FP_REGNUM, floats_offset - vfp_offset);
14496 start_reg = reg - 1;
14497 }
14498 }
14499
14500 /* Just in case the last register checked also needs unstacking. */
14501 if (reg != start_reg)
14502 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
14503 reg + 1, start_reg - reg,
14504 FP_REGNUM, floats_offset - vfp_offset);
14505 }
14506
14507 if (TARGET_HARD_FLOAT && TARGET_VFP)
14508 {
14509 int saved_size;
14510
14511 /* The fldmd insns do not have base+offset addressing
14512 modes, so we use IP to hold the address. */
14513 saved_size = arm_get_vfp_saved_size ();
14514
14515 if (saved_size > 0)
14516 {
14517 floats_offset += saved_size;
14518 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
14519 FP_REGNUM, floats_offset - vfp_offset);
14520 }
14521 start_reg = FIRST_VFP_REGNUM;
14522 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
14523 {
14524 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14525 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
14526 {
14527 if (start_reg != reg)
14528 vfp_output_fldmd (f, IP_REGNUM,
14529 (start_reg - FIRST_VFP_REGNUM) / 2,
14530 (reg - start_reg) / 2);
14531 start_reg = reg + 2;
14532 }
14533 }
14534 if (start_reg != reg)
14535 vfp_output_fldmd (f, IP_REGNUM,
14536 (start_reg - FIRST_VFP_REGNUM) / 2,
14537 (reg - start_reg) / 2);
14538 }
14539
14540 if (TARGET_IWMMXT)
14541 {
14542 /* The frame pointer is guaranteed to be non-double-word aligned.
14543 This is because it is set to (old_stack_pointer - 4) and the
14544 old_stack_pointer was double word aligned. Thus the offset to
14545 the iWMMXt registers to be loaded must also be non-double-word
14546 sized, so that the resultant address *is* double-word aligned.
14547 We can ignore floats_offset since that was already included in
14548 the live_regs_mask. */
14549 lrm_count += (lrm_count % 2 ? 2 : 1);
14550
14551 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14552 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14553 {
14554 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
14555 reg, FP_REGNUM, lrm_count * 4);
14556 lrm_count += 2;
14557 }
14558 }
14559
14560 /* saved_regs_mask should contain the IP, which at the time of stack
14561 frame generation actually contains the old stack pointer. So a
14562 quick way to unwind the stack is just pop the IP register directly
14563 into the stack pointer. */
14564 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
14565 saved_regs_mask &= ~ (1 << IP_REGNUM);
14566 saved_regs_mask |= (1 << SP_REGNUM);
14567
14568 /* There are two registers left in saved_regs_mask - LR and PC. We
14569 only need to restore the LR register (the return address), but to
14570 save time we can load it directly into the PC, unless we need a
14571 special function exit sequence, or we are not really returning. */
14572 if (really_return
14573 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
14574 && !crtl->calls_eh_return)
14575 /* Delete the LR from the register mask, so that the LR on
14576 the stack is loaded into the PC in the register mask. */
14577 saved_regs_mask &= ~ (1 << LR_REGNUM);
14578 else
14579 saved_regs_mask &= ~ (1 << PC_REGNUM);
14580
14581 /* We must use SP as the base register, because SP is one of the
14582 registers being restored. If an interrupt or page fault
14583 happens in the ldm instruction, the SP might or might not
14584 have been restored. That would be bad, as then SP will no
14585 longer indicate the safe area of stack, and we can get stack
14586 corruption. Using SP as the base register means that it will
14587 be reset correctly to the original value, should an interrupt
14588 occur. If the stack pointer already points at the right
14589 place, then omit the subtraction. */
14590 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
14591 || cfun->calls_alloca)
14592 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
14593 4 * bit_count (saved_regs_mask));
14594 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
14595
14596 if (IS_INTERRUPT (func_type))
14597 /* Interrupt handlers will have pushed the
14598 IP onto the stack, so restore it now. */
14599 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
14600 }
14601 else
14602 {
14603 /* This branch is executed for ARM mode (non-apcs frames) and
14604 Thumb-2 mode. Frame layout is essentially the same for those
14605 cases, except that in ARM mode frame pointer points to the
14606 first saved register, while in Thumb-2 mode the frame pointer points
14607 to the last saved register.
14608
14609 It is possible to make frame pointer point to last saved
14610 register in both cases, and remove some conditionals below.
14611 That means that fp setup in prologue would be just "mov fp, sp"
14612 and sp restore in epilogue would be just "mov sp, fp", whereas
14613 now we have to use add/sub in those cases. However, the value
14614 of that would be marginal, as both mov and add/sub are 32-bit
14615 in ARM mode, and it would require extra conditionals
14616 in arm_expand_prologue to distingish ARM-apcs-frame case
14617 (where frame pointer is required to point at first register)
14618 and ARM-non-apcs-frame. Therefore, such change is postponed
14619 until real need arise. */
14620 unsigned HOST_WIDE_INT amount;
14621 int rfe;
14622 /* Restore stack pointer if necessary. */
14623 if (TARGET_ARM && frame_pointer_needed)
14624 {
14625 operands[0] = stack_pointer_rtx;
14626 operands[1] = hard_frame_pointer_rtx;
14627
14628 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
14629 output_add_immediate (operands);
14630 }
14631 else
14632 {
14633 if (frame_pointer_needed)
14634 {
14635 /* For Thumb-2 restore sp from the frame pointer.
14636 Operand restrictions mean we have to incrememnt FP, then copy
14637 to SP. */
14638 amount = offsets->locals_base - offsets->saved_regs;
14639 operands[0] = hard_frame_pointer_rtx;
14640 }
14641 else
14642 {
14643 unsigned long count;
14644 operands[0] = stack_pointer_rtx;
14645 amount = offsets->outgoing_args - offsets->saved_regs;
14646 /* pop call clobbered registers if it avoids a
14647 separate stack adjustment. */
14648 count = offsets->saved_regs - offsets->saved_args;
14649 if (optimize_size
14650 && count != 0
14651 && !crtl->calls_eh_return
14652 && bit_count(saved_regs_mask) * 4 == count
14653 && !IS_INTERRUPT (func_type)
14654 && !crtl->tail_call_emit)
14655 {
14656 unsigned long mask;
14657 /* Preserve return values, of any size. */
14658 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
14659 mask ^= 0xf;
14660 mask &= ~saved_regs_mask;
14661 reg = 0;
14662 while (bit_count (mask) * 4 > amount)
14663 {
14664 while ((mask & (1 << reg)) == 0)
14665 reg++;
14666 mask &= ~(1 << reg);
14667 }
14668 if (bit_count (mask) * 4 == amount) {
14669 amount = 0;
14670 saved_regs_mask |= mask;
14671 }
14672 }
14673 }
14674
14675 if (amount)
14676 {
14677 operands[1] = operands[0];
14678 operands[2] = GEN_INT (amount);
14679 output_add_immediate (operands);
14680 }
14681 if (frame_pointer_needed)
14682 asm_fprintf (f, "\tmov\t%r, %r\n",
14683 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
14684 }
14685
14686 if (TARGET_FPA_EMU2)
14687 {
14688 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14689 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14690 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
14691 reg, SP_REGNUM);
14692 }
14693 else
14694 {
14695 start_reg = FIRST_FPA_REGNUM;
14696
14697 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
14698 {
14699 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14700 {
14701 if (reg - start_reg == 3)
14702 {
14703 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
14704 start_reg, SP_REGNUM);
14705 start_reg = reg + 1;
14706 }
14707 }
14708 else
14709 {
14710 if (reg != start_reg)
14711 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14712 start_reg, reg - start_reg,
14713 SP_REGNUM);
14714
14715 start_reg = reg + 1;
14716 }
14717 }
14718
14719 /* Just in case the last register checked also needs unstacking. */
14720 if (reg != start_reg)
14721 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
14722 start_reg, reg - start_reg, SP_REGNUM);
14723 }
14724
14725 if (TARGET_HARD_FLOAT && TARGET_VFP)
14726 {
14727 int end_reg = LAST_VFP_REGNUM + 1;
14728
14729 /* Scan the registers in reverse order. We need to match
14730 any groupings made in the prologue and generate matching
14731 pop operations. */
14732 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
14733 {
14734 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
14735 && (!df_regs_ever_live_p (reg + 1)
14736 || call_used_regs[reg + 1]))
14737 {
14738 if (end_reg > reg + 2)
14739 vfp_output_fldmd (f, SP_REGNUM,
14740 (reg + 2 - FIRST_VFP_REGNUM) / 2,
14741 (end_reg - (reg + 2)) / 2);
14742 end_reg = reg;
14743 }
14744 }
14745 if (end_reg > reg + 2)
14746 vfp_output_fldmd (f, SP_REGNUM, 0,
14747 (end_reg - (reg + 2)) / 2);
14748 }
14749
14750 if (TARGET_IWMMXT)
14751 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
14752 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14753 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
14754
14755 /* If we can, restore the LR into the PC. */
14756 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
14757 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
14758 && !IS_STACKALIGN (func_type)
14759 && really_return
14760 && crtl->args.pretend_args_size == 0
14761 && saved_regs_mask & (1 << LR_REGNUM)
14762 && !crtl->calls_eh_return)
14763 {
14764 saved_regs_mask &= ~ (1 << LR_REGNUM);
14765 saved_regs_mask |= (1 << PC_REGNUM);
14766 rfe = IS_INTERRUPT (func_type);
14767 }
14768 else
14769 rfe = 0;
14770
14771 /* Load the registers off the stack. If we only have one register
14772 to load use the LDR instruction - it is faster. For Thumb-2
14773 always use pop and the assembler will pick the best instruction.*/
14774 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
14775 && !IS_INTERRUPT(func_type))
14776 {
14777 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
14778 }
14779 else if (saved_regs_mask)
14780 {
14781 if (saved_regs_mask & (1 << SP_REGNUM))
14782 /* Note - write back to the stack register is not enabled
14783 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14784 in the list of registers and if we add writeback the
14785 instruction becomes UNPREDICTABLE. */
14786 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
14787 rfe);
14788 else if (TARGET_ARM)
14789 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
14790 rfe);
14791 else
14792 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
14793 }
14794
14795 if (crtl->args.pretend_args_size)
14796 {
14797 /* Unwind the pre-pushed regs. */
14798 operands[0] = operands[1] = stack_pointer_rtx;
14799 operands[2] = GEN_INT (crtl->args.pretend_args_size);
14800 output_add_immediate (operands);
14801 }
14802 }
14803
14804 /* We may have already restored PC directly from the stack. */
14805 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
14806 return "";
14807
14808 /* Stack adjustment for exception handler. */
14809 if (crtl->calls_eh_return)
14810 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
14811 ARM_EH_STACKADJ_REGNUM);
14812
14813 /* Generate the return instruction. */
14814 switch ((int) ARM_FUNC_TYPE (func_type))
14815 {
14816 case ARM_FT_ISR:
14817 case ARM_FT_FIQ:
14818 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
14819 break;
14820
14821 case ARM_FT_EXCEPTION:
14822 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14823 break;
14824
14825 case ARM_FT_INTERWORKED:
14826 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14827 break;
14828
14829 default:
14830 if (IS_STACKALIGN (func_type))
14831 {
14832 /* See comment in arm_expand_prologue. */
14833 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
14834 }
14835 if (arm_arch5 || arm_arch4t)
14836 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
14837 else
14838 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
14839 break;
14840 }
14841
14842 return "";
14843 }
14844
14845 static void
14846 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
14847 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
14848 {
14849 arm_stack_offsets *offsets;
14850
14851 if (TARGET_THUMB1)
14852 {
14853 int regno;
14854
14855 /* Emit any call-via-reg trampolines that are needed for v4t support
14856 of call_reg and call_value_reg type insns. */
14857 for (regno = 0; regno < LR_REGNUM; regno++)
14858 {
14859 rtx label = cfun->machine->call_via[regno];
14860
14861 if (label != NULL)
14862 {
14863 switch_to_section (function_section (current_function_decl));
14864 targetm.asm_out.internal_label (asm_out_file, "L",
14865 CODE_LABEL_NUMBER (label));
14866 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
14867 }
14868 }
14869
14870 /* ??? Probably not safe to set this here, since it assumes that a
14871 function will be emitted as assembly immediately after we generate
14872 RTL for it. This does not happen for inline functions. */
14873 cfun->machine->return_used_this_function = 0;
14874 }
14875 else /* TARGET_32BIT */
14876 {
14877 /* We need to take into account any stack-frame rounding. */
14878 offsets = arm_get_frame_offsets ();
14879
14880 gcc_assert (!use_return_insn (FALSE, NULL)
14881 || (cfun->machine->return_used_this_function != 0)
14882 || offsets->saved_regs == offsets->outgoing_args
14883 || frame_pointer_needed);
14884
14885 /* Reset the ARM-specific per-function variables. */
14886 after_arm_reorg = 0;
14887 }
14888 }
14889
14890 /* Generate and emit an insn that we will recognize as a push_multi.
14891 Unfortunately, since this insn does not reflect very well the actual
14892 semantics of the operation, we need to annotate the insn for the benefit
14893 of DWARF2 frame unwind information. */
14894 static rtx
14895 emit_multi_reg_push (unsigned long mask)
14896 {
14897 int num_regs = 0;
14898 int num_dwarf_regs;
14899 int i, j;
14900 rtx par;
14901 rtx dwarf;
14902 int dwarf_par_index;
14903 rtx tmp, reg;
14904
14905 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14906 if (mask & (1 << i))
14907 num_regs++;
14908
14909 gcc_assert (num_regs && num_regs <= 16);
14910
14911 /* We don't record the PC in the dwarf frame information. */
14912 num_dwarf_regs = num_regs;
14913 if (mask & (1 << PC_REGNUM))
14914 num_dwarf_regs--;
14915
14916 /* For the body of the insn we are going to generate an UNSPEC in
14917 parallel with several USEs. This allows the insn to be recognized
14918 by the push_multi pattern in the arm.md file.
14919
14920 The body of the insn looks something like this:
14921
14922 (parallel [
14923 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14924 (const_int:SI <num>)))
14925 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14926 (use (reg:SI XX))
14927 (use (reg:SI YY))
14928 ...
14929 ])
14930
14931 For the frame note however, we try to be more explicit and actually
14932 show each register being stored into the stack frame, plus a (single)
14933 decrement of the stack pointer. We do it this way in order to be
14934 friendly to the stack unwinding code, which only wants to see a single
14935 stack decrement per instruction. The RTL we generate for the note looks
14936 something like this:
14937
14938 (sequence [
14939 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
14940 (set (mem:SI (reg:SI sp)) (reg:SI r4))
14941 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
14942 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
14943 ...
14944 ])
14945
14946 FIXME:: In an ideal world the PRE_MODIFY would not exist and
14947 instead we'd have a parallel expression detailing all
14948 the stores to the various memory addresses so that debug
14949 information is more up-to-date. Remember however while writing
14950 this to take care of the constraints with the push instruction.
14951
14952 Note also that this has to be taken care of for the VFP registers.
14953
14954 For more see PR43399. */
14955
14956 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
14957 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
14958 dwarf_par_index = 1;
14959
14960 for (i = 0; i <= LAST_ARM_REGNUM; i++)
14961 {
14962 if (mask & (1 << i))
14963 {
14964 reg = gen_rtx_REG (SImode, i);
14965
14966 XVECEXP (par, 0, 0)
14967 = gen_rtx_SET (VOIDmode,
14968 gen_frame_mem
14969 (BLKmode,
14970 gen_rtx_PRE_MODIFY (Pmode,
14971 stack_pointer_rtx,
14972 plus_constant
14973 (stack_pointer_rtx,
14974 -4 * num_regs))
14975 ),
14976 gen_rtx_UNSPEC (BLKmode,
14977 gen_rtvec (1, reg),
14978 UNSPEC_PUSH_MULT));
14979
14980 if (i != PC_REGNUM)
14981 {
14982 tmp = gen_rtx_SET (VOIDmode,
14983 gen_frame_mem (SImode, stack_pointer_rtx),
14984 reg);
14985 RTX_FRAME_RELATED_P (tmp) = 1;
14986 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
14987 dwarf_par_index++;
14988 }
14989
14990 break;
14991 }
14992 }
14993
14994 for (j = 1, i++; j < num_regs; i++)
14995 {
14996 if (mask & (1 << i))
14997 {
14998 reg = gen_rtx_REG (SImode, i);
14999
15000 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15001
15002 if (i != PC_REGNUM)
15003 {
15004 tmp
15005 = gen_rtx_SET (VOIDmode,
15006 gen_frame_mem
15007 (SImode,
15008 plus_constant (stack_pointer_rtx,
15009 4 * j)),
15010 reg);
15011 RTX_FRAME_RELATED_P (tmp) = 1;
15012 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15013 }
15014
15015 j++;
15016 }
15017 }
15018
15019 par = emit_insn (par);
15020
15021 tmp = gen_rtx_SET (VOIDmode,
15022 stack_pointer_rtx,
15023 plus_constant (stack_pointer_rtx, -4 * num_regs));
15024 RTX_FRAME_RELATED_P (tmp) = 1;
15025 XVECEXP (dwarf, 0, 0) = tmp;
15026
15027 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15028
15029 return par;
15030 }
15031
15032 /* Calculate the size of the return value that is passed in registers. */
15033 static unsigned
15034 arm_size_return_regs (void)
15035 {
15036 enum machine_mode mode;
15037
15038 if (crtl->return_rtx != 0)
15039 mode = GET_MODE (crtl->return_rtx);
15040 else
15041 mode = DECL_MODE (DECL_RESULT (current_function_decl));
15042
15043 return GET_MODE_SIZE (mode);
15044 }
15045
15046 static rtx
15047 emit_sfm (int base_reg, int count)
15048 {
15049 rtx par;
15050 rtx dwarf;
15051 rtx tmp, reg;
15052 int i;
15053
15054 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
15055 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
15056
15057 reg = gen_rtx_REG (XFmode, base_reg++);
15058
15059 XVECEXP (par, 0, 0)
15060 = gen_rtx_SET (VOIDmode,
15061 gen_frame_mem
15062 (BLKmode,
15063 gen_rtx_PRE_MODIFY (Pmode,
15064 stack_pointer_rtx,
15065 plus_constant
15066 (stack_pointer_rtx,
15067 -12 * count))
15068 ),
15069 gen_rtx_UNSPEC (BLKmode,
15070 gen_rtvec (1, reg),
15071 UNSPEC_PUSH_MULT));
15072 tmp = gen_rtx_SET (VOIDmode,
15073 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
15074 RTX_FRAME_RELATED_P (tmp) = 1;
15075 XVECEXP (dwarf, 0, 1) = tmp;
15076
15077 for (i = 1; i < count; i++)
15078 {
15079 reg = gen_rtx_REG (XFmode, base_reg++);
15080 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
15081
15082 tmp = gen_rtx_SET (VOIDmode,
15083 gen_frame_mem (XFmode,
15084 plus_constant (stack_pointer_rtx,
15085 i * 12)),
15086 reg);
15087 RTX_FRAME_RELATED_P (tmp) = 1;
15088 XVECEXP (dwarf, 0, i + 1) = tmp;
15089 }
15090
15091 tmp = gen_rtx_SET (VOIDmode,
15092 stack_pointer_rtx,
15093 plus_constant (stack_pointer_rtx, -12 * count));
15094
15095 RTX_FRAME_RELATED_P (tmp) = 1;
15096 XVECEXP (dwarf, 0, 0) = tmp;
15097
15098 par = emit_insn (par);
15099 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15100
15101 return par;
15102 }
15103
15104
15105 /* Return true if the current function needs to save/restore LR. */
15106
15107 static bool
15108 thumb_force_lr_save (void)
15109 {
15110 return !cfun->machine->lr_save_eliminated
15111 && (!leaf_function_p ()
15112 || thumb_far_jump_used_p ()
15113 || df_regs_ever_live_p (LR_REGNUM));
15114 }
15115
15116
15117 /* Return true if r3 is used by any of the tail call insns in the
15118 current function. */
15119
15120 static bool
15121 any_sibcall_uses_r3 (void)
15122 {
15123 edge_iterator ei;
15124 edge e;
15125
15126 if (!crtl->tail_call_emit)
15127 return false;
15128 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15129 if (e->flags & EDGE_SIBCALL)
15130 {
15131 rtx call = BB_END (e->src);
15132 if (!CALL_P (call))
15133 call = prev_nonnote_nondebug_insn (call);
15134 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
15135 if (find_regno_fusage (call, USE, 3))
15136 return true;
15137 }
15138 return false;
15139 }
15140
15141
15142 /* Compute the distance from register FROM to register TO.
15143 These can be the arg pointer (26), the soft frame pointer (25),
15144 the stack pointer (13) or the hard frame pointer (11).
15145 In thumb mode r7 is used as the soft frame pointer, if needed.
15146 Typical stack layout looks like this:
15147
15148 old stack pointer -> | |
15149 ----
15150 | | \
15151 | | saved arguments for
15152 | | vararg functions
15153 | | /
15154 --
15155 hard FP & arg pointer -> | | \
15156 | | stack
15157 | | frame
15158 | | /
15159 --
15160 | | \
15161 | | call saved
15162 | | registers
15163 soft frame pointer -> | | /
15164 --
15165 | | \
15166 | | local
15167 | | variables
15168 locals base pointer -> | | /
15169 --
15170 | | \
15171 | | outgoing
15172 | | arguments
15173 current stack pointer -> | | /
15174 --
15175
15176 For a given function some or all of these stack components
15177 may not be needed, giving rise to the possibility of
15178 eliminating some of the registers.
15179
15180 The values returned by this function must reflect the behavior
15181 of arm_expand_prologue() and arm_compute_save_reg_mask().
15182
15183 The sign of the number returned reflects the direction of stack
15184 growth, so the values are positive for all eliminations except
15185 from the soft frame pointer to the hard frame pointer.
15186
15187 SFP may point just inside the local variables block to ensure correct
15188 alignment. */
15189
15190
15191 /* Calculate stack offsets. These are used to calculate register elimination
15192 offsets and in prologue/epilogue code. Also calculates which registers
15193 should be saved. */
15194
15195 static arm_stack_offsets *
15196 arm_get_frame_offsets (void)
15197 {
15198 struct arm_stack_offsets *offsets;
15199 unsigned long func_type;
15200 int leaf;
15201 int saved;
15202 int core_saved;
15203 HOST_WIDE_INT frame_size;
15204 int i;
15205
15206 offsets = &cfun->machine->stack_offsets;
15207
15208 /* We need to know if we are a leaf function. Unfortunately, it
15209 is possible to be called after start_sequence has been called,
15210 which causes get_insns to return the insns for the sequence,
15211 not the function, which will cause leaf_function_p to return
15212 the incorrect result.
15213
15214 to know about leaf functions once reload has completed, and the
15215 frame size cannot be changed after that time, so we can safely
15216 use the cached value. */
15217
15218 if (reload_completed)
15219 return offsets;
15220
15221 /* Initially this is the size of the local variables. It will translated
15222 into an offset once we have determined the size of preceding data. */
15223 frame_size = ROUND_UP_WORD (get_frame_size ());
15224
15225 leaf = leaf_function_p ();
15226
15227 /* Space for variadic functions. */
15228 offsets->saved_args = crtl->args.pretend_args_size;
15229
15230 /* In Thumb mode this is incorrect, but never used. */
15231 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
15232 arm_compute_static_chain_stack_bytes();
15233
15234 if (TARGET_32BIT)
15235 {
15236 unsigned int regno;
15237
15238 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
15239 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15240 saved = core_saved;
15241
15242 /* We know that SP will be doubleword aligned on entry, and we must
15243 preserve that condition at any subroutine call. We also require the
15244 soft frame pointer to be doubleword aligned. */
15245
15246 if (TARGET_REALLY_IWMMXT)
15247 {
15248 /* Check for the call-saved iWMMXt registers. */
15249 for (regno = FIRST_IWMMXT_REGNUM;
15250 regno <= LAST_IWMMXT_REGNUM;
15251 regno++)
15252 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15253 saved += 8;
15254 }
15255
15256 func_type = arm_current_func_type ();
15257 if (! IS_VOLATILE (func_type))
15258 {
15259 /* Space for saved FPA registers. */
15260 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
15261 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
15262 saved += 12;
15263
15264 /* Space for saved VFP registers. */
15265 if (TARGET_HARD_FLOAT && TARGET_VFP)
15266 saved += arm_get_vfp_saved_size ();
15267 }
15268 }
15269 else /* TARGET_THUMB1 */
15270 {
15271 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
15272 core_saved = bit_count (offsets->saved_regs_mask) * 4;
15273 saved = core_saved;
15274 if (TARGET_BACKTRACE)
15275 saved += 16;
15276 }
15277
15278 /* Saved registers include the stack frame. */
15279 offsets->saved_regs = offsets->saved_args + saved +
15280 arm_compute_static_chain_stack_bytes();
15281 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
15282 /* A leaf function does not need any stack alignment if it has nothing
15283 on the stack. */
15284 if (leaf && frame_size == 0
15285 /* However if it calls alloca(), we have a dynamically allocated
15286 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
15287 && ! cfun->calls_alloca)
15288 {
15289 offsets->outgoing_args = offsets->soft_frame;
15290 offsets->locals_base = offsets->soft_frame;
15291 return offsets;
15292 }
15293
15294 /* Ensure SFP has the correct alignment. */
15295 if (ARM_DOUBLEWORD_ALIGN
15296 && (offsets->soft_frame & 7))
15297 {
15298 offsets->soft_frame += 4;
15299 /* Try to align stack by pushing an extra reg. Don't bother doing this
15300 when there is a stack frame as the alignment will be rolled into
15301 the normal stack adjustment. */
15302 if (frame_size + crtl->outgoing_args_size == 0)
15303 {
15304 int reg = -1;
15305
15306 /* If it is safe to use r3, then do so. This sometimes
15307 generates better code on Thumb-2 by avoiding the need to
15308 use 32-bit push/pop instructions. */
15309 if (! any_sibcall_uses_r3 ()
15310 && arm_size_return_regs () <= 12
15311 && (offsets->saved_regs_mask & (1 << 3)) == 0)
15312 {
15313 reg = 3;
15314 }
15315 else
15316 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
15317 {
15318 if ((offsets->saved_regs_mask & (1 << i)) == 0)
15319 {
15320 reg = i;
15321 break;
15322 }
15323 }
15324
15325 if (reg != -1)
15326 {
15327 offsets->saved_regs += 4;
15328 offsets->saved_regs_mask |= (1 << reg);
15329 }
15330 }
15331 }
15332
15333 offsets->locals_base = offsets->soft_frame + frame_size;
15334 offsets->outgoing_args = (offsets->locals_base
15335 + crtl->outgoing_args_size);
15336
15337 if (ARM_DOUBLEWORD_ALIGN)
15338 {
15339 /* Ensure SP remains doubleword aligned. */
15340 if (offsets->outgoing_args & 7)
15341 offsets->outgoing_args += 4;
15342 gcc_assert (!(offsets->outgoing_args & 7));
15343 }
15344
15345 return offsets;
15346 }
15347
15348
15349 /* Calculate the relative offsets for the different stack pointers. Positive
15350 offsets are in the direction of stack growth. */
15351
15352 HOST_WIDE_INT
15353 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
15354 {
15355 arm_stack_offsets *offsets;
15356
15357 offsets = arm_get_frame_offsets ();
15358
15359 /* OK, now we have enough information to compute the distances.
15360 There must be an entry in these switch tables for each pair
15361 of registers in ELIMINABLE_REGS, even if some of the entries
15362 seem to be redundant or useless. */
15363 switch (from)
15364 {
15365 case ARG_POINTER_REGNUM:
15366 switch (to)
15367 {
15368 case THUMB_HARD_FRAME_POINTER_REGNUM:
15369 return 0;
15370
15371 case FRAME_POINTER_REGNUM:
15372 /* This is the reverse of the soft frame pointer
15373 to hard frame pointer elimination below. */
15374 return offsets->soft_frame - offsets->saved_args;
15375
15376 case ARM_HARD_FRAME_POINTER_REGNUM:
15377 /* This is only non-zero in the case where the static chain register
15378 is stored above the frame. */
15379 return offsets->frame - offsets->saved_args - 4;
15380
15381 case STACK_POINTER_REGNUM:
15382 /* If nothing has been pushed on the stack at all
15383 then this will return -4. This *is* correct! */
15384 return offsets->outgoing_args - (offsets->saved_args + 4);
15385
15386 default:
15387 gcc_unreachable ();
15388 }
15389 gcc_unreachable ();
15390
15391 case FRAME_POINTER_REGNUM:
15392 switch (to)
15393 {
15394 case THUMB_HARD_FRAME_POINTER_REGNUM:
15395 return 0;
15396
15397 case ARM_HARD_FRAME_POINTER_REGNUM:
15398 /* The hard frame pointer points to the top entry in the
15399 stack frame. The soft frame pointer to the bottom entry
15400 in the stack frame. If there is no stack frame at all,
15401 then they are identical. */
15402
15403 return offsets->frame - offsets->soft_frame;
15404
15405 case STACK_POINTER_REGNUM:
15406 return offsets->outgoing_args - offsets->soft_frame;
15407
15408 default:
15409 gcc_unreachable ();
15410 }
15411 gcc_unreachable ();
15412
15413 default:
15414 /* You cannot eliminate from the stack pointer.
15415 In theory you could eliminate from the hard frame
15416 pointer to the stack pointer, but this will never
15417 happen, since if a stack frame is not needed the
15418 hard frame pointer will never be used. */
15419 gcc_unreachable ();
15420 }
15421 }
15422
15423 /* Given FROM and TO register numbers, say whether this elimination is
15424 allowed. Frame pointer elimination is automatically handled.
15425
15426 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15427 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15428 pointer, we must eliminate FRAME_POINTER_REGNUM into
15429 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15430 ARG_POINTER_REGNUM. */
15431
15432 bool
15433 arm_can_eliminate (const int from, const int to)
15434 {
15435 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
15436 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
15437 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
15438 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
15439 true);
15440 }
15441
15442 /* Emit RTL to save coprocessor registers on function entry. Returns the
15443 number of bytes pushed. */
15444
15445 static int
15446 arm_save_coproc_regs(void)
15447 {
15448 int saved_size = 0;
15449 unsigned reg;
15450 unsigned start_reg;
15451 rtx insn;
15452
15453 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15454 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15455 {
15456 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15457 insn = gen_rtx_MEM (V2SImode, insn);
15458 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
15459 RTX_FRAME_RELATED_P (insn) = 1;
15460 saved_size += 8;
15461 }
15462
15463 /* Save any floating point call-saved registers used by this
15464 function. */
15465 if (TARGET_FPA_EMU2)
15466 {
15467 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15468 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15469 {
15470 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
15471 insn = gen_rtx_MEM (XFmode, insn);
15472 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
15473 RTX_FRAME_RELATED_P (insn) = 1;
15474 saved_size += 12;
15475 }
15476 }
15477 else
15478 {
15479 start_reg = LAST_FPA_REGNUM;
15480
15481 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15482 {
15483 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15484 {
15485 if (start_reg - reg == 3)
15486 {
15487 insn = emit_sfm (reg, 4);
15488 RTX_FRAME_RELATED_P (insn) = 1;
15489 saved_size += 48;
15490 start_reg = reg - 1;
15491 }
15492 }
15493 else
15494 {
15495 if (start_reg != reg)
15496 {
15497 insn = emit_sfm (reg + 1, start_reg - reg);
15498 RTX_FRAME_RELATED_P (insn) = 1;
15499 saved_size += (start_reg - reg) * 12;
15500 }
15501 start_reg = reg - 1;
15502 }
15503 }
15504
15505 if (start_reg != reg)
15506 {
15507 insn = emit_sfm (reg + 1, start_reg - reg);
15508 saved_size += (start_reg - reg) * 12;
15509 RTX_FRAME_RELATED_P (insn) = 1;
15510 }
15511 }
15512 if (TARGET_HARD_FLOAT && TARGET_VFP)
15513 {
15514 start_reg = FIRST_VFP_REGNUM;
15515
15516 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15517 {
15518 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15519 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15520 {
15521 if (start_reg != reg)
15522 saved_size += vfp_emit_fstmd (start_reg,
15523 (reg - start_reg) / 2);
15524 start_reg = reg + 2;
15525 }
15526 }
15527 if (start_reg != reg)
15528 saved_size += vfp_emit_fstmd (start_reg,
15529 (reg - start_reg) / 2);
15530 }
15531 return saved_size;
15532 }
15533
15534
15535 /* Set the Thumb frame pointer from the stack pointer. */
15536
15537 static void
15538 thumb_set_frame_pointer (arm_stack_offsets *offsets)
15539 {
15540 HOST_WIDE_INT amount;
15541 rtx insn, dwarf;
15542
15543 amount = offsets->outgoing_args - offsets->locals_base;
15544 if (amount < 1024)
15545 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15546 stack_pointer_rtx, GEN_INT (amount)));
15547 else
15548 {
15549 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
15550 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15551 expects the first two operands to be the same. */
15552 if (TARGET_THUMB2)
15553 {
15554 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15555 stack_pointer_rtx,
15556 hard_frame_pointer_rtx));
15557 }
15558 else
15559 {
15560 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15561 hard_frame_pointer_rtx,
15562 stack_pointer_rtx));
15563 }
15564 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
15565 plus_constant (stack_pointer_rtx, amount));
15566 RTX_FRAME_RELATED_P (dwarf) = 1;
15567 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15568 }
15569
15570 RTX_FRAME_RELATED_P (insn) = 1;
15571 }
15572
15573 /* Generate the prologue instructions for entry into an ARM or Thumb-2
15574 function. */
15575 void
15576 arm_expand_prologue (void)
15577 {
15578 rtx amount;
15579 rtx insn;
15580 rtx ip_rtx;
15581 unsigned long live_regs_mask;
15582 unsigned long func_type;
15583 int fp_offset = 0;
15584 int saved_pretend_args = 0;
15585 int saved_regs = 0;
15586 unsigned HOST_WIDE_INT args_to_push;
15587 arm_stack_offsets *offsets;
15588
15589 func_type = arm_current_func_type ();
15590
15591 /* Naked functions don't have prologues. */
15592 if (IS_NAKED (func_type))
15593 return;
15594
15595 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15596 args_to_push = crtl->args.pretend_args_size;
15597
15598 /* Compute which register we will have to save onto the stack. */
15599 offsets = arm_get_frame_offsets ();
15600 live_regs_mask = offsets->saved_regs_mask;
15601
15602 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
15603
15604 if (IS_STACKALIGN (func_type))
15605 {
15606 rtx dwarf;
15607 rtx r0;
15608 rtx r1;
15609 /* Handle a word-aligned stack pointer. We generate the following:
15610
15611 mov r0, sp
15612 bic r1, r0, #7
15613 mov sp, r1
15614 <save and restore r0 in normal prologue/epilogue>
15615 mov sp, r0
15616 bx lr
15617
15618 The unwinder doesn't need to know about the stack realignment.
15619 Just tell it we saved SP in r0. */
15620 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
15621
15622 r0 = gen_rtx_REG (SImode, 0);
15623 r1 = gen_rtx_REG (SImode, 1);
15624 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15625 compiler won't choke. */
15626 dwarf = gen_rtx_UNSPEC (SImode, rtvec_alloc (0), UNSPEC_STACK_ALIGN);
15627 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
15628 insn = gen_movsi (r0, stack_pointer_rtx);
15629 RTX_FRAME_RELATED_P (insn) = 1;
15630 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15631 emit_insn (insn);
15632 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
15633 emit_insn (gen_movsi (stack_pointer_rtx, r1));
15634 }
15635
15636 /* For APCS frames, if IP register is clobbered
15637 when creating frame, save that register in a special
15638 way. */
15639 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15640 {
15641 if (IS_INTERRUPT (func_type))
15642 {
15643 /* Interrupt functions must not corrupt any registers.
15644 Creating a frame pointer however, corrupts the IP
15645 register, so we must push it first. */
15646 insn = emit_multi_reg_push (1 << IP_REGNUM);
15647
15648 /* Do not set RTX_FRAME_RELATED_P on this insn.
15649 The dwarf stack unwinding code only wants to see one
15650 stack decrement per function, and this is not it. If
15651 this instruction is labeled as being part of the frame
15652 creation sequence then dwarf2out_frame_debug_expr will
15653 die when it encounters the assignment of IP to FP
15654 later on, since the use of SP here establishes SP as
15655 the CFA register and not IP.
15656
15657 Anyway this instruction is not really part of the stack
15658 frame creation although it is part of the prologue. */
15659 }
15660 else if (IS_NESTED (func_type))
15661 {
15662 /* The Static chain register is the same as the IP register
15663 used as a scratch register during stack frame creation.
15664 To get around this need to find somewhere to store IP
15665 whilst the frame is being created. We try the following
15666 places in order:
15667
15668 1. The last argument register.
15669 2. A slot on the stack above the frame. (This only
15670 works if the function is not a varargs function).
15671 3. Register r3, after pushing the argument registers
15672 onto the stack.
15673
15674 Note - we only need to tell the dwarf2 backend about the SP
15675 adjustment in the second variant; the static chain register
15676 doesn't need to be unwound, as it doesn't contain a value
15677 inherited from the caller. */
15678
15679 if (df_regs_ever_live_p (3) == false)
15680 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15681 else if (args_to_push == 0)
15682 {
15683 rtx dwarf;
15684
15685 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15686 saved_regs += 4;
15687
15688 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
15689 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
15690 fp_offset = 4;
15691
15692 /* Just tell the dwarf backend that we adjusted SP. */
15693 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15694 plus_constant (stack_pointer_rtx,
15695 -fp_offset));
15696 RTX_FRAME_RELATED_P (insn) = 1;
15697 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15698 }
15699 else
15700 {
15701 /* Store the args on the stack. */
15702 if (cfun->machine->uses_anonymous_args)
15703 insn = emit_multi_reg_push
15704 ((0xf0 >> (args_to_push / 4)) & 0xf);
15705 else
15706 insn = emit_insn
15707 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15708 GEN_INT (- args_to_push)));
15709
15710 RTX_FRAME_RELATED_P (insn) = 1;
15711
15712 saved_pretend_args = 1;
15713 fp_offset = args_to_push;
15714 args_to_push = 0;
15715
15716 /* Now reuse r3 to preserve IP. */
15717 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
15718 }
15719 }
15720
15721 insn = emit_set_insn (ip_rtx,
15722 plus_constant (stack_pointer_rtx, fp_offset));
15723 RTX_FRAME_RELATED_P (insn) = 1;
15724 }
15725
15726 if (args_to_push)
15727 {
15728 /* Push the argument registers, or reserve space for them. */
15729 if (cfun->machine->uses_anonymous_args)
15730 insn = emit_multi_reg_push
15731 ((0xf0 >> (args_to_push / 4)) & 0xf);
15732 else
15733 insn = emit_insn
15734 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15735 GEN_INT (- args_to_push)));
15736 RTX_FRAME_RELATED_P (insn) = 1;
15737 }
15738
15739 /* If this is an interrupt service routine, and the link register
15740 is going to be pushed, and we're not generating extra
15741 push of IP (needed when frame is needed and frame layout if apcs),
15742 subtracting four from LR now will mean that the function return
15743 can be done with a single instruction. */
15744 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
15745 && (live_regs_mask & (1 << LR_REGNUM)) != 0
15746 && !(frame_pointer_needed && TARGET_APCS_FRAME)
15747 && TARGET_ARM)
15748 {
15749 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
15750
15751 emit_set_insn (lr, plus_constant (lr, -4));
15752 }
15753
15754 if (live_regs_mask)
15755 {
15756 saved_regs += bit_count (live_regs_mask) * 4;
15757 if (optimize_size && !frame_pointer_needed
15758 && saved_regs == offsets->saved_regs - offsets->saved_args)
15759 {
15760 /* If no coprocessor registers are being pushed and we don't have
15761 to worry about a frame pointer then push extra registers to
15762 create the stack frame. This is done is a way that does not
15763 alter the frame layout, so is independent of the epilogue. */
15764 int n;
15765 int frame;
15766 n = 0;
15767 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
15768 n++;
15769 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
15770 if (frame && n * 4 >= frame)
15771 {
15772 n = frame / 4;
15773 live_regs_mask |= (1 << n) - 1;
15774 saved_regs += frame;
15775 }
15776 }
15777 insn = emit_multi_reg_push (live_regs_mask);
15778 RTX_FRAME_RELATED_P (insn) = 1;
15779 }
15780
15781 if (! IS_VOLATILE (func_type))
15782 saved_regs += arm_save_coproc_regs ();
15783
15784 if (frame_pointer_needed && TARGET_ARM)
15785 {
15786 /* Create the new frame pointer. */
15787 if (TARGET_APCS_FRAME)
15788 {
15789 insn = GEN_INT (-(4 + args_to_push + fp_offset));
15790 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
15791 RTX_FRAME_RELATED_P (insn) = 1;
15792
15793 if (IS_NESTED (func_type))
15794 {
15795 /* Recover the static chain register. */
15796 if (!df_regs_ever_live_p (3)
15797 || saved_pretend_args)
15798 insn = gen_rtx_REG (SImode, 3);
15799 else /* if (crtl->args.pretend_args_size == 0) */
15800 {
15801 insn = plus_constant (hard_frame_pointer_rtx, 4);
15802 insn = gen_frame_mem (SImode, insn);
15803 }
15804 emit_set_insn (ip_rtx, insn);
15805 /* Add a USE to stop propagate_one_insn() from barfing. */
15806 emit_insn (gen_prologue_use (ip_rtx));
15807 }
15808 }
15809 else
15810 {
15811 insn = GEN_INT (saved_regs - 4);
15812 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
15813 stack_pointer_rtx, insn));
15814 RTX_FRAME_RELATED_P (insn) = 1;
15815 }
15816 }
15817
15818 if (flag_stack_usage_info)
15819 current_function_static_stack_size
15820 = offsets->outgoing_args - offsets->saved_args;
15821
15822 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
15823 {
15824 /* This add can produce multiple insns for a large constant, so we
15825 need to get tricky. */
15826 rtx last = get_last_insn ();
15827
15828 amount = GEN_INT (offsets->saved_args + saved_regs
15829 - offsets->outgoing_args);
15830
15831 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
15832 amount));
15833 do
15834 {
15835 last = last ? NEXT_INSN (last) : get_insns ();
15836 RTX_FRAME_RELATED_P (last) = 1;
15837 }
15838 while (last != insn);
15839
15840 /* If the frame pointer is needed, emit a special barrier that
15841 will prevent the scheduler from moving stores to the frame
15842 before the stack adjustment. */
15843 if (frame_pointer_needed)
15844 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
15845 hard_frame_pointer_rtx));
15846 }
15847
15848
15849 if (frame_pointer_needed && TARGET_THUMB2)
15850 thumb_set_frame_pointer (offsets);
15851
15852 if (flag_pic && arm_pic_register != INVALID_REGNUM)
15853 {
15854 unsigned long mask;
15855
15856 mask = live_regs_mask;
15857 mask &= THUMB2_WORK_REGS;
15858 if (!IS_NESTED (func_type))
15859 mask |= (1 << IP_REGNUM);
15860 arm_load_pic_register (mask);
15861 }
15862
15863 /* If we are profiling, make sure no instructions are scheduled before
15864 the call to mcount. Similarly if the user has requested no
15865 scheduling in the prolog. Similarly if we want non-call exceptions
15866 using the EABI unwinder, to prevent faulting instructions from being
15867 swapped with a stack adjustment. */
15868 if (crtl->profile || !TARGET_SCHED_PROLOG
15869 || (arm_except_unwind_info (&global_options) == UI_TARGET
15870 && cfun->can_throw_non_call_exceptions))
15871 emit_insn (gen_blockage ());
15872
15873 /* If the link register is being kept alive, with the return address in it,
15874 then make sure that it does not get reused by the ce2 pass. */
15875 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
15876 cfun->machine->lr_save_eliminated = 1;
15877 }
15878 \f
15879 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15880 static void
15881 arm_print_condition (FILE *stream)
15882 {
15883 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
15884 {
15885 /* Branch conversion is not implemented for Thumb-2. */
15886 if (TARGET_THUMB)
15887 {
15888 output_operand_lossage ("predicated Thumb instruction");
15889 return;
15890 }
15891 if (current_insn_predicate != NULL)
15892 {
15893 output_operand_lossage
15894 ("predicated instruction in conditional sequence");
15895 return;
15896 }
15897
15898 fputs (arm_condition_codes[arm_current_cc], stream);
15899 }
15900 else if (current_insn_predicate)
15901 {
15902 enum arm_cond_code code;
15903
15904 if (TARGET_THUMB1)
15905 {
15906 output_operand_lossage ("predicated Thumb instruction");
15907 return;
15908 }
15909
15910 code = get_arm_condition_code (current_insn_predicate);
15911 fputs (arm_condition_codes[code], stream);
15912 }
15913 }
15914
15915
15916 /* If CODE is 'd', then the X is a condition operand and the instruction
15917 should only be executed if the condition is true.
15918 if CODE is 'D', then the X is a condition operand and the instruction
15919 should only be executed if the condition is false: however, if the mode
15920 of the comparison is CCFPEmode, then always execute the instruction -- we
15921 do this because in these circumstances !GE does not necessarily imply LT;
15922 in these cases the instruction pattern will take care to make sure that
15923 an instruction containing %d will follow, thereby undoing the effects of
15924 doing this instruction unconditionally.
15925 If CODE is 'N' then X is a floating point operand that must be negated
15926 before output.
15927 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15928 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15929 static void
15930 arm_print_operand (FILE *stream, rtx x, int code)
15931 {
15932 switch (code)
15933 {
15934 case '@':
15935 fputs (ASM_COMMENT_START, stream);
15936 return;
15937
15938 case '_':
15939 fputs (user_label_prefix, stream);
15940 return;
15941
15942 case '|':
15943 fputs (REGISTER_PREFIX, stream);
15944 return;
15945
15946 case '?':
15947 arm_print_condition (stream);
15948 return;
15949
15950 case '(':
15951 /* Nothing in unified syntax, otherwise the current condition code. */
15952 if (!TARGET_UNIFIED_ASM)
15953 arm_print_condition (stream);
15954 break;
15955
15956 case ')':
15957 /* The current condition code in unified syntax, otherwise nothing. */
15958 if (TARGET_UNIFIED_ASM)
15959 arm_print_condition (stream);
15960 break;
15961
15962 case '.':
15963 /* The current condition code for a condition code setting instruction.
15964 Preceded by 's' in unified syntax, otherwise followed by 's'. */
15965 if (TARGET_UNIFIED_ASM)
15966 {
15967 fputc('s', stream);
15968 arm_print_condition (stream);
15969 }
15970 else
15971 {
15972 arm_print_condition (stream);
15973 fputc('s', stream);
15974 }
15975 return;
15976
15977 case '!':
15978 /* If the instruction is conditionally executed then print
15979 the current condition code, otherwise print 's'. */
15980 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
15981 if (current_insn_predicate)
15982 arm_print_condition (stream);
15983 else
15984 fputc('s', stream);
15985 break;
15986
15987 /* %# is a "break" sequence. It doesn't output anything, but is used to
15988 separate e.g. operand numbers from following text, if that text consists
15989 of further digits which we don't want to be part of the operand
15990 number. */
15991 case '#':
15992 return;
15993
15994 case 'N':
15995 {
15996 REAL_VALUE_TYPE r;
15997 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15998 r = real_value_negate (&r);
15999 fprintf (stream, "%s", fp_const_from_val (&r));
16000 }
16001 return;
16002
16003 /* An integer or symbol address without a preceding # sign. */
16004 case 'c':
16005 switch (GET_CODE (x))
16006 {
16007 case CONST_INT:
16008 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16009 break;
16010
16011 case SYMBOL_REF:
16012 output_addr_const (stream, x);
16013 break;
16014
16015 default:
16016 gcc_unreachable ();
16017 }
16018 return;
16019
16020 case 'B':
16021 if (GET_CODE (x) == CONST_INT)
16022 {
16023 HOST_WIDE_INT val;
16024 val = ARM_SIGN_EXTEND (~INTVAL (x));
16025 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
16026 }
16027 else
16028 {
16029 putc ('~', stream);
16030 output_addr_const (stream, x);
16031 }
16032 return;
16033
16034 case 'L':
16035 /* The low 16 bits of an immediate constant. */
16036 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
16037 return;
16038
16039 case 'i':
16040 fprintf (stream, "%s", arithmetic_instr (x, 1));
16041 return;
16042
16043 /* Truncate Cirrus shift counts. */
16044 case 's':
16045 if (GET_CODE (x) == CONST_INT)
16046 {
16047 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
16048 return;
16049 }
16050 arm_print_operand (stream, x, 0);
16051 return;
16052
16053 case 'I':
16054 fprintf (stream, "%s", arithmetic_instr (x, 0));
16055 return;
16056
16057 case 'S':
16058 {
16059 HOST_WIDE_INT val;
16060 const char *shift;
16061
16062 if (!shift_operator (x, SImode))
16063 {
16064 output_operand_lossage ("invalid shift operand");
16065 break;
16066 }
16067
16068 shift = shift_op (x, &val);
16069
16070 if (shift)
16071 {
16072 fprintf (stream, ", %s ", shift);
16073 if (val == -1)
16074 arm_print_operand (stream, XEXP (x, 1), 0);
16075 else
16076 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
16077 }
16078 }
16079 return;
16080
16081 /* An explanation of the 'Q', 'R' and 'H' register operands:
16082
16083 In a pair of registers containing a DI or DF value the 'Q'
16084 operand returns the register number of the register containing
16085 the least significant part of the value. The 'R' operand returns
16086 the register number of the register containing the most
16087 significant part of the value.
16088
16089 The 'H' operand returns the higher of the two register numbers.
16090 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16091 same as the 'Q' operand, since the most significant part of the
16092 value is held in the lower number register. The reverse is true
16093 on systems where WORDS_BIG_ENDIAN is false.
16094
16095 The purpose of these operands is to distinguish between cases
16096 where the endian-ness of the values is important (for example
16097 when they are added together), and cases where the endian-ness
16098 is irrelevant, but the order of register operations is important.
16099 For example when loading a value from memory into a register
16100 pair, the endian-ness does not matter. Provided that the value
16101 from the lower memory address is put into the lower numbered
16102 register, and the value from the higher address is put into the
16103 higher numbered register, the load will work regardless of whether
16104 the value being loaded is big-wordian or little-wordian. The
16105 order of the two register loads can matter however, if the address
16106 of the memory location is actually held in one of the registers
16107 being overwritten by the load.
16108
16109 The 'Q' and 'R' constraints are also available for 64-bit
16110 constants. */
16111 case 'Q':
16112 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16113 {
16114 rtx part = gen_lowpart (SImode, x);
16115 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16116 return;
16117 }
16118
16119 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16120 {
16121 output_operand_lossage ("invalid operand for code '%c'", code);
16122 return;
16123 }
16124
16125 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
16126 return;
16127
16128 case 'R':
16129 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16130 {
16131 enum machine_mode mode = GET_MODE (x);
16132 rtx part;
16133
16134 if (mode == VOIDmode)
16135 mode = DImode;
16136 part = gen_highpart_mode (SImode, mode, x);
16137 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16138 return;
16139 }
16140
16141 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16142 {
16143 output_operand_lossage ("invalid operand for code '%c'", code);
16144 return;
16145 }
16146
16147 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
16148 return;
16149
16150 case 'H':
16151 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16152 {
16153 output_operand_lossage ("invalid operand for code '%c'", code);
16154 return;
16155 }
16156
16157 asm_fprintf (stream, "%r", REGNO (x) + 1);
16158 return;
16159
16160 case 'J':
16161 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16162 {
16163 output_operand_lossage ("invalid operand for code '%c'", code);
16164 return;
16165 }
16166
16167 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
16168 return;
16169
16170 case 'K':
16171 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
16172 {
16173 output_operand_lossage ("invalid operand for code '%c'", code);
16174 return;
16175 }
16176
16177 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
16178 return;
16179
16180 case 'm':
16181 asm_fprintf (stream, "%r",
16182 GET_CODE (XEXP (x, 0)) == REG
16183 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
16184 return;
16185
16186 case 'M':
16187 asm_fprintf (stream, "{%r-%r}",
16188 REGNO (x),
16189 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
16190 return;
16191
16192 /* Like 'M', but writing doubleword vector registers, for use by Neon
16193 insns. */
16194 case 'h':
16195 {
16196 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
16197 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
16198 if (numregs == 1)
16199 asm_fprintf (stream, "{d%d}", regno);
16200 else
16201 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
16202 }
16203 return;
16204
16205 case 'd':
16206 /* CONST_TRUE_RTX means always -- that's the default. */
16207 if (x == const_true_rtx)
16208 return;
16209
16210 if (!COMPARISON_P (x))
16211 {
16212 output_operand_lossage ("invalid operand for code '%c'", code);
16213 return;
16214 }
16215
16216 fputs (arm_condition_codes[get_arm_condition_code (x)],
16217 stream);
16218 return;
16219
16220 case 'D':
16221 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16222 want to do that. */
16223 if (x == const_true_rtx)
16224 {
16225 output_operand_lossage ("instruction never executed");
16226 return;
16227 }
16228 if (!COMPARISON_P (x))
16229 {
16230 output_operand_lossage ("invalid operand for code '%c'", code);
16231 return;
16232 }
16233
16234 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
16235 (get_arm_condition_code (x))],
16236 stream);
16237 return;
16238
16239 /* Cirrus registers can be accessed in a variety of ways:
16240 single floating point (f)
16241 double floating point (d)
16242 32bit integer (fx)
16243 64bit integer (dx). */
16244 case 'W': /* Cirrus register in F mode. */
16245 case 'X': /* Cirrus register in D mode. */
16246 case 'Y': /* Cirrus register in FX mode. */
16247 case 'Z': /* Cirrus register in DX mode. */
16248 gcc_assert (GET_CODE (x) == REG
16249 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
16250
16251 fprintf (stream, "mv%s%s",
16252 code == 'W' ? "f"
16253 : code == 'X' ? "d"
16254 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
16255
16256 return;
16257
16258 /* Print cirrus register in the mode specified by the register's mode. */
16259 case 'V':
16260 {
16261 int mode = GET_MODE (x);
16262
16263 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
16264 {
16265 output_operand_lossage ("invalid operand for code '%c'", code);
16266 return;
16267 }
16268
16269 fprintf (stream, "mv%s%s",
16270 mode == DFmode ? "d"
16271 : mode == SImode ? "fx"
16272 : mode == DImode ? "dx"
16273 : "f", reg_names[REGNO (x)] + 2);
16274
16275 return;
16276 }
16277
16278 case 'U':
16279 if (GET_CODE (x) != REG
16280 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
16281 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
16282 /* Bad value for wCG register number. */
16283 {
16284 output_operand_lossage ("invalid operand for code '%c'", code);
16285 return;
16286 }
16287
16288 else
16289 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
16290 return;
16291
16292 /* Print an iWMMXt control register name. */
16293 case 'w':
16294 if (GET_CODE (x) != CONST_INT
16295 || INTVAL (x) < 0
16296 || INTVAL (x) >= 16)
16297 /* Bad value for wC register number. */
16298 {
16299 output_operand_lossage ("invalid operand for code '%c'", code);
16300 return;
16301 }
16302
16303 else
16304 {
16305 static const char * wc_reg_names [16] =
16306 {
16307 "wCID", "wCon", "wCSSF", "wCASF",
16308 "wC4", "wC5", "wC6", "wC7",
16309 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16310 "wC12", "wC13", "wC14", "wC15"
16311 };
16312
16313 fprintf (stream, wc_reg_names [INTVAL (x)]);
16314 }
16315 return;
16316
16317 /* Print the high single-precision register of a VFP double-precision
16318 register. */
16319 case 'p':
16320 {
16321 int mode = GET_MODE (x);
16322 int regno;
16323
16324 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
16325 {
16326 output_operand_lossage ("invalid operand for code '%c'", code);
16327 return;
16328 }
16329
16330 regno = REGNO (x);
16331 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
16332 {
16333 output_operand_lossage ("invalid operand for code '%c'", code);
16334 return;
16335 }
16336
16337 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
16338 }
16339 return;
16340
16341 /* Print a VFP/Neon double precision or quad precision register name. */
16342 case 'P':
16343 case 'q':
16344 {
16345 int mode = GET_MODE (x);
16346 int is_quad = (code == 'q');
16347 int regno;
16348
16349 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
16350 {
16351 output_operand_lossage ("invalid operand for code '%c'", code);
16352 return;
16353 }
16354
16355 if (GET_CODE (x) != REG
16356 || !IS_VFP_REGNUM (REGNO (x)))
16357 {
16358 output_operand_lossage ("invalid operand for code '%c'", code);
16359 return;
16360 }
16361
16362 regno = REGNO (x);
16363 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
16364 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
16365 {
16366 output_operand_lossage ("invalid operand for code '%c'", code);
16367 return;
16368 }
16369
16370 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
16371 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
16372 }
16373 return;
16374
16375 /* These two codes print the low/high doubleword register of a Neon quad
16376 register, respectively. For pair-structure types, can also print
16377 low/high quadword registers. */
16378 case 'e':
16379 case 'f':
16380 {
16381 int mode = GET_MODE (x);
16382 int regno;
16383
16384 if ((GET_MODE_SIZE (mode) != 16
16385 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
16386 {
16387 output_operand_lossage ("invalid operand for code '%c'", code);
16388 return;
16389 }
16390
16391 regno = REGNO (x);
16392 if (!NEON_REGNO_OK_FOR_QUAD (regno))
16393 {
16394 output_operand_lossage ("invalid operand for code '%c'", code);
16395 return;
16396 }
16397
16398 if (GET_MODE_SIZE (mode) == 16)
16399 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
16400 + (code == 'f' ? 1 : 0));
16401 else
16402 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
16403 + (code == 'f' ? 1 : 0));
16404 }
16405 return;
16406
16407 /* Print a VFPv3 floating-point constant, represented as an integer
16408 index. */
16409 case 'G':
16410 {
16411 int index = vfp3_const_double_index (x);
16412 gcc_assert (index != -1);
16413 fprintf (stream, "%d", index);
16414 }
16415 return;
16416
16417 /* Print bits representing opcode features for Neon.
16418
16419 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16420 and polynomials as unsigned.
16421
16422 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16423
16424 Bit 2 is 1 for rounding functions, 0 otherwise. */
16425
16426 /* Identify the type as 's', 'u', 'p' or 'f'. */
16427 case 'T':
16428 {
16429 HOST_WIDE_INT bits = INTVAL (x);
16430 fputc ("uspf"[bits & 3], stream);
16431 }
16432 return;
16433
16434 /* Likewise, but signed and unsigned integers are both 'i'. */
16435 case 'F':
16436 {
16437 HOST_WIDE_INT bits = INTVAL (x);
16438 fputc ("iipf"[bits & 3], stream);
16439 }
16440 return;
16441
16442 /* As for 'T', but emit 'u' instead of 'p'. */
16443 case 't':
16444 {
16445 HOST_WIDE_INT bits = INTVAL (x);
16446 fputc ("usuf"[bits & 3], stream);
16447 }
16448 return;
16449
16450 /* Bit 2: rounding (vs none). */
16451 case 'O':
16452 {
16453 HOST_WIDE_INT bits = INTVAL (x);
16454 fputs ((bits & 4) != 0 ? "r" : "", stream);
16455 }
16456 return;
16457
16458 /* Memory operand for vld1/vst1 instruction. */
16459 case 'A':
16460 {
16461 rtx addr;
16462 bool postinc = FALSE;
16463 unsigned align, memsize, align_bits;
16464
16465 gcc_assert (GET_CODE (x) == MEM);
16466 addr = XEXP (x, 0);
16467 if (GET_CODE (addr) == POST_INC)
16468 {
16469 postinc = 1;
16470 addr = XEXP (addr, 0);
16471 }
16472 asm_fprintf (stream, "[%r", REGNO (addr));
16473
16474 /* We know the alignment of this access, so we can emit a hint in the
16475 instruction (for some alignments) as an aid to the memory subsystem
16476 of the target. */
16477 align = MEM_ALIGN (x) >> 3;
16478 memsize = INTVAL (MEM_SIZE (x));
16479
16480 /* Only certain alignment specifiers are supported by the hardware. */
16481 if (memsize == 16 && (align % 32) == 0)
16482 align_bits = 256;
16483 else if ((memsize == 8 || memsize == 16) && (align % 16) == 0)
16484 align_bits = 128;
16485 else if ((align % 8) == 0)
16486 align_bits = 64;
16487 else
16488 align_bits = 0;
16489
16490 if (align_bits != 0)
16491 asm_fprintf (stream, ":%d", align_bits);
16492
16493 asm_fprintf (stream, "]");
16494
16495 if (postinc)
16496 fputs("!", stream);
16497 }
16498 return;
16499
16500 case 'C':
16501 {
16502 rtx addr;
16503
16504 gcc_assert (GET_CODE (x) == MEM);
16505 addr = XEXP (x, 0);
16506 gcc_assert (GET_CODE (addr) == REG);
16507 asm_fprintf (stream, "[%r]", REGNO (addr));
16508 }
16509 return;
16510
16511 /* Translate an S register number into a D register number and element index. */
16512 case 'y':
16513 {
16514 int mode = GET_MODE (x);
16515 int regno;
16516
16517 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
16518 {
16519 output_operand_lossage ("invalid operand for code '%c'", code);
16520 return;
16521 }
16522
16523 regno = REGNO (x);
16524 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16525 {
16526 output_operand_lossage ("invalid operand for code '%c'", code);
16527 return;
16528 }
16529
16530 regno = regno - FIRST_VFP_REGNUM;
16531 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
16532 }
16533 return;
16534
16535 /* Register specifier for vld1.16/vst1.16. Translate the S register
16536 number into a D register number and element index. */
16537 case 'z':
16538 {
16539 int mode = GET_MODE (x);
16540 int regno;
16541
16542 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
16543 {
16544 output_operand_lossage ("invalid operand for code '%c'", code);
16545 return;
16546 }
16547
16548 regno = REGNO (x);
16549 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
16550 {
16551 output_operand_lossage ("invalid operand for code '%c'", code);
16552 return;
16553 }
16554
16555 regno = regno - FIRST_VFP_REGNUM;
16556 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
16557 }
16558 return;
16559
16560 default:
16561 if (x == 0)
16562 {
16563 output_operand_lossage ("missing operand");
16564 return;
16565 }
16566
16567 switch (GET_CODE (x))
16568 {
16569 case REG:
16570 asm_fprintf (stream, "%r", REGNO (x));
16571 break;
16572
16573 case MEM:
16574 output_memory_reference_mode = GET_MODE (x);
16575 output_address (XEXP (x, 0));
16576 break;
16577
16578 case CONST_DOUBLE:
16579 if (TARGET_NEON)
16580 {
16581 char fpstr[20];
16582 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
16583 sizeof (fpstr), 0, 1);
16584 fprintf (stream, "#%s", fpstr);
16585 }
16586 else
16587 fprintf (stream, "#%s", fp_immediate_constant (x));
16588 break;
16589
16590 default:
16591 gcc_assert (GET_CODE (x) != NEG);
16592 fputc ('#', stream);
16593 if (GET_CODE (x) == HIGH)
16594 {
16595 fputs (":lower16:", stream);
16596 x = XEXP (x, 0);
16597 }
16598
16599 output_addr_const (stream, x);
16600 break;
16601 }
16602 }
16603 }
16604 \f
16605 /* Target hook for printing a memory address. */
16606 static void
16607 arm_print_operand_address (FILE *stream, rtx x)
16608 {
16609 if (TARGET_32BIT)
16610 {
16611 int is_minus = GET_CODE (x) == MINUS;
16612
16613 if (GET_CODE (x) == REG)
16614 asm_fprintf (stream, "[%r, #0]", REGNO (x));
16615 else if (GET_CODE (x) == PLUS || is_minus)
16616 {
16617 rtx base = XEXP (x, 0);
16618 rtx index = XEXP (x, 1);
16619 HOST_WIDE_INT offset = 0;
16620 if (GET_CODE (base) != REG
16621 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
16622 {
16623 /* Ensure that BASE is a register. */
16624 /* (one of them must be). */
16625 /* Also ensure the SP is not used as in index register. */
16626 rtx temp = base;
16627 base = index;
16628 index = temp;
16629 }
16630 switch (GET_CODE (index))
16631 {
16632 case CONST_INT:
16633 offset = INTVAL (index);
16634 if (is_minus)
16635 offset = -offset;
16636 asm_fprintf (stream, "[%r, #%wd]",
16637 REGNO (base), offset);
16638 break;
16639
16640 case REG:
16641 asm_fprintf (stream, "[%r, %s%r]",
16642 REGNO (base), is_minus ? "-" : "",
16643 REGNO (index));
16644 break;
16645
16646 case MULT:
16647 case ASHIFTRT:
16648 case LSHIFTRT:
16649 case ASHIFT:
16650 case ROTATERT:
16651 {
16652 asm_fprintf (stream, "[%r, %s%r",
16653 REGNO (base), is_minus ? "-" : "",
16654 REGNO (XEXP (index, 0)));
16655 arm_print_operand (stream, index, 'S');
16656 fputs ("]", stream);
16657 break;
16658 }
16659
16660 default:
16661 gcc_unreachable ();
16662 }
16663 }
16664 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16665 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16666 {
16667 extern enum machine_mode output_memory_reference_mode;
16668
16669 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16670
16671 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16672 asm_fprintf (stream, "[%r, #%s%d]!",
16673 REGNO (XEXP (x, 0)),
16674 GET_CODE (x) == PRE_DEC ? "-" : "",
16675 GET_MODE_SIZE (output_memory_reference_mode));
16676 else
16677 asm_fprintf (stream, "[%r], #%s%d",
16678 REGNO (XEXP (x, 0)),
16679 GET_CODE (x) == POST_DEC ? "-" : "",
16680 GET_MODE_SIZE (output_memory_reference_mode));
16681 }
16682 else if (GET_CODE (x) == PRE_MODIFY)
16683 {
16684 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16685 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16686 asm_fprintf (stream, "#%wd]!",
16687 INTVAL (XEXP (XEXP (x, 1), 1)));
16688 else
16689 asm_fprintf (stream, "%r]!",
16690 REGNO (XEXP (XEXP (x, 1), 1)));
16691 }
16692 else if (GET_CODE (x) == POST_MODIFY)
16693 {
16694 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16695 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16696 asm_fprintf (stream, "#%wd",
16697 INTVAL (XEXP (XEXP (x, 1), 1)));
16698 else
16699 asm_fprintf (stream, "%r",
16700 REGNO (XEXP (XEXP (x, 1), 1)));
16701 }
16702 else output_addr_const (stream, x);
16703 }
16704 else
16705 {
16706 if (GET_CODE (x) == REG)
16707 asm_fprintf (stream, "[%r]", REGNO (x));
16708 else if (GET_CODE (x) == POST_INC)
16709 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16710 else if (GET_CODE (x) == PLUS)
16711 {
16712 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16713 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16714 asm_fprintf (stream, "[%r, #%wd]",
16715 REGNO (XEXP (x, 0)),
16716 INTVAL (XEXP (x, 1)));
16717 else
16718 asm_fprintf (stream, "[%r, %r]",
16719 REGNO (XEXP (x, 0)),
16720 REGNO (XEXP (x, 1)));
16721 }
16722 else
16723 output_addr_const (stream, x);
16724 }
16725 }
16726 \f
16727 /* Target hook for indicating whether a punctuation character for
16728 TARGET_PRINT_OPERAND is valid. */
16729 static bool
16730 arm_print_operand_punct_valid_p (unsigned char code)
16731 {
16732 return (code == '@' || code == '|' || code == '.'
16733 || code == '(' || code == ')' || code == '#'
16734 || (TARGET_32BIT && (code == '?'))
16735 || (TARGET_THUMB2 && (code == '!'))
16736 || (TARGET_THUMB && (code == '_')));
16737 }
16738 \f
16739 /* Target hook for assembling integer objects. The ARM version needs to
16740 handle word-sized values specially. */
16741 static bool
16742 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
16743 {
16744 enum machine_mode mode;
16745
16746 if (size == UNITS_PER_WORD && aligned_p)
16747 {
16748 fputs ("\t.word\t", asm_out_file);
16749 output_addr_const (asm_out_file, x);
16750
16751 /* Mark symbols as position independent. We only do this in the
16752 .text segment, not in the .data segment. */
16753 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
16754 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
16755 {
16756 /* See legitimize_pic_address for an explanation of the
16757 TARGET_VXWORKS_RTP check. */
16758 if (TARGET_VXWORKS_RTP
16759 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
16760 fputs ("(GOT)", asm_out_file);
16761 else
16762 fputs ("(GOTOFF)", asm_out_file);
16763 }
16764 fputc ('\n', asm_out_file);
16765 return true;
16766 }
16767
16768 mode = GET_MODE (x);
16769
16770 if (arm_vector_mode_supported_p (mode))
16771 {
16772 int i, units;
16773
16774 gcc_assert (GET_CODE (x) == CONST_VECTOR);
16775
16776 units = CONST_VECTOR_NUNITS (x);
16777 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16778
16779 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16780 for (i = 0; i < units; i++)
16781 {
16782 rtx elt = CONST_VECTOR_ELT (x, i);
16783 assemble_integer
16784 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
16785 }
16786 else
16787 for (i = 0; i < units; i++)
16788 {
16789 rtx elt = CONST_VECTOR_ELT (x, i);
16790 REAL_VALUE_TYPE rval;
16791
16792 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
16793
16794 assemble_real
16795 (rval, GET_MODE_INNER (mode),
16796 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
16797 }
16798
16799 return true;
16800 }
16801
16802 return default_assemble_integer (x, size, aligned_p);
16803 }
16804
16805 static void
16806 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
16807 {
16808 section *s;
16809
16810 if (!TARGET_AAPCS_BASED)
16811 {
16812 (is_ctor ?
16813 default_named_section_asm_out_constructor
16814 : default_named_section_asm_out_destructor) (symbol, priority);
16815 return;
16816 }
16817
16818 /* Put these in the .init_array section, using a special relocation. */
16819 if (priority != DEFAULT_INIT_PRIORITY)
16820 {
16821 char buf[18];
16822 sprintf (buf, "%s.%.5u",
16823 is_ctor ? ".init_array" : ".fini_array",
16824 priority);
16825 s = get_section (buf, SECTION_WRITE, NULL_TREE);
16826 }
16827 else if (is_ctor)
16828 s = ctors_section;
16829 else
16830 s = dtors_section;
16831
16832 switch_to_section (s);
16833 assemble_align (POINTER_SIZE);
16834 fputs ("\t.word\t", asm_out_file);
16835 output_addr_const (asm_out_file, symbol);
16836 fputs ("(target1)\n", asm_out_file);
16837 }
16838
16839 /* Add a function to the list of static constructors. */
16840
16841 static void
16842 arm_elf_asm_constructor (rtx symbol, int priority)
16843 {
16844 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
16845 }
16846
16847 /* Add a function to the list of static destructors. */
16848
16849 static void
16850 arm_elf_asm_destructor (rtx symbol, int priority)
16851 {
16852 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
16853 }
16854 \f
16855 /* A finite state machine takes care of noticing whether or not instructions
16856 can be conditionally executed, and thus decrease execution time and code
16857 size by deleting branch instructions. The fsm is controlled by
16858 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
16859
16860 /* The state of the fsm controlling condition codes are:
16861 0: normal, do nothing special
16862 1: make ASM_OUTPUT_OPCODE not output this instruction
16863 2: make ASM_OUTPUT_OPCODE not output this instruction
16864 3: make instructions conditional
16865 4: make instructions conditional
16866
16867 State transitions (state->state by whom under condition):
16868 0 -> 1 final_prescan_insn if the `target' is a label
16869 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
16870 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
16871 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
16872 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
16873 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
16874 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
16875 (the target insn is arm_target_insn).
16876
16877 If the jump clobbers the conditions then we use states 2 and 4.
16878
16879 A similar thing can be done with conditional return insns.
16880
16881 XXX In case the `target' is an unconditional branch, this conditionalising
16882 of the instructions always reduces code size, but not always execution
16883 time. But then, I want to reduce the code size to somewhere near what
16884 /bin/cc produces. */
16885
16886 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
16887 instructions. When a COND_EXEC instruction is seen the subsequent
16888 instructions are scanned so that multiple conditional instructions can be
16889 combined into a single IT block. arm_condexec_count and arm_condexec_mask
16890 specify the length and true/false mask for the IT block. These will be
16891 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
16892
16893 /* Returns the index of the ARM condition code string in
16894 `arm_condition_codes'. COMPARISON should be an rtx like
16895 `(eq (...) (...))'. */
16896 static enum arm_cond_code
16897 get_arm_condition_code (rtx comparison)
16898 {
16899 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
16900 enum arm_cond_code code;
16901 enum rtx_code comp_code = GET_CODE (comparison);
16902
16903 if (GET_MODE_CLASS (mode) != MODE_CC)
16904 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
16905 XEXP (comparison, 1));
16906
16907 switch (mode)
16908 {
16909 case CC_DNEmode: code = ARM_NE; goto dominance;
16910 case CC_DEQmode: code = ARM_EQ; goto dominance;
16911 case CC_DGEmode: code = ARM_GE; goto dominance;
16912 case CC_DGTmode: code = ARM_GT; goto dominance;
16913 case CC_DLEmode: code = ARM_LE; goto dominance;
16914 case CC_DLTmode: code = ARM_LT; goto dominance;
16915 case CC_DGEUmode: code = ARM_CS; goto dominance;
16916 case CC_DGTUmode: code = ARM_HI; goto dominance;
16917 case CC_DLEUmode: code = ARM_LS; goto dominance;
16918 case CC_DLTUmode: code = ARM_CC;
16919
16920 dominance:
16921 gcc_assert (comp_code == EQ || comp_code == NE);
16922
16923 if (comp_code == EQ)
16924 return ARM_INVERSE_CONDITION_CODE (code);
16925 return code;
16926
16927 case CC_NOOVmode:
16928 switch (comp_code)
16929 {
16930 case NE: return ARM_NE;
16931 case EQ: return ARM_EQ;
16932 case GE: return ARM_PL;
16933 case LT: return ARM_MI;
16934 default: gcc_unreachable ();
16935 }
16936
16937 case CC_Zmode:
16938 switch (comp_code)
16939 {
16940 case NE: return ARM_NE;
16941 case EQ: return ARM_EQ;
16942 default: gcc_unreachable ();
16943 }
16944
16945 case CC_Nmode:
16946 switch (comp_code)
16947 {
16948 case NE: return ARM_MI;
16949 case EQ: return ARM_PL;
16950 default: gcc_unreachable ();
16951 }
16952
16953 case CCFPEmode:
16954 case CCFPmode:
16955 /* These encodings assume that AC=1 in the FPA system control
16956 byte. This allows us to handle all cases except UNEQ and
16957 LTGT. */
16958 switch (comp_code)
16959 {
16960 case GE: return ARM_GE;
16961 case GT: return ARM_GT;
16962 case LE: return ARM_LS;
16963 case LT: return ARM_MI;
16964 case NE: return ARM_NE;
16965 case EQ: return ARM_EQ;
16966 case ORDERED: return ARM_VC;
16967 case UNORDERED: return ARM_VS;
16968 case UNLT: return ARM_LT;
16969 case UNLE: return ARM_LE;
16970 case UNGT: return ARM_HI;
16971 case UNGE: return ARM_PL;
16972 /* UNEQ and LTGT do not have a representation. */
16973 case UNEQ: /* Fall through. */
16974 case LTGT: /* Fall through. */
16975 default: gcc_unreachable ();
16976 }
16977
16978 case CC_SWPmode:
16979 switch (comp_code)
16980 {
16981 case NE: return ARM_NE;
16982 case EQ: return ARM_EQ;
16983 case GE: return ARM_LE;
16984 case GT: return ARM_LT;
16985 case LE: return ARM_GE;
16986 case LT: return ARM_GT;
16987 case GEU: return ARM_LS;
16988 case GTU: return ARM_CC;
16989 case LEU: return ARM_CS;
16990 case LTU: return ARM_HI;
16991 default: gcc_unreachable ();
16992 }
16993
16994 case CC_Cmode:
16995 switch (comp_code)
16996 {
16997 case LTU: return ARM_CS;
16998 case GEU: return ARM_CC;
16999 default: gcc_unreachable ();
17000 }
17001
17002 case CC_CZmode:
17003 switch (comp_code)
17004 {
17005 case NE: return ARM_NE;
17006 case EQ: return ARM_EQ;
17007 case GEU: return ARM_CS;
17008 case GTU: return ARM_HI;
17009 case LEU: return ARM_LS;
17010 case LTU: return ARM_CC;
17011 default: gcc_unreachable ();
17012 }
17013
17014 case CC_NCVmode:
17015 switch (comp_code)
17016 {
17017 case GE: return ARM_GE;
17018 case LT: return ARM_LT;
17019 case GEU: return ARM_CS;
17020 case LTU: return ARM_CC;
17021 default: gcc_unreachable ();
17022 }
17023
17024 case CCmode:
17025 switch (comp_code)
17026 {
17027 case NE: return ARM_NE;
17028 case EQ: return ARM_EQ;
17029 case GE: return ARM_GE;
17030 case GT: return ARM_GT;
17031 case LE: return ARM_LE;
17032 case LT: return ARM_LT;
17033 case GEU: return ARM_CS;
17034 case GTU: return ARM_HI;
17035 case LEU: return ARM_LS;
17036 case LTU: return ARM_CC;
17037 default: gcc_unreachable ();
17038 }
17039
17040 default: gcc_unreachable ();
17041 }
17042 }
17043
17044 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17045 instructions. */
17046 void
17047 thumb2_final_prescan_insn (rtx insn)
17048 {
17049 rtx first_insn = insn;
17050 rtx body = PATTERN (insn);
17051 rtx predicate;
17052 enum arm_cond_code code;
17053 int n;
17054 int mask;
17055
17056 /* Remove the previous insn from the count of insns to be output. */
17057 if (arm_condexec_count)
17058 arm_condexec_count--;
17059
17060 /* Nothing to do if we are already inside a conditional block. */
17061 if (arm_condexec_count)
17062 return;
17063
17064 if (GET_CODE (body) != COND_EXEC)
17065 return;
17066
17067 /* Conditional jumps are implemented directly. */
17068 if (GET_CODE (insn) == JUMP_INSN)
17069 return;
17070
17071 predicate = COND_EXEC_TEST (body);
17072 arm_current_cc = get_arm_condition_code (predicate);
17073
17074 n = get_attr_ce_count (insn);
17075 arm_condexec_count = 1;
17076 arm_condexec_mask = (1 << n) - 1;
17077 arm_condexec_masklen = n;
17078 /* See if subsequent instructions can be combined into the same block. */
17079 for (;;)
17080 {
17081 insn = next_nonnote_insn (insn);
17082
17083 /* Jumping into the middle of an IT block is illegal, so a label or
17084 barrier terminates the block. */
17085 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
17086 break;
17087
17088 body = PATTERN (insn);
17089 /* USE and CLOBBER aren't really insns, so just skip them. */
17090 if (GET_CODE (body) == USE
17091 || GET_CODE (body) == CLOBBER)
17092 continue;
17093
17094 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17095 if (GET_CODE (body) != COND_EXEC)
17096 break;
17097 /* Allow up to 4 conditionally executed instructions in a block. */
17098 n = get_attr_ce_count (insn);
17099 if (arm_condexec_masklen + n > 4)
17100 break;
17101
17102 predicate = COND_EXEC_TEST (body);
17103 code = get_arm_condition_code (predicate);
17104 mask = (1 << n) - 1;
17105 if (arm_current_cc == code)
17106 arm_condexec_mask |= (mask << arm_condexec_masklen);
17107 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
17108 break;
17109
17110 arm_condexec_count++;
17111 arm_condexec_masklen += n;
17112
17113 /* A jump must be the last instruction in a conditional block. */
17114 if (GET_CODE(insn) == JUMP_INSN)
17115 break;
17116 }
17117 /* Restore recog_data (getting the attributes of other insns can
17118 destroy this array, but final.c assumes that it remains intact
17119 across this call). */
17120 extract_constrain_insn_cached (first_insn);
17121 }
17122
17123 void
17124 arm_final_prescan_insn (rtx insn)
17125 {
17126 /* BODY will hold the body of INSN. */
17127 rtx body = PATTERN (insn);
17128
17129 /* This will be 1 if trying to repeat the trick, and things need to be
17130 reversed if it appears to fail. */
17131 int reverse = 0;
17132
17133 /* If we start with a return insn, we only succeed if we find another one. */
17134 int seeking_return = 0;
17135
17136 /* START_INSN will hold the insn from where we start looking. This is the
17137 first insn after the following code_label if REVERSE is true. */
17138 rtx start_insn = insn;
17139
17140 /* If in state 4, check if the target branch is reached, in order to
17141 change back to state 0. */
17142 if (arm_ccfsm_state == 4)
17143 {
17144 if (insn == arm_target_insn)
17145 {
17146 arm_target_insn = NULL;
17147 arm_ccfsm_state = 0;
17148 }
17149 return;
17150 }
17151
17152 /* If in state 3, it is possible to repeat the trick, if this insn is an
17153 unconditional branch to a label, and immediately following this branch
17154 is the previous target label which is only used once, and the label this
17155 branch jumps to is not too far off. */
17156 if (arm_ccfsm_state == 3)
17157 {
17158 if (simplejump_p (insn))
17159 {
17160 start_insn = next_nonnote_insn (start_insn);
17161 if (GET_CODE (start_insn) == BARRIER)
17162 {
17163 /* XXX Isn't this always a barrier? */
17164 start_insn = next_nonnote_insn (start_insn);
17165 }
17166 if (GET_CODE (start_insn) == CODE_LABEL
17167 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17168 && LABEL_NUSES (start_insn) == 1)
17169 reverse = TRUE;
17170 else
17171 return;
17172 }
17173 else if (GET_CODE (body) == RETURN)
17174 {
17175 start_insn = next_nonnote_insn (start_insn);
17176 if (GET_CODE (start_insn) == BARRIER)
17177 start_insn = next_nonnote_insn (start_insn);
17178 if (GET_CODE (start_insn) == CODE_LABEL
17179 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
17180 && LABEL_NUSES (start_insn) == 1)
17181 {
17182 reverse = TRUE;
17183 seeking_return = 1;
17184 }
17185 else
17186 return;
17187 }
17188 else
17189 return;
17190 }
17191
17192 gcc_assert (!arm_ccfsm_state || reverse);
17193 if (GET_CODE (insn) != JUMP_INSN)
17194 return;
17195
17196 /* This jump might be paralleled with a clobber of the condition codes
17197 the jump should always come first */
17198 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
17199 body = XVECEXP (body, 0, 0);
17200
17201 if (reverse
17202 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
17203 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
17204 {
17205 int insns_skipped;
17206 int fail = FALSE, succeed = FALSE;
17207 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17208 int then_not_else = TRUE;
17209 rtx this_insn = start_insn, label = 0;
17210
17211 /* Register the insn jumped to. */
17212 if (reverse)
17213 {
17214 if (!seeking_return)
17215 label = XEXP (SET_SRC (body), 0);
17216 }
17217 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
17218 label = XEXP (XEXP (SET_SRC (body), 1), 0);
17219 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
17220 {
17221 label = XEXP (XEXP (SET_SRC (body), 2), 0);
17222 then_not_else = FALSE;
17223 }
17224 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
17225 seeking_return = 1;
17226 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
17227 {
17228 seeking_return = 1;
17229 then_not_else = FALSE;
17230 }
17231 else
17232 gcc_unreachable ();
17233
17234 /* See how many insns this branch skips, and what kind of insns. If all
17235 insns are okay, and the label or unconditional branch to the same
17236 label is not too far away, succeed. */
17237 for (insns_skipped = 0;
17238 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
17239 {
17240 rtx scanbody;
17241
17242 this_insn = next_nonnote_insn (this_insn);
17243 if (!this_insn)
17244 break;
17245
17246 switch (GET_CODE (this_insn))
17247 {
17248 case CODE_LABEL:
17249 /* Succeed if it is the target label, otherwise fail since
17250 control falls in from somewhere else. */
17251 if (this_insn == label)
17252 {
17253 arm_ccfsm_state = 1;
17254 succeed = TRUE;
17255 }
17256 else
17257 fail = TRUE;
17258 break;
17259
17260 case BARRIER:
17261 /* Succeed if the following insn is the target label.
17262 Otherwise fail.
17263 If return insns are used then the last insn in a function
17264 will be a barrier. */
17265 this_insn = next_nonnote_insn (this_insn);
17266 if (this_insn && this_insn == label)
17267 {
17268 arm_ccfsm_state = 1;
17269 succeed = TRUE;
17270 }
17271 else
17272 fail = TRUE;
17273 break;
17274
17275 case CALL_INSN:
17276 /* The AAPCS says that conditional calls should not be
17277 used since they make interworking inefficient (the
17278 linker can't transform BL<cond> into BLX). That's
17279 only a problem if the machine has BLX. */
17280 if (arm_arch5)
17281 {
17282 fail = TRUE;
17283 break;
17284 }
17285
17286 /* Succeed if the following insn is the target label, or
17287 if the following two insns are a barrier and the
17288 target label. */
17289 this_insn = next_nonnote_insn (this_insn);
17290 if (this_insn && GET_CODE (this_insn) == BARRIER)
17291 this_insn = next_nonnote_insn (this_insn);
17292
17293 if (this_insn && this_insn == label
17294 && insns_skipped < max_insns_skipped)
17295 {
17296 arm_ccfsm_state = 1;
17297 succeed = TRUE;
17298 }
17299 else
17300 fail = TRUE;
17301 break;
17302
17303 case JUMP_INSN:
17304 /* If this is an unconditional branch to the same label, succeed.
17305 If it is to another label, do nothing. If it is conditional,
17306 fail. */
17307 /* XXX Probably, the tests for SET and the PC are
17308 unnecessary. */
17309
17310 scanbody = PATTERN (this_insn);
17311 if (GET_CODE (scanbody) == SET
17312 && GET_CODE (SET_DEST (scanbody)) == PC)
17313 {
17314 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
17315 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
17316 {
17317 arm_ccfsm_state = 2;
17318 succeed = TRUE;
17319 }
17320 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
17321 fail = TRUE;
17322 }
17323 /* Fail if a conditional return is undesirable (e.g. on a
17324 StrongARM), but still allow this if optimizing for size. */
17325 else if (GET_CODE (scanbody) == RETURN
17326 && !use_return_insn (TRUE, NULL)
17327 && !optimize_size)
17328 fail = TRUE;
17329 else if (GET_CODE (scanbody) == RETURN
17330 && seeking_return)
17331 {
17332 arm_ccfsm_state = 2;
17333 succeed = TRUE;
17334 }
17335 else if (GET_CODE (scanbody) == PARALLEL)
17336 {
17337 switch (get_attr_conds (this_insn))
17338 {
17339 case CONDS_NOCOND:
17340 break;
17341 default:
17342 fail = TRUE;
17343 break;
17344 }
17345 }
17346 else
17347 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
17348
17349 break;
17350
17351 case INSN:
17352 /* Instructions using or affecting the condition codes make it
17353 fail. */
17354 scanbody = PATTERN (this_insn);
17355 if (!(GET_CODE (scanbody) == SET
17356 || GET_CODE (scanbody) == PARALLEL)
17357 || get_attr_conds (this_insn) != CONDS_NOCOND)
17358 fail = TRUE;
17359
17360 /* A conditional cirrus instruction must be followed by
17361 a non Cirrus instruction. However, since we
17362 conditionalize instructions in this function and by
17363 the time we get here we can't add instructions
17364 (nops), because shorten_branches() has already been
17365 called, we will disable conditionalizing Cirrus
17366 instructions to be safe. */
17367 if (GET_CODE (scanbody) != USE
17368 && GET_CODE (scanbody) != CLOBBER
17369 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
17370 fail = TRUE;
17371 break;
17372
17373 default:
17374 break;
17375 }
17376 }
17377 if (succeed)
17378 {
17379 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
17380 arm_target_label = CODE_LABEL_NUMBER (label);
17381 else
17382 {
17383 gcc_assert (seeking_return || arm_ccfsm_state == 2);
17384
17385 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
17386 {
17387 this_insn = next_nonnote_insn (this_insn);
17388 gcc_assert (!this_insn
17389 || (GET_CODE (this_insn) != BARRIER
17390 && GET_CODE (this_insn) != CODE_LABEL));
17391 }
17392 if (!this_insn)
17393 {
17394 /* Oh, dear! we ran off the end.. give up. */
17395 extract_constrain_insn_cached (insn);
17396 arm_ccfsm_state = 0;
17397 arm_target_insn = NULL;
17398 return;
17399 }
17400 arm_target_insn = this_insn;
17401 }
17402
17403 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17404 what it was. */
17405 if (!reverse)
17406 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
17407
17408 if (reverse || then_not_else)
17409 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
17410 }
17411
17412 /* Restore recog_data (getting the attributes of other insns can
17413 destroy this array, but final.c assumes that it remains intact
17414 across this call. */
17415 extract_constrain_insn_cached (insn);
17416 }
17417 }
17418
17419 /* Output IT instructions. */
17420 void
17421 thumb2_asm_output_opcode (FILE * stream)
17422 {
17423 char buff[5];
17424 int n;
17425
17426 if (arm_condexec_mask)
17427 {
17428 for (n = 0; n < arm_condexec_masklen; n++)
17429 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
17430 buff[n] = 0;
17431 asm_fprintf(stream, "i%s\t%s\n\t", buff,
17432 arm_condition_codes[arm_current_cc]);
17433 arm_condexec_mask = 0;
17434 }
17435 }
17436
17437 /* Returns true if REGNO is a valid register
17438 for holding a quantity of type MODE. */
17439 int
17440 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
17441 {
17442 if (GET_MODE_CLASS (mode) == MODE_CC)
17443 return (regno == CC_REGNUM
17444 || (TARGET_HARD_FLOAT && TARGET_VFP
17445 && regno == VFPCC_REGNUM));
17446
17447 if (TARGET_THUMB1)
17448 /* For the Thumb we only allow values bigger than SImode in
17449 registers 0 - 6, so that there is always a second low
17450 register available to hold the upper part of the value.
17451 We probably we ought to ensure that the register is the
17452 start of an even numbered register pair. */
17453 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
17454
17455 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
17456 && IS_CIRRUS_REGNUM (regno))
17457 /* We have outlawed SI values in Cirrus registers because they
17458 reside in the lower 32 bits, but SF values reside in the
17459 upper 32 bits. This causes gcc all sorts of grief. We can't
17460 even split the registers into pairs because Cirrus SI values
17461 get sign extended to 64bits-- aldyh. */
17462 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
17463
17464 if (TARGET_HARD_FLOAT && TARGET_VFP
17465 && IS_VFP_REGNUM (regno))
17466 {
17467 if (mode == SFmode || mode == SImode)
17468 return VFP_REGNO_OK_FOR_SINGLE (regno);
17469
17470 if (mode == DFmode)
17471 return VFP_REGNO_OK_FOR_DOUBLE (regno);
17472
17473 /* VFP registers can hold HFmode values, but there is no point in
17474 putting them there unless we have hardware conversion insns. */
17475 if (mode == HFmode)
17476 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
17477
17478 if (TARGET_NEON)
17479 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
17480 || (VALID_NEON_QREG_MODE (mode)
17481 && NEON_REGNO_OK_FOR_QUAD (regno))
17482 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
17483 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
17484 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
17485 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
17486 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
17487
17488 return FALSE;
17489 }
17490
17491 if (TARGET_REALLY_IWMMXT)
17492 {
17493 if (IS_IWMMXT_GR_REGNUM (regno))
17494 return mode == SImode;
17495
17496 if (IS_IWMMXT_REGNUM (regno))
17497 return VALID_IWMMXT_REG_MODE (mode);
17498 }
17499
17500 /* We allow almost any value to be stored in the general registers.
17501 Restrict doubleword quantities to even register pairs so that we can
17502 use ldrd. Do not allow very large Neon structure opaque modes in
17503 general registers; they would use too many. */
17504 if (regno <= LAST_ARM_REGNUM)
17505 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
17506 && ARM_NUM_REGS (mode) <= 4;
17507
17508 if (regno == FRAME_POINTER_REGNUM
17509 || regno == ARG_POINTER_REGNUM)
17510 /* We only allow integers in the fake hard registers. */
17511 return GET_MODE_CLASS (mode) == MODE_INT;
17512
17513 /* The only registers left are the FPA registers
17514 which we only allow to hold FP values. */
17515 return (TARGET_HARD_FLOAT && TARGET_FPA
17516 && GET_MODE_CLASS (mode) == MODE_FLOAT
17517 && regno >= FIRST_FPA_REGNUM
17518 && regno <= LAST_FPA_REGNUM);
17519 }
17520
17521 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17522 not used in arm mode. */
17523
17524 enum reg_class
17525 arm_regno_class (int regno)
17526 {
17527 if (TARGET_THUMB1)
17528 {
17529 if (regno == STACK_POINTER_REGNUM)
17530 return STACK_REG;
17531 if (regno == CC_REGNUM)
17532 return CC_REG;
17533 if (regno < 8)
17534 return LO_REGS;
17535 return HI_REGS;
17536 }
17537
17538 if (TARGET_THUMB2 && regno < 8)
17539 return LO_REGS;
17540
17541 if ( regno <= LAST_ARM_REGNUM
17542 || regno == FRAME_POINTER_REGNUM
17543 || regno == ARG_POINTER_REGNUM)
17544 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
17545
17546 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
17547 return TARGET_THUMB2 ? CC_REG : NO_REGS;
17548
17549 if (IS_CIRRUS_REGNUM (regno))
17550 return CIRRUS_REGS;
17551
17552 if (IS_VFP_REGNUM (regno))
17553 {
17554 if (regno <= D7_VFP_REGNUM)
17555 return VFP_D0_D7_REGS;
17556 else if (regno <= LAST_LO_VFP_REGNUM)
17557 return VFP_LO_REGS;
17558 else
17559 return VFP_HI_REGS;
17560 }
17561
17562 if (IS_IWMMXT_REGNUM (regno))
17563 return IWMMXT_REGS;
17564
17565 if (IS_IWMMXT_GR_REGNUM (regno))
17566 return IWMMXT_GR_REGS;
17567
17568 return FPA_REGS;
17569 }
17570
17571 /* Handle a special case when computing the offset
17572 of an argument from the frame pointer. */
17573 int
17574 arm_debugger_arg_offset (int value, rtx addr)
17575 {
17576 rtx insn;
17577
17578 /* We are only interested if dbxout_parms() failed to compute the offset. */
17579 if (value != 0)
17580 return 0;
17581
17582 /* We can only cope with the case where the address is held in a register. */
17583 if (GET_CODE (addr) != REG)
17584 return 0;
17585
17586 /* If we are using the frame pointer to point at the argument, then
17587 an offset of 0 is correct. */
17588 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
17589 return 0;
17590
17591 /* If we are using the stack pointer to point at the
17592 argument, then an offset of 0 is correct. */
17593 /* ??? Check this is consistent with thumb2 frame layout. */
17594 if ((TARGET_THUMB || !frame_pointer_needed)
17595 && REGNO (addr) == SP_REGNUM)
17596 return 0;
17597
17598 /* Oh dear. The argument is pointed to by a register rather
17599 than being held in a register, or being stored at a known
17600 offset from the frame pointer. Since GDB only understands
17601 those two kinds of argument we must translate the address
17602 held in the register into an offset from the frame pointer.
17603 We do this by searching through the insns for the function
17604 looking to see where this register gets its value. If the
17605 register is initialized from the frame pointer plus an offset
17606 then we are in luck and we can continue, otherwise we give up.
17607
17608 This code is exercised by producing debugging information
17609 for a function with arguments like this:
17610
17611 double func (double a, double b, int c, double d) {return d;}
17612
17613 Without this code the stab for parameter 'd' will be set to
17614 an offset of 0 from the frame pointer, rather than 8. */
17615
17616 /* The if() statement says:
17617
17618 If the insn is a normal instruction
17619 and if the insn is setting the value in a register
17620 and if the register being set is the register holding the address of the argument
17621 and if the address is computing by an addition
17622 that involves adding to a register
17623 which is the frame pointer
17624 a constant integer
17625
17626 then... */
17627
17628 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17629 {
17630 if ( GET_CODE (insn) == INSN
17631 && GET_CODE (PATTERN (insn)) == SET
17632 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
17633 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
17634 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
17635 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17636 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
17637 )
17638 {
17639 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
17640
17641 break;
17642 }
17643 }
17644
17645 if (value == 0)
17646 {
17647 debug_rtx (addr);
17648 warning (0, "unable to compute real location of stacked parameter");
17649 value = 8; /* XXX magic hack */
17650 }
17651
17652 return value;
17653 }
17654 \f
17655 typedef enum {
17656 T_V8QI,
17657 T_V4HI,
17658 T_V2SI,
17659 T_V2SF,
17660 T_DI,
17661 T_V16QI,
17662 T_V8HI,
17663 T_V4SI,
17664 T_V4SF,
17665 T_V2DI,
17666 T_TI,
17667 T_EI,
17668 T_OI,
17669 T_MAX /* Size of enum. Keep last. */
17670 } neon_builtin_type_mode;
17671
17672 #define TYPE_MODE_BIT(X) (1 << (X))
17673
17674 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
17675 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
17676 | TYPE_MODE_BIT (T_DI))
17677 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
17678 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
17679 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
17680
17681 #define v8qi_UP T_V8QI
17682 #define v4hi_UP T_V4HI
17683 #define v2si_UP T_V2SI
17684 #define v2sf_UP T_V2SF
17685 #define di_UP T_DI
17686 #define v16qi_UP T_V16QI
17687 #define v8hi_UP T_V8HI
17688 #define v4si_UP T_V4SI
17689 #define v4sf_UP T_V4SF
17690 #define v2di_UP T_V2DI
17691 #define ti_UP T_TI
17692 #define ei_UP T_EI
17693 #define oi_UP T_OI
17694
17695 #define UP(X) X##_UP
17696
17697 typedef enum {
17698 NEON_BINOP,
17699 NEON_TERNOP,
17700 NEON_UNOP,
17701 NEON_GETLANE,
17702 NEON_SETLANE,
17703 NEON_CREATE,
17704 NEON_DUP,
17705 NEON_DUPLANE,
17706 NEON_COMBINE,
17707 NEON_SPLIT,
17708 NEON_LANEMUL,
17709 NEON_LANEMULL,
17710 NEON_LANEMULH,
17711 NEON_LANEMAC,
17712 NEON_SCALARMUL,
17713 NEON_SCALARMULL,
17714 NEON_SCALARMULH,
17715 NEON_SCALARMAC,
17716 NEON_CONVERT,
17717 NEON_FIXCONV,
17718 NEON_SELECT,
17719 NEON_RESULTPAIR,
17720 NEON_REINTERP,
17721 NEON_VTBL,
17722 NEON_VTBX,
17723 NEON_LOAD1,
17724 NEON_LOAD1LANE,
17725 NEON_STORE1,
17726 NEON_STORE1LANE,
17727 NEON_LOADSTRUCT,
17728 NEON_LOADSTRUCTLANE,
17729 NEON_STORESTRUCT,
17730 NEON_STORESTRUCTLANE,
17731 NEON_LOGICBINOP,
17732 NEON_SHIFTINSERT,
17733 NEON_SHIFTIMM,
17734 NEON_SHIFTACC
17735 } neon_itype;
17736
17737 typedef struct {
17738 const char *name;
17739 const neon_itype itype;
17740 const neon_builtin_type_mode mode;
17741 const enum insn_code code;
17742 unsigned int fcode;
17743 } neon_builtin_datum;
17744
17745 #define CF(N,X) CODE_FOR_neon_##N##X
17746
17747 #define VAR1(T, N, A) \
17748 {#N, NEON_##T, UP (A), CF (N, A), 0}
17749 #define VAR2(T, N, A, B) \
17750 VAR1 (T, N, A), \
17751 {#N, NEON_##T, UP (B), CF (N, B), 0}
17752 #define VAR3(T, N, A, B, C) \
17753 VAR2 (T, N, A, B), \
17754 {#N, NEON_##T, UP (C), CF (N, C), 0}
17755 #define VAR4(T, N, A, B, C, D) \
17756 VAR3 (T, N, A, B, C), \
17757 {#N, NEON_##T, UP (D), CF (N, D), 0}
17758 #define VAR5(T, N, A, B, C, D, E) \
17759 VAR4 (T, N, A, B, C, D), \
17760 {#N, NEON_##T, UP (E), CF (N, E), 0}
17761 #define VAR6(T, N, A, B, C, D, E, F) \
17762 VAR5 (T, N, A, B, C, D, E), \
17763 {#N, NEON_##T, UP (F), CF (N, F), 0}
17764 #define VAR7(T, N, A, B, C, D, E, F, G) \
17765 VAR6 (T, N, A, B, C, D, E, F), \
17766 {#N, NEON_##T, UP (G), CF (N, G), 0}
17767 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
17768 VAR7 (T, N, A, B, C, D, E, F, G), \
17769 {#N, NEON_##T, UP (H), CF (N, H), 0}
17770 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
17771 VAR8 (T, N, A, B, C, D, E, F, G, H), \
17772 {#N, NEON_##T, UP (I), CF (N, I), 0}
17773 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
17774 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
17775 {#N, NEON_##T, UP (J), CF (N, J), 0}
17776
17777 /* The mode entries in the following table correspond to the "key" type of the
17778 instruction variant, i.e. equivalent to that which would be specified after
17779 the assembler mnemonic, which usually refers to the last vector operand.
17780 (Signed/unsigned/polynomial types are not differentiated between though, and
17781 are all mapped onto the same mode for a given element size.) The modes
17782 listed per instruction should be the same as those defined for that
17783 instruction's pattern in neon.md. */
17784
17785 static neon_builtin_datum neon_builtin_data[] =
17786 {
17787 VAR10 (BINOP, vadd,
17788 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17789 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
17790 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
17791 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17792 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17793 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
17794 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17795 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17796 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
17797 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17798 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
17799 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
17800 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
17801 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
17802 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
17803 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
17804 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
17805 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
17806 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
17807 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
17808 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
17809 VAR2 (BINOP, vqdmull, v4hi, v2si),
17810 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17811 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17812 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17813 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
17814 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
17815 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
17816 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17817 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17818 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17819 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
17820 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17821 VAR10 (BINOP, vsub,
17822 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17823 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
17824 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
17825 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17826 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17827 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
17828 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17829 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17830 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17831 VAR2 (BINOP, vcage, v2sf, v4sf),
17832 VAR2 (BINOP, vcagt, v2sf, v4sf),
17833 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17834 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17835 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
17836 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17837 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
17838 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17839 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17840 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
17841 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17842 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17843 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
17844 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
17845 VAR2 (BINOP, vrecps, v2sf, v4sf),
17846 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
17847 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17848 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
17849 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17850 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17851 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17852 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17853 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17854 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17855 VAR2 (UNOP, vcnt, v8qi, v16qi),
17856 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
17857 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
17858 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
17859 /* FIXME: vget_lane supports more variants than this! */
17860 VAR10 (GETLANE, vget_lane,
17861 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17862 VAR10 (SETLANE, vset_lane,
17863 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17864 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
17865 VAR10 (DUP, vdup_n,
17866 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17867 VAR10 (DUPLANE, vdup_lane,
17868 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17869 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
17870 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
17871 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
17872 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
17873 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
17874 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
17875 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
17876 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17877 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17878 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
17879 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
17880 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17881 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
17882 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
17883 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17884 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17885 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
17886 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
17887 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17888 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
17889 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
17890 VAR10 (BINOP, vext,
17891 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17892 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17893 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
17894 VAR2 (UNOP, vrev16, v8qi, v16qi),
17895 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
17896 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
17897 VAR10 (SELECT, vbsl,
17898 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17899 VAR1 (VTBL, vtbl1, v8qi),
17900 VAR1 (VTBL, vtbl2, v8qi),
17901 VAR1 (VTBL, vtbl3, v8qi),
17902 VAR1 (VTBL, vtbl4, v8qi),
17903 VAR1 (VTBX, vtbx1, v8qi),
17904 VAR1 (VTBX, vtbx2, v8qi),
17905 VAR1 (VTBX, vtbx3, v8qi),
17906 VAR1 (VTBX, vtbx4, v8qi),
17907 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17908 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17909 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
17910 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
17911 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
17912 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
17913 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
17914 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
17915 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
17916 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
17917 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
17918 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
17919 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
17920 VAR10 (LOAD1, vld1,
17921 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17922 VAR10 (LOAD1LANE, vld1_lane,
17923 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17924 VAR10 (LOAD1, vld1_dup,
17925 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17926 VAR10 (STORE1, vst1,
17927 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17928 VAR10 (STORE1LANE, vst1_lane,
17929 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17930 VAR9 (LOADSTRUCT,
17931 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
17932 VAR7 (LOADSTRUCTLANE, vld2_lane,
17933 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17934 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
17935 VAR9 (STORESTRUCT, vst2,
17936 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
17937 VAR7 (STORESTRUCTLANE, vst2_lane,
17938 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17939 VAR9 (LOADSTRUCT,
17940 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
17941 VAR7 (LOADSTRUCTLANE, vld3_lane,
17942 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17943 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
17944 VAR9 (STORESTRUCT, vst3,
17945 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
17946 VAR7 (STORESTRUCTLANE, vst3_lane,
17947 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17948 VAR9 (LOADSTRUCT, vld4,
17949 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
17950 VAR7 (LOADSTRUCTLANE, vld4_lane,
17951 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17952 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
17953 VAR9 (STORESTRUCT, vst4,
17954 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
17955 VAR7 (STORESTRUCTLANE, vst4_lane,
17956 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
17957 VAR10 (LOGICBINOP, vand,
17958 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17959 VAR10 (LOGICBINOP, vorr,
17960 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17961 VAR10 (BINOP, veor,
17962 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17963 VAR10 (LOGICBINOP, vbic,
17964 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
17965 VAR10 (LOGICBINOP, vorn,
17966 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
17967 };
17968
17969 #undef CF
17970 #undef VAR1
17971 #undef VAR2
17972 #undef VAR3
17973 #undef VAR4
17974 #undef VAR5
17975 #undef VAR6
17976 #undef VAR7
17977 #undef VAR8
17978 #undef VAR9
17979 #undef VAR10
17980
17981 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
17982 symbolic names defined here (which would require too much duplication).
17983 FIXME? */
17984 enum arm_builtins
17985 {
17986 ARM_BUILTIN_GETWCX,
17987 ARM_BUILTIN_SETWCX,
17988
17989 ARM_BUILTIN_WZERO,
17990
17991 ARM_BUILTIN_WAVG2BR,
17992 ARM_BUILTIN_WAVG2HR,
17993 ARM_BUILTIN_WAVG2B,
17994 ARM_BUILTIN_WAVG2H,
17995
17996 ARM_BUILTIN_WACCB,
17997 ARM_BUILTIN_WACCH,
17998 ARM_BUILTIN_WACCW,
17999
18000 ARM_BUILTIN_WMACS,
18001 ARM_BUILTIN_WMACSZ,
18002 ARM_BUILTIN_WMACU,
18003 ARM_BUILTIN_WMACUZ,
18004
18005 ARM_BUILTIN_WSADB,
18006 ARM_BUILTIN_WSADBZ,
18007 ARM_BUILTIN_WSADH,
18008 ARM_BUILTIN_WSADHZ,
18009
18010 ARM_BUILTIN_WALIGN,
18011
18012 ARM_BUILTIN_TMIA,
18013 ARM_BUILTIN_TMIAPH,
18014 ARM_BUILTIN_TMIABB,
18015 ARM_BUILTIN_TMIABT,
18016 ARM_BUILTIN_TMIATB,
18017 ARM_BUILTIN_TMIATT,
18018
18019 ARM_BUILTIN_TMOVMSKB,
18020 ARM_BUILTIN_TMOVMSKH,
18021 ARM_BUILTIN_TMOVMSKW,
18022
18023 ARM_BUILTIN_TBCSTB,
18024 ARM_BUILTIN_TBCSTH,
18025 ARM_BUILTIN_TBCSTW,
18026
18027 ARM_BUILTIN_WMADDS,
18028 ARM_BUILTIN_WMADDU,
18029
18030 ARM_BUILTIN_WPACKHSS,
18031 ARM_BUILTIN_WPACKWSS,
18032 ARM_BUILTIN_WPACKDSS,
18033 ARM_BUILTIN_WPACKHUS,
18034 ARM_BUILTIN_WPACKWUS,
18035 ARM_BUILTIN_WPACKDUS,
18036
18037 ARM_BUILTIN_WADDB,
18038 ARM_BUILTIN_WADDH,
18039 ARM_BUILTIN_WADDW,
18040 ARM_BUILTIN_WADDSSB,
18041 ARM_BUILTIN_WADDSSH,
18042 ARM_BUILTIN_WADDSSW,
18043 ARM_BUILTIN_WADDUSB,
18044 ARM_BUILTIN_WADDUSH,
18045 ARM_BUILTIN_WADDUSW,
18046 ARM_BUILTIN_WSUBB,
18047 ARM_BUILTIN_WSUBH,
18048 ARM_BUILTIN_WSUBW,
18049 ARM_BUILTIN_WSUBSSB,
18050 ARM_BUILTIN_WSUBSSH,
18051 ARM_BUILTIN_WSUBSSW,
18052 ARM_BUILTIN_WSUBUSB,
18053 ARM_BUILTIN_WSUBUSH,
18054 ARM_BUILTIN_WSUBUSW,
18055
18056 ARM_BUILTIN_WAND,
18057 ARM_BUILTIN_WANDN,
18058 ARM_BUILTIN_WOR,
18059 ARM_BUILTIN_WXOR,
18060
18061 ARM_BUILTIN_WCMPEQB,
18062 ARM_BUILTIN_WCMPEQH,
18063 ARM_BUILTIN_WCMPEQW,
18064 ARM_BUILTIN_WCMPGTUB,
18065 ARM_BUILTIN_WCMPGTUH,
18066 ARM_BUILTIN_WCMPGTUW,
18067 ARM_BUILTIN_WCMPGTSB,
18068 ARM_BUILTIN_WCMPGTSH,
18069 ARM_BUILTIN_WCMPGTSW,
18070
18071 ARM_BUILTIN_TEXTRMSB,
18072 ARM_BUILTIN_TEXTRMSH,
18073 ARM_BUILTIN_TEXTRMSW,
18074 ARM_BUILTIN_TEXTRMUB,
18075 ARM_BUILTIN_TEXTRMUH,
18076 ARM_BUILTIN_TEXTRMUW,
18077 ARM_BUILTIN_TINSRB,
18078 ARM_BUILTIN_TINSRH,
18079 ARM_BUILTIN_TINSRW,
18080
18081 ARM_BUILTIN_WMAXSW,
18082 ARM_BUILTIN_WMAXSH,
18083 ARM_BUILTIN_WMAXSB,
18084 ARM_BUILTIN_WMAXUW,
18085 ARM_BUILTIN_WMAXUH,
18086 ARM_BUILTIN_WMAXUB,
18087 ARM_BUILTIN_WMINSW,
18088 ARM_BUILTIN_WMINSH,
18089 ARM_BUILTIN_WMINSB,
18090 ARM_BUILTIN_WMINUW,
18091 ARM_BUILTIN_WMINUH,
18092 ARM_BUILTIN_WMINUB,
18093
18094 ARM_BUILTIN_WMULUM,
18095 ARM_BUILTIN_WMULSM,
18096 ARM_BUILTIN_WMULUL,
18097
18098 ARM_BUILTIN_PSADBH,
18099 ARM_BUILTIN_WSHUFH,
18100
18101 ARM_BUILTIN_WSLLH,
18102 ARM_BUILTIN_WSLLW,
18103 ARM_BUILTIN_WSLLD,
18104 ARM_BUILTIN_WSRAH,
18105 ARM_BUILTIN_WSRAW,
18106 ARM_BUILTIN_WSRAD,
18107 ARM_BUILTIN_WSRLH,
18108 ARM_BUILTIN_WSRLW,
18109 ARM_BUILTIN_WSRLD,
18110 ARM_BUILTIN_WRORH,
18111 ARM_BUILTIN_WRORW,
18112 ARM_BUILTIN_WRORD,
18113 ARM_BUILTIN_WSLLHI,
18114 ARM_BUILTIN_WSLLWI,
18115 ARM_BUILTIN_WSLLDI,
18116 ARM_BUILTIN_WSRAHI,
18117 ARM_BUILTIN_WSRAWI,
18118 ARM_BUILTIN_WSRADI,
18119 ARM_BUILTIN_WSRLHI,
18120 ARM_BUILTIN_WSRLWI,
18121 ARM_BUILTIN_WSRLDI,
18122 ARM_BUILTIN_WRORHI,
18123 ARM_BUILTIN_WRORWI,
18124 ARM_BUILTIN_WRORDI,
18125
18126 ARM_BUILTIN_WUNPCKIHB,
18127 ARM_BUILTIN_WUNPCKIHH,
18128 ARM_BUILTIN_WUNPCKIHW,
18129 ARM_BUILTIN_WUNPCKILB,
18130 ARM_BUILTIN_WUNPCKILH,
18131 ARM_BUILTIN_WUNPCKILW,
18132
18133 ARM_BUILTIN_WUNPCKEHSB,
18134 ARM_BUILTIN_WUNPCKEHSH,
18135 ARM_BUILTIN_WUNPCKEHSW,
18136 ARM_BUILTIN_WUNPCKEHUB,
18137 ARM_BUILTIN_WUNPCKEHUH,
18138 ARM_BUILTIN_WUNPCKEHUW,
18139 ARM_BUILTIN_WUNPCKELSB,
18140 ARM_BUILTIN_WUNPCKELSH,
18141 ARM_BUILTIN_WUNPCKELSW,
18142 ARM_BUILTIN_WUNPCKELUB,
18143 ARM_BUILTIN_WUNPCKELUH,
18144 ARM_BUILTIN_WUNPCKELUW,
18145
18146 ARM_BUILTIN_THREAD_POINTER,
18147
18148 ARM_BUILTIN_NEON_BASE,
18149
18150 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
18151 };
18152
18153 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
18154
18155 static void
18156 arm_init_neon_builtins (void)
18157 {
18158 unsigned int i, fcode;
18159 tree decl;
18160
18161 tree neon_intQI_type_node;
18162 tree neon_intHI_type_node;
18163 tree neon_polyQI_type_node;
18164 tree neon_polyHI_type_node;
18165 tree neon_intSI_type_node;
18166 tree neon_intDI_type_node;
18167 tree neon_float_type_node;
18168
18169 tree intQI_pointer_node;
18170 tree intHI_pointer_node;
18171 tree intSI_pointer_node;
18172 tree intDI_pointer_node;
18173 tree float_pointer_node;
18174
18175 tree const_intQI_node;
18176 tree const_intHI_node;
18177 tree const_intSI_node;
18178 tree const_intDI_node;
18179 tree const_float_node;
18180
18181 tree const_intQI_pointer_node;
18182 tree const_intHI_pointer_node;
18183 tree const_intSI_pointer_node;
18184 tree const_intDI_pointer_node;
18185 tree const_float_pointer_node;
18186
18187 tree V8QI_type_node;
18188 tree V4HI_type_node;
18189 tree V2SI_type_node;
18190 tree V2SF_type_node;
18191 tree V16QI_type_node;
18192 tree V8HI_type_node;
18193 tree V4SI_type_node;
18194 tree V4SF_type_node;
18195 tree V2DI_type_node;
18196
18197 tree intUQI_type_node;
18198 tree intUHI_type_node;
18199 tree intUSI_type_node;
18200 tree intUDI_type_node;
18201
18202 tree intEI_type_node;
18203 tree intOI_type_node;
18204 tree intCI_type_node;
18205 tree intXI_type_node;
18206
18207 tree V8QI_pointer_node;
18208 tree V4HI_pointer_node;
18209 tree V2SI_pointer_node;
18210 tree V2SF_pointer_node;
18211 tree V16QI_pointer_node;
18212 tree V8HI_pointer_node;
18213 tree V4SI_pointer_node;
18214 tree V4SF_pointer_node;
18215 tree V2DI_pointer_node;
18216
18217 tree void_ftype_pv8qi_v8qi_v8qi;
18218 tree void_ftype_pv4hi_v4hi_v4hi;
18219 tree void_ftype_pv2si_v2si_v2si;
18220 tree void_ftype_pv2sf_v2sf_v2sf;
18221 tree void_ftype_pdi_di_di;
18222 tree void_ftype_pv16qi_v16qi_v16qi;
18223 tree void_ftype_pv8hi_v8hi_v8hi;
18224 tree void_ftype_pv4si_v4si_v4si;
18225 tree void_ftype_pv4sf_v4sf_v4sf;
18226 tree void_ftype_pv2di_v2di_v2di;
18227
18228 tree reinterp_ftype_dreg[5][5];
18229 tree reinterp_ftype_qreg[5][5];
18230 tree dreg_types[5], qreg_types[5];
18231
18232 /* Create distinguished type nodes for NEON vector element types,
18233 and pointers to values of such types, so we can detect them later. */
18234 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18235 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18236 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
18237 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
18238 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
18239 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
18240 neon_float_type_node = make_node (REAL_TYPE);
18241 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
18242 layout_type (neon_float_type_node);
18243
18244 /* Define typedefs which exactly correspond to the modes we are basing vector
18245 types on. If you change these names you'll need to change
18246 the table used by arm_mangle_type too. */
18247 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
18248 "__builtin_neon_qi");
18249 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
18250 "__builtin_neon_hi");
18251 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
18252 "__builtin_neon_si");
18253 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
18254 "__builtin_neon_sf");
18255 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
18256 "__builtin_neon_di");
18257 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
18258 "__builtin_neon_poly8");
18259 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
18260 "__builtin_neon_poly16");
18261
18262 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
18263 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
18264 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
18265 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
18266 float_pointer_node = build_pointer_type (neon_float_type_node);
18267
18268 /* Next create constant-qualified versions of the above types. */
18269 const_intQI_node = build_qualified_type (neon_intQI_type_node,
18270 TYPE_QUAL_CONST);
18271 const_intHI_node = build_qualified_type (neon_intHI_type_node,
18272 TYPE_QUAL_CONST);
18273 const_intSI_node = build_qualified_type (neon_intSI_type_node,
18274 TYPE_QUAL_CONST);
18275 const_intDI_node = build_qualified_type (neon_intDI_type_node,
18276 TYPE_QUAL_CONST);
18277 const_float_node = build_qualified_type (neon_float_type_node,
18278 TYPE_QUAL_CONST);
18279
18280 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
18281 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
18282 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
18283 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
18284 const_float_pointer_node = build_pointer_type (const_float_node);
18285
18286 /* Now create vector types based on our NEON element types. */
18287 /* 64-bit vectors. */
18288 V8QI_type_node =
18289 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
18290 V4HI_type_node =
18291 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
18292 V2SI_type_node =
18293 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
18294 V2SF_type_node =
18295 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
18296 /* 128-bit vectors. */
18297 V16QI_type_node =
18298 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
18299 V8HI_type_node =
18300 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
18301 V4SI_type_node =
18302 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
18303 V4SF_type_node =
18304 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
18305 V2DI_type_node =
18306 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
18307
18308 /* Unsigned integer types for various mode sizes. */
18309 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
18310 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
18311 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
18312 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
18313
18314 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
18315 "__builtin_neon_uqi");
18316 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
18317 "__builtin_neon_uhi");
18318 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
18319 "__builtin_neon_usi");
18320 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
18321 "__builtin_neon_udi");
18322
18323 /* Opaque integer types for structures of vectors. */
18324 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
18325 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
18326 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
18327 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
18328
18329 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
18330 "__builtin_neon_ti");
18331 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
18332 "__builtin_neon_ei");
18333 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
18334 "__builtin_neon_oi");
18335 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
18336 "__builtin_neon_ci");
18337 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
18338 "__builtin_neon_xi");
18339
18340 /* Pointers to vector types. */
18341 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
18342 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
18343 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
18344 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
18345 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
18346 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
18347 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
18348 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
18349 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
18350
18351 /* Operations which return results as pairs. */
18352 void_ftype_pv8qi_v8qi_v8qi =
18353 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
18354 V8QI_type_node, NULL);
18355 void_ftype_pv4hi_v4hi_v4hi =
18356 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
18357 V4HI_type_node, NULL);
18358 void_ftype_pv2si_v2si_v2si =
18359 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
18360 V2SI_type_node, NULL);
18361 void_ftype_pv2sf_v2sf_v2sf =
18362 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
18363 V2SF_type_node, NULL);
18364 void_ftype_pdi_di_di =
18365 build_function_type_list (void_type_node, intDI_pointer_node,
18366 neon_intDI_type_node, neon_intDI_type_node, NULL);
18367 void_ftype_pv16qi_v16qi_v16qi =
18368 build_function_type_list (void_type_node, V16QI_pointer_node,
18369 V16QI_type_node, V16QI_type_node, NULL);
18370 void_ftype_pv8hi_v8hi_v8hi =
18371 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
18372 V8HI_type_node, NULL);
18373 void_ftype_pv4si_v4si_v4si =
18374 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
18375 V4SI_type_node, NULL);
18376 void_ftype_pv4sf_v4sf_v4sf =
18377 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
18378 V4SF_type_node, NULL);
18379 void_ftype_pv2di_v2di_v2di =
18380 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
18381 V2DI_type_node, NULL);
18382
18383 dreg_types[0] = V8QI_type_node;
18384 dreg_types[1] = V4HI_type_node;
18385 dreg_types[2] = V2SI_type_node;
18386 dreg_types[3] = V2SF_type_node;
18387 dreg_types[4] = neon_intDI_type_node;
18388
18389 qreg_types[0] = V16QI_type_node;
18390 qreg_types[1] = V8HI_type_node;
18391 qreg_types[2] = V4SI_type_node;
18392 qreg_types[3] = V4SF_type_node;
18393 qreg_types[4] = V2DI_type_node;
18394
18395 for (i = 0; i < 5; i++)
18396 {
18397 int j;
18398 for (j = 0; j < 5; j++)
18399 {
18400 reinterp_ftype_dreg[i][j]
18401 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
18402 reinterp_ftype_qreg[i][j]
18403 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
18404 }
18405 }
18406
18407 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
18408 i < ARRAY_SIZE (neon_builtin_data);
18409 i++, fcode++)
18410 {
18411 neon_builtin_datum *d = &neon_builtin_data[i];
18412
18413 const char* const modenames[] = {
18414 "v8qi", "v4hi", "v2si", "v2sf", "di",
18415 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
18416 "ti", "ei", "oi"
18417 };
18418 char namebuf[60];
18419 tree ftype = NULL;
18420 int is_load = 0, is_store = 0;
18421
18422 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
18423
18424 d->fcode = fcode;
18425
18426 switch (d->itype)
18427 {
18428 case NEON_LOAD1:
18429 case NEON_LOAD1LANE:
18430 case NEON_LOADSTRUCT:
18431 case NEON_LOADSTRUCTLANE:
18432 is_load = 1;
18433 /* Fall through. */
18434 case NEON_STORE1:
18435 case NEON_STORE1LANE:
18436 case NEON_STORESTRUCT:
18437 case NEON_STORESTRUCTLANE:
18438 if (!is_load)
18439 is_store = 1;
18440 /* Fall through. */
18441 case NEON_UNOP:
18442 case NEON_BINOP:
18443 case NEON_LOGICBINOP:
18444 case NEON_SHIFTINSERT:
18445 case NEON_TERNOP:
18446 case NEON_GETLANE:
18447 case NEON_SETLANE:
18448 case NEON_CREATE:
18449 case NEON_DUP:
18450 case NEON_DUPLANE:
18451 case NEON_SHIFTIMM:
18452 case NEON_SHIFTACC:
18453 case NEON_COMBINE:
18454 case NEON_SPLIT:
18455 case NEON_CONVERT:
18456 case NEON_FIXCONV:
18457 case NEON_LANEMUL:
18458 case NEON_LANEMULL:
18459 case NEON_LANEMULH:
18460 case NEON_LANEMAC:
18461 case NEON_SCALARMUL:
18462 case NEON_SCALARMULL:
18463 case NEON_SCALARMULH:
18464 case NEON_SCALARMAC:
18465 case NEON_SELECT:
18466 case NEON_VTBL:
18467 case NEON_VTBX:
18468 {
18469 int k;
18470 tree return_type = void_type_node, args = void_list_node;
18471
18472 /* Build a function type directly from the insn_data for
18473 this builtin. The build_function_type() function takes
18474 care of removing duplicates for us. */
18475 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
18476 {
18477 tree eltype;
18478
18479 if (is_load && k == 1)
18480 {
18481 /* Neon load patterns always have the memory
18482 operand in the operand 1 position. */
18483 gcc_assert (insn_data[d->code].operand[k].predicate
18484 == neon_struct_operand);
18485
18486 switch (d->mode)
18487 {
18488 case T_V8QI:
18489 case T_V16QI:
18490 eltype = const_intQI_pointer_node;
18491 break;
18492
18493 case T_V4HI:
18494 case T_V8HI:
18495 eltype = const_intHI_pointer_node;
18496 break;
18497
18498 case T_V2SI:
18499 case T_V4SI:
18500 eltype = const_intSI_pointer_node;
18501 break;
18502
18503 case T_V2SF:
18504 case T_V4SF:
18505 eltype = const_float_pointer_node;
18506 break;
18507
18508 case T_DI:
18509 case T_V2DI:
18510 eltype = const_intDI_pointer_node;
18511 break;
18512
18513 default: gcc_unreachable ();
18514 }
18515 }
18516 else if (is_store && k == 0)
18517 {
18518 /* Similarly, Neon store patterns use operand 0 as
18519 the memory location to store to. */
18520 gcc_assert (insn_data[d->code].operand[k].predicate
18521 == neon_struct_operand);
18522
18523 switch (d->mode)
18524 {
18525 case T_V8QI:
18526 case T_V16QI:
18527 eltype = intQI_pointer_node;
18528 break;
18529
18530 case T_V4HI:
18531 case T_V8HI:
18532 eltype = intHI_pointer_node;
18533 break;
18534
18535 case T_V2SI:
18536 case T_V4SI:
18537 eltype = intSI_pointer_node;
18538 break;
18539
18540 case T_V2SF:
18541 case T_V4SF:
18542 eltype = float_pointer_node;
18543 break;
18544
18545 case T_DI:
18546 case T_V2DI:
18547 eltype = intDI_pointer_node;
18548 break;
18549
18550 default: gcc_unreachable ();
18551 }
18552 }
18553 else
18554 {
18555 switch (insn_data[d->code].operand[k].mode)
18556 {
18557 case VOIDmode: eltype = void_type_node; break;
18558 /* Scalars. */
18559 case QImode: eltype = neon_intQI_type_node; break;
18560 case HImode: eltype = neon_intHI_type_node; break;
18561 case SImode: eltype = neon_intSI_type_node; break;
18562 case SFmode: eltype = neon_float_type_node; break;
18563 case DImode: eltype = neon_intDI_type_node; break;
18564 case TImode: eltype = intTI_type_node; break;
18565 case EImode: eltype = intEI_type_node; break;
18566 case OImode: eltype = intOI_type_node; break;
18567 case CImode: eltype = intCI_type_node; break;
18568 case XImode: eltype = intXI_type_node; break;
18569 /* 64-bit vectors. */
18570 case V8QImode: eltype = V8QI_type_node; break;
18571 case V4HImode: eltype = V4HI_type_node; break;
18572 case V2SImode: eltype = V2SI_type_node; break;
18573 case V2SFmode: eltype = V2SF_type_node; break;
18574 /* 128-bit vectors. */
18575 case V16QImode: eltype = V16QI_type_node; break;
18576 case V8HImode: eltype = V8HI_type_node; break;
18577 case V4SImode: eltype = V4SI_type_node; break;
18578 case V4SFmode: eltype = V4SF_type_node; break;
18579 case V2DImode: eltype = V2DI_type_node; break;
18580 default: gcc_unreachable ();
18581 }
18582 }
18583
18584 if (k == 0 && !is_store)
18585 return_type = eltype;
18586 else
18587 args = tree_cons (NULL_TREE, eltype, args);
18588 }
18589
18590 ftype = build_function_type (return_type, args);
18591 }
18592 break;
18593
18594 case NEON_RESULTPAIR:
18595 {
18596 switch (insn_data[d->code].operand[1].mode)
18597 {
18598 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
18599 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
18600 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
18601 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
18602 case DImode: ftype = void_ftype_pdi_di_di; break;
18603 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
18604 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
18605 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
18606 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
18607 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
18608 default: gcc_unreachable ();
18609 }
18610 }
18611 break;
18612
18613 case NEON_REINTERP:
18614 {
18615 /* We iterate over 5 doubleword types, then 5 quadword
18616 types. */
18617 int rhs = d->mode % 5;
18618 switch (insn_data[d->code].operand[0].mode)
18619 {
18620 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
18621 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
18622 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
18623 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
18624 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
18625 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
18626 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
18627 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
18628 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
18629 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
18630 default: gcc_unreachable ();
18631 }
18632 }
18633 break;
18634
18635 default:
18636 gcc_unreachable ();
18637 }
18638
18639 gcc_assert (ftype != NULL);
18640
18641 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
18642
18643 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
18644 NULL_TREE);
18645 arm_builtin_decls[fcode] = decl;
18646 }
18647 }
18648
18649 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
18650 do \
18651 { \
18652 if ((MASK) & insn_flags) \
18653 { \
18654 tree bdecl; \
18655 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
18656 BUILT_IN_MD, NULL, NULL_TREE); \
18657 arm_builtin_decls[CODE] = bdecl; \
18658 } \
18659 } \
18660 while (0)
18661
18662 struct builtin_description
18663 {
18664 const unsigned int mask;
18665 const enum insn_code icode;
18666 const char * const name;
18667 const enum arm_builtins code;
18668 const enum rtx_code comparison;
18669 const unsigned int flag;
18670 };
18671
18672 static const struct builtin_description bdesc_2arg[] =
18673 {
18674 #define IWMMXT_BUILTIN(code, string, builtin) \
18675 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
18676 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
18677
18678 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
18679 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
18680 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
18681 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
18682 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
18683 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
18684 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
18685 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
18686 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
18687 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
18688 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
18689 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
18690 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
18691 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
18692 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
18693 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
18694 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
18695 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
18696 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
18697 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
18698 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
18699 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
18700 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
18701 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
18702 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
18703 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
18704 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
18705 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
18706 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
18707 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
18708 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
18709 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
18710 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
18711 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
18712 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
18713 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
18714 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
18715 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
18716 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
18717 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
18718 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
18719 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
18720 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
18721 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
18722 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
18723 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
18724 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
18725 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
18726 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
18727 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
18728 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
18729 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
18730 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
18731 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
18732 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
18733 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
18734 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
18735 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
18736
18737 #define IWMMXT_BUILTIN2(code, builtin) \
18738 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
18739
18740 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
18741 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
18742 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
18743 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
18744 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
18745 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
18746 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
18747 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
18748 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
18749 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
18750 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
18751 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
18752 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
18753 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
18754 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
18755 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
18756 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
18757 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
18758 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
18759 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
18760 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
18761 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
18762 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
18763 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
18764 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
18765 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
18766 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
18767 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
18768 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
18769 IWMMXT_BUILTIN2 (rordi3, WRORDI)
18770 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
18771 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
18772 };
18773
18774 static const struct builtin_description bdesc_1arg[] =
18775 {
18776 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
18777 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
18778 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
18779 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
18780 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
18781 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
18782 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
18783 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
18784 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
18785 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
18786 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
18787 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
18788 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
18789 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
18790 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
18791 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
18792 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
18793 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
18794 };
18795
18796 /* Set up all the iWMMXt builtins. This is not called if
18797 TARGET_IWMMXT is zero. */
18798
18799 static void
18800 arm_init_iwmmxt_builtins (void)
18801 {
18802 const struct builtin_description * d;
18803 size_t i;
18804
18805 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
18806 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
18807 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
18808
18809 tree int_ftype_int
18810 = build_function_type_list (integer_type_node,
18811 integer_type_node, NULL_TREE);
18812 tree v8qi_ftype_v8qi_v8qi_int
18813 = build_function_type_list (V8QI_type_node,
18814 V8QI_type_node, V8QI_type_node,
18815 integer_type_node, NULL_TREE);
18816 tree v4hi_ftype_v4hi_int
18817 = build_function_type_list (V4HI_type_node,
18818 V4HI_type_node, integer_type_node, NULL_TREE);
18819 tree v2si_ftype_v2si_int
18820 = build_function_type_list (V2SI_type_node,
18821 V2SI_type_node, integer_type_node, NULL_TREE);
18822 tree v2si_ftype_di_di
18823 = build_function_type_list (V2SI_type_node,
18824 long_long_integer_type_node,
18825 long_long_integer_type_node,
18826 NULL_TREE);
18827 tree di_ftype_di_int
18828 = build_function_type_list (long_long_integer_type_node,
18829 long_long_integer_type_node,
18830 integer_type_node, NULL_TREE);
18831 tree di_ftype_di_int_int
18832 = build_function_type_list (long_long_integer_type_node,
18833 long_long_integer_type_node,
18834 integer_type_node,
18835 integer_type_node, NULL_TREE);
18836 tree int_ftype_v8qi
18837 = build_function_type_list (integer_type_node,
18838 V8QI_type_node, NULL_TREE);
18839 tree int_ftype_v4hi
18840 = build_function_type_list (integer_type_node,
18841 V4HI_type_node, NULL_TREE);
18842 tree int_ftype_v2si
18843 = build_function_type_list (integer_type_node,
18844 V2SI_type_node, NULL_TREE);
18845 tree int_ftype_v8qi_int
18846 = build_function_type_list (integer_type_node,
18847 V8QI_type_node, integer_type_node, NULL_TREE);
18848 tree int_ftype_v4hi_int
18849 = build_function_type_list (integer_type_node,
18850 V4HI_type_node, integer_type_node, NULL_TREE);
18851 tree int_ftype_v2si_int
18852 = build_function_type_list (integer_type_node,
18853 V2SI_type_node, integer_type_node, NULL_TREE);
18854 tree v8qi_ftype_v8qi_int_int
18855 = build_function_type_list (V8QI_type_node,
18856 V8QI_type_node, integer_type_node,
18857 integer_type_node, NULL_TREE);
18858 tree v4hi_ftype_v4hi_int_int
18859 = build_function_type_list (V4HI_type_node,
18860 V4HI_type_node, integer_type_node,
18861 integer_type_node, NULL_TREE);
18862 tree v2si_ftype_v2si_int_int
18863 = build_function_type_list (V2SI_type_node,
18864 V2SI_type_node, integer_type_node,
18865 integer_type_node, NULL_TREE);
18866 /* Miscellaneous. */
18867 tree v8qi_ftype_v4hi_v4hi
18868 = build_function_type_list (V8QI_type_node,
18869 V4HI_type_node, V4HI_type_node, NULL_TREE);
18870 tree v4hi_ftype_v2si_v2si
18871 = build_function_type_list (V4HI_type_node,
18872 V2SI_type_node, V2SI_type_node, NULL_TREE);
18873 tree v2si_ftype_v4hi_v4hi
18874 = build_function_type_list (V2SI_type_node,
18875 V4HI_type_node, V4HI_type_node, NULL_TREE);
18876 tree v2si_ftype_v8qi_v8qi
18877 = build_function_type_list (V2SI_type_node,
18878 V8QI_type_node, V8QI_type_node, NULL_TREE);
18879 tree v4hi_ftype_v4hi_di
18880 = build_function_type_list (V4HI_type_node,
18881 V4HI_type_node, long_long_integer_type_node,
18882 NULL_TREE);
18883 tree v2si_ftype_v2si_di
18884 = build_function_type_list (V2SI_type_node,
18885 V2SI_type_node, long_long_integer_type_node,
18886 NULL_TREE);
18887 tree void_ftype_int_int
18888 = build_function_type_list (void_type_node,
18889 integer_type_node, integer_type_node,
18890 NULL_TREE);
18891 tree di_ftype_void
18892 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
18893 tree di_ftype_v8qi
18894 = build_function_type_list (long_long_integer_type_node,
18895 V8QI_type_node, NULL_TREE);
18896 tree di_ftype_v4hi
18897 = build_function_type_list (long_long_integer_type_node,
18898 V4HI_type_node, NULL_TREE);
18899 tree di_ftype_v2si
18900 = build_function_type_list (long_long_integer_type_node,
18901 V2SI_type_node, NULL_TREE);
18902 tree v2si_ftype_v4hi
18903 = build_function_type_list (V2SI_type_node,
18904 V4HI_type_node, NULL_TREE);
18905 tree v4hi_ftype_v8qi
18906 = build_function_type_list (V4HI_type_node,
18907 V8QI_type_node, NULL_TREE);
18908
18909 tree di_ftype_di_v4hi_v4hi
18910 = build_function_type_list (long_long_unsigned_type_node,
18911 long_long_unsigned_type_node,
18912 V4HI_type_node, V4HI_type_node,
18913 NULL_TREE);
18914
18915 tree di_ftype_v4hi_v4hi
18916 = build_function_type_list (long_long_unsigned_type_node,
18917 V4HI_type_node,V4HI_type_node,
18918 NULL_TREE);
18919
18920 /* Normal vector binops. */
18921 tree v8qi_ftype_v8qi_v8qi
18922 = build_function_type_list (V8QI_type_node,
18923 V8QI_type_node, V8QI_type_node, NULL_TREE);
18924 tree v4hi_ftype_v4hi_v4hi
18925 = build_function_type_list (V4HI_type_node,
18926 V4HI_type_node,V4HI_type_node, NULL_TREE);
18927 tree v2si_ftype_v2si_v2si
18928 = build_function_type_list (V2SI_type_node,
18929 V2SI_type_node, V2SI_type_node, NULL_TREE);
18930 tree di_ftype_di_di
18931 = build_function_type_list (long_long_unsigned_type_node,
18932 long_long_unsigned_type_node,
18933 long_long_unsigned_type_node,
18934 NULL_TREE);
18935
18936 /* Add all builtins that are more or less simple operations on two
18937 operands. */
18938 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18939 {
18940 /* Use one of the operands; the target can have a different mode for
18941 mask-generating compares. */
18942 enum machine_mode mode;
18943 tree type;
18944
18945 if (d->name == 0)
18946 continue;
18947
18948 mode = insn_data[d->icode].operand[1].mode;
18949
18950 switch (mode)
18951 {
18952 case V8QImode:
18953 type = v8qi_ftype_v8qi_v8qi;
18954 break;
18955 case V4HImode:
18956 type = v4hi_ftype_v4hi_v4hi;
18957 break;
18958 case V2SImode:
18959 type = v2si_ftype_v2si_v2si;
18960 break;
18961 case DImode:
18962 type = di_ftype_di_di;
18963 break;
18964
18965 default:
18966 gcc_unreachable ();
18967 }
18968
18969 def_mbuiltin (d->mask, d->name, type, d->code);
18970 }
18971
18972 /* Add the remaining MMX insns with somewhat more complicated types. */
18973 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
18974 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
18975 ARM_BUILTIN_ ## CODE)
18976
18977 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
18978 iwmmx_mbuiltin ("setwcx", void_ftype_int_int, SETWCX);
18979 iwmmx_mbuiltin ("getwcx", int_ftype_int, GETWCX);
18980
18981 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
18982 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
18983 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
18984 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
18985 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
18986 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
18987
18988 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
18989 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
18990 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
18991 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
18992 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
18993 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
18994
18995 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
18996 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
18997 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
18998 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
18999 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
19000 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
19001
19002 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
19003 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
19004 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
19005 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
19006 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
19007 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
19008
19009 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
19010
19011 iwmmx_mbuiltin ("wsadb", v2si_ftype_v8qi_v8qi, WSADB);
19012 iwmmx_mbuiltin ("wsadh", v2si_ftype_v4hi_v4hi, WSADH);
19013 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
19014 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
19015
19016 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
19017 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
19018 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
19019 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
19020 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
19021 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
19022 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
19023 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
19024 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
19025
19026 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
19027 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
19028 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
19029
19030 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
19031 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
19032 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
19033
19034 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
19035 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
19036 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
19037 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
19038 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
19039 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
19040
19041 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
19042 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
19043 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
19044 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
19045 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
19046 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
19047 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
19048 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
19049 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
19050 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
19051 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
19052 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
19053
19054 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
19055 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
19056 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
19057 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
19058
19059 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGN);
19060 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
19061 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
19062 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
19063 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
19064 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
19065 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
19066
19067 #undef iwmmx_mbuiltin
19068 }
19069
19070 static void
19071 arm_init_tls_builtins (void)
19072 {
19073 tree ftype, decl;
19074
19075 ftype = build_function_type (ptr_type_node, void_list_node);
19076 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
19077 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
19078 NULL, NULL_TREE);
19079 TREE_NOTHROW (decl) = 1;
19080 TREE_READONLY (decl) = 1;
19081 arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
19082 }
19083
19084 static void
19085 arm_init_fp16_builtins (void)
19086 {
19087 tree fp16_type = make_node (REAL_TYPE);
19088 TYPE_PRECISION (fp16_type) = 16;
19089 layout_type (fp16_type);
19090 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
19091 }
19092
19093 static void
19094 arm_init_builtins (void)
19095 {
19096 arm_init_tls_builtins ();
19097
19098 if (TARGET_REALLY_IWMMXT)
19099 arm_init_iwmmxt_builtins ();
19100
19101 if (TARGET_NEON)
19102 arm_init_neon_builtins ();
19103
19104 if (arm_fp16_format)
19105 arm_init_fp16_builtins ();
19106 }
19107
19108 /* Return the ARM builtin for CODE. */
19109
19110 static tree
19111 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
19112 {
19113 if (code >= ARM_BUILTIN_MAX)
19114 return error_mark_node;
19115
19116 return arm_builtin_decls[code];
19117 }
19118
19119 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19120
19121 static const char *
19122 arm_invalid_parameter_type (const_tree t)
19123 {
19124 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19125 return N_("function parameters cannot have __fp16 type");
19126 return NULL;
19127 }
19128
19129 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19130
19131 static const char *
19132 arm_invalid_return_type (const_tree t)
19133 {
19134 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19135 return N_("functions cannot return __fp16 type");
19136 return NULL;
19137 }
19138
19139 /* Implement TARGET_PROMOTED_TYPE. */
19140
19141 static tree
19142 arm_promoted_type (const_tree t)
19143 {
19144 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
19145 return float_type_node;
19146 return NULL_TREE;
19147 }
19148
19149 /* Implement TARGET_CONVERT_TO_TYPE.
19150 Specifically, this hook implements the peculiarity of the ARM
19151 half-precision floating-point C semantics that requires conversions between
19152 __fp16 to or from double to do an intermediate conversion to float. */
19153
19154 static tree
19155 arm_convert_to_type (tree type, tree expr)
19156 {
19157 tree fromtype = TREE_TYPE (expr);
19158 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
19159 return NULL_TREE;
19160 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
19161 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
19162 return convert (type, convert (float_type_node, expr));
19163 return NULL_TREE;
19164 }
19165
19166 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
19167 This simply adds HFmode as a supported mode; even though we don't
19168 implement arithmetic on this type directly, it's supported by
19169 optabs conversions, much the way the double-word arithmetic is
19170 special-cased in the default hook. */
19171
19172 static bool
19173 arm_scalar_mode_supported_p (enum machine_mode mode)
19174 {
19175 if (mode == HFmode)
19176 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
19177 else
19178 return default_scalar_mode_supported_p (mode);
19179 }
19180
19181 /* Errors in the source file can cause expand_expr to return const0_rtx
19182 where we expect a vector. To avoid crashing, use one of the vector
19183 clear instructions. */
19184
19185 static rtx
19186 safe_vector_operand (rtx x, enum machine_mode mode)
19187 {
19188 if (x != const0_rtx)
19189 return x;
19190 x = gen_reg_rtx (mode);
19191
19192 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
19193 : gen_rtx_SUBREG (DImode, x, 0)));
19194 return x;
19195 }
19196
19197 /* Subroutine of arm_expand_builtin to take care of binop insns. */
19198
19199 static rtx
19200 arm_expand_binop_builtin (enum insn_code icode,
19201 tree exp, rtx target)
19202 {
19203 rtx pat;
19204 tree arg0 = CALL_EXPR_ARG (exp, 0);
19205 tree arg1 = CALL_EXPR_ARG (exp, 1);
19206 rtx op0 = expand_normal (arg0);
19207 rtx op1 = expand_normal (arg1);
19208 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19209 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19210 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19211
19212 if (VECTOR_MODE_P (mode0))
19213 op0 = safe_vector_operand (op0, mode0);
19214 if (VECTOR_MODE_P (mode1))
19215 op1 = safe_vector_operand (op1, mode1);
19216
19217 if (! target
19218 || GET_MODE (target) != tmode
19219 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19220 target = gen_reg_rtx (tmode);
19221
19222 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
19223
19224 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19225 op0 = copy_to_mode_reg (mode0, op0);
19226 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19227 op1 = copy_to_mode_reg (mode1, op1);
19228
19229 pat = GEN_FCN (icode) (target, op0, op1);
19230 if (! pat)
19231 return 0;
19232 emit_insn (pat);
19233 return target;
19234 }
19235
19236 /* Subroutine of arm_expand_builtin to take care of unop insns. */
19237
19238 static rtx
19239 arm_expand_unop_builtin (enum insn_code icode,
19240 tree exp, rtx target, int do_load)
19241 {
19242 rtx pat;
19243 tree arg0 = CALL_EXPR_ARG (exp, 0);
19244 rtx op0 = expand_normal (arg0);
19245 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19246 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19247
19248 if (! target
19249 || GET_MODE (target) != tmode
19250 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19251 target = gen_reg_rtx (tmode);
19252 if (do_load)
19253 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19254 else
19255 {
19256 if (VECTOR_MODE_P (mode0))
19257 op0 = safe_vector_operand (op0, mode0);
19258
19259 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19260 op0 = copy_to_mode_reg (mode0, op0);
19261 }
19262
19263 pat = GEN_FCN (icode) (target, op0);
19264 if (! pat)
19265 return 0;
19266 emit_insn (pat);
19267 return target;
19268 }
19269
19270 typedef enum {
19271 NEON_ARG_COPY_TO_REG,
19272 NEON_ARG_CONSTANT,
19273 NEON_ARG_MEMORY,
19274 NEON_ARG_STOP
19275 } builtin_arg;
19276
19277 #define NEON_MAX_BUILTIN_ARGS 5
19278
19279 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
19280 and return an expression for the accessed memory.
19281
19282 The intrinsic function operates on a block of registers that has
19283 mode REG_MODE. This block contains vectors of type TYPE_MODE.
19284 The function references the memory at EXP in mode MEM_MODE;
19285 this mode may be BLKmode if no more suitable mode is available. */
19286
19287 static tree
19288 neon_dereference_pointer (tree exp, enum machine_mode mem_mode,
19289 enum machine_mode reg_mode,
19290 neon_builtin_type_mode type_mode)
19291 {
19292 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
19293 tree elem_type, upper_bound, array_type;
19294
19295 /* Work out the size of the register block in bytes. */
19296 reg_size = GET_MODE_SIZE (reg_mode);
19297
19298 /* Work out the size of each vector in bytes. */
19299 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
19300 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
19301
19302 /* Work out how many vectors there are. */
19303 gcc_assert (reg_size % vector_size == 0);
19304 nvectors = reg_size / vector_size;
19305
19306 /* Work out how many elements are being loaded or stored.
19307 MEM_MODE == REG_MODE implies a one-to-one mapping between register
19308 and memory elements; anything else implies a lane load or store. */
19309 if (mem_mode == reg_mode)
19310 nelems = vector_size * nvectors;
19311 else
19312 nelems = nvectors;
19313
19314 /* Work out the type of each element. */
19315 gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp)));
19316 elem_type = TREE_TYPE (TREE_TYPE (exp));
19317
19318 /* Create a type that describes the full access. */
19319 upper_bound = build_int_cst (size_type_node, nelems - 1);
19320 array_type = build_array_type (elem_type, build_index_type (upper_bound));
19321
19322 /* Dereference EXP using that type. */
19323 exp = convert (build_pointer_type (array_type), exp);
19324 return fold_build2 (MEM_REF, array_type, exp,
19325 build_int_cst (TREE_TYPE (exp), 0));
19326 }
19327
19328 /* Expand a Neon builtin. */
19329 static rtx
19330 arm_expand_neon_args (rtx target, int icode, int have_retval,
19331 neon_builtin_type_mode type_mode,
19332 tree exp, ...)
19333 {
19334 va_list ap;
19335 rtx pat;
19336 tree arg[NEON_MAX_BUILTIN_ARGS];
19337 rtx op[NEON_MAX_BUILTIN_ARGS];
19338 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19339 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
19340 enum machine_mode other_mode;
19341 int argc = 0;
19342 int opno;
19343
19344 if (have_retval
19345 && (!target
19346 || GET_MODE (target) != tmode
19347 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
19348 target = gen_reg_rtx (tmode);
19349
19350 va_start (ap, exp);
19351
19352 for (;;)
19353 {
19354 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
19355
19356 if (thisarg == NEON_ARG_STOP)
19357 break;
19358 else
19359 {
19360 opno = argc + have_retval;
19361 mode[argc] = insn_data[icode].operand[opno].mode;
19362 arg[argc] = CALL_EXPR_ARG (exp, argc);
19363 if (thisarg == NEON_ARG_MEMORY)
19364 {
19365 other_mode = insn_data[icode].operand[1 - opno].mode;
19366 arg[argc] = neon_dereference_pointer (arg[argc], mode[argc],
19367 other_mode, type_mode);
19368 }
19369 op[argc] = expand_normal (arg[argc]);
19370
19371 switch (thisarg)
19372 {
19373 case NEON_ARG_COPY_TO_REG:
19374 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
19375 if (!(*insn_data[icode].operand[opno].predicate)
19376 (op[argc], mode[argc]))
19377 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
19378 break;
19379
19380 case NEON_ARG_CONSTANT:
19381 /* FIXME: This error message is somewhat unhelpful. */
19382 if (!(*insn_data[icode].operand[opno].predicate)
19383 (op[argc], mode[argc]))
19384 error ("argument must be a constant");
19385 break;
19386
19387 case NEON_ARG_MEMORY:
19388 gcc_assert (MEM_P (op[argc]));
19389 PUT_MODE (op[argc], mode[argc]);
19390 /* ??? arm_neon.h uses the same built-in functions for signed
19391 and unsigned accesses, casting where necessary. This isn't
19392 alias safe. */
19393 set_mem_alias_set (op[argc], 0);
19394 if (!(*insn_data[icode].operand[opno].predicate)
19395 (op[argc], mode[argc]))
19396 op[argc] = (replace_equiv_address
19397 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
19398 break;
19399
19400 case NEON_ARG_STOP:
19401 gcc_unreachable ();
19402 }
19403
19404 argc++;
19405 }
19406 }
19407
19408 va_end (ap);
19409
19410 if (have_retval)
19411 switch (argc)
19412 {
19413 case 1:
19414 pat = GEN_FCN (icode) (target, op[0]);
19415 break;
19416
19417 case 2:
19418 pat = GEN_FCN (icode) (target, op[0], op[1]);
19419 break;
19420
19421 case 3:
19422 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
19423 break;
19424
19425 case 4:
19426 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
19427 break;
19428
19429 case 5:
19430 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
19431 break;
19432
19433 default:
19434 gcc_unreachable ();
19435 }
19436 else
19437 switch (argc)
19438 {
19439 case 1:
19440 pat = GEN_FCN (icode) (op[0]);
19441 break;
19442
19443 case 2:
19444 pat = GEN_FCN (icode) (op[0], op[1]);
19445 break;
19446
19447 case 3:
19448 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
19449 break;
19450
19451 case 4:
19452 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
19453 break;
19454
19455 case 5:
19456 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
19457 break;
19458
19459 default:
19460 gcc_unreachable ();
19461 }
19462
19463 if (!pat)
19464 return 0;
19465
19466 emit_insn (pat);
19467
19468 return target;
19469 }
19470
19471 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19472 constants defined per-instruction or per instruction-variant. Instead, the
19473 required info is looked up in the table neon_builtin_data. */
19474 static rtx
19475 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
19476 {
19477 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
19478 neon_itype itype = d->itype;
19479 enum insn_code icode = d->code;
19480 neon_builtin_type_mode type_mode = d->mode;
19481
19482 switch (itype)
19483 {
19484 case NEON_UNOP:
19485 case NEON_CONVERT:
19486 case NEON_DUPLANE:
19487 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19488 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19489
19490 case NEON_BINOP:
19491 case NEON_SETLANE:
19492 case NEON_SCALARMUL:
19493 case NEON_SCALARMULL:
19494 case NEON_SCALARMULH:
19495 case NEON_SHIFTINSERT:
19496 case NEON_LOGICBINOP:
19497 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19498 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19499 NEON_ARG_STOP);
19500
19501 case NEON_TERNOP:
19502 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19503 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19504 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19505
19506 case NEON_GETLANE:
19507 case NEON_FIXCONV:
19508 case NEON_SHIFTIMM:
19509 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19510 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
19511 NEON_ARG_STOP);
19512
19513 case NEON_CREATE:
19514 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19515 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19516
19517 case NEON_DUP:
19518 case NEON_SPLIT:
19519 case NEON_REINTERP:
19520 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19521 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19522
19523 case NEON_COMBINE:
19524 case NEON_VTBL:
19525 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19526 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19527
19528 case NEON_RESULTPAIR:
19529 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
19530 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19531 NEON_ARG_STOP);
19532
19533 case NEON_LANEMUL:
19534 case NEON_LANEMULL:
19535 case NEON_LANEMULH:
19536 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19537 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19538 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19539
19540 case NEON_LANEMAC:
19541 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19542 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19543 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
19544
19545 case NEON_SHIFTACC:
19546 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19547 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19548 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19549
19550 case NEON_SCALARMAC:
19551 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19552 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19553 NEON_ARG_CONSTANT, NEON_ARG_STOP);
19554
19555 case NEON_SELECT:
19556 case NEON_VTBX:
19557 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19558 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
19559 NEON_ARG_STOP);
19560
19561 case NEON_LOAD1:
19562 case NEON_LOADSTRUCT:
19563 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19564 NEON_ARG_MEMORY, NEON_ARG_STOP);
19565
19566 case NEON_LOAD1LANE:
19567 case NEON_LOADSTRUCTLANE:
19568 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
19569 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19570 NEON_ARG_STOP);
19571
19572 case NEON_STORE1:
19573 case NEON_STORESTRUCT:
19574 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
19575 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
19576
19577 case NEON_STORE1LANE:
19578 case NEON_STORESTRUCTLANE:
19579 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
19580 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
19581 NEON_ARG_STOP);
19582 }
19583
19584 gcc_unreachable ();
19585 }
19586
19587 /* Emit code to reinterpret one Neon type as another, without altering bits. */
19588 void
19589 neon_reinterpret (rtx dest, rtx src)
19590 {
19591 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
19592 }
19593
19594 /* Emit code to place a Neon pair result in memory locations (with equal
19595 registers). */
19596 void
19597 neon_emit_pair_result_insn (enum machine_mode mode,
19598 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
19599 rtx op1, rtx op2)
19600 {
19601 rtx mem = gen_rtx_MEM (mode, destaddr);
19602 rtx tmp1 = gen_reg_rtx (mode);
19603 rtx tmp2 = gen_reg_rtx (mode);
19604
19605 emit_insn (intfn (tmp1, op1, op2, tmp2));
19606
19607 emit_move_insn (mem, tmp1);
19608 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
19609 emit_move_insn (mem, tmp2);
19610 }
19611
19612 /* Set up operands for a register copy from src to dest, taking care not to
19613 clobber registers in the process.
19614 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
19615 be called with a large N, so that should be OK. */
19616
19617 void
19618 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
19619 {
19620 unsigned int copied = 0, opctr = 0;
19621 unsigned int done = (1 << count) - 1;
19622 unsigned int i, j;
19623
19624 while (copied != done)
19625 {
19626 for (i = 0; i < count; i++)
19627 {
19628 int good = 1;
19629
19630 for (j = 0; good && j < count; j++)
19631 if (i != j && (copied & (1 << j)) == 0
19632 && reg_overlap_mentioned_p (src[j], dest[i]))
19633 good = 0;
19634
19635 if (good)
19636 {
19637 operands[opctr++] = dest[i];
19638 operands[opctr++] = src[i];
19639 copied |= 1 << i;
19640 }
19641 }
19642 }
19643
19644 gcc_assert (opctr == count * 2);
19645 }
19646
19647 /* Expand an expression EXP that calls a built-in function,
19648 with result going to TARGET if that's convenient
19649 (and in mode MODE if that's convenient).
19650 SUBTARGET may be used as the target for computing one of EXP's operands.
19651 IGNORE is nonzero if the value is to be ignored. */
19652
19653 static rtx
19654 arm_expand_builtin (tree exp,
19655 rtx target,
19656 rtx subtarget ATTRIBUTE_UNUSED,
19657 enum machine_mode mode ATTRIBUTE_UNUSED,
19658 int ignore ATTRIBUTE_UNUSED)
19659 {
19660 const struct builtin_description * d;
19661 enum insn_code icode;
19662 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19663 tree arg0;
19664 tree arg1;
19665 tree arg2;
19666 rtx op0;
19667 rtx op1;
19668 rtx op2;
19669 rtx pat;
19670 int fcode = DECL_FUNCTION_CODE (fndecl);
19671 size_t i;
19672 enum machine_mode tmode;
19673 enum machine_mode mode0;
19674 enum machine_mode mode1;
19675 enum machine_mode mode2;
19676
19677 if (fcode >= ARM_BUILTIN_NEON_BASE)
19678 return arm_expand_neon_builtin (fcode, exp, target);
19679
19680 switch (fcode)
19681 {
19682 case ARM_BUILTIN_TEXTRMSB:
19683 case ARM_BUILTIN_TEXTRMUB:
19684 case ARM_BUILTIN_TEXTRMSH:
19685 case ARM_BUILTIN_TEXTRMUH:
19686 case ARM_BUILTIN_TEXTRMSW:
19687 case ARM_BUILTIN_TEXTRMUW:
19688 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
19689 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
19690 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
19691 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
19692 : CODE_FOR_iwmmxt_textrmw);
19693
19694 arg0 = CALL_EXPR_ARG (exp, 0);
19695 arg1 = CALL_EXPR_ARG (exp, 1);
19696 op0 = expand_normal (arg0);
19697 op1 = expand_normal (arg1);
19698 tmode = insn_data[icode].operand[0].mode;
19699 mode0 = insn_data[icode].operand[1].mode;
19700 mode1 = insn_data[icode].operand[2].mode;
19701
19702 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19703 op0 = copy_to_mode_reg (mode0, op0);
19704 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19705 {
19706 /* @@@ better error message */
19707 error ("selector must be an immediate");
19708 return gen_reg_rtx (tmode);
19709 }
19710 if (target == 0
19711 || GET_MODE (target) != tmode
19712 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19713 target = gen_reg_rtx (tmode);
19714 pat = GEN_FCN (icode) (target, op0, op1);
19715 if (! pat)
19716 return 0;
19717 emit_insn (pat);
19718 return target;
19719
19720 case ARM_BUILTIN_TINSRB:
19721 case ARM_BUILTIN_TINSRH:
19722 case ARM_BUILTIN_TINSRW:
19723 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
19724 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
19725 : CODE_FOR_iwmmxt_tinsrw);
19726 arg0 = CALL_EXPR_ARG (exp, 0);
19727 arg1 = CALL_EXPR_ARG (exp, 1);
19728 arg2 = CALL_EXPR_ARG (exp, 2);
19729 op0 = expand_normal (arg0);
19730 op1 = expand_normal (arg1);
19731 op2 = expand_normal (arg2);
19732 tmode = insn_data[icode].operand[0].mode;
19733 mode0 = insn_data[icode].operand[1].mode;
19734 mode1 = insn_data[icode].operand[2].mode;
19735 mode2 = insn_data[icode].operand[3].mode;
19736
19737 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19738 op0 = copy_to_mode_reg (mode0, op0);
19739 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19740 op1 = copy_to_mode_reg (mode1, op1);
19741 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19742 {
19743 /* @@@ better error message */
19744 error ("selector must be an immediate");
19745 return const0_rtx;
19746 }
19747 if (target == 0
19748 || GET_MODE (target) != tmode
19749 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19750 target = gen_reg_rtx (tmode);
19751 pat = GEN_FCN (icode) (target, op0, op1, op2);
19752 if (! pat)
19753 return 0;
19754 emit_insn (pat);
19755 return target;
19756
19757 case ARM_BUILTIN_SETWCX:
19758 arg0 = CALL_EXPR_ARG (exp, 0);
19759 arg1 = CALL_EXPR_ARG (exp, 1);
19760 op0 = force_reg (SImode, expand_normal (arg0));
19761 op1 = expand_normal (arg1);
19762 emit_insn (gen_iwmmxt_tmcr (op1, op0));
19763 return 0;
19764
19765 case ARM_BUILTIN_GETWCX:
19766 arg0 = CALL_EXPR_ARG (exp, 0);
19767 op0 = expand_normal (arg0);
19768 target = gen_reg_rtx (SImode);
19769 emit_insn (gen_iwmmxt_tmrc (target, op0));
19770 return target;
19771
19772 case ARM_BUILTIN_WSHUFH:
19773 icode = CODE_FOR_iwmmxt_wshufh;
19774 arg0 = CALL_EXPR_ARG (exp, 0);
19775 arg1 = CALL_EXPR_ARG (exp, 1);
19776 op0 = expand_normal (arg0);
19777 op1 = expand_normal (arg1);
19778 tmode = insn_data[icode].operand[0].mode;
19779 mode1 = insn_data[icode].operand[1].mode;
19780 mode2 = insn_data[icode].operand[2].mode;
19781
19782 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19783 op0 = copy_to_mode_reg (mode1, op0);
19784 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19785 {
19786 /* @@@ better error message */
19787 error ("mask must be an immediate");
19788 return const0_rtx;
19789 }
19790 if (target == 0
19791 || GET_MODE (target) != tmode
19792 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19793 target = gen_reg_rtx (tmode);
19794 pat = GEN_FCN (icode) (target, op0, op1);
19795 if (! pat)
19796 return 0;
19797 emit_insn (pat);
19798 return target;
19799
19800 case ARM_BUILTIN_WSADB:
19801 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
19802 case ARM_BUILTIN_WSADH:
19803 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
19804 case ARM_BUILTIN_WSADBZ:
19805 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
19806 case ARM_BUILTIN_WSADHZ:
19807 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
19808
19809 /* Several three-argument builtins. */
19810 case ARM_BUILTIN_WMACS:
19811 case ARM_BUILTIN_WMACU:
19812 case ARM_BUILTIN_WALIGN:
19813 case ARM_BUILTIN_TMIA:
19814 case ARM_BUILTIN_TMIAPH:
19815 case ARM_BUILTIN_TMIATT:
19816 case ARM_BUILTIN_TMIATB:
19817 case ARM_BUILTIN_TMIABT:
19818 case ARM_BUILTIN_TMIABB:
19819 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
19820 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
19821 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
19822 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
19823 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
19824 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
19825 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
19826 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
19827 : CODE_FOR_iwmmxt_walign);
19828 arg0 = CALL_EXPR_ARG (exp, 0);
19829 arg1 = CALL_EXPR_ARG (exp, 1);
19830 arg2 = CALL_EXPR_ARG (exp, 2);
19831 op0 = expand_normal (arg0);
19832 op1 = expand_normal (arg1);
19833 op2 = expand_normal (arg2);
19834 tmode = insn_data[icode].operand[0].mode;
19835 mode0 = insn_data[icode].operand[1].mode;
19836 mode1 = insn_data[icode].operand[2].mode;
19837 mode2 = insn_data[icode].operand[3].mode;
19838
19839 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19840 op0 = copy_to_mode_reg (mode0, op0);
19841 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19842 op1 = copy_to_mode_reg (mode1, op1);
19843 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19844 op2 = copy_to_mode_reg (mode2, op2);
19845 if (target == 0
19846 || GET_MODE (target) != tmode
19847 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19848 target = gen_reg_rtx (tmode);
19849 pat = GEN_FCN (icode) (target, op0, op1, op2);
19850 if (! pat)
19851 return 0;
19852 emit_insn (pat);
19853 return target;
19854
19855 case ARM_BUILTIN_WZERO:
19856 target = gen_reg_rtx (DImode);
19857 emit_insn (gen_iwmmxt_clrdi (target));
19858 return target;
19859
19860 case ARM_BUILTIN_THREAD_POINTER:
19861 return arm_load_tp (target);
19862
19863 default:
19864 break;
19865 }
19866
19867 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19868 if (d->code == (const enum arm_builtins) fcode)
19869 return arm_expand_binop_builtin (d->icode, exp, target);
19870
19871 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19872 if (d->code == (const enum arm_builtins) fcode)
19873 return arm_expand_unop_builtin (d->icode, exp, target, 0);
19874
19875 /* @@@ Should really do something sensible here. */
19876 return NULL_RTX;
19877 }
19878 \f
19879 /* Return the number (counting from 0) of
19880 the least significant set bit in MASK. */
19881
19882 inline static int
19883 number_of_first_bit_set (unsigned mask)
19884 {
19885 int bit;
19886
19887 for (bit = 0;
19888 (mask & (1 << bit)) == 0;
19889 ++bit)
19890 continue;
19891
19892 return bit;
19893 }
19894
19895 /* Emit code to push or pop registers to or from the stack. F is the
19896 assembly file. MASK is the registers to push or pop. PUSH is
19897 nonzero if we should push, and zero if we should pop. For debugging
19898 output, if pushing, adjust CFA_OFFSET by the amount of space added
19899 to the stack. REAL_REGS should have the same number of bits set as
19900 MASK, and will be used instead (in the same order) to describe which
19901 registers were saved - this is used to mark the save slots when we
19902 push high registers after moving them to low registers. */
19903 static void
19904 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
19905 unsigned long real_regs)
19906 {
19907 int regno;
19908 int lo_mask = mask & 0xFF;
19909 int pushed_words = 0;
19910
19911 gcc_assert (mask);
19912
19913 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
19914 {
19915 /* Special case. Do not generate a POP PC statement here, do it in
19916 thumb_exit() */
19917 thumb_exit (f, -1);
19918 return;
19919 }
19920
19921 if (push && arm_except_unwind_info (&global_options) == UI_TARGET)
19922 {
19923 fprintf (f, "\t.save\t{");
19924 for (regno = 0; regno < 15; regno++)
19925 {
19926 if (real_regs & (1 << regno))
19927 {
19928 if (real_regs & ((1 << regno) -1))
19929 fprintf (f, ", ");
19930 asm_fprintf (f, "%r", regno);
19931 }
19932 }
19933 fprintf (f, "}\n");
19934 }
19935
19936 fprintf (f, "\t%s\t{", push ? "push" : "pop");
19937
19938 /* Look at the low registers first. */
19939 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
19940 {
19941 if (lo_mask & 1)
19942 {
19943 asm_fprintf (f, "%r", regno);
19944
19945 if ((lo_mask & ~1) != 0)
19946 fprintf (f, ", ");
19947
19948 pushed_words++;
19949 }
19950 }
19951
19952 if (push && (mask & (1 << LR_REGNUM)))
19953 {
19954 /* Catch pushing the LR. */
19955 if (mask & 0xFF)
19956 fprintf (f, ", ");
19957
19958 asm_fprintf (f, "%r", LR_REGNUM);
19959
19960 pushed_words++;
19961 }
19962 else if (!push && (mask & (1 << PC_REGNUM)))
19963 {
19964 /* Catch popping the PC. */
19965 if (TARGET_INTERWORK || TARGET_BACKTRACE
19966 || crtl->calls_eh_return)
19967 {
19968 /* The PC is never poped directly, instead
19969 it is popped into r3 and then BX is used. */
19970 fprintf (f, "}\n");
19971
19972 thumb_exit (f, -1);
19973
19974 return;
19975 }
19976 else
19977 {
19978 if (mask & 0xFF)
19979 fprintf (f, ", ");
19980
19981 asm_fprintf (f, "%r", PC_REGNUM);
19982 }
19983 }
19984
19985 fprintf (f, "}\n");
19986
19987 if (push && pushed_words && dwarf2out_do_frame ())
19988 {
19989 char *l = dwarf2out_cfi_label (false);
19990 int pushed_mask = real_regs;
19991
19992 *cfa_offset += pushed_words * 4;
19993 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
19994
19995 pushed_words = 0;
19996 pushed_mask = real_regs;
19997 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
19998 {
19999 if (pushed_mask & 1)
20000 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
20001 }
20002 }
20003 }
20004
20005 /* Generate code to return from a thumb function.
20006 If 'reg_containing_return_addr' is -1, then the return address is
20007 actually on the stack, at the stack pointer. */
20008 static void
20009 thumb_exit (FILE *f, int reg_containing_return_addr)
20010 {
20011 unsigned regs_available_for_popping;
20012 unsigned regs_to_pop;
20013 int pops_needed;
20014 unsigned available;
20015 unsigned required;
20016 int mode;
20017 int size;
20018 int restore_a4 = FALSE;
20019
20020 /* Compute the registers we need to pop. */
20021 regs_to_pop = 0;
20022 pops_needed = 0;
20023
20024 if (reg_containing_return_addr == -1)
20025 {
20026 regs_to_pop |= 1 << LR_REGNUM;
20027 ++pops_needed;
20028 }
20029
20030 if (TARGET_BACKTRACE)
20031 {
20032 /* Restore the (ARM) frame pointer and stack pointer. */
20033 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
20034 pops_needed += 2;
20035 }
20036
20037 /* If there is nothing to pop then just emit the BX instruction and
20038 return. */
20039 if (pops_needed == 0)
20040 {
20041 if (crtl->calls_eh_return)
20042 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20043
20044 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20045 return;
20046 }
20047 /* Otherwise if we are not supporting interworking and we have not created
20048 a backtrace structure and the function was not entered in ARM mode then
20049 just pop the return address straight into the PC. */
20050 else if (!TARGET_INTERWORK
20051 && !TARGET_BACKTRACE
20052 && !is_called_in_ARM_mode (current_function_decl)
20053 && !crtl->calls_eh_return)
20054 {
20055 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
20056 return;
20057 }
20058
20059 /* Find out how many of the (return) argument registers we can corrupt. */
20060 regs_available_for_popping = 0;
20061
20062 /* If returning via __builtin_eh_return, the bottom three registers
20063 all contain information needed for the return. */
20064 if (crtl->calls_eh_return)
20065 size = 12;
20066 else
20067 {
20068 /* If we can deduce the registers used from the function's
20069 return value. This is more reliable that examining
20070 df_regs_ever_live_p () because that will be set if the register is
20071 ever used in the function, not just if the register is used
20072 to hold a return value. */
20073
20074 if (crtl->return_rtx != 0)
20075 mode = GET_MODE (crtl->return_rtx);
20076 else
20077 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20078
20079 size = GET_MODE_SIZE (mode);
20080
20081 if (size == 0)
20082 {
20083 /* In a void function we can use any argument register.
20084 In a function that returns a structure on the stack
20085 we can use the second and third argument registers. */
20086 if (mode == VOIDmode)
20087 regs_available_for_popping =
20088 (1 << ARG_REGISTER (1))
20089 | (1 << ARG_REGISTER (2))
20090 | (1 << ARG_REGISTER (3));
20091 else
20092 regs_available_for_popping =
20093 (1 << ARG_REGISTER (2))
20094 | (1 << ARG_REGISTER (3));
20095 }
20096 else if (size <= 4)
20097 regs_available_for_popping =
20098 (1 << ARG_REGISTER (2))
20099 | (1 << ARG_REGISTER (3));
20100 else if (size <= 8)
20101 regs_available_for_popping =
20102 (1 << ARG_REGISTER (3));
20103 }
20104
20105 /* Match registers to be popped with registers into which we pop them. */
20106 for (available = regs_available_for_popping,
20107 required = regs_to_pop;
20108 required != 0 && available != 0;
20109 available &= ~(available & - available),
20110 required &= ~(required & - required))
20111 -- pops_needed;
20112
20113 /* If we have any popping registers left over, remove them. */
20114 if (available > 0)
20115 regs_available_for_popping &= ~available;
20116
20117 /* Otherwise if we need another popping register we can use
20118 the fourth argument register. */
20119 else if (pops_needed)
20120 {
20121 /* If we have not found any free argument registers and
20122 reg a4 contains the return address, we must move it. */
20123 if (regs_available_for_popping == 0
20124 && reg_containing_return_addr == LAST_ARG_REGNUM)
20125 {
20126 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20127 reg_containing_return_addr = LR_REGNUM;
20128 }
20129 else if (size > 12)
20130 {
20131 /* Register a4 is being used to hold part of the return value,
20132 but we have dire need of a free, low register. */
20133 restore_a4 = TRUE;
20134
20135 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
20136 }
20137
20138 if (reg_containing_return_addr != LAST_ARG_REGNUM)
20139 {
20140 /* The fourth argument register is available. */
20141 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
20142
20143 --pops_needed;
20144 }
20145 }
20146
20147 /* Pop as many registers as we can. */
20148 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20149 regs_available_for_popping);
20150
20151 /* Process the registers we popped. */
20152 if (reg_containing_return_addr == -1)
20153 {
20154 /* The return address was popped into the lowest numbered register. */
20155 regs_to_pop &= ~(1 << LR_REGNUM);
20156
20157 reg_containing_return_addr =
20158 number_of_first_bit_set (regs_available_for_popping);
20159
20160 /* Remove this register for the mask of available registers, so that
20161 the return address will not be corrupted by further pops. */
20162 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
20163 }
20164
20165 /* If we popped other registers then handle them here. */
20166 if (regs_available_for_popping)
20167 {
20168 int frame_pointer;
20169
20170 /* Work out which register currently contains the frame pointer. */
20171 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
20172
20173 /* Move it into the correct place. */
20174 asm_fprintf (f, "\tmov\t%r, %r\n",
20175 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
20176
20177 /* (Temporarily) remove it from the mask of popped registers. */
20178 regs_available_for_popping &= ~(1 << frame_pointer);
20179 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
20180
20181 if (regs_available_for_popping)
20182 {
20183 int stack_pointer;
20184
20185 /* We popped the stack pointer as well,
20186 find the register that contains it. */
20187 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
20188
20189 /* Move it into the stack register. */
20190 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
20191
20192 /* At this point we have popped all necessary registers, so
20193 do not worry about restoring regs_available_for_popping
20194 to its correct value:
20195
20196 assert (pops_needed == 0)
20197 assert (regs_available_for_popping == (1 << frame_pointer))
20198 assert (regs_to_pop == (1 << STACK_POINTER)) */
20199 }
20200 else
20201 {
20202 /* Since we have just move the popped value into the frame
20203 pointer, the popping register is available for reuse, and
20204 we know that we still have the stack pointer left to pop. */
20205 regs_available_for_popping |= (1 << frame_pointer);
20206 }
20207 }
20208
20209 /* If we still have registers left on the stack, but we no longer have
20210 any registers into which we can pop them, then we must move the return
20211 address into the link register and make available the register that
20212 contained it. */
20213 if (regs_available_for_popping == 0 && pops_needed > 0)
20214 {
20215 regs_available_for_popping |= 1 << reg_containing_return_addr;
20216
20217 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
20218 reg_containing_return_addr);
20219
20220 reg_containing_return_addr = LR_REGNUM;
20221 }
20222
20223 /* If we have registers left on the stack then pop some more.
20224 We know that at most we will want to pop FP and SP. */
20225 if (pops_needed > 0)
20226 {
20227 int popped_into;
20228 int move_to;
20229
20230 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20231 regs_available_for_popping);
20232
20233 /* We have popped either FP or SP.
20234 Move whichever one it is into the correct register. */
20235 popped_into = number_of_first_bit_set (regs_available_for_popping);
20236 move_to = number_of_first_bit_set (regs_to_pop);
20237
20238 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
20239
20240 regs_to_pop &= ~(1 << move_to);
20241
20242 --pops_needed;
20243 }
20244
20245 /* If we still have not popped everything then we must have only
20246 had one register available to us and we are now popping the SP. */
20247 if (pops_needed > 0)
20248 {
20249 int popped_into;
20250
20251 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
20252 regs_available_for_popping);
20253
20254 popped_into = number_of_first_bit_set (regs_available_for_popping);
20255
20256 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
20257 /*
20258 assert (regs_to_pop == (1 << STACK_POINTER))
20259 assert (pops_needed == 1)
20260 */
20261 }
20262
20263 /* If necessary restore the a4 register. */
20264 if (restore_a4)
20265 {
20266 if (reg_containing_return_addr != LR_REGNUM)
20267 {
20268 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
20269 reg_containing_return_addr = LR_REGNUM;
20270 }
20271
20272 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
20273 }
20274
20275 if (crtl->calls_eh_return)
20276 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
20277
20278 /* Return to caller. */
20279 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
20280 }
20281 \f
20282 /* Scan INSN just before assembler is output for it.
20283 For Thumb-1, we track the status of the condition codes; this
20284 information is used in the cbranchsi4_insn pattern. */
20285 void
20286 thumb1_final_prescan_insn (rtx insn)
20287 {
20288 if (flag_print_asm_name)
20289 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
20290 INSN_ADDRESSES (INSN_UID (insn)));
20291 /* Don't overwrite the previous setter when we get to a cbranch. */
20292 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
20293 {
20294 enum attr_conds conds;
20295
20296 if (cfun->machine->thumb1_cc_insn)
20297 {
20298 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
20299 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
20300 CC_STATUS_INIT;
20301 }
20302 conds = get_attr_conds (insn);
20303 if (conds == CONDS_SET)
20304 {
20305 rtx set = single_set (insn);
20306 cfun->machine->thumb1_cc_insn = insn;
20307 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
20308 cfun->machine->thumb1_cc_op1 = const0_rtx;
20309 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
20310 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
20311 {
20312 rtx src1 = XEXP (SET_SRC (set), 1);
20313 if (src1 == const0_rtx)
20314 cfun->machine->thumb1_cc_mode = CCmode;
20315 }
20316 }
20317 else if (conds != CONDS_NOCOND)
20318 cfun->machine->thumb1_cc_insn = NULL_RTX;
20319 }
20320 }
20321
20322 int
20323 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
20324 {
20325 unsigned HOST_WIDE_INT mask = 0xff;
20326 int i;
20327
20328 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
20329 if (val == 0) /* XXX */
20330 return 0;
20331
20332 for (i = 0; i < 25; i++)
20333 if ((val & (mask << i)) == val)
20334 return 1;
20335
20336 return 0;
20337 }
20338
20339 /* Returns nonzero if the current function contains,
20340 or might contain a far jump. */
20341 static int
20342 thumb_far_jump_used_p (void)
20343 {
20344 rtx insn;
20345
20346 /* This test is only important for leaf functions. */
20347 /* assert (!leaf_function_p ()); */
20348
20349 /* If we have already decided that far jumps may be used,
20350 do not bother checking again, and always return true even if
20351 it turns out that they are not being used. Once we have made
20352 the decision that far jumps are present (and that hence the link
20353 register will be pushed onto the stack) we cannot go back on it. */
20354 if (cfun->machine->far_jump_used)
20355 return 1;
20356
20357 /* If this function is not being called from the prologue/epilogue
20358 generation code then it must be being called from the
20359 INITIAL_ELIMINATION_OFFSET macro. */
20360 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
20361 {
20362 /* In this case we know that we are being asked about the elimination
20363 of the arg pointer register. If that register is not being used,
20364 then there are no arguments on the stack, and we do not have to
20365 worry that a far jump might force the prologue to push the link
20366 register, changing the stack offsets. In this case we can just
20367 return false, since the presence of far jumps in the function will
20368 not affect stack offsets.
20369
20370 If the arg pointer is live (or if it was live, but has now been
20371 eliminated and so set to dead) then we do have to test to see if
20372 the function might contain a far jump. This test can lead to some
20373 false negatives, since before reload is completed, then length of
20374 branch instructions is not known, so gcc defaults to returning their
20375 longest length, which in turn sets the far jump attribute to true.
20376
20377 A false negative will not result in bad code being generated, but it
20378 will result in a needless push and pop of the link register. We
20379 hope that this does not occur too often.
20380
20381 If we need doubleword stack alignment this could affect the other
20382 elimination offsets so we can't risk getting it wrong. */
20383 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
20384 cfun->machine->arg_pointer_live = 1;
20385 else if (!cfun->machine->arg_pointer_live)
20386 return 0;
20387 }
20388
20389 /* Check to see if the function contains a branch
20390 insn with the far jump attribute set. */
20391 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20392 {
20393 if (GET_CODE (insn) == JUMP_INSN
20394 /* Ignore tablejump patterns. */
20395 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20396 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
20397 && get_attr_far_jump (insn) == FAR_JUMP_YES
20398 )
20399 {
20400 /* Record the fact that we have decided that
20401 the function does use far jumps. */
20402 cfun->machine->far_jump_used = 1;
20403 return 1;
20404 }
20405 }
20406
20407 return 0;
20408 }
20409
20410 /* Return nonzero if FUNC must be entered in ARM mode. */
20411 int
20412 is_called_in_ARM_mode (tree func)
20413 {
20414 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
20415
20416 /* Ignore the problem about functions whose address is taken. */
20417 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
20418 return TRUE;
20419
20420 #ifdef ARM_PE
20421 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
20422 #else
20423 return FALSE;
20424 #endif
20425 }
20426
20427 /* Given the stack offsets and register mask in OFFSETS, decide how
20428 many additional registers to push instead of subtracting a constant
20429 from SP. For epilogues the principle is the same except we use pop.
20430 FOR_PROLOGUE indicates which we're generating. */
20431 static int
20432 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
20433 {
20434 HOST_WIDE_INT amount;
20435 unsigned long live_regs_mask = offsets->saved_regs_mask;
20436 /* Extract a mask of the ones we can give to the Thumb's push/pop
20437 instruction. */
20438 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
20439 /* Then count how many other high registers will need to be pushed. */
20440 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20441 int n_free, reg_base;
20442
20443 if (!for_prologue && frame_pointer_needed)
20444 amount = offsets->locals_base - offsets->saved_regs;
20445 else
20446 amount = offsets->outgoing_args - offsets->saved_regs;
20447
20448 /* If the stack frame size is 512 exactly, we can save one load
20449 instruction, which should make this a win even when optimizing
20450 for speed. */
20451 if (!optimize_size && amount != 512)
20452 return 0;
20453
20454 /* Can't do this if there are high registers to push. */
20455 if (high_regs_pushed != 0)
20456 return 0;
20457
20458 /* Shouldn't do it in the prologue if no registers would normally
20459 be pushed at all. In the epilogue, also allow it if we'll have
20460 a pop insn for the PC. */
20461 if (l_mask == 0
20462 && (for_prologue
20463 || TARGET_BACKTRACE
20464 || (live_regs_mask & 1 << LR_REGNUM) == 0
20465 || TARGET_INTERWORK
20466 || crtl->args.pretend_args_size != 0))
20467 return 0;
20468
20469 /* Don't do this if thumb_expand_prologue wants to emit instructions
20470 between the push and the stack frame allocation. */
20471 if (for_prologue
20472 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
20473 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
20474 return 0;
20475
20476 reg_base = 0;
20477 n_free = 0;
20478 if (!for_prologue)
20479 {
20480 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
20481 live_regs_mask >>= reg_base;
20482 }
20483
20484 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
20485 && (for_prologue || call_used_regs[reg_base + n_free]))
20486 {
20487 live_regs_mask >>= 1;
20488 n_free++;
20489 }
20490
20491 if (n_free == 0)
20492 return 0;
20493 gcc_assert (amount / 4 * 4 == amount);
20494
20495 if (amount >= 512 && (amount - n_free * 4) < 512)
20496 return (amount - 508) / 4;
20497 if (amount <= n_free * 4)
20498 return amount / 4;
20499 return 0;
20500 }
20501
20502 /* The bits which aren't usefully expanded as rtl. */
20503 const char *
20504 thumb_unexpanded_epilogue (void)
20505 {
20506 arm_stack_offsets *offsets;
20507 int regno;
20508 unsigned long live_regs_mask = 0;
20509 int high_regs_pushed = 0;
20510 int extra_pop;
20511 int had_to_push_lr;
20512 int size;
20513
20514 if (cfun->machine->return_used_this_function != 0)
20515 return "";
20516
20517 if (IS_NAKED (arm_current_func_type ()))
20518 return "";
20519
20520 offsets = arm_get_frame_offsets ();
20521 live_regs_mask = offsets->saved_regs_mask;
20522 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20523
20524 /* If we can deduce the registers used from the function's return value.
20525 This is more reliable that examining df_regs_ever_live_p () because that
20526 will be set if the register is ever used in the function, not just if
20527 the register is used to hold a return value. */
20528 size = arm_size_return_regs ();
20529
20530 extra_pop = thumb1_extra_regs_pushed (offsets, false);
20531 if (extra_pop > 0)
20532 {
20533 unsigned long extra_mask = (1 << extra_pop) - 1;
20534 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
20535 }
20536
20537 /* The prolog may have pushed some high registers to use as
20538 work registers. e.g. the testsuite file:
20539 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20540 compiles to produce:
20541 push {r4, r5, r6, r7, lr}
20542 mov r7, r9
20543 mov r6, r8
20544 push {r6, r7}
20545 as part of the prolog. We have to undo that pushing here. */
20546
20547 if (high_regs_pushed)
20548 {
20549 unsigned long mask = live_regs_mask & 0xff;
20550 int next_hi_reg;
20551
20552 /* The available low registers depend on the size of the value we are
20553 returning. */
20554 if (size <= 12)
20555 mask |= 1 << 3;
20556 if (size <= 8)
20557 mask |= 1 << 2;
20558
20559 if (mask == 0)
20560 /* Oh dear! We have no low registers into which we can pop
20561 high registers! */
20562 internal_error
20563 ("no low registers available for popping high registers");
20564
20565 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
20566 if (live_regs_mask & (1 << next_hi_reg))
20567 break;
20568
20569 while (high_regs_pushed)
20570 {
20571 /* Find lo register(s) into which the high register(s) can
20572 be popped. */
20573 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20574 {
20575 if (mask & (1 << regno))
20576 high_regs_pushed--;
20577 if (high_regs_pushed == 0)
20578 break;
20579 }
20580
20581 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
20582
20583 /* Pop the values into the low register(s). */
20584 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
20585
20586 /* Move the value(s) into the high registers. */
20587 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
20588 {
20589 if (mask & (1 << regno))
20590 {
20591 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
20592 regno);
20593
20594 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
20595 if (live_regs_mask & (1 << next_hi_reg))
20596 break;
20597 }
20598 }
20599 }
20600 live_regs_mask &= ~0x0f00;
20601 }
20602
20603 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
20604 live_regs_mask &= 0xff;
20605
20606 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
20607 {
20608 /* Pop the return address into the PC. */
20609 if (had_to_push_lr)
20610 live_regs_mask |= 1 << PC_REGNUM;
20611
20612 /* Either no argument registers were pushed or a backtrace
20613 structure was created which includes an adjusted stack
20614 pointer, so just pop everything. */
20615 if (live_regs_mask)
20616 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20617 live_regs_mask);
20618
20619 /* We have either just popped the return address into the
20620 PC or it is was kept in LR for the entire function.
20621 Note that thumb_pushpop has already called thumb_exit if the
20622 PC was in the list. */
20623 if (!had_to_push_lr)
20624 thumb_exit (asm_out_file, LR_REGNUM);
20625 }
20626 else
20627 {
20628 /* Pop everything but the return address. */
20629 if (live_regs_mask)
20630 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
20631 live_regs_mask);
20632
20633 if (had_to_push_lr)
20634 {
20635 if (size > 12)
20636 {
20637 /* We have no free low regs, so save one. */
20638 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
20639 LAST_ARG_REGNUM);
20640 }
20641
20642 /* Get the return address into a temporary register. */
20643 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
20644 1 << LAST_ARG_REGNUM);
20645
20646 if (size > 12)
20647 {
20648 /* Move the return address to lr. */
20649 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
20650 LAST_ARG_REGNUM);
20651 /* Restore the low register. */
20652 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
20653 IP_REGNUM);
20654 regno = LR_REGNUM;
20655 }
20656 else
20657 regno = LAST_ARG_REGNUM;
20658 }
20659 else
20660 regno = LR_REGNUM;
20661
20662 /* Remove the argument registers that were pushed onto the stack. */
20663 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
20664 SP_REGNUM, SP_REGNUM,
20665 crtl->args.pretend_args_size);
20666
20667 thumb_exit (asm_out_file, regno);
20668 }
20669
20670 return "";
20671 }
20672
20673 /* Functions to save and restore machine-specific function data. */
20674 static struct machine_function *
20675 arm_init_machine_status (void)
20676 {
20677 struct machine_function *machine;
20678 machine = ggc_alloc_cleared_machine_function ();
20679
20680 #if ARM_FT_UNKNOWN != 0
20681 machine->func_type = ARM_FT_UNKNOWN;
20682 #endif
20683 return machine;
20684 }
20685
20686 /* Return an RTX indicating where the return address to the
20687 calling function can be found. */
20688 rtx
20689 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
20690 {
20691 if (count != 0)
20692 return NULL_RTX;
20693
20694 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
20695 }
20696
20697 /* Do anything needed before RTL is emitted for each function. */
20698 void
20699 arm_init_expanders (void)
20700 {
20701 /* Arrange to initialize and mark the machine per-function status. */
20702 init_machine_status = arm_init_machine_status;
20703
20704 /* This is to stop the combine pass optimizing away the alignment
20705 adjustment of va_arg. */
20706 /* ??? It is claimed that this should not be necessary. */
20707 if (cfun)
20708 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
20709 }
20710
20711
20712 /* Like arm_compute_initial_elimination offset. Simpler because there
20713 isn't an ABI specified frame pointer for Thumb. Instead, we set it
20714 to point at the base of the local variables after static stack
20715 space for a function has been allocated. */
20716
20717 HOST_WIDE_INT
20718 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20719 {
20720 arm_stack_offsets *offsets;
20721
20722 offsets = arm_get_frame_offsets ();
20723
20724 switch (from)
20725 {
20726 case ARG_POINTER_REGNUM:
20727 switch (to)
20728 {
20729 case STACK_POINTER_REGNUM:
20730 return offsets->outgoing_args - offsets->saved_args;
20731
20732 case FRAME_POINTER_REGNUM:
20733 return offsets->soft_frame - offsets->saved_args;
20734
20735 case ARM_HARD_FRAME_POINTER_REGNUM:
20736 return offsets->saved_regs - offsets->saved_args;
20737
20738 case THUMB_HARD_FRAME_POINTER_REGNUM:
20739 return offsets->locals_base - offsets->saved_args;
20740
20741 default:
20742 gcc_unreachable ();
20743 }
20744 break;
20745
20746 case FRAME_POINTER_REGNUM:
20747 switch (to)
20748 {
20749 case STACK_POINTER_REGNUM:
20750 return offsets->outgoing_args - offsets->soft_frame;
20751
20752 case ARM_HARD_FRAME_POINTER_REGNUM:
20753 return offsets->saved_regs - offsets->soft_frame;
20754
20755 case THUMB_HARD_FRAME_POINTER_REGNUM:
20756 return offsets->locals_base - offsets->soft_frame;
20757
20758 default:
20759 gcc_unreachable ();
20760 }
20761 break;
20762
20763 default:
20764 gcc_unreachable ();
20765 }
20766 }
20767
20768 /* Generate the rest of a function's prologue. */
20769 void
20770 thumb1_expand_prologue (void)
20771 {
20772 rtx insn, dwarf;
20773
20774 HOST_WIDE_INT amount;
20775 arm_stack_offsets *offsets;
20776 unsigned long func_type;
20777 int regno;
20778 unsigned long live_regs_mask;
20779
20780 func_type = arm_current_func_type ();
20781
20782 /* Naked functions don't have prologues. */
20783 if (IS_NAKED (func_type))
20784 return;
20785
20786 if (IS_INTERRUPT (func_type))
20787 {
20788 error ("interrupt Service Routines cannot be coded in Thumb mode");
20789 return;
20790 }
20791
20792 offsets = arm_get_frame_offsets ();
20793 live_regs_mask = offsets->saved_regs_mask;
20794 /* Load the pic register before setting the frame pointer,
20795 so we can use r7 as a temporary work register. */
20796 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20797 arm_load_pic_register (live_regs_mask);
20798
20799 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
20800 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
20801 stack_pointer_rtx);
20802
20803 if (flag_stack_usage_info)
20804 current_function_static_stack_size
20805 = offsets->outgoing_args - offsets->saved_args;
20806
20807 amount = offsets->outgoing_args - offsets->saved_regs;
20808 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
20809 if (amount)
20810 {
20811 if (amount < 512)
20812 {
20813 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20814 GEN_INT (- amount)));
20815 RTX_FRAME_RELATED_P (insn) = 1;
20816 }
20817 else
20818 {
20819 rtx reg;
20820
20821 /* The stack decrement is too big for an immediate value in a single
20822 insn. In theory we could issue multiple subtracts, but after
20823 three of them it becomes more space efficient to place the full
20824 value in the constant pool and load into a register. (Also the
20825 ARM debugger really likes to see only one stack decrement per
20826 function). So instead we look for a scratch register into which
20827 we can load the decrement, and then we subtract this from the
20828 stack pointer. Unfortunately on the thumb the only available
20829 scratch registers are the argument registers, and we cannot use
20830 these as they may hold arguments to the function. Instead we
20831 attempt to locate a call preserved register which is used by this
20832 function. If we can find one, then we know that it will have
20833 been pushed at the start of the prologue and so we can corrupt
20834 it now. */
20835 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
20836 if (live_regs_mask & (1 << regno))
20837 break;
20838
20839 gcc_assert(regno <= LAST_LO_REGNUM);
20840
20841 reg = gen_rtx_REG (SImode, regno);
20842
20843 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
20844
20845 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
20846 stack_pointer_rtx, reg));
20847 RTX_FRAME_RELATED_P (insn) = 1;
20848 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20849 plus_constant (stack_pointer_rtx,
20850 -amount));
20851 RTX_FRAME_RELATED_P (dwarf) = 1;
20852 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20853 }
20854 }
20855
20856 if (frame_pointer_needed)
20857 thumb_set_frame_pointer (offsets);
20858
20859 /* If we are profiling, make sure no instructions are scheduled before
20860 the call to mcount. Similarly if the user has requested no
20861 scheduling in the prolog. Similarly if we want non-call exceptions
20862 using the EABI unwinder, to prevent faulting instructions from being
20863 swapped with a stack adjustment. */
20864 if (crtl->profile || !TARGET_SCHED_PROLOG
20865 || (arm_except_unwind_info (&global_options) == UI_TARGET
20866 && cfun->can_throw_non_call_exceptions))
20867 emit_insn (gen_blockage ());
20868
20869 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
20870 if (live_regs_mask & 0xff)
20871 cfun->machine->lr_save_eliminated = 0;
20872 }
20873
20874
20875 void
20876 thumb1_expand_epilogue (void)
20877 {
20878 HOST_WIDE_INT amount;
20879 arm_stack_offsets *offsets;
20880 int regno;
20881
20882 /* Naked functions don't have prologues. */
20883 if (IS_NAKED (arm_current_func_type ()))
20884 return;
20885
20886 offsets = arm_get_frame_offsets ();
20887 amount = offsets->outgoing_args - offsets->saved_regs;
20888
20889 if (frame_pointer_needed)
20890 {
20891 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
20892 amount = offsets->locals_base - offsets->saved_regs;
20893 }
20894 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
20895
20896 gcc_assert (amount >= 0);
20897 if (amount)
20898 {
20899 if (amount < 512)
20900 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20901 GEN_INT (amount)));
20902 else
20903 {
20904 /* r3 is always free in the epilogue. */
20905 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
20906
20907 emit_insn (gen_movsi (reg, GEN_INT (amount)));
20908 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
20909 }
20910 }
20911
20912 /* Emit a USE (stack_pointer_rtx), so that
20913 the stack adjustment will not be deleted. */
20914 emit_insn (gen_prologue_use (stack_pointer_rtx));
20915
20916 if (crtl->profile || !TARGET_SCHED_PROLOG)
20917 emit_insn (gen_blockage ());
20918
20919 /* Emit a clobber for each insn that will be restored in the epilogue,
20920 so that flow2 will get register lifetimes correct. */
20921 for (regno = 0; regno < 13; regno++)
20922 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
20923 emit_clobber (gen_rtx_REG (SImode, regno));
20924
20925 if (! df_regs_ever_live_p (LR_REGNUM))
20926 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
20927 }
20928
20929 static void
20930 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
20931 {
20932 arm_stack_offsets *offsets;
20933 unsigned long live_regs_mask = 0;
20934 unsigned long l_mask;
20935 unsigned high_regs_pushed = 0;
20936 int cfa_offset = 0;
20937 int regno;
20938
20939 if (IS_NAKED (arm_current_func_type ()))
20940 return;
20941
20942 if (is_called_in_ARM_mode (current_function_decl))
20943 {
20944 const char * name;
20945
20946 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
20947 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
20948 == SYMBOL_REF);
20949 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
20950
20951 /* Generate code sequence to switch us into Thumb mode. */
20952 /* The .code 32 directive has already been emitted by
20953 ASM_DECLARE_FUNCTION_NAME. */
20954 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
20955 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
20956
20957 /* Generate a label, so that the debugger will notice the
20958 change in instruction sets. This label is also used by
20959 the assembler to bypass the ARM code when this function
20960 is called from a Thumb encoded function elsewhere in the
20961 same file. Hence the definition of STUB_NAME here must
20962 agree with the definition in gas/config/tc-arm.c. */
20963
20964 #define STUB_NAME ".real_start_of"
20965
20966 fprintf (f, "\t.code\t16\n");
20967 #ifdef ARM_PE
20968 if (arm_dllexport_name_p (name))
20969 name = arm_strip_name_encoding (name);
20970 #endif
20971 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
20972 fprintf (f, "\t.thumb_func\n");
20973 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
20974 }
20975
20976 if (crtl->args.pretend_args_size)
20977 {
20978 /* Output unwind directive for the stack adjustment. */
20979 if (arm_except_unwind_info (&global_options) == UI_TARGET)
20980 fprintf (f, "\t.pad #%d\n",
20981 crtl->args.pretend_args_size);
20982
20983 if (cfun->machine->uses_anonymous_args)
20984 {
20985 int num_pushes;
20986
20987 fprintf (f, "\tpush\t{");
20988
20989 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
20990
20991 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
20992 regno <= LAST_ARG_REGNUM;
20993 regno++)
20994 asm_fprintf (f, "%r%s", regno,
20995 regno == LAST_ARG_REGNUM ? "" : ", ");
20996
20997 fprintf (f, "}\n");
20998 }
20999 else
21000 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
21001 SP_REGNUM, SP_REGNUM,
21002 crtl->args.pretend_args_size);
21003
21004 /* We don't need to record the stores for unwinding (would it
21005 help the debugger any if we did?), but record the change in
21006 the stack pointer. */
21007 if (dwarf2out_do_frame ())
21008 {
21009 char *l = dwarf2out_cfi_label (false);
21010
21011 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
21012 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
21013 }
21014 }
21015
21016 /* Get the registers we are going to push. */
21017 offsets = arm_get_frame_offsets ();
21018 live_regs_mask = offsets->saved_regs_mask;
21019 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
21020 l_mask = live_regs_mask & 0x40ff;
21021 /* Then count how many other high registers will need to be pushed. */
21022 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21023
21024 if (TARGET_BACKTRACE)
21025 {
21026 unsigned offset;
21027 unsigned work_register;
21028
21029 /* We have been asked to create a stack backtrace structure.
21030 The code looks like this:
21031
21032 0 .align 2
21033 0 func:
21034 0 sub SP, #16 Reserve space for 4 registers.
21035 2 push {R7} Push low registers.
21036 4 add R7, SP, #20 Get the stack pointer before the push.
21037 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
21038 8 mov R7, PC Get hold of the start of this code plus 12.
21039 10 str R7, [SP, #16] Store it.
21040 12 mov R7, FP Get hold of the current frame pointer.
21041 14 str R7, [SP, #4] Store it.
21042 16 mov R7, LR Get hold of the current return address.
21043 18 str R7, [SP, #12] Store it.
21044 20 add R7, SP, #16 Point at the start of the backtrace structure.
21045 22 mov FP, R7 Put this value into the frame pointer. */
21046
21047 work_register = thumb_find_work_register (live_regs_mask);
21048
21049 if (arm_except_unwind_info (&global_options) == UI_TARGET)
21050 asm_fprintf (f, "\t.pad #16\n");
21051
21052 asm_fprintf
21053 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
21054 SP_REGNUM, SP_REGNUM);
21055
21056 if (dwarf2out_do_frame ())
21057 {
21058 char *l = dwarf2out_cfi_label (false);
21059
21060 cfa_offset = cfa_offset + 16;
21061 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
21062 }
21063
21064 if (l_mask)
21065 {
21066 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
21067 offset = bit_count (l_mask) * UNITS_PER_WORD;
21068 }
21069 else
21070 offset = 0;
21071
21072 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
21073 offset + 16 + crtl->args.pretend_args_size);
21074
21075 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21076 offset + 4);
21077
21078 /* Make sure that the instruction fetching the PC is in the right place
21079 to calculate "start of backtrace creation code + 12". */
21080 if (l_mask)
21081 {
21082 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
21083 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21084 offset + 12);
21085 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
21086 ARM_HARD_FRAME_POINTER_REGNUM);
21087 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21088 offset);
21089 }
21090 else
21091 {
21092 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
21093 ARM_HARD_FRAME_POINTER_REGNUM);
21094 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21095 offset);
21096 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
21097 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21098 offset + 12);
21099 }
21100
21101 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
21102 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
21103 offset + 8);
21104 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
21105 offset + 12);
21106 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
21107 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
21108 }
21109 /* Optimization: If we are not pushing any low registers but we are going
21110 to push some high registers then delay our first push. This will just
21111 be a push of LR and we can combine it with the push of the first high
21112 register. */
21113 else if ((l_mask & 0xff) != 0
21114 || (high_regs_pushed == 0 && l_mask))
21115 {
21116 unsigned long mask = l_mask;
21117 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
21118 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
21119 }
21120
21121 if (high_regs_pushed)
21122 {
21123 unsigned pushable_regs;
21124 unsigned next_hi_reg;
21125
21126 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
21127 if (live_regs_mask & (1 << next_hi_reg))
21128 break;
21129
21130 pushable_regs = l_mask & 0xff;
21131
21132 if (pushable_regs == 0)
21133 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
21134
21135 while (high_regs_pushed > 0)
21136 {
21137 unsigned long real_regs_mask = 0;
21138
21139 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
21140 {
21141 if (pushable_regs & (1 << regno))
21142 {
21143 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
21144
21145 high_regs_pushed --;
21146 real_regs_mask |= (1 << next_hi_reg);
21147
21148 if (high_regs_pushed)
21149 {
21150 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
21151 next_hi_reg --)
21152 if (live_regs_mask & (1 << next_hi_reg))
21153 break;
21154 }
21155 else
21156 {
21157 pushable_regs &= ~((1 << regno) - 1);
21158 break;
21159 }
21160 }
21161 }
21162
21163 /* If we had to find a work register and we have not yet
21164 saved the LR then add it to the list of regs to push. */
21165 if (l_mask == (1 << LR_REGNUM))
21166 {
21167 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
21168 1, &cfa_offset,
21169 real_regs_mask | (1 << LR_REGNUM));
21170 l_mask = 0;
21171 }
21172 else
21173 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
21174 }
21175 }
21176 }
21177
21178 /* Handle the case of a double word load into a low register from
21179 a computed memory address. The computed address may involve a
21180 register which is overwritten by the load. */
21181 const char *
21182 thumb_load_double_from_address (rtx *operands)
21183 {
21184 rtx addr;
21185 rtx base;
21186 rtx offset;
21187 rtx arg1;
21188 rtx arg2;
21189
21190 gcc_assert (GET_CODE (operands[0]) == REG);
21191 gcc_assert (GET_CODE (operands[1]) == MEM);
21192
21193 /* Get the memory address. */
21194 addr = XEXP (operands[1], 0);
21195
21196 /* Work out how the memory address is computed. */
21197 switch (GET_CODE (addr))
21198 {
21199 case REG:
21200 operands[2] = adjust_address (operands[1], SImode, 4);
21201
21202 if (REGNO (operands[0]) == REGNO (addr))
21203 {
21204 output_asm_insn ("ldr\t%H0, %2", operands);
21205 output_asm_insn ("ldr\t%0, %1", operands);
21206 }
21207 else
21208 {
21209 output_asm_insn ("ldr\t%0, %1", operands);
21210 output_asm_insn ("ldr\t%H0, %2", operands);
21211 }
21212 break;
21213
21214 case CONST:
21215 /* Compute <address> + 4 for the high order load. */
21216 operands[2] = adjust_address (operands[1], SImode, 4);
21217
21218 output_asm_insn ("ldr\t%0, %1", operands);
21219 output_asm_insn ("ldr\t%H0, %2", operands);
21220 break;
21221
21222 case PLUS:
21223 arg1 = XEXP (addr, 0);
21224 arg2 = XEXP (addr, 1);
21225
21226 if (CONSTANT_P (arg1))
21227 base = arg2, offset = arg1;
21228 else
21229 base = arg1, offset = arg2;
21230
21231 gcc_assert (GET_CODE (base) == REG);
21232
21233 /* Catch the case of <address> = <reg> + <reg> */
21234 if (GET_CODE (offset) == REG)
21235 {
21236 int reg_offset = REGNO (offset);
21237 int reg_base = REGNO (base);
21238 int reg_dest = REGNO (operands[0]);
21239
21240 /* Add the base and offset registers together into the
21241 higher destination register. */
21242 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
21243 reg_dest + 1, reg_base, reg_offset);
21244
21245 /* Load the lower destination register from the address in
21246 the higher destination register. */
21247 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
21248 reg_dest, reg_dest + 1);
21249
21250 /* Load the higher destination register from its own address
21251 plus 4. */
21252 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
21253 reg_dest + 1, reg_dest + 1);
21254 }
21255 else
21256 {
21257 /* Compute <address> + 4 for the high order load. */
21258 operands[2] = adjust_address (operands[1], SImode, 4);
21259
21260 /* If the computed address is held in the low order register
21261 then load the high order register first, otherwise always
21262 load the low order register first. */
21263 if (REGNO (operands[0]) == REGNO (base))
21264 {
21265 output_asm_insn ("ldr\t%H0, %2", operands);
21266 output_asm_insn ("ldr\t%0, %1", operands);
21267 }
21268 else
21269 {
21270 output_asm_insn ("ldr\t%0, %1", operands);
21271 output_asm_insn ("ldr\t%H0, %2", operands);
21272 }
21273 }
21274 break;
21275
21276 case LABEL_REF:
21277 /* With no registers to worry about we can just load the value
21278 directly. */
21279 operands[2] = adjust_address (operands[1], SImode, 4);
21280
21281 output_asm_insn ("ldr\t%H0, %2", operands);
21282 output_asm_insn ("ldr\t%0, %1", operands);
21283 break;
21284
21285 default:
21286 gcc_unreachable ();
21287 }
21288
21289 return "";
21290 }
21291
21292 const char *
21293 thumb_output_move_mem_multiple (int n, rtx *operands)
21294 {
21295 rtx tmp;
21296
21297 switch (n)
21298 {
21299 case 2:
21300 if (REGNO (operands[4]) > REGNO (operands[5]))
21301 {
21302 tmp = operands[4];
21303 operands[4] = operands[5];
21304 operands[5] = tmp;
21305 }
21306 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
21307 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
21308 break;
21309
21310 case 3:
21311 if (REGNO (operands[4]) > REGNO (operands[5]))
21312 {
21313 tmp = operands[4];
21314 operands[4] = operands[5];
21315 operands[5] = tmp;
21316 }
21317 if (REGNO (operands[5]) > REGNO (operands[6]))
21318 {
21319 tmp = operands[5];
21320 operands[5] = operands[6];
21321 operands[6] = tmp;
21322 }
21323 if (REGNO (operands[4]) > REGNO (operands[5]))
21324 {
21325 tmp = operands[4];
21326 operands[4] = operands[5];
21327 operands[5] = tmp;
21328 }
21329
21330 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
21331 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
21332 break;
21333
21334 default:
21335 gcc_unreachable ();
21336 }
21337
21338 return "";
21339 }
21340
21341 /* Output a call-via instruction for thumb state. */
21342 const char *
21343 thumb_call_via_reg (rtx reg)
21344 {
21345 int regno = REGNO (reg);
21346 rtx *labelp;
21347
21348 gcc_assert (regno < LR_REGNUM);
21349
21350 /* If we are in the normal text section we can use a single instance
21351 per compilation unit. If we are doing function sections, then we need
21352 an entry per section, since we can't rely on reachability. */
21353 if (in_section == text_section)
21354 {
21355 thumb_call_reg_needed = 1;
21356
21357 if (thumb_call_via_label[regno] == NULL)
21358 thumb_call_via_label[regno] = gen_label_rtx ();
21359 labelp = thumb_call_via_label + regno;
21360 }
21361 else
21362 {
21363 if (cfun->machine->call_via[regno] == NULL)
21364 cfun->machine->call_via[regno] = gen_label_rtx ();
21365 labelp = cfun->machine->call_via + regno;
21366 }
21367
21368 output_asm_insn ("bl\t%a0", labelp);
21369 return "";
21370 }
21371
21372 /* Routines for generating rtl. */
21373 void
21374 thumb_expand_movmemqi (rtx *operands)
21375 {
21376 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
21377 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
21378 HOST_WIDE_INT len = INTVAL (operands[2]);
21379 HOST_WIDE_INT offset = 0;
21380
21381 while (len >= 12)
21382 {
21383 emit_insn (gen_movmem12b (out, in, out, in));
21384 len -= 12;
21385 }
21386
21387 if (len >= 8)
21388 {
21389 emit_insn (gen_movmem8b (out, in, out, in));
21390 len -= 8;
21391 }
21392
21393 if (len >= 4)
21394 {
21395 rtx reg = gen_reg_rtx (SImode);
21396 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
21397 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
21398 len -= 4;
21399 offset += 4;
21400 }
21401
21402 if (len >= 2)
21403 {
21404 rtx reg = gen_reg_rtx (HImode);
21405 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
21406 plus_constant (in, offset))));
21407 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
21408 reg));
21409 len -= 2;
21410 offset += 2;
21411 }
21412
21413 if (len)
21414 {
21415 rtx reg = gen_reg_rtx (QImode);
21416 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
21417 plus_constant (in, offset))));
21418 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
21419 reg));
21420 }
21421 }
21422
21423 void
21424 thumb_reload_out_hi (rtx *operands)
21425 {
21426 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
21427 }
21428
21429 /* Handle reading a half-word from memory during reload. */
21430 void
21431 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
21432 {
21433 gcc_unreachable ();
21434 }
21435
21436 /* Return the length of a function name prefix
21437 that starts with the character 'c'. */
21438 static int
21439 arm_get_strip_length (int c)
21440 {
21441 switch (c)
21442 {
21443 ARM_NAME_ENCODING_LENGTHS
21444 default: return 0;
21445 }
21446 }
21447
21448 /* Return a pointer to a function's name with any
21449 and all prefix encodings stripped from it. */
21450 const char *
21451 arm_strip_name_encoding (const char *name)
21452 {
21453 int skip;
21454
21455 while ((skip = arm_get_strip_length (* name)))
21456 name += skip;
21457
21458 return name;
21459 }
21460
21461 /* If there is a '*' anywhere in the name's prefix, then
21462 emit the stripped name verbatim, otherwise prepend an
21463 underscore if leading underscores are being used. */
21464 void
21465 arm_asm_output_labelref (FILE *stream, const char *name)
21466 {
21467 int skip;
21468 int verbatim = 0;
21469
21470 while ((skip = arm_get_strip_length (* name)))
21471 {
21472 verbatim |= (*name == '*');
21473 name += skip;
21474 }
21475
21476 if (verbatim)
21477 fputs (name, stream);
21478 else
21479 asm_fprintf (stream, "%U%s", name);
21480 }
21481
21482 static void
21483 arm_file_start (void)
21484 {
21485 int val;
21486
21487 if (TARGET_UNIFIED_ASM)
21488 asm_fprintf (asm_out_file, "\t.syntax unified\n");
21489
21490 if (TARGET_BPABI)
21491 {
21492 const char *fpu_name;
21493 if (arm_selected_arch)
21494 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
21495 else
21496 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
21497
21498 if (TARGET_SOFT_FLOAT)
21499 {
21500 if (TARGET_VFP)
21501 fpu_name = "softvfp";
21502 else
21503 fpu_name = "softfpa";
21504 }
21505 else
21506 {
21507 fpu_name = arm_fpu_desc->name;
21508 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
21509 {
21510 if (TARGET_HARD_FLOAT)
21511 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
21512 if (TARGET_HARD_FLOAT_ABI)
21513 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
21514 }
21515 }
21516 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
21517
21518 /* Some of these attributes only apply when the corresponding features
21519 are used. However we don't have any easy way of figuring this out.
21520 Conservatively record the setting that would have been used. */
21521
21522 /* Tag_ABI_FP_rounding. */
21523 if (flag_rounding_math)
21524 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
21525 if (!flag_unsafe_math_optimizations)
21526 {
21527 /* Tag_ABI_FP_denomal. */
21528 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
21529 /* Tag_ABI_FP_exceptions. */
21530 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
21531 }
21532 /* Tag_ABI_FP_user_exceptions. */
21533 if (flag_signaling_nans)
21534 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
21535 /* Tag_ABI_FP_number_model. */
21536 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
21537 flag_finite_math_only ? 1 : 3);
21538
21539 /* Tag_ABI_align8_needed. */
21540 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
21541 /* Tag_ABI_align8_preserved. */
21542 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
21543 /* Tag_ABI_enum_size. */
21544 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
21545 flag_short_enums ? 1 : 2);
21546
21547 /* Tag_ABI_optimization_goals. */
21548 if (optimize_size)
21549 val = 4;
21550 else if (optimize >= 2)
21551 val = 2;
21552 else if (optimize)
21553 val = 1;
21554 else
21555 val = 6;
21556 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
21557
21558 /* Tag_ABI_FP_16bit_format. */
21559 if (arm_fp16_format)
21560 asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
21561 (int)arm_fp16_format);
21562
21563 if (arm_lang_output_object_attributes_hook)
21564 arm_lang_output_object_attributes_hook();
21565 }
21566 default_file_start();
21567 }
21568
21569 static void
21570 arm_file_end (void)
21571 {
21572 int regno;
21573
21574 if (NEED_INDICATE_EXEC_STACK)
21575 /* Add .note.GNU-stack. */
21576 file_end_indicate_exec_stack ();
21577
21578 if (! thumb_call_reg_needed)
21579 return;
21580
21581 switch_to_section (text_section);
21582 asm_fprintf (asm_out_file, "\t.code 16\n");
21583 ASM_OUTPUT_ALIGN (asm_out_file, 1);
21584
21585 for (regno = 0; regno < LR_REGNUM; regno++)
21586 {
21587 rtx label = thumb_call_via_label[regno];
21588
21589 if (label != 0)
21590 {
21591 targetm.asm_out.internal_label (asm_out_file, "L",
21592 CODE_LABEL_NUMBER (label));
21593 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
21594 }
21595 }
21596 }
21597
21598 #ifndef ARM_PE
21599 /* Symbols in the text segment can be accessed without indirecting via the
21600 constant pool; it may take an extra binary operation, but this is still
21601 faster than indirecting via memory. Don't do this when not optimizing,
21602 since we won't be calculating al of the offsets necessary to do this
21603 simplification. */
21604
21605 static void
21606 arm_encode_section_info (tree decl, rtx rtl, int first)
21607 {
21608 if (optimize > 0 && TREE_CONSTANT (decl))
21609 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
21610
21611 default_encode_section_info (decl, rtl, first);
21612 }
21613 #endif /* !ARM_PE */
21614
21615 static void
21616 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
21617 {
21618 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
21619 && !strcmp (prefix, "L"))
21620 {
21621 arm_ccfsm_state = 0;
21622 arm_target_insn = NULL;
21623 }
21624 default_internal_label (stream, prefix, labelno);
21625 }
21626
21627 /* Output code to add DELTA to the first argument, and then jump
21628 to FUNCTION. Used for C++ multiple inheritance. */
21629 static void
21630 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
21631 HOST_WIDE_INT delta,
21632 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
21633 tree function)
21634 {
21635 static int thunk_label = 0;
21636 char label[256];
21637 char labelpc[256];
21638 int mi_delta = delta;
21639 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
21640 int shift = 0;
21641 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
21642 ? 1 : 0);
21643 if (mi_delta < 0)
21644 mi_delta = - mi_delta;
21645
21646 if (TARGET_THUMB1)
21647 {
21648 int labelno = thunk_label++;
21649 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
21650 /* Thunks are entered in arm mode when avaiable. */
21651 if (TARGET_THUMB1_ONLY)
21652 {
21653 /* push r3 so we can use it as a temporary. */
21654 /* TODO: Omit this save if r3 is not used. */
21655 fputs ("\tpush {r3}\n", file);
21656 fputs ("\tldr\tr3, ", file);
21657 }
21658 else
21659 {
21660 fputs ("\tldr\tr12, ", file);
21661 }
21662 assemble_name (file, label);
21663 fputc ('\n', file);
21664 if (flag_pic)
21665 {
21666 /* If we are generating PIC, the ldr instruction below loads
21667 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
21668 the address of the add + 8, so we have:
21669
21670 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
21671 = target + 1.
21672
21673 Note that we have "+ 1" because some versions of GNU ld
21674 don't set the low bit of the result for R_ARM_REL32
21675 relocations against thumb function symbols.
21676 On ARMv6M this is +4, not +8. */
21677 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
21678 assemble_name (file, labelpc);
21679 fputs (":\n", file);
21680 if (TARGET_THUMB1_ONLY)
21681 {
21682 /* This is 2 insns after the start of the thunk, so we know it
21683 is 4-byte aligned. */
21684 fputs ("\tadd\tr3, pc, r3\n", file);
21685 fputs ("\tmov r12, r3\n", file);
21686 }
21687 else
21688 fputs ("\tadd\tr12, pc, r12\n", file);
21689 }
21690 else if (TARGET_THUMB1_ONLY)
21691 fputs ("\tmov r12, r3\n", file);
21692 }
21693 if (TARGET_THUMB1_ONLY)
21694 {
21695 if (mi_delta > 255)
21696 {
21697 fputs ("\tldr\tr3, ", file);
21698 assemble_name (file, label);
21699 fputs ("+4\n", file);
21700 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
21701 mi_op, this_regno, this_regno);
21702 }
21703 else if (mi_delta != 0)
21704 {
21705 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21706 mi_op, this_regno, this_regno,
21707 mi_delta);
21708 }
21709 }
21710 else
21711 {
21712 /* TODO: Use movw/movt for large constants when available. */
21713 while (mi_delta != 0)
21714 {
21715 if ((mi_delta & (3 << shift)) == 0)
21716 shift += 2;
21717 else
21718 {
21719 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
21720 mi_op, this_regno, this_regno,
21721 mi_delta & (0xff << shift));
21722 mi_delta &= ~(0xff << shift);
21723 shift += 8;
21724 }
21725 }
21726 }
21727 if (TARGET_THUMB1)
21728 {
21729 if (TARGET_THUMB1_ONLY)
21730 fputs ("\tpop\t{r3}\n", file);
21731
21732 fprintf (file, "\tbx\tr12\n");
21733 ASM_OUTPUT_ALIGN (file, 2);
21734 assemble_name (file, label);
21735 fputs (":\n", file);
21736 if (flag_pic)
21737 {
21738 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
21739 rtx tem = XEXP (DECL_RTL (function), 0);
21740 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
21741 tem = gen_rtx_MINUS (GET_MODE (tem),
21742 tem,
21743 gen_rtx_SYMBOL_REF (Pmode,
21744 ggc_strdup (labelpc)));
21745 assemble_integer (tem, 4, BITS_PER_WORD, 1);
21746 }
21747 else
21748 /* Output ".word .LTHUNKn". */
21749 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
21750
21751 if (TARGET_THUMB1_ONLY && mi_delta > 255)
21752 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
21753 }
21754 else
21755 {
21756 fputs ("\tb\t", file);
21757 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
21758 if (NEED_PLT_RELOC)
21759 fputs ("(PLT)", file);
21760 fputc ('\n', file);
21761 }
21762 }
21763
21764 int
21765 arm_emit_vector_const (FILE *file, rtx x)
21766 {
21767 int i;
21768 const char * pattern;
21769
21770 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21771
21772 switch (GET_MODE (x))
21773 {
21774 case V2SImode: pattern = "%08x"; break;
21775 case V4HImode: pattern = "%04x"; break;
21776 case V8QImode: pattern = "%02x"; break;
21777 default: gcc_unreachable ();
21778 }
21779
21780 fprintf (file, "0x");
21781 for (i = CONST_VECTOR_NUNITS (x); i--;)
21782 {
21783 rtx element;
21784
21785 element = CONST_VECTOR_ELT (x, i);
21786 fprintf (file, pattern, INTVAL (element));
21787 }
21788
21789 return 1;
21790 }
21791
21792 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
21793 HFmode constant pool entries are actually loaded with ldr. */
21794 void
21795 arm_emit_fp16_const (rtx c)
21796 {
21797 REAL_VALUE_TYPE r;
21798 long bits;
21799
21800 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
21801 bits = real_to_target (NULL, &r, HFmode);
21802 if (WORDS_BIG_ENDIAN)
21803 assemble_zeros (2);
21804 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
21805 if (!WORDS_BIG_ENDIAN)
21806 assemble_zeros (2);
21807 }
21808
21809 const char *
21810 arm_output_load_gr (rtx *operands)
21811 {
21812 rtx reg;
21813 rtx offset;
21814 rtx wcgr;
21815 rtx sum;
21816
21817 if (GET_CODE (operands [1]) != MEM
21818 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
21819 || GET_CODE (reg = XEXP (sum, 0)) != REG
21820 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
21821 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
21822 return "wldrw%?\t%0, %1";
21823
21824 /* Fix up an out-of-range load of a GR register. */
21825 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
21826 wcgr = operands[0];
21827 operands[0] = reg;
21828 output_asm_insn ("ldr%?\t%0, %1", operands);
21829
21830 operands[0] = wcgr;
21831 operands[1] = reg;
21832 output_asm_insn ("tmcr%?\t%0, %1", operands);
21833 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
21834
21835 return "";
21836 }
21837
21838 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
21839
21840 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
21841 named arg and all anonymous args onto the stack.
21842 XXX I know the prologue shouldn't be pushing registers, but it is faster
21843 that way. */
21844
21845 static void
21846 arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
21847 enum machine_mode mode,
21848 tree type,
21849 int *pretend_size,
21850 int second_time ATTRIBUTE_UNUSED)
21851 {
21852 int nregs;
21853
21854 cfun->machine->uses_anonymous_args = 1;
21855 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
21856 {
21857 nregs = pcum->aapcs_ncrn;
21858 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
21859 nregs++;
21860 }
21861 else
21862 nregs = pcum->nregs;
21863
21864 if (nregs < NUM_ARG_REGS)
21865 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
21866 }
21867
21868 /* Return nonzero if the CONSUMER instruction (a store) does not need
21869 PRODUCER's value to calculate the address. */
21870
21871 int
21872 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
21873 {
21874 rtx value = PATTERN (producer);
21875 rtx addr = PATTERN (consumer);
21876
21877 if (GET_CODE (value) == COND_EXEC)
21878 value = COND_EXEC_CODE (value);
21879 if (GET_CODE (value) == PARALLEL)
21880 value = XVECEXP (value, 0, 0);
21881 value = XEXP (value, 0);
21882 if (GET_CODE (addr) == COND_EXEC)
21883 addr = COND_EXEC_CODE (addr);
21884 if (GET_CODE (addr) == PARALLEL)
21885 addr = XVECEXP (addr, 0, 0);
21886 addr = XEXP (addr, 0);
21887
21888 return !reg_overlap_mentioned_p (value, addr);
21889 }
21890
21891 /* Return nonzero if the CONSUMER instruction (a store) does need
21892 PRODUCER's value to calculate the address. */
21893
21894 int
21895 arm_early_store_addr_dep (rtx producer, rtx consumer)
21896 {
21897 return !arm_no_early_store_addr_dep (producer, consumer);
21898 }
21899
21900 /* Return nonzero if the CONSUMER instruction (a load) does need
21901 PRODUCER's value to calculate the address. */
21902
21903 int
21904 arm_early_load_addr_dep (rtx producer, rtx consumer)
21905 {
21906 rtx value = PATTERN (producer);
21907 rtx addr = PATTERN (consumer);
21908
21909 if (GET_CODE (value) == COND_EXEC)
21910 value = COND_EXEC_CODE (value);
21911 if (GET_CODE (value) == PARALLEL)
21912 value = XVECEXP (value, 0, 0);
21913 value = XEXP (value, 0);
21914 if (GET_CODE (addr) == COND_EXEC)
21915 addr = COND_EXEC_CODE (addr);
21916 if (GET_CODE (addr) == PARALLEL)
21917 addr = XVECEXP (addr, 0, 0);
21918 addr = XEXP (addr, 1);
21919
21920 return reg_overlap_mentioned_p (value, addr);
21921 }
21922
21923 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21924 have an early register shift value or amount dependency on the
21925 result of PRODUCER. */
21926
21927 int
21928 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
21929 {
21930 rtx value = PATTERN (producer);
21931 rtx op = PATTERN (consumer);
21932 rtx early_op;
21933
21934 if (GET_CODE (value) == COND_EXEC)
21935 value = COND_EXEC_CODE (value);
21936 if (GET_CODE (value) == PARALLEL)
21937 value = XVECEXP (value, 0, 0);
21938 value = XEXP (value, 0);
21939 if (GET_CODE (op) == COND_EXEC)
21940 op = COND_EXEC_CODE (op);
21941 if (GET_CODE (op) == PARALLEL)
21942 op = XVECEXP (op, 0, 0);
21943 op = XEXP (op, 1);
21944
21945 early_op = XEXP (op, 0);
21946 /* This is either an actual independent shift, or a shift applied to
21947 the first operand of another operation. We want the whole shift
21948 operation. */
21949 if (GET_CODE (early_op) == REG)
21950 early_op = op;
21951
21952 return !reg_overlap_mentioned_p (value, early_op);
21953 }
21954
21955 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21956 have an early register shift value dependency on the result of
21957 PRODUCER. */
21958
21959 int
21960 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
21961 {
21962 rtx value = PATTERN (producer);
21963 rtx op = PATTERN (consumer);
21964 rtx early_op;
21965
21966 if (GET_CODE (value) == COND_EXEC)
21967 value = COND_EXEC_CODE (value);
21968 if (GET_CODE (value) == PARALLEL)
21969 value = XVECEXP (value, 0, 0);
21970 value = XEXP (value, 0);
21971 if (GET_CODE (op) == COND_EXEC)
21972 op = COND_EXEC_CODE (op);
21973 if (GET_CODE (op) == PARALLEL)
21974 op = XVECEXP (op, 0, 0);
21975 op = XEXP (op, 1);
21976
21977 early_op = XEXP (op, 0);
21978
21979 /* This is either an actual independent shift, or a shift applied to
21980 the first operand of another operation. We want the value being
21981 shifted, in either case. */
21982 if (GET_CODE (early_op) != REG)
21983 early_op = XEXP (early_op, 0);
21984
21985 return !reg_overlap_mentioned_p (value, early_op);
21986 }
21987
21988 /* Return nonzero if the CONSUMER (a mul or mac op) does not
21989 have an early register mult dependency on the result of
21990 PRODUCER. */
21991
21992 int
21993 arm_no_early_mul_dep (rtx producer, rtx consumer)
21994 {
21995 rtx value = PATTERN (producer);
21996 rtx op = PATTERN (consumer);
21997
21998 if (GET_CODE (value) == COND_EXEC)
21999 value = COND_EXEC_CODE (value);
22000 if (GET_CODE (value) == PARALLEL)
22001 value = XVECEXP (value, 0, 0);
22002 value = XEXP (value, 0);
22003 if (GET_CODE (op) == COND_EXEC)
22004 op = COND_EXEC_CODE (op);
22005 if (GET_CODE (op) == PARALLEL)
22006 op = XVECEXP (op, 0, 0);
22007 op = XEXP (op, 1);
22008
22009 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
22010 {
22011 if (GET_CODE (XEXP (op, 0)) == MULT)
22012 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
22013 else
22014 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
22015 }
22016
22017 return 0;
22018 }
22019
22020 /* We can't rely on the caller doing the proper promotion when
22021 using APCS or ATPCS. */
22022
22023 static bool
22024 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
22025 {
22026 return !TARGET_AAPCS_BASED;
22027 }
22028
22029 static enum machine_mode
22030 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
22031 enum machine_mode mode,
22032 int *punsignedp ATTRIBUTE_UNUSED,
22033 const_tree fntype ATTRIBUTE_UNUSED,
22034 int for_return ATTRIBUTE_UNUSED)
22035 {
22036 if (GET_MODE_CLASS (mode) == MODE_INT
22037 && GET_MODE_SIZE (mode) < 4)
22038 return SImode;
22039
22040 return mode;
22041 }
22042
22043 /* AAPCS based ABIs use short enums by default. */
22044
22045 static bool
22046 arm_default_short_enums (void)
22047 {
22048 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
22049 }
22050
22051
22052 /* AAPCS requires that anonymous bitfields affect structure alignment. */
22053
22054 static bool
22055 arm_align_anon_bitfield (void)
22056 {
22057 return TARGET_AAPCS_BASED;
22058 }
22059
22060
22061 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
22062
22063 static tree
22064 arm_cxx_guard_type (void)
22065 {
22066 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
22067 }
22068
22069 /* Return non-zero if the consumer (a multiply-accumulate instruction)
22070 has an accumulator dependency on the result of the producer (a
22071 multiplication instruction) and no other dependency on that result. */
22072 int
22073 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
22074 {
22075 rtx mul = PATTERN (producer);
22076 rtx mac = PATTERN (consumer);
22077 rtx mul_result;
22078 rtx mac_op0, mac_op1, mac_acc;
22079
22080 if (GET_CODE (mul) == COND_EXEC)
22081 mul = COND_EXEC_CODE (mul);
22082 if (GET_CODE (mac) == COND_EXEC)
22083 mac = COND_EXEC_CODE (mac);
22084
22085 /* Check that mul is of the form (set (...) (mult ...))
22086 and mla is of the form (set (...) (plus (mult ...) (...))). */
22087 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
22088 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
22089 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
22090 return 0;
22091
22092 mul_result = XEXP (mul, 0);
22093 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
22094 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
22095 mac_acc = XEXP (XEXP (mac, 1), 1);
22096
22097 return (reg_overlap_mentioned_p (mul_result, mac_acc)
22098 && !reg_overlap_mentioned_p (mul_result, mac_op0)
22099 && !reg_overlap_mentioned_p (mul_result, mac_op1));
22100 }
22101
22102
22103 /* The EABI says test the least significant bit of a guard variable. */
22104
22105 static bool
22106 arm_cxx_guard_mask_bit (void)
22107 {
22108 return TARGET_AAPCS_BASED;
22109 }
22110
22111
22112 /* The EABI specifies that all array cookies are 8 bytes long. */
22113
22114 static tree
22115 arm_get_cookie_size (tree type)
22116 {
22117 tree size;
22118
22119 if (!TARGET_AAPCS_BASED)
22120 return default_cxx_get_cookie_size (type);
22121
22122 size = build_int_cst (sizetype, 8);
22123 return size;
22124 }
22125
22126
22127 /* The EABI says that array cookies should also contain the element size. */
22128
22129 static bool
22130 arm_cookie_has_size (void)
22131 {
22132 return TARGET_AAPCS_BASED;
22133 }
22134
22135
22136 /* The EABI says constructors and destructors should return a pointer to
22137 the object constructed/destroyed. */
22138
22139 static bool
22140 arm_cxx_cdtor_returns_this (void)
22141 {
22142 return TARGET_AAPCS_BASED;
22143 }
22144
22145 /* The EABI says that an inline function may never be the key
22146 method. */
22147
22148 static bool
22149 arm_cxx_key_method_may_be_inline (void)
22150 {
22151 return !TARGET_AAPCS_BASED;
22152 }
22153
22154 static void
22155 arm_cxx_determine_class_data_visibility (tree decl)
22156 {
22157 if (!TARGET_AAPCS_BASED
22158 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
22159 return;
22160
22161 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
22162 is exported. However, on systems without dynamic vague linkage,
22163 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
22164 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
22165 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
22166 else
22167 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
22168 DECL_VISIBILITY_SPECIFIED (decl) = 1;
22169 }
22170
22171 static bool
22172 arm_cxx_class_data_always_comdat (void)
22173 {
22174 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
22175 vague linkage if the class has no key function. */
22176 return !TARGET_AAPCS_BASED;
22177 }
22178
22179
22180 /* The EABI says __aeabi_atexit should be used to register static
22181 destructors. */
22182
22183 static bool
22184 arm_cxx_use_aeabi_atexit (void)
22185 {
22186 return TARGET_AAPCS_BASED;
22187 }
22188
22189
22190 void
22191 arm_set_return_address (rtx source, rtx scratch)
22192 {
22193 arm_stack_offsets *offsets;
22194 HOST_WIDE_INT delta;
22195 rtx addr;
22196 unsigned long saved_regs;
22197
22198 offsets = arm_get_frame_offsets ();
22199 saved_regs = offsets->saved_regs_mask;
22200
22201 if ((saved_regs & (1 << LR_REGNUM)) == 0)
22202 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22203 else
22204 {
22205 if (frame_pointer_needed)
22206 addr = plus_constant(hard_frame_pointer_rtx, -4);
22207 else
22208 {
22209 /* LR will be the first saved register. */
22210 delta = offsets->outgoing_args - (offsets->frame + 4);
22211
22212
22213 if (delta >= 4096)
22214 {
22215 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
22216 GEN_INT (delta & ~4095)));
22217 addr = scratch;
22218 delta &= 4095;
22219 }
22220 else
22221 addr = stack_pointer_rtx;
22222
22223 addr = plus_constant (addr, delta);
22224 }
22225 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22226 }
22227 }
22228
22229
22230 void
22231 thumb_set_return_address (rtx source, rtx scratch)
22232 {
22233 arm_stack_offsets *offsets;
22234 HOST_WIDE_INT delta;
22235 HOST_WIDE_INT limit;
22236 int reg;
22237 rtx addr;
22238 unsigned long mask;
22239
22240 emit_use (source);
22241
22242 offsets = arm_get_frame_offsets ();
22243 mask = offsets->saved_regs_mask;
22244 if (mask & (1 << LR_REGNUM))
22245 {
22246 limit = 1024;
22247 /* Find the saved regs. */
22248 if (frame_pointer_needed)
22249 {
22250 delta = offsets->soft_frame - offsets->saved_args;
22251 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
22252 if (TARGET_THUMB1)
22253 limit = 128;
22254 }
22255 else
22256 {
22257 delta = offsets->outgoing_args - offsets->saved_args;
22258 reg = SP_REGNUM;
22259 }
22260 /* Allow for the stack frame. */
22261 if (TARGET_THUMB1 && TARGET_BACKTRACE)
22262 delta -= 16;
22263 /* The link register is always the first saved register. */
22264 delta -= 4;
22265
22266 /* Construct the address. */
22267 addr = gen_rtx_REG (SImode, reg);
22268 if (delta > limit)
22269 {
22270 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
22271 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
22272 addr = scratch;
22273 }
22274 else
22275 addr = plus_constant (addr, delta);
22276
22277 emit_move_insn (gen_frame_mem (Pmode, addr), source);
22278 }
22279 else
22280 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
22281 }
22282
22283 /* Implements target hook vector_mode_supported_p. */
22284 bool
22285 arm_vector_mode_supported_p (enum machine_mode mode)
22286 {
22287 /* Neon also supports V2SImode, etc. listed in the clause below. */
22288 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
22289 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
22290 return true;
22291
22292 if ((TARGET_NEON || TARGET_IWMMXT)
22293 && ((mode == V2SImode)
22294 || (mode == V4HImode)
22295 || (mode == V8QImode)))
22296 return true;
22297
22298 return false;
22299 }
22300
22301 /* Implements target hook array_mode_supported_p. */
22302
22303 static bool
22304 arm_array_mode_supported_p (enum machine_mode mode,
22305 unsigned HOST_WIDE_INT nelems)
22306 {
22307 if (TARGET_NEON
22308 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
22309 && (nelems >= 2 && nelems <= 4))
22310 return true;
22311
22312 return false;
22313 }
22314
22315 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
22316 registers when autovectorizing for Neon, at least until multiple vector
22317 widths are supported properly by the middle-end. */
22318
22319 static enum machine_mode
22320 arm_preferred_simd_mode (enum machine_mode mode)
22321 {
22322 if (TARGET_NEON)
22323 switch (mode)
22324 {
22325 case SFmode:
22326 return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
22327 case SImode:
22328 return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
22329 case HImode:
22330 return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
22331 case QImode:
22332 return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
22333 case DImode:
22334 if (TARGET_NEON_VECTORIZE_QUAD)
22335 return V2DImode;
22336 break;
22337
22338 default:;
22339 }
22340
22341 if (TARGET_REALLY_IWMMXT)
22342 switch (mode)
22343 {
22344 case SImode:
22345 return V2SImode;
22346 case HImode:
22347 return V4HImode;
22348 case QImode:
22349 return V8QImode;
22350
22351 default:;
22352 }
22353
22354 return word_mode;
22355 }
22356
22357 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
22358
22359 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
22360 using r0-r4 for function arguments, r7 for the stack frame and don't have
22361 enough left over to do doubleword arithmetic. For Thumb-2 all the
22362 potentially problematic instructions accept high registers so this is not
22363 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
22364 that require many low registers. */
22365 static bool
22366 arm_class_likely_spilled_p (reg_class_t rclass)
22367 {
22368 if ((TARGET_THUMB1 && rclass == LO_REGS)
22369 || rclass == CC_REG)
22370 return true;
22371
22372 return false;
22373 }
22374
22375 /* Implements target hook small_register_classes_for_mode_p. */
22376 bool
22377 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
22378 {
22379 return TARGET_THUMB1;
22380 }
22381
22382 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
22383 ARM insns and therefore guarantee that the shift count is modulo 256.
22384 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
22385 guarantee no particular behavior for out-of-range counts. */
22386
22387 static unsigned HOST_WIDE_INT
22388 arm_shift_truncation_mask (enum machine_mode mode)
22389 {
22390 return mode == SImode ? 255 : 0;
22391 }
22392
22393
22394 /* Map internal gcc register numbers to DWARF2 register numbers. */
22395
22396 unsigned int
22397 arm_dbx_register_number (unsigned int regno)
22398 {
22399 if (regno < 16)
22400 return regno;
22401
22402 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
22403 compatibility. The EABI defines them as registers 96-103. */
22404 if (IS_FPA_REGNUM (regno))
22405 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
22406
22407 if (IS_VFP_REGNUM (regno))
22408 {
22409 /* See comment in arm_dwarf_register_span. */
22410 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22411 return 64 + regno - FIRST_VFP_REGNUM;
22412 else
22413 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
22414 }
22415
22416 if (IS_IWMMXT_GR_REGNUM (regno))
22417 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
22418
22419 if (IS_IWMMXT_REGNUM (regno))
22420 return 112 + regno - FIRST_IWMMXT_REGNUM;
22421
22422 gcc_unreachable ();
22423 }
22424
22425 /* Dwarf models VFPv3 registers as 32 64-bit registers.
22426 GCC models tham as 64 32-bit registers, so we need to describe this to
22427 the DWARF generation code. Other registers can use the default. */
22428 static rtx
22429 arm_dwarf_register_span (rtx rtl)
22430 {
22431 unsigned regno;
22432 int nregs;
22433 int i;
22434 rtx p;
22435
22436 regno = REGNO (rtl);
22437 if (!IS_VFP_REGNUM (regno))
22438 return NULL_RTX;
22439
22440 /* XXX FIXME: The EABI defines two VFP register ranges:
22441 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
22442 256-287: D0-D31
22443 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
22444 corresponding D register. Until GDB supports this, we shall use the
22445 legacy encodings. We also use these encodings for D0-D15 for
22446 compatibility with older debuggers. */
22447 if (VFP_REGNO_OK_FOR_SINGLE (regno))
22448 return NULL_RTX;
22449
22450 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
22451 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
22452 regno = (regno - FIRST_VFP_REGNUM) / 2;
22453 for (i = 0; i < nregs; i++)
22454 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
22455
22456 return p;
22457 }
22458
22459 #if ARM_UNWIND_INFO
22460 /* Emit unwind directives for a store-multiple instruction or stack pointer
22461 push during alignment.
22462 These should only ever be generated by the function prologue code, so
22463 expect them to have a particular form. */
22464
22465 static void
22466 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
22467 {
22468 int i;
22469 HOST_WIDE_INT offset;
22470 HOST_WIDE_INT nregs;
22471 int reg_size;
22472 unsigned reg;
22473 unsigned lastreg;
22474 rtx e;
22475
22476 e = XVECEXP (p, 0, 0);
22477 if (GET_CODE (e) != SET)
22478 abort ();
22479
22480 /* First insn will adjust the stack pointer. */
22481 if (GET_CODE (e) != SET
22482 || GET_CODE (XEXP (e, 0)) != REG
22483 || REGNO (XEXP (e, 0)) != SP_REGNUM
22484 || GET_CODE (XEXP (e, 1)) != PLUS)
22485 abort ();
22486
22487 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
22488 nregs = XVECLEN (p, 0) - 1;
22489
22490 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
22491 if (reg < 16)
22492 {
22493 /* The function prologue may also push pc, but not annotate it as it is
22494 never restored. We turn this into a stack pointer adjustment. */
22495 if (nregs * 4 == offset - 4)
22496 {
22497 fprintf (asm_out_file, "\t.pad #4\n");
22498 offset -= 4;
22499 }
22500 reg_size = 4;
22501 fprintf (asm_out_file, "\t.save {");
22502 }
22503 else if (IS_VFP_REGNUM (reg))
22504 {
22505 reg_size = 8;
22506 fprintf (asm_out_file, "\t.vsave {");
22507 }
22508 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
22509 {
22510 /* FPA registers are done differently. */
22511 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
22512 return;
22513 }
22514 else
22515 /* Unknown register type. */
22516 abort ();
22517
22518 /* If the stack increment doesn't match the size of the saved registers,
22519 something has gone horribly wrong. */
22520 if (offset != nregs * reg_size)
22521 abort ();
22522
22523 offset = 0;
22524 lastreg = 0;
22525 /* The remaining insns will describe the stores. */
22526 for (i = 1; i <= nregs; i++)
22527 {
22528 /* Expect (set (mem <addr>) (reg)).
22529 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22530 e = XVECEXP (p, 0, i);
22531 if (GET_CODE (e) != SET
22532 || GET_CODE (XEXP (e, 0)) != MEM
22533 || GET_CODE (XEXP (e, 1)) != REG)
22534 abort ();
22535
22536 reg = REGNO (XEXP (e, 1));
22537 if (reg < lastreg)
22538 abort ();
22539
22540 if (i != 1)
22541 fprintf (asm_out_file, ", ");
22542 /* We can't use %r for vfp because we need to use the
22543 double precision register names. */
22544 if (IS_VFP_REGNUM (reg))
22545 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
22546 else
22547 asm_fprintf (asm_out_file, "%r", reg);
22548
22549 #ifdef ENABLE_CHECKING
22550 /* Check that the addresses are consecutive. */
22551 e = XEXP (XEXP (e, 0), 0);
22552 if (GET_CODE (e) == PLUS)
22553 {
22554 offset += reg_size;
22555 if (GET_CODE (XEXP (e, 0)) != REG
22556 || REGNO (XEXP (e, 0)) != SP_REGNUM
22557 || GET_CODE (XEXP (e, 1)) != CONST_INT
22558 || offset != INTVAL (XEXP (e, 1)))
22559 abort ();
22560 }
22561 else if (i != 1
22562 || GET_CODE (e) != REG
22563 || REGNO (e) != SP_REGNUM)
22564 abort ();
22565 #endif
22566 }
22567 fprintf (asm_out_file, "}\n");
22568 }
22569
22570 /* Emit unwind directives for a SET. */
22571
22572 static void
22573 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
22574 {
22575 rtx e0;
22576 rtx e1;
22577 unsigned reg;
22578
22579 e0 = XEXP (p, 0);
22580 e1 = XEXP (p, 1);
22581 switch (GET_CODE (e0))
22582 {
22583 case MEM:
22584 /* Pushing a single register. */
22585 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
22586 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
22587 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
22588 abort ();
22589
22590 asm_fprintf (asm_out_file, "\t.save ");
22591 if (IS_VFP_REGNUM (REGNO (e1)))
22592 asm_fprintf(asm_out_file, "{d%d}\n",
22593 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
22594 else
22595 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
22596 break;
22597
22598 case REG:
22599 if (REGNO (e0) == SP_REGNUM)
22600 {
22601 /* A stack increment. */
22602 if (GET_CODE (e1) != PLUS
22603 || GET_CODE (XEXP (e1, 0)) != REG
22604 || REGNO (XEXP (e1, 0)) != SP_REGNUM
22605 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22606 abort ();
22607
22608 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
22609 -INTVAL (XEXP (e1, 1)));
22610 }
22611 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
22612 {
22613 HOST_WIDE_INT offset;
22614
22615 if (GET_CODE (e1) == PLUS)
22616 {
22617 if (GET_CODE (XEXP (e1, 0)) != REG
22618 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
22619 abort ();
22620 reg = REGNO (XEXP (e1, 0));
22621 offset = INTVAL (XEXP (e1, 1));
22622 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
22623 HARD_FRAME_POINTER_REGNUM, reg,
22624 offset);
22625 }
22626 else if (GET_CODE (e1) == REG)
22627 {
22628 reg = REGNO (e1);
22629 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
22630 HARD_FRAME_POINTER_REGNUM, reg);
22631 }
22632 else
22633 abort ();
22634 }
22635 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
22636 {
22637 /* Move from sp to reg. */
22638 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
22639 }
22640 else if (GET_CODE (e1) == PLUS
22641 && GET_CODE (XEXP (e1, 0)) == REG
22642 && REGNO (XEXP (e1, 0)) == SP_REGNUM
22643 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
22644 {
22645 /* Set reg to offset from sp. */
22646 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
22647 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
22648 }
22649 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
22650 {
22651 /* Stack pointer save before alignment. */
22652 reg = REGNO (e0);
22653 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
22654 reg + 0x90, reg);
22655 }
22656 else
22657 abort ();
22658 break;
22659
22660 default:
22661 abort ();
22662 }
22663 }
22664
22665
22666 /* Emit unwind directives for the given insn. */
22667
22668 static void
22669 arm_unwind_emit (FILE * asm_out_file, rtx insn)
22670 {
22671 rtx pat;
22672
22673 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22674 return;
22675
22676 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22677 && (TREE_NOTHROW (current_function_decl)
22678 || crtl->all_throwers_are_sibcalls))
22679 return;
22680
22681 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
22682 return;
22683
22684 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
22685 if (pat)
22686 pat = XEXP (pat, 0);
22687 else
22688 pat = PATTERN (insn);
22689
22690 switch (GET_CODE (pat))
22691 {
22692 case SET:
22693 arm_unwind_emit_set (asm_out_file, pat);
22694 break;
22695
22696 case SEQUENCE:
22697 /* Store multiple. */
22698 arm_unwind_emit_sequence (asm_out_file, pat);
22699 break;
22700
22701 default:
22702 abort();
22703 }
22704 }
22705
22706
22707 /* Output a reference from a function exception table to the type_info
22708 object X. The EABI specifies that the symbol should be relocated by
22709 an R_ARM_TARGET2 relocation. */
22710
22711 static bool
22712 arm_output_ttype (rtx x)
22713 {
22714 fputs ("\t.word\t", asm_out_file);
22715 output_addr_const (asm_out_file, x);
22716 /* Use special relocations for symbol references. */
22717 if (GET_CODE (x) != CONST_INT)
22718 fputs ("(TARGET2)", asm_out_file);
22719 fputc ('\n', asm_out_file);
22720
22721 return TRUE;
22722 }
22723
22724 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
22725
22726 static void
22727 arm_asm_emit_except_personality (rtx personality)
22728 {
22729 fputs ("\t.personality\t", asm_out_file);
22730 output_addr_const (asm_out_file, personality);
22731 fputc ('\n', asm_out_file);
22732 }
22733
22734 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
22735
22736 static void
22737 arm_asm_init_sections (void)
22738 {
22739 exception_section = get_unnamed_section (0, output_section_asm_op,
22740 "\t.handlerdata");
22741 }
22742 #endif /* ARM_UNWIND_INFO */
22743
22744 /* Implement TARGET_EXCEPT_UNWIND_INFO. */
22745
22746 static enum unwind_info_type
22747 arm_except_unwind_info (struct gcc_options *opts)
22748 {
22749 /* Honor the --enable-sjlj-exceptions configure switch. */
22750 #ifdef CONFIG_SJLJ_EXCEPTIONS
22751 if (CONFIG_SJLJ_EXCEPTIONS)
22752 return UI_SJLJ;
22753 #endif
22754
22755 /* If not using ARM EABI unwind tables... */
22756 if (ARM_UNWIND_INFO)
22757 {
22758 /* For simplicity elsewhere in this file, indicate that all unwind
22759 info is disabled if we're not emitting unwind tables. */
22760 if (!opts->x_flag_exceptions && !opts->x_flag_unwind_tables)
22761 return UI_NONE;
22762 else
22763 return UI_TARGET;
22764 }
22765
22766 /* ... we use sjlj exceptions for backwards compatibility. */
22767 return UI_SJLJ;
22768 }
22769
22770
22771 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
22772 stack alignment. */
22773
22774 static void
22775 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
22776 {
22777 rtx unspec = SET_SRC (pattern);
22778 gcc_assert (GET_CODE (unspec) == UNSPEC);
22779
22780 switch (index)
22781 {
22782 case UNSPEC_STACK_ALIGN:
22783 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
22784 put anything on the stack, so hopefully it won't matter.
22785 CFA = SP will be correct after alignment. */
22786 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
22787 SET_DEST (pattern));
22788 break;
22789 default:
22790 gcc_unreachable ();
22791 }
22792 }
22793
22794
22795 /* Output unwind directives for the start/end of a function. */
22796
22797 void
22798 arm_output_fn_unwind (FILE * f, bool prologue)
22799 {
22800 if (arm_except_unwind_info (&global_options) != UI_TARGET)
22801 return;
22802
22803 if (prologue)
22804 fputs ("\t.fnstart\n", f);
22805 else
22806 {
22807 /* If this function will never be unwound, then mark it as such.
22808 The came condition is used in arm_unwind_emit to suppress
22809 the frame annotations. */
22810 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
22811 && (TREE_NOTHROW (current_function_decl)
22812 || crtl->all_throwers_are_sibcalls))
22813 fputs("\t.cantunwind\n", f);
22814
22815 fputs ("\t.fnend\n", f);
22816 }
22817 }
22818
22819 static bool
22820 arm_emit_tls_decoration (FILE *fp, rtx x)
22821 {
22822 enum tls_reloc reloc;
22823 rtx val;
22824
22825 val = XVECEXP (x, 0, 0);
22826 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
22827
22828 output_addr_const (fp, val);
22829
22830 switch (reloc)
22831 {
22832 case TLS_GD32:
22833 fputs ("(tlsgd)", fp);
22834 break;
22835 case TLS_LDM32:
22836 fputs ("(tlsldm)", fp);
22837 break;
22838 case TLS_LDO32:
22839 fputs ("(tlsldo)", fp);
22840 break;
22841 case TLS_IE32:
22842 fputs ("(gottpoff)", fp);
22843 break;
22844 case TLS_LE32:
22845 fputs ("(tpoff)", fp);
22846 break;
22847 default:
22848 gcc_unreachable ();
22849 }
22850
22851 switch (reloc)
22852 {
22853 case TLS_GD32:
22854 case TLS_LDM32:
22855 case TLS_IE32:
22856 fputs (" + (. - ", fp);
22857 output_addr_const (fp, XVECEXP (x, 0, 2));
22858 fputs (" - ", fp);
22859 output_addr_const (fp, XVECEXP (x, 0, 3));
22860 fputc (')', fp);
22861 break;
22862 default:
22863 break;
22864 }
22865
22866 return TRUE;
22867 }
22868
22869 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
22870
22871 static void
22872 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
22873 {
22874 gcc_assert (size == 4);
22875 fputs ("\t.word\t", file);
22876 output_addr_const (file, x);
22877 fputs ("(tlsldo)", file);
22878 }
22879
22880 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
22881
22882 static bool
22883 arm_output_addr_const_extra (FILE *fp, rtx x)
22884 {
22885 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
22886 return arm_emit_tls_decoration (fp, x);
22887 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
22888 {
22889 char label[256];
22890 int labelno = INTVAL (XVECEXP (x, 0, 0));
22891
22892 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
22893 assemble_name_raw (fp, label);
22894
22895 return TRUE;
22896 }
22897 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
22898 {
22899 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
22900 if (GOT_PCREL)
22901 fputs ("+.", fp);
22902 fputs ("-(", fp);
22903 output_addr_const (fp, XVECEXP (x, 0, 0));
22904 fputc (')', fp);
22905 return TRUE;
22906 }
22907 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
22908 {
22909 output_addr_const (fp, XVECEXP (x, 0, 0));
22910 if (GOT_PCREL)
22911 fputs ("+.", fp);
22912 fputs ("-(", fp);
22913 output_addr_const (fp, XVECEXP (x, 0, 1));
22914 fputc (')', fp);
22915 return TRUE;
22916 }
22917 else if (GET_CODE (x) == CONST_VECTOR)
22918 return arm_emit_vector_const (fp, x);
22919
22920 return FALSE;
22921 }
22922
22923 /* Output assembly for a shift instruction.
22924 SET_FLAGS determines how the instruction modifies the condition codes.
22925 0 - Do not set condition codes.
22926 1 - Set condition codes.
22927 2 - Use smallest instruction. */
22928 const char *
22929 arm_output_shift(rtx * operands, int set_flags)
22930 {
22931 char pattern[100];
22932 static const char flag_chars[3] = {'?', '.', '!'};
22933 const char *shift;
22934 HOST_WIDE_INT val;
22935 char c;
22936
22937 c = flag_chars[set_flags];
22938 if (TARGET_UNIFIED_ASM)
22939 {
22940 shift = shift_op(operands[3], &val);
22941 if (shift)
22942 {
22943 if (val != -1)
22944 operands[2] = GEN_INT(val);
22945 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
22946 }
22947 else
22948 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
22949 }
22950 else
22951 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
22952 output_asm_insn (pattern, operands);
22953 return "";
22954 }
22955
22956 /* Output a Thumb-1 casesi dispatch sequence. */
22957 const char *
22958 thumb1_output_casesi (rtx *operands)
22959 {
22960 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
22961
22962 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22963
22964 switch (GET_MODE(diff_vec))
22965 {
22966 case QImode:
22967 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22968 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
22969 case HImode:
22970 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
22971 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
22972 case SImode:
22973 return "bl\t%___gnu_thumb1_case_si";
22974 default:
22975 gcc_unreachable ();
22976 }
22977 }
22978
22979 /* Output a Thumb-2 casesi instruction. */
22980 const char *
22981 thumb2_output_casesi (rtx *operands)
22982 {
22983 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
22984
22985 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
22986
22987 output_asm_insn ("cmp\t%0, %1", operands);
22988 output_asm_insn ("bhi\t%l3", operands);
22989 switch (GET_MODE(diff_vec))
22990 {
22991 case QImode:
22992 return "tbb\t[%|pc, %0]";
22993 case HImode:
22994 return "tbh\t[%|pc, %0, lsl #1]";
22995 case SImode:
22996 if (flag_pic)
22997 {
22998 output_asm_insn ("adr\t%4, %l2", operands);
22999 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
23000 output_asm_insn ("add\t%4, %4, %5", operands);
23001 return "bx\t%4";
23002 }
23003 else
23004 {
23005 output_asm_insn ("adr\t%4, %l2", operands);
23006 return "ldr\t%|pc, [%4, %0, lsl #2]";
23007 }
23008 default:
23009 gcc_unreachable ();
23010 }
23011 }
23012
23013 /* Most ARM cores are single issue, but some newer ones can dual issue.
23014 The scheduler descriptions rely on this being correct. */
23015 static int
23016 arm_issue_rate (void)
23017 {
23018 switch (arm_tune)
23019 {
23020 case cortexr4:
23021 case cortexr4f:
23022 case cortexa5:
23023 case cortexa8:
23024 case cortexa9:
23025 case fa726te:
23026 return 2;
23027
23028 default:
23029 return 1;
23030 }
23031 }
23032
23033 /* A table and a function to perform ARM-specific name mangling for
23034 NEON vector types in order to conform to the AAPCS (see "Procedure
23035 Call Standard for the ARM Architecture", Appendix A). To qualify
23036 for emission with the mangled names defined in that document, a
23037 vector type must not only be of the correct mode but also be
23038 composed of NEON vector element types (e.g. __builtin_neon_qi). */
23039 typedef struct
23040 {
23041 enum machine_mode mode;
23042 const char *element_type_name;
23043 const char *aapcs_name;
23044 } arm_mangle_map_entry;
23045
23046 static arm_mangle_map_entry arm_mangle_map[] = {
23047 /* 64-bit containerized types. */
23048 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
23049 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
23050 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
23051 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
23052 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
23053 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
23054 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
23055 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
23056 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
23057 /* 128-bit containerized types. */
23058 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
23059 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
23060 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
23061 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
23062 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
23063 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
23064 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
23065 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
23066 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
23067 { VOIDmode, NULL, NULL }
23068 };
23069
23070 const char *
23071 arm_mangle_type (const_tree type)
23072 {
23073 arm_mangle_map_entry *pos = arm_mangle_map;
23074
23075 /* The ARM ABI documents (10th October 2008) say that "__va_list"
23076 has to be managled as if it is in the "std" namespace. */
23077 if (TARGET_AAPCS_BASED
23078 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
23079 {
23080 static bool warned;
23081 if (!warned && warn_psabi && !in_system_header)
23082 {
23083 warned = true;
23084 inform (input_location,
23085 "the mangling of %<va_list%> has changed in GCC 4.4");
23086 }
23087 return "St9__va_list";
23088 }
23089
23090 /* Half-precision float. */
23091 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
23092 return "Dh";
23093
23094 if (TREE_CODE (type) != VECTOR_TYPE)
23095 return NULL;
23096
23097 /* Check the mode of the vector type, and the name of the vector
23098 element type, against the table. */
23099 while (pos->mode != VOIDmode)
23100 {
23101 tree elt_type = TREE_TYPE (type);
23102
23103 if (pos->mode == TYPE_MODE (type)
23104 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
23105 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
23106 pos->element_type_name))
23107 return pos->aapcs_name;
23108
23109 pos++;
23110 }
23111
23112 /* Use the default mangling for unrecognized (possibly user-defined)
23113 vector types. */
23114 return NULL;
23115 }
23116
23117 /* Order of allocation of core registers for Thumb: this allocation is
23118 written over the corresponding initial entries of the array
23119 initialized with REG_ALLOC_ORDER. We allocate all low registers
23120 first. Saving and restoring a low register is usually cheaper than
23121 using a call-clobbered high register. */
23122
23123 static const int thumb_core_reg_alloc_order[] =
23124 {
23125 3, 2, 1, 0, 4, 5, 6, 7,
23126 14, 12, 8, 9, 10, 11, 13, 15
23127 };
23128
23129 /* Adjust register allocation order when compiling for Thumb. */
23130
23131 void
23132 arm_order_regs_for_local_alloc (void)
23133 {
23134 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
23135 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
23136 if (TARGET_THUMB)
23137 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
23138 sizeof (thumb_core_reg_alloc_order));
23139 }
23140
23141 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
23142
23143 bool
23144 arm_frame_pointer_required (void)
23145 {
23146 return (cfun->has_nonlocal_label
23147 || SUBTARGET_FRAME_POINTER_REQUIRED
23148 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
23149 }
23150
23151 /* Only thumb1 can't support conditional execution, so return true if
23152 the target is not thumb1. */
23153 static bool
23154 arm_have_conditional_execution (void)
23155 {
23156 return !TARGET_THUMB1;
23157 }
23158
23159 /* Legitimize a memory reference for sync primitive implemented using
23160 ldrex / strex. We currently force the form of the reference to be
23161 indirect without offset. We do not yet support the indirect offset
23162 addressing supported by some ARM targets for these
23163 instructions. */
23164 static rtx
23165 arm_legitimize_sync_memory (rtx memory)
23166 {
23167 rtx addr = force_reg (Pmode, XEXP (memory, 0));
23168 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
23169
23170 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
23171 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
23172 return legitimate_memory;
23173 }
23174
23175 /* An instruction emitter. */
23176 typedef void (* emit_f) (int label, const char *, rtx *);
23177
23178 /* An instruction emitter that emits via the conventional
23179 output_asm_insn. */
23180 static void
23181 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
23182 {
23183 output_asm_insn (pattern, operands);
23184 }
23185
23186 /* Count the number of emitted synchronization instructions. */
23187 static unsigned arm_insn_count;
23188
23189 /* An emitter that counts emitted instructions but does not actually
23190 emit instruction into the instruction stream. */
23191 static void
23192 arm_count (int label,
23193 const char *pattern ATTRIBUTE_UNUSED,
23194 rtx *operands ATTRIBUTE_UNUSED)
23195 {
23196 if (! label)
23197 ++ arm_insn_count;
23198 }
23199
23200 /* Construct a pattern using conventional output formatting and feed
23201 it to output_asm_insn. Provides a mechanism to construct the
23202 output pattern on the fly. Note the hard limit on the pattern
23203 buffer size. */
23204 static void ATTRIBUTE_PRINTF_4
23205 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
23206 const char *pattern, ...)
23207 {
23208 va_list ap;
23209 char buffer[256];
23210
23211 va_start (ap, pattern);
23212 vsprintf (buffer, pattern, ap);
23213 va_end (ap);
23214 emit (label, buffer, operands);
23215 }
23216
23217 /* Emit the memory barrier instruction, if any, provided by this
23218 target to a specified emitter. */
23219 static void
23220 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
23221 {
23222 if (TARGET_HAVE_DMB)
23223 {
23224 /* Note we issue a system level barrier. We should consider
23225 issuing a inner shareabilty zone barrier here instead, ie.
23226 "DMB ISH". */
23227 emit (0, "dmb\tsy", operands);
23228 return;
23229 }
23230
23231 if (TARGET_HAVE_DMB_MCR)
23232 {
23233 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
23234 return;
23235 }
23236
23237 gcc_unreachable ();
23238 }
23239
23240 /* Emit the memory barrier instruction, if any, provided by this
23241 target. */
23242 const char *
23243 arm_output_memory_barrier (rtx *operands)
23244 {
23245 arm_process_output_memory_barrier (arm_emit, operands);
23246 return "";
23247 }
23248
23249 /* Helper to figure out the instruction suffix required on ldrex/strex
23250 for operations on an object of the specified mode. */
23251 static const char *
23252 arm_ldrex_suffix (enum machine_mode mode)
23253 {
23254 switch (mode)
23255 {
23256 case QImode: return "b";
23257 case HImode: return "h";
23258 case SImode: return "";
23259 case DImode: return "d";
23260 default:
23261 gcc_unreachable ();
23262 }
23263 return "";
23264 }
23265
23266 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
23267 mode. */
23268 static void
23269 arm_output_ldrex (emit_f emit,
23270 enum machine_mode mode,
23271 rtx target,
23272 rtx memory)
23273 {
23274 const char *suffix = arm_ldrex_suffix (mode);
23275 rtx operands[2];
23276
23277 operands[0] = target;
23278 operands[1] = memory;
23279 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
23280 }
23281
23282 /* Emit a strex{b,h,d, } instruction appropriate for the specified
23283 mode. */
23284 static void
23285 arm_output_strex (emit_f emit,
23286 enum machine_mode mode,
23287 const char *cc,
23288 rtx result,
23289 rtx value,
23290 rtx memory)
23291 {
23292 const char *suffix = arm_ldrex_suffix (mode);
23293 rtx operands[3];
23294
23295 operands[0] = result;
23296 operands[1] = value;
23297 operands[2] = memory;
23298 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
23299 cc);
23300 }
23301
23302 /* Helper to emit a two operand instruction. */
23303 static void
23304 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
23305 {
23306 rtx operands[2];
23307
23308 operands[0] = d;
23309 operands[1] = s;
23310 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
23311 }
23312
23313 /* Helper to emit a three operand instruction. */
23314 static void
23315 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
23316 {
23317 rtx operands[3];
23318
23319 operands[0] = d;
23320 operands[1] = a;
23321 operands[2] = b;
23322 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
23323 }
23324
23325 /* Emit a load store exclusive synchronization loop.
23326
23327 do
23328 old_value = [mem]
23329 if old_value != required_value
23330 break;
23331 t1 = sync_op (old_value, new_value)
23332 [mem] = t1, t2 = [0|1]
23333 while ! t2
23334
23335 Note:
23336 t1 == t2 is not permitted
23337 t1 == old_value is permitted
23338
23339 required_value:
23340
23341 RTX register or const_int representing the required old_value for
23342 the modify to continue, if NULL no comparsion is performed. */
23343 static void
23344 arm_output_sync_loop (emit_f emit,
23345 enum machine_mode mode,
23346 rtx old_value,
23347 rtx memory,
23348 rtx required_value,
23349 rtx new_value,
23350 rtx t1,
23351 rtx t2,
23352 enum attr_sync_op sync_op,
23353 int early_barrier_required)
23354 {
23355 rtx operands[1];
23356
23357 gcc_assert (t1 != t2);
23358
23359 if (early_barrier_required)
23360 arm_process_output_memory_barrier (emit, NULL);
23361
23362 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
23363
23364 arm_output_ldrex (emit, mode, old_value, memory);
23365
23366 if (required_value)
23367 {
23368 rtx operands[2];
23369
23370 operands[0] = old_value;
23371 operands[1] = required_value;
23372 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
23373 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
23374 }
23375
23376 switch (sync_op)
23377 {
23378 case SYNC_OP_ADD:
23379 arm_output_op3 (emit, "add", t1, old_value, new_value);
23380 break;
23381
23382 case SYNC_OP_SUB:
23383 arm_output_op3 (emit, "sub", t1, old_value, new_value);
23384 break;
23385
23386 case SYNC_OP_IOR:
23387 arm_output_op3 (emit, "orr", t1, old_value, new_value);
23388 break;
23389
23390 case SYNC_OP_XOR:
23391 arm_output_op3 (emit, "eor", t1, old_value, new_value);
23392 break;
23393
23394 case SYNC_OP_AND:
23395 arm_output_op3 (emit,"and", t1, old_value, new_value);
23396 break;
23397
23398 case SYNC_OP_NAND:
23399 arm_output_op3 (emit, "and", t1, old_value, new_value);
23400 arm_output_op2 (emit, "mvn", t1, t1);
23401 break;
23402
23403 case SYNC_OP_NONE:
23404 t1 = new_value;
23405 break;
23406 }
23407
23408 if (t2)
23409 {
23410 arm_output_strex (emit, mode, "", t2, t1, memory);
23411 operands[0] = t2;
23412 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23413 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23414 LOCAL_LABEL_PREFIX);
23415 }
23416 else
23417 {
23418 /* Use old_value for the return value because for some operations
23419 the old_value can easily be restored. This saves one register. */
23420 arm_output_strex (emit, mode, "", old_value, t1, memory);
23421 operands[0] = old_value;
23422 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23423 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23424 LOCAL_LABEL_PREFIX);
23425
23426 switch (sync_op)
23427 {
23428 case SYNC_OP_ADD:
23429 arm_output_op3 (emit, "sub", old_value, t1, new_value);
23430 break;
23431
23432 case SYNC_OP_SUB:
23433 arm_output_op3 (emit, "add", old_value, t1, new_value);
23434 break;
23435
23436 case SYNC_OP_XOR:
23437 arm_output_op3 (emit, "eor", old_value, t1, new_value);
23438 break;
23439
23440 case SYNC_OP_NONE:
23441 arm_output_op2 (emit, "mov", old_value, required_value);
23442 break;
23443
23444 default:
23445 gcc_unreachable ();
23446 }
23447 }
23448
23449 arm_process_output_memory_barrier (emit, NULL);
23450 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
23451 }
23452
23453 static rtx
23454 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
23455 {
23456 if (index > 0)
23457 default_value = operands[index - 1];
23458
23459 return default_value;
23460 }
23461
23462 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
23463 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
23464
23465 /* Extract the operands for a synchroniztion instruction from the
23466 instructions attributes and emit the instruction. */
23467 static void
23468 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
23469 {
23470 rtx result, memory, required_value, new_value, t1, t2;
23471 int early_barrier;
23472 enum machine_mode mode;
23473 enum attr_sync_op sync_op;
23474
23475 result = FETCH_SYNC_OPERAND(result, 0);
23476 memory = FETCH_SYNC_OPERAND(memory, 0);
23477 required_value = FETCH_SYNC_OPERAND(required_value, 0);
23478 new_value = FETCH_SYNC_OPERAND(new_value, 0);
23479 t1 = FETCH_SYNC_OPERAND(t1, 0);
23480 t2 = FETCH_SYNC_OPERAND(t2, 0);
23481 early_barrier =
23482 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
23483 sync_op = get_attr_sync_op (insn);
23484 mode = GET_MODE (memory);
23485
23486 arm_output_sync_loop (emit, mode, result, memory, required_value,
23487 new_value, t1, t2, sync_op, early_barrier);
23488 }
23489
23490 /* Emit a synchronization instruction loop. */
23491 const char *
23492 arm_output_sync_insn (rtx insn, rtx *operands)
23493 {
23494 arm_process_output_sync_insn (arm_emit, insn, operands);
23495 return "";
23496 }
23497
23498 /* Count the number of machine instruction that will be emitted for a
23499 synchronization instruction. Note that the emitter used does not
23500 emit instructions, it just counts instructions being carefull not
23501 to count labels. */
23502 unsigned int
23503 arm_sync_loop_insns (rtx insn, rtx *operands)
23504 {
23505 arm_insn_count = 0;
23506 arm_process_output_sync_insn (arm_count, insn, operands);
23507 return arm_insn_count;
23508 }
23509
23510 /* Helper to call a target sync instruction generator, dealing with
23511 the variation in operands required by the different generators. */
23512 static rtx
23513 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
23514 rtx memory, rtx required_value, rtx new_value)
23515 {
23516 switch (generator->op)
23517 {
23518 case arm_sync_generator_omn:
23519 gcc_assert (! required_value);
23520 return generator->u.omn (old_value, memory, new_value);
23521
23522 case arm_sync_generator_omrn:
23523 gcc_assert (required_value);
23524 return generator->u.omrn (old_value, memory, required_value, new_value);
23525 }
23526
23527 return NULL;
23528 }
23529
23530 /* Expand a synchronization loop. The synchronization loop is expanded
23531 as an opaque block of instructions in order to ensure that we do
23532 not subsequently get extraneous memory accesses inserted within the
23533 critical region. The exclusive access property of ldrex/strex is
23534 only guaranteed in there are no intervening memory accesses. */
23535 void
23536 arm_expand_sync (enum machine_mode mode,
23537 struct arm_sync_generator *generator,
23538 rtx target, rtx memory, rtx required_value, rtx new_value)
23539 {
23540 if (target == NULL)
23541 target = gen_reg_rtx (mode);
23542
23543 memory = arm_legitimize_sync_memory (memory);
23544 if (mode != SImode)
23545 {
23546 rtx load_temp = gen_reg_rtx (SImode);
23547
23548 if (required_value)
23549 required_value = convert_modes (SImode, mode, required_value, true);
23550
23551 new_value = convert_modes (SImode, mode, new_value, true);
23552 emit_insn (arm_call_generator (generator, load_temp, memory,
23553 required_value, new_value));
23554 emit_move_insn (target, gen_lowpart (mode, load_temp));
23555 }
23556 else
23557 {
23558 emit_insn (arm_call_generator (generator, target, memory, required_value,
23559 new_value));
23560 }
23561 }
23562
23563 static unsigned int
23564 arm_autovectorize_vector_sizes (void)
23565 {
23566 return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
23567 }
23568
23569 static bool
23570 arm_vector_alignment_reachable (const_tree type, bool is_packed)
23571 {
23572 /* Vectors which aren't in packed structures will not be less aligned than
23573 the natural alignment of their element type, so this is safe. */
23574 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23575 return !is_packed;
23576
23577 return default_builtin_vector_alignment_reachable (type, is_packed);
23578 }
23579
23580 static bool
23581 arm_builtin_support_vector_misalignment (enum machine_mode mode,
23582 const_tree type, int misalignment,
23583 bool is_packed)
23584 {
23585 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23586 {
23587 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
23588
23589 if (is_packed)
23590 return align == 1;
23591
23592 /* If the misalignment is unknown, we should be able to handle the access
23593 so long as it is not to a member of a packed data structure. */
23594 if (misalignment == -1)
23595 return true;
23596
23597 /* Return true if the misalignment is a multiple of the natural alignment
23598 of the vector's element type. This is probably always going to be
23599 true in practice, since we've already established that this isn't a
23600 packed access. */
23601 return ((misalignment % align) == 0);
23602 }
23603
23604 return default_builtin_support_vector_misalignment (mode, type, misalignment,
23605 is_packed);
23606 }
23607
23608 static void
23609 arm_conditional_register_usage (void)
23610 {
23611 int regno;
23612
23613 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
23614 {
23615 for (regno = FIRST_FPA_REGNUM;
23616 regno <= LAST_FPA_REGNUM; ++regno)
23617 fixed_regs[regno] = call_used_regs[regno] = 1;
23618 }
23619
23620 if (TARGET_THUMB1 && optimize_size)
23621 {
23622 /* When optimizing for size on Thumb-1, it's better not
23623 to use the HI regs, because of the overhead of
23624 stacking them. */
23625 for (regno = FIRST_HI_REGNUM;
23626 regno <= LAST_HI_REGNUM; ++regno)
23627 fixed_regs[regno] = call_used_regs[regno] = 1;
23628 }
23629
23630 /* The link register can be clobbered by any branch insn,
23631 but we have no way to track that at present, so mark
23632 it as unavailable. */
23633 if (TARGET_THUMB1)
23634 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
23635
23636 if (TARGET_32BIT && TARGET_HARD_FLOAT)
23637 {
23638 if (TARGET_MAVERICK)
23639 {
23640 for (regno = FIRST_FPA_REGNUM;
23641 regno <= LAST_FPA_REGNUM; ++ regno)
23642 fixed_regs[regno] = call_used_regs[regno] = 1;
23643 for (regno = FIRST_CIRRUS_FP_REGNUM;
23644 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
23645 {
23646 fixed_regs[regno] = 0;
23647 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
23648 }
23649 }
23650 if (TARGET_VFP)
23651 {
23652 /* VFPv3 registers are disabled when earlier VFP
23653 versions are selected due to the definition of
23654 LAST_VFP_REGNUM. */
23655 for (regno = FIRST_VFP_REGNUM;
23656 regno <= LAST_VFP_REGNUM; ++ regno)
23657 {
23658 fixed_regs[regno] = 0;
23659 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
23660 || regno >= FIRST_VFP_REGNUM + 32;
23661 }
23662 }
23663 }
23664
23665 if (TARGET_REALLY_IWMMXT)
23666 {
23667 regno = FIRST_IWMMXT_GR_REGNUM;
23668 /* The 2002/10/09 revision of the XScale ABI has wCG0
23669 and wCG1 as call-preserved registers. The 2002/11/21
23670 revision changed this so that all wCG registers are
23671 scratch registers. */
23672 for (regno = FIRST_IWMMXT_GR_REGNUM;
23673 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
23674 fixed_regs[regno] = 0;
23675 /* The XScale ABI has wR0 - wR9 as scratch registers,
23676 the rest as call-preserved registers. */
23677 for (regno = FIRST_IWMMXT_REGNUM;
23678 regno <= LAST_IWMMXT_REGNUM; ++ regno)
23679 {
23680 fixed_regs[regno] = 0;
23681 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
23682 }
23683 }
23684
23685 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
23686 {
23687 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23688 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23689 }
23690 else if (TARGET_APCS_STACK)
23691 {
23692 fixed_regs[10] = 1;
23693 call_used_regs[10] = 1;
23694 }
23695 /* -mcaller-super-interworking reserves r11 for calls to
23696 _interwork_r11_call_via_rN(). Making the register global
23697 is an easy way of ensuring that it remains valid for all
23698 calls. */
23699 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
23700 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
23701 {
23702 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23703 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23704 if (TARGET_CALLER_INTERWORKING)
23705 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23706 }
23707 SUBTARGET_CONDITIONAL_REGISTER_USAGE
23708 }
23709
23710 static reg_class_t
23711 arm_preferred_rename_class (reg_class_t rclass)
23712 {
23713 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
23714 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
23715 and code size can be reduced. */
23716 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
23717 return LO_REGS;
23718 else
23719 return NO_REGS;
23720 }
23721
23722 /* Compute the atrribute "length" of insn "*push_multi".
23723 So this function MUST be kept in sync with that insn pattern. */
23724 int
23725 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
23726 {
23727 int i, regno, hi_reg;
23728 int num_saves = XVECLEN (parallel_op, 0);
23729
23730 /* ARM mode. */
23731 if (TARGET_ARM)
23732 return 4;
23733
23734 /* Thumb2 mode. */
23735 regno = REGNO (first_op);
23736 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
23737 for (i = 1; i < num_saves && !hi_reg; i++)
23738 {
23739 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
23740 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
23741 }
23742
23743 if (!hi_reg)
23744 return 2;
23745 return 4;
23746 }
23747
23748 #include "gt-arm.h"