9af66dd8ace6c952428bc5f604f60a6444b49be8
[gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8
9 This file is part of GCC.
10
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
15
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "reload.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "diagnostic-core.h"
44 #include "recog.h"
45 #include "cgraph.h"
46 #include "ggc.h"
47 #include "except.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
50 #include "tm_p.h"
51 #include "target.h"
52 #include "target-def.h"
53 #include "debug.h"
54 #include "langhooks.h"
55 #include "df.h"
56 #include "intl.h"
57 #include "libfuncs.h"
58 #include "params.h"
59 #include "opts.h"
60
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode;
63 typedef struct minipool_fixup Mfix;
64
65 void (*arm_lang_output_object_attributes_hook)(void);
66
67 struct four_ints
68 {
69 int i[4];
70 };
71
72 /* Forward function declarations. */
73 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
74 static int arm_compute_static_chain_stack_bytes (void);
75 static arm_stack_offsets *arm_get_frame_offsets (void);
76 static void arm_add_gc_roots (void);
77 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
78 HOST_WIDE_INT, rtx, rtx, int, int);
79 static unsigned bit_count (unsigned long);
80 static int arm_address_register_rtx_p (rtx, int);
81 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
82 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
83 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
84 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
85 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
86 inline static int thumb1_index_register_rtx_p (rtx, int);
87 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
88 static int thumb_far_jump_used_p (void);
89 static bool thumb_force_lr_save (void);
90 static rtx emit_sfm (int, int);
91 static unsigned arm_size_return_regs (void);
92 static bool arm_assemble_integer (rtx, unsigned int, int);
93 static void arm_print_operand (FILE *, rtx, int);
94 static void arm_print_operand_address (FILE *, rtx);
95 static bool arm_print_operand_punct_valid_p (unsigned char code);
96 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
97 static arm_cc get_arm_condition_code (rtx);
98 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
99 static rtx is_jump_table (rtx);
100 static const char *output_multi_immediate (rtx *, const char *, const char *,
101 int, HOST_WIDE_INT);
102 static const char *shift_op (rtx, HOST_WIDE_INT *);
103 static struct machine_function *arm_init_machine_status (void);
104 static void thumb_exit (FILE *, int);
105 static rtx is_jump_table (rtx);
106 static HOST_WIDE_INT get_jump_table_size (rtx);
107 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
108 static Mnode *add_minipool_forward_ref (Mfix *);
109 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
110 static Mnode *add_minipool_backward_ref (Mfix *);
111 static void assign_minipool_offsets (Mfix *);
112 static void arm_print_value (FILE *, rtx);
113 static void dump_minipool (rtx);
114 static int arm_barrier_cost (rtx);
115 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
116 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
117 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
118 rtx);
119 static void arm_reorg (void);
120 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
121 static unsigned long arm_compute_save_reg0_reg12_mask (void);
122 static unsigned long arm_compute_save_reg_mask (void);
123 static unsigned long arm_isr_value (tree);
124 static unsigned long arm_compute_func_type (void);
125 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
126 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
127 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
128 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
129 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
130 #endif
131 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
132 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
133 static int arm_comp_type_attributes (const_tree, const_tree);
134 static void arm_set_default_type_attributes (tree);
135 static int arm_adjust_cost (rtx, rtx, rtx, int);
136 static int optimal_immediate_sequence (enum rtx_code code,
137 unsigned HOST_WIDE_INT val,
138 struct four_ints *return_sequence);
139 static int optimal_immediate_sequence_1 (enum rtx_code code,
140 unsigned HOST_WIDE_INT val,
141 struct four_ints *return_sequence,
142 int i);
143 static int arm_get_strip_length (int);
144 static bool arm_function_ok_for_sibcall (tree, tree);
145 static enum machine_mode arm_promote_function_mode (const_tree,
146 enum machine_mode, int *,
147 const_tree, int);
148 static bool arm_return_in_memory (const_tree, const_tree);
149 static rtx arm_function_value (const_tree, const_tree, bool);
150 static rtx arm_libcall_value_1 (enum machine_mode);
151 static rtx arm_libcall_value (enum machine_mode, const_rtx);
152 static bool arm_function_value_regno_p (const unsigned int);
153 static void arm_internal_label (FILE *, const char *, unsigned long);
154 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
155 tree);
156 static bool arm_have_conditional_execution (void);
157 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
158 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
159 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
160 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
161 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
163 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
165 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
166 static int arm_address_cost (rtx, bool);
167 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
168 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
169 static bool arm_memory_load_p (rtx);
170 static bool arm_cirrus_insn_p (rtx);
171 static void cirrus_reorg (rtx);
172 static void arm_init_builtins (void);
173 static void arm_init_iwmmxt_builtins (void);
174 static rtx safe_vector_operand (rtx, enum machine_mode);
175 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
176 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
177 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
178 static tree arm_builtin_decl (unsigned, bool);
179 static void emit_constant_insn (rtx cond, rtx pattern);
180 static rtx emit_set_insn (rtx, rtx);
181 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
182 tree, bool);
183 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
184 const_tree, bool);
185 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
186 const_tree, bool);
187 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
188 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
189 const_tree);
190 static rtx aapcs_libcall_value (enum machine_mode);
191 static int aapcs_select_return_coproc (const_tree, const_tree);
192
193 #ifdef OBJECT_FORMAT_ELF
194 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
195 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
196 #endif
197 #ifndef ARM_PE
198 static void arm_encode_section_info (tree, rtx, int);
199 #endif
200
201 static void arm_file_end (void);
202 static void arm_file_start (void);
203
204 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
205 tree, int *, int);
206 static bool arm_pass_by_reference (cumulative_args_t,
207 enum machine_mode, const_tree, bool);
208 static bool arm_promote_prototypes (const_tree);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree);
212 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
213 static bool arm_return_in_memory (const_tree, const_tree);
214 #if ARM_UNWIND_INFO
215 static void arm_unwind_emit (FILE *, rtx);
216 static bool arm_output_ttype (rtx);
217 static void arm_asm_emit_except_personality (rtx);
218 static void arm_asm_init_sections (void);
219 #endif
220 static rtx arm_dwarf_register_span (rtx);
221
222 static tree arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree arm_get_cookie_size (tree);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree, rtx);
234 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
235 static void arm_option_override (void);
236 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
237 static bool arm_cannot_copy_insn_p (rtx);
238 static bool arm_tls_symbol_p (rtx x);
239 static int arm_issue_rate (void);
240 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
241 static bool arm_output_addr_const_extra (FILE *, rtx);
242 static bool arm_allocate_stack_slots_for_args (void);
243 static const char *arm_invalid_parameter_type (const_tree t);
244 static const char *arm_invalid_return_type (const_tree t);
245 static tree arm_promoted_type (const_tree t);
246 static tree arm_convert_to_type (tree type, tree expr);
247 static bool arm_scalar_mode_supported_p (enum machine_mode);
248 static bool arm_frame_pointer_required (void);
249 static bool arm_can_eliminate (const int, const int);
250 static void arm_asm_trampoline_template (FILE *);
251 static void arm_trampoline_init (rtx, tree, rtx);
252 static rtx arm_trampoline_adjust_address (rtx);
253 static rtx arm_pic_static_addr (rtx orig, rtx reg);
254 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
255 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
256 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
257 static bool arm_array_mode_supported_p (enum machine_mode,
258 unsigned HOST_WIDE_INT);
259 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
260 static bool arm_class_likely_spilled_p (reg_class_t);
261 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
262 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
263 const_tree type,
264 int misalignment,
265 bool is_packed);
266 static void arm_conditional_register_usage (void);
267 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
268 static unsigned int arm_autovectorize_vector_sizes (void);
269 static int arm_default_branch_cost (bool, bool);
270 static int arm_cortex_a5_branch_cost (bool, bool);
271
272 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
273 const unsigned char *sel);
274
275 \f
276 /* Table of machine attributes. */
277 static const struct attribute_spec arm_attribute_table[] =
278 {
279 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
280 affects_type_identity } */
281 /* Function calls made to this symbol must be done indirectly, because
282 it may lie outside of the 26 bit addressing range of a normal function
283 call. */
284 { "long_call", 0, 0, false, true, true, NULL, false },
285 /* Whereas these functions are always known to reside within the 26 bit
286 addressing range. */
287 { "short_call", 0, 0, false, true, true, NULL, false },
288 /* Specify the procedure call conventions for a function. */
289 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
290 false },
291 /* Interrupt Service Routines have special prologue and epilogue requirements. */
292 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
293 false },
294 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
295 false },
296 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
297 false },
298 #ifdef ARM_PE
299 /* ARM/PE has three new attributes:
300 interfacearm - ?
301 dllexport - for exporting a function/variable that will live in a dll
302 dllimport - for importing a function/variable from a dll
303
304 Microsoft allows multiple declspecs in one __declspec, separating
305 them with spaces. We do NOT support this. Instead, use __declspec
306 multiple times.
307 */
308 { "dllimport", 0, 0, true, false, false, NULL, false },
309 { "dllexport", 0, 0, true, false, false, NULL, false },
310 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
311 false },
312 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
313 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
314 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
315 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
316 false },
317 #endif
318 { NULL, 0, 0, false, false, false, NULL, false }
319 };
320 \f
321 /* Initialize the GCC target structure. */
322 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
323 #undef TARGET_MERGE_DECL_ATTRIBUTES
324 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
325 #endif
326
327 #undef TARGET_LEGITIMIZE_ADDRESS
328 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
329
330 #undef TARGET_ATTRIBUTE_TABLE
331 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
332
333 #undef TARGET_ASM_FILE_START
334 #define TARGET_ASM_FILE_START arm_file_start
335 #undef TARGET_ASM_FILE_END
336 #define TARGET_ASM_FILE_END arm_file_end
337
338 #undef TARGET_ASM_ALIGNED_SI_OP
339 #define TARGET_ASM_ALIGNED_SI_OP NULL
340 #undef TARGET_ASM_INTEGER
341 #define TARGET_ASM_INTEGER arm_assemble_integer
342
343 #undef TARGET_PRINT_OPERAND
344 #define TARGET_PRINT_OPERAND arm_print_operand
345 #undef TARGET_PRINT_OPERAND_ADDRESS
346 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
347 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
348 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
349
350 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
351 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
352
353 #undef TARGET_ASM_FUNCTION_PROLOGUE
354 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
355
356 #undef TARGET_ASM_FUNCTION_EPILOGUE
357 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
358
359 #undef TARGET_OPTION_OVERRIDE
360 #define TARGET_OPTION_OVERRIDE arm_option_override
361
362 #undef TARGET_COMP_TYPE_ATTRIBUTES
363 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
364
365 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
366 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
367
368 #undef TARGET_SCHED_ADJUST_COST
369 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
370
371 #undef TARGET_REGISTER_MOVE_COST
372 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
373
374 #undef TARGET_MEMORY_MOVE_COST
375 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
376
377 #undef TARGET_ENCODE_SECTION_INFO
378 #ifdef ARM_PE
379 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
380 #else
381 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
382 #endif
383
384 #undef TARGET_STRIP_NAME_ENCODING
385 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
386
387 #undef TARGET_ASM_INTERNAL_LABEL
388 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
389
390 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
391 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
392
393 #undef TARGET_FUNCTION_VALUE
394 #define TARGET_FUNCTION_VALUE arm_function_value
395
396 #undef TARGET_LIBCALL_VALUE
397 #define TARGET_LIBCALL_VALUE arm_libcall_value
398
399 #undef TARGET_FUNCTION_VALUE_REGNO_P
400 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
401
402 #undef TARGET_ASM_OUTPUT_MI_THUNK
403 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
404 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
405 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
406
407 #undef TARGET_RTX_COSTS
408 #define TARGET_RTX_COSTS arm_rtx_costs
409 #undef TARGET_ADDRESS_COST
410 #define TARGET_ADDRESS_COST arm_address_cost
411
412 #undef TARGET_SHIFT_TRUNCATION_MASK
413 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
414 #undef TARGET_VECTOR_MODE_SUPPORTED_P
415 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
416 #undef TARGET_ARRAY_MODE_SUPPORTED_P
417 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
418 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
419 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
420 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
421 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
422 arm_autovectorize_vector_sizes
423
424 #undef TARGET_MACHINE_DEPENDENT_REORG
425 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
426
427 #undef TARGET_INIT_BUILTINS
428 #define TARGET_INIT_BUILTINS arm_init_builtins
429 #undef TARGET_EXPAND_BUILTIN
430 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
431 #undef TARGET_BUILTIN_DECL
432 #define TARGET_BUILTIN_DECL arm_builtin_decl
433
434 #undef TARGET_INIT_LIBFUNCS
435 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
436
437 #undef TARGET_PROMOTE_FUNCTION_MODE
438 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
439 #undef TARGET_PROMOTE_PROTOTYPES
440 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
441 #undef TARGET_PASS_BY_REFERENCE
442 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
443 #undef TARGET_ARG_PARTIAL_BYTES
444 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
445 #undef TARGET_FUNCTION_ARG
446 #define TARGET_FUNCTION_ARG arm_function_arg
447 #undef TARGET_FUNCTION_ARG_ADVANCE
448 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
449 #undef TARGET_FUNCTION_ARG_BOUNDARY
450 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
451
452 #undef TARGET_SETUP_INCOMING_VARARGS
453 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
454
455 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
456 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
457
458 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
459 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
460 #undef TARGET_TRAMPOLINE_INIT
461 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
462 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
463 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
464
465 #undef TARGET_DEFAULT_SHORT_ENUMS
466 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
467
468 #undef TARGET_ALIGN_ANON_BITFIELD
469 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
470
471 #undef TARGET_NARROW_VOLATILE_BITFIELD
472 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
473
474 #undef TARGET_CXX_GUARD_TYPE
475 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
476
477 #undef TARGET_CXX_GUARD_MASK_BIT
478 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
479
480 #undef TARGET_CXX_GET_COOKIE_SIZE
481 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
482
483 #undef TARGET_CXX_COOKIE_HAS_SIZE
484 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
485
486 #undef TARGET_CXX_CDTOR_RETURNS_THIS
487 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
488
489 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
490 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
491
492 #undef TARGET_CXX_USE_AEABI_ATEXIT
493 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
494
495 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
496 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
497 arm_cxx_determine_class_data_visibility
498
499 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
500 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
501
502 #undef TARGET_RETURN_IN_MSB
503 #define TARGET_RETURN_IN_MSB arm_return_in_msb
504
505 #undef TARGET_RETURN_IN_MEMORY
506 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
507
508 #undef TARGET_MUST_PASS_IN_STACK
509 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
510
511 #if ARM_UNWIND_INFO
512 #undef TARGET_ASM_UNWIND_EMIT
513 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
514
515 /* EABI unwinding tables use a different format for the typeinfo tables. */
516 #undef TARGET_ASM_TTYPE
517 #define TARGET_ASM_TTYPE arm_output_ttype
518
519 #undef TARGET_ARM_EABI_UNWINDER
520 #define TARGET_ARM_EABI_UNWINDER true
521
522 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
523 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
524
525 #undef TARGET_ASM_INIT_SECTIONS
526 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
527 #endif /* ARM_UNWIND_INFO */
528
529 #undef TARGET_DWARF_REGISTER_SPAN
530 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
531
532 #undef TARGET_CANNOT_COPY_INSN_P
533 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
534
535 #ifdef HAVE_AS_TLS
536 #undef TARGET_HAVE_TLS
537 #define TARGET_HAVE_TLS true
538 #endif
539
540 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
541 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
542
543 #undef TARGET_LEGITIMATE_CONSTANT_P
544 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
545
546 #undef TARGET_CANNOT_FORCE_CONST_MEM
547 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
548
549 #undef TARGET_MAX_ANCHOR_OFFSET
550 #define TARGET_MAX_ANCHOR_OFFSET 4095
551
552 /* The minimum is set such that the total size of the block
553 for a particular anchor is -4088 + 1 + 4095 bytes, which is
554 divisible by eight, ensuring natural spacing of anchors. */
555 #undef TARGET_MIN_ANCHOR_OFFSET
556 #define TARGET_MIN_ANCHOR_OFFSET -4088
557
558 #undef TARGET_SCHED_ISSUE_RATE
559 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
560
561 #undef TARGET_MANGLE_TYPE
562 #define TARGET_MANGLE_TYPE arm_mangle_type
563
564 #undef TARGET_BUILD_BUILTIN_VA_LIST
565 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
566 #undef TARGET_EXPAND_BUILTIN_VA_START
567 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
568 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
569 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
570
571 #ifdef HAVE_AS_TLS
572 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
573 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
574 #endif
575
576 #undef TARGET_LEGITIMATE_ADDRESS_P
577 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
578
579 #undef TARGET_INVALID_PARAMETER_TYPE
580 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
581
582 #undef TARGET_INVALID_RETURN_TYPE
583 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
584
585 #undef TARGET_PROMOTED_TYPE
586 #define TARGET_PROMOTED_TYPE arm_promoted_type
587
588 #undef TARGET_CONVERT_TO_TYPE
589 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
590
591 #undef TARGET_SCALAR_MODE_SUPPORTED_P
592 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
593
594 #undef TARGET_FRAME_POINTER_REQUIRED
595 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
596
597 #undef TARGET_CAN_ELIMINATE
598 #define TARGET_CAN_ELIMINATE arm_can_eliminate
599
600 #undef TARGET_CONDITIONAL_REGISTER_USAGE
601 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
602
603 #undef TARGET_CLASS_LIKELY_SPILLED_P
604 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
605
606 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
607 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
608 arm_vector_alignment_reachable
609
610 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
611 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
612 arm_builtin_support_vector_misalignment
613
614 #undef TARGET_PREFERRED_RENAME_CLASS
615 #define TARGET_PREFERRED_RENAME_CLASS \
616 arm_preferred_rename_class
617
618 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
619 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
620 arm_vectorize_vec_perm_const_ok
621
622 struct gcc_target targetm = TARGET_INITIALIZER;
623 \f
624 /* Obstack for minipool constant handling. */
625 static struct obstack minipool_obstack;
626 static char * minipool_startobj;
627
628 /* The maximum number of insns skipped which
629 will be conditionalised if possible. */
630 static int max_insns_skipped = 5;
631
632 extern FILE * asm_out_file;
633
634 /* True if we are currently building a constant table. */
635 int making_const_table;
636
637 /* The processor for which instructions should be scheduled. */
638 enum processor_type arm_tune = arm_none;
639
640 /* The current tuning set. */
641 const struct tune_params *current_tune;
642
643 /* Which floating point hardware to schedule for. */
644 int arm_fpu_attr;
645
646 /* Which floating popint hardware to use. */
647 const struct arm_fpu_desc *arm_fpu_desc;
648
649 /* Used for Thumb call_via trampolines. */
650 rtx thumb_call_via_label[14];
651 static int thumb_call_reg_needed;
652
653 /* Bit values used to identify processor capabilities. */
654 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
655 #define FL_ARCH3M (1 << 1) /* Extended multiply */
656 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
657 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
658 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
659 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
660 #define FL_THUMB (1 << 6) /* Thumb aware */
661 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
662 #define FL_STRONG (1 << 8) /* StrongARM */
663 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
664 #define FL_XSCALE (1 << 10) /* XScale */
665 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
666 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
667 media instructions. */
668 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
669 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
670 Note: ARM6 & 7 derivatives only. */
671 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
672 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
673 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
674 profile. */
675 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
676 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
677 #define FL_NEON (1 << 20) /* Neon instructions. */
678 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
679 architecture. */
680 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
681 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
682
683 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
684
685 /* Flags that only effect tuning, not available instructions. */
686 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
687 | FL_CO_PROC)
688
689 #define FL_FOR_ARCH2 FL_NOTM
690 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
691 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
692 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
693 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
694 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
695 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
696 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
697 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
698 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
699 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
700 #define FL_FOR_ARCH6J FL_FOR_ARCH6
701 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
702 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
703 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
704 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
705 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
706 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
707 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
708 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
709 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
710 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
711
712 /* The bits in this mask specify which
713 instructions we are allowed to generate. */
714 static unsigned long insn_flags = 0;
715
716 /* The bits in this mask specify which instruction scheduling options should
717 be used. */
718 static unsigned long tune_flags = 0;
719
720 /* The following are used in the arm.md file as equivalents to bits
721 in the above two flag variables. */
722
723 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
724 int arm_arch3m = 0;
725
726 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
727 int arm_arch4 = 0;
728
729 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
730 int arm_arch4t = 0;
731
732 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
733 int arm_arch5 = 0;
734
735 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
736 int arm_arch5e = 0;
737
738 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
739 int arm_arch6 = 0;
740
741 /* Nonzero if this chip supports the ARM 6K extensions. */
742 int arm_arch6k = 0;
743
744 /* Nonzero if this chip supports the ARM 7 extensions. */
745 int arm_arch7 = 0;
746
747 /* Nonzero if instructions not present in the 'M' profile can be used. */
748 int arm_arch_notm = 0;
749
750 /* Nonzero if instructions present in ARMv7E-M can be used. */
751 int arm_arch7em = 0;
752
753 /* Nonzero if this chip can benefit from load scheduling. */
754 int arm_ld_sched = 0;
755
756 /* Nonzero if this chip is a StrongARM. */
757 int arm_tune_strongarm = 0;
758
759 /* Nonzero if this chip is a Cirrus variant. */
760 int arm_arch_cirrus = 0;
761
762 /* Nonzero if this chip supports Intel Wireless MMX technology. */
763 int arm_arch_iwmmxt = 0;
764
765 /* Nonzero if this chip is an XScale. */
766 int arm_arch_xscale = 0;
767
768 /* Nonzero if tuning for XScale */
769 int arm_tune_xscale = 0;
770
771 /* Nonzero if we want to tune for stores that access the write-buffer.
772 This typically means an ARM6 or ARM7 with MMU or MPU. */
773 int arm_tune_wbuf = 0;
774
775 /* Nonzero if tuning for Cortex-A9. */
776 int arm_tune_cortex_a9 = 0;
777
778 /* Nonzero if generating Thumb instructions. */
779 int thumb_code = 0;
780
781 /* Nonzero if generating Thumb-1 instructions. */
782 int thumb1_code = 0;
783
784 /* Nonzero if we should define __THUMB_INTERWORK__ in the
785 preprocessor.
786 XXX This is a bit of a hack, it's intended to help work around
787 problems in GLD which doesn't understand that armv5t code is
788 interworking clean. */
789 int arm_cpp_interwork = 0;
790
791 /* Nonzero if chip supports Thumb 2. */
792 int arm_arch_thumb2;
793
794 /* Nonzero if chip supports integer division instruction. */
795 int arm_arch_arm_hwdiv;
796 int arm_arch_thumb_hwdiv;
797
798 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
799 we must report the mode of the memory reference from
800 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
801 enum machine_mode output_memory_reference_mode;
802
803 /* The register number to be used for the PIC offset register. */
804 unsigned arm_pic_register = INVALID_REGNUM;
805
806 /* Set to 1 after arm_reorg has started. Reset to start at the start of
807 the next function. */
808 static int after_arm_reorg = 0;
809
810 enum arm_pcs arm_pcs_default;
811
812 /* For an explanation of these variables, see final_prescan_insn below. */
813 int arm_ccfsm_state;
814 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
815 enum arm_cond_code arm_current_cc;
816
817 rtx arm_target_insn;
818 int arm_target_label;
819 /* The number of conditionally executed insns, including the current insn. */
820 int arm_condexec_count = 0;
821 /* A bitmask specifying the patterns for the IT block.
822 Zero means do not output an IT block before this insn. */
823 int arm_condexec_mask = 0;
824 /* The number of bits used in arm_condexec_mask. */
825 int arm_condexec_masklen = 0;
826
827 /* The condition codes of the ARM, and the inverse function. */
828 static const char * const arm_condition_codes[] =
829 {
830 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
831 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
832 };
833
834 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
835 int arm_regs_in_sequence[] =
836 {
837 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
838 };
839
840 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
841 #define streq(string1, string2) (strcmp (string1, string2) == 0)
842
843 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
844 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
845 | (1 << PIC_OFFSET_TABLE_REGNUM)))
846 \f
847 /* Initialization code. */
848
849 struct processors
850 {
851 const char *const name;
852 enum processor_type core;
853 const char *arch;
854 const unsigned long flags;
855 const struct tune_params *const tune;
856 };
857
858
859 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
860 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
861 prefetch_slots, \
862 l1_size, \
863 l1_line_size
864
865 const struct tune_params arm_slowmul_tune =
866 {
867 arm_slowmul_rtx_costs,
868 NULL,
869 3, /* Constant limit. */
870 5, /* Max cond insns. */
871 ARM_PREFETCH_NOT_BENEFICIAL,
872 true, /* Prefer constant pool. */
873 arm_default_branch_cost
874 };
875
876 const struct tune_params arm_fastmul_tune =
877 {
878 arm_fastmul_rtx_costs,
879 NULL,
880 1, /* Constant limit. */
881 5, /* Max cond insns. */
882 ARM_PREFETCH_NOT_BENEFICIAL,
883 true, /* Prefer constant pool. */
884 arm_default_branch_cost
885 };
886
887 /* StrongARM has early execution of branches, so a sequence that is worth
888 skipping is shorter. Set max_insns_skipped to a lower value. */
889
890 const struct tune_params arm_strongarm_tune =
891 {
892 arm_fastmul_rtx_costs,
893 NULL,
894 1, /* Constant limit. */
895 3, /* Max cond insns. */
896 ARM_PREFETCH_NOT_BENEFICIAL,
897 true, /* Prefer constant pool. */
898 arm_default_branch_cost
899 };
900
901 const struct tune_params arm_xscale_tune =
902 {
903 arm_xscale_rtx_costs,
904 xscale_sched_adjust_cost,
905 2, /* Constant limit. */
906 3, /* Max cond insns. */
907 ARM_PREFETCH_NOT_BENEFICIAL,
908 true, /* Prefer constant pool. */
909 arm_default_branch_cost
910 };
911
912 const struct tune_params arm_9e_tune =
913 {
914 arm_9e_rtx_costs,
915 NULL,
916 1, /* Constant limit. */
917 5, /* Max cond insns. */
918 ARM_PREFETCH_NOT_BENEFICIAL,
919 true, /* Prefer constant pool. */
920 arm_default_branch_cost
921 };
922
923 const struct tune_params arm_v6t2_tune =
924 {
925 arm_9e_rtx_costs,
926 NULL,
927 1, /* Constant limit. */
928 5, /* Max cond insns. */
929 ARM_PREFETCH_NOT_BENEFICIAL,
930 false, /* Prefer constant pool. */
931 arm_default_branch_cost
932 };
933
934 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
935 const struct tune_params arm_cortex_tune =
936 {
937 arm_9e_rtx_costs,
938 NULL,
939 1, /* Constant limit. */
940 5, /* Max cond insns. */
941 ARM_PREFETCH_NOT_BENEFICIAL,
942 false, /* Prefer constant pool. */
943 arm_default_branch_cost
944 };
945
946 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
947 less appealing. Set max_insns_skipped to a low value. */
948
949 const struct tune_params arm_cortex_a5_tune =
950 {
951 arm_9e_rtx_costs,
952 NULL,
953 1, /* Constant limit. */
954 1, /* Max cond insns. */
955 ARM_PREFETCH_NOT_BENEFICIAL,
956 false, /* Prefer constant pool. */
957 arm_cortex_a5_branch_cost
958 };
959
960 const struct tune_params arm_cortex_a9_tune =
961 {
962 arm_9e_rtx_costs,
963 cortex_a9_sched_adjust_cost,
964 1, /* Constant limit. */
965 5, /* Max cond insns. */
966 ARM_PREFETCH_BENEFICIAL(4,32,32),
967 false, /* Prefer constant pool. */
968 arm_default_branch_cost
969 };
970
971 const struct tune_params arm_fa726te_tune =
972 {
973 arm_9e_rtx_costs,
974 fa726te_sched_adjust_cost,
975 1, /* Constant limit. */
976 5, /* Max cond insns. */
977 ARM_PREFETCH_NOT_BENEFICIAL,
978 true, /* Prefer constant pool. */
979 arm_default_branch_cost
980 };
981
982
983 /* Not all of these give usefully different compilation alternatives,
984 but there is no simple way of generalizing them. */
985 static const struct processors all_cores[] =
986 {
987 /* ARM Cores */
988 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
989 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
990 #include "arm-cores.def"
991 #undef ARM_CORE
992 {NULL, arm_none, NULL, 0, NULL}
993 };
994
995 static const struct processors all_architectures[] =
996 {
997 /* ARM Architectures */
998 /* We don't specify tuning costs here as it will be figured out
999 from the core. */
1000
1001 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1002 {NAME, CORE, #ARCH, FLAGS, NULL},
1003 #include "arm-arches.def"
1004 #undef ARM_ARCH
1005 {NULL, arm_none, NULL, 0 , NULL}
1006 };
1007
1008
1009 /* These are populated as commandline arguments are processed, or NULL
1010 if not specified. */
1011 static const struct processors *arm_selected_arch;
1012 static const struct processors *arm_selected_cpu;
1013 static const struct processors *arm_selected_tune;
1014
1015 /* The name of the preprocessor macro to define for this architecture. */
1016
1017 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1018
1019 /* Available values for -mfpu=. */
1020
1021 static const struct arm_fpu_desc all_fpus[] =
1022 {
1023 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
1024 { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
1025 #include "arm-fpus.def"
1026 #undef ARM_FPU
1027 };
1028
1029
1030 /* Supported TLS relocations. */
1031
1032 enum tls_reloc {
1033 TLS_GD32,
1034 TLS_LDM32,
1035 TLS_LDO32,
1036 TLS_IE32,
1037 TLS_LE32,
1038 TLS_DESCSEQ /* GNU scheme */
1039 };
1040
1041 /* The maximum number of insns to be used when loading a constant. */
1042 inline static int
1043 arm_constant_limit (bool size_p)
1044 {
1045 return size_p ? 1 : current_tune->constant_limit;
1046 }
1047
1048 /* Emit an insn that's a simple single-set. Both the operands must be known
1049 to be valid. */
1050 inline static rtx
1051 emit_set_insn (rtx x, rtx y)
1052 {
1053 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1054 }
1055
1056 /* Return the number of bits set in VALUE. */
1057 static unsigned
1058 bit_count (unsigned long value)
1059 {
1060 unsigned long count = 0;
1061
1062 while (value)
1063 {
1064 count++;
1065 value &= value - 1; /* Clear the least-significant set bit. */
1066 }
1067
1068 return count;
1069 }
1070
1071 typedef struct
1072 {
1073 enum machine_mode mode;
1074 const char *name;
1075 } arm_fixed_mode_set;
1076
1077 /* A small helper for setting fixed-point library libfuncs. */
1078
1079 static void
1080 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1081 const char *funcname, const char *modename,
1082 int num_suffix)
1083 {
1084 char buffer[50];
1085
1086 if (num_suffix == 0)
1087 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1088 else
1089 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1090
1091 set_optab_libfunc (optable, mode, buffer);
1092 }
1093
1094 static void
1095 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1096 enum machine_mode from, const char *funcname,
1097 const char *toname, const char *fromname)
1098 {
1099 char buffer[50];
1100 const char *maybe_suffix_2 = "";
1101
1102 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1103 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1104 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1105 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1106 maybe_suffix_2 = "2";
1107
1108 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1109 maybe_suffix_2);
1110
1111 set_conv_libfunc (optable, to, from, buffer);
1112 }
1113
1114 /* Set up library functions unique to ARM. */
1115
1116 static void
1117 arm_init_libfuncs (void)
1118 {
1119 /* For Linux, we have access to kernel support for atomic operations. */
1120 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1121 init_sync_libfuncs (2 * UNITS_PER_WORD);
1122
1123 /* There are no special library functions unless we are using the
1124 ARM BPABI. */
1125 if (!TARGET_BPABI)
1126 return;
1127
1128 /* The functions below are described in Section 4 of the "Run-Time
1129 ABI for the ARM architecture", Version 1.0. */
1130
1131 /* Double-precision floating-point arithmetic. Table 2. */
1132 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1133 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1134 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1135 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1136 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1137
1138 /* Double-precision comparisons. Table 3. */
1139 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1140 set_optab_libfunc (ne_optab, DFmode, NULL);
1141 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1142 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1143 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1144 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1145 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1146
1147 /* Single-precision floating-point arithmetic. Table 4. */
1148 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1149 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1150 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1151 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1152 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1153
1154 /* Single-precision comparisons. Table 5. */
1155 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1156 set_optab_libfunc (ne_optab, SFmode, NULL);
1157 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1158 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1159 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1160 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1161 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1162
1163 /* Floating-point to integer conversions. Table 6. */
1164 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1165 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1166 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1167 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1168 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1169 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1170 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1171 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1172
1173 /* Conversions between floating types. Table 7. */
1174 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1175 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1176
1177 /* Integer to floating-point conversions. Table 8. */
1178 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1179 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1180 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1181 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1182 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1183 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1184 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1185 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1186
1187 /* Long long. Table 9. */
1188 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1189 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1190 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1191 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1192 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1193 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1194 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1195 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1196
1197 /* Integer (32/32->32) division. \S 4.3.1. */
1198 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1199 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1200
1201 /* The divmod functions are designed so that they can be used for
1202 plain division, even though they return both the quotient and the
1203 remainder. The quotient is returned in the usual location (i.e.,
1204 r0 for SImode, {r0, r1} for DImode), just as would be expected
1205 for an ordinary division routine. Because the AAPCS calling
1206 conventions specify that all of { r0, r1, r2, r3 } are
1207 callee-saved registers, there is no need to tell the compiler
1208 explicitly that those registers are clobbered by these
1209 routines. */
1210 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1211 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1212
1213 /* For SImode division the ABI provides div-without-mod routines,
1214 which are faster. */
1215 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1216 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1217
1218 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1219 divmod libcalls instead. */
1220 set_optab_libfunc (smod_optab, DImode, NULL);
1221 set_optab_libfunc (umod_optab, DImode, NULL);
1222 set_optab_libfunc (smod_optab, SImode, NULL);
1223 set_optab_libfunc (umod_optab, SImode, NULL);
1224
1225 /* Half-precision float operations. The compiler handles all operations
1226 with NULL libfuncs by converting the SFmode. */
1227 switch (arm_fp16_format)
1228 {
1229 case ARM_FP16_FORMAT_IEEE:
1230 case ARM_FP16_FORMAT_ALTERNATIVE:
1231
1232 /* Conversions. */
1233 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1234 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1235 ? "__gnu_f2h_ieee"
1236 : "__gnu_f2h_alternative"));
1237 set_conv_libfunc (sext_optab, SFmode, HFmode,
1238 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1239 ? "__gnu_h2f_ieee"
1240 : "__gnu_h2f_alternative"));
1241
1242 /* Arithmetic. */
1243 set_optab_libfunc (add_optab, HFmode, NULL);
1244 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1245 set_optab_libfunc (smul_optab, HFmode, NULL);
1246 set_optab_libfunc (neg_optab, HFmode, NULL);
1247 set_optab_libfunc (sub_optab, HFmode, NULL);
1248
1249 /* Comparisons. */
1250 set_optab_libfunc (eq_optab, HFmode, NULL);
1251 set_optab_libfunc (ne_optab, HFmode, NULL);
1252 set_optab_libfunc (lt_optab, HFmode, NULL);
1253 set_optab_libfunc (le_optab, HFmode, NULL);
1254 set_optab_libfunc (ge_optab, HFmode, NULL);
1255 set_optab_libfunc (gt_optab, HFmode, NULL);
1256 set_optab_libfunc (unord_optab, HFmode, NULL);
1257 break;
1258
1259 default:
1260 break;
1261 }
1262
1263 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1264 {
1265 const arm_fixed_mode_set fixed_arith_modes[] =
1266 {
1267 { QQmode, "qq" },
1268 { UQQmode, "uqq" },
1269 { HQmode, "hq" },
1270 { UHQmode, "uhq" },
1271 { SQmode, "sq" },
1272 { USQmode, "usq" },
1273 { DQmode, "dq" },
1274 { UDQmode, "udq" },
1275 { TQmode, "tq" },
1276 { UTQmode, "utq" },
1277 { HAmode, "ha" },
1278 { UHAmode, "uha" },
1279 { SAmode, "sa" },
1280 { USAmode, "usa" },
1281 { DAmode, "da" },
1282 { UDAmode, "uda" },
1283 { TAmode, "ta" },
1284 { UTAmode, "uta" }
1285 };
1286 const arm_fixed_mode_set fixed_conv_modes[] =
1287 {
1288 { QQmode, "qq" },
1289 { UQQmode, "uqq" },
1290 { HQmode, "hq" },
1291 { UHQmode, "uhq" },
1292 { SQmode, "sq" },
1293 { USQmode, "usq" },
1294 { DQmode, "dq" },
1295 { UDQmode, "udq" },
1296 { TQmode, "tq" },
1297 { UTQmode, "utq" },
1298 { HAmode, "ha" },
1299 { UHAmode, "uha" },
1300 { SAmode, "sa" },
1301 { USAmode, "usa" },
1302 { DAmode, "da" },
1303 { UDAmode, "uda" },
1304 { TAmode, "ta" },
1305 { UTAmode, "uta" },
1306 { QImode, "qi" },
1307 { HImode, "hi" },
1308 { SImode, "si" },
1309 { DImode, "di" },
1310 { TImode, "ti" },
1311 { SFmode, "sf" },
1312 { DFmode, "df" }
1313 };
1314 unsigned int i, j;
1315
1316 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1317 {
1318 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1319 "add", fixed_arith_modes[i].name, 3);
1320 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1321 "ssadd", fixed_arith_modes[i].name, 3);
1322 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1323 "usadd", fixed_arith_modes[i].name, 3);
1324 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1325 "sub", fixed_arith_modes[i].name, 3);
1326 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1327 "sssub", fixed_arith_modes[i].name, 3);
1328 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1329 "ussub", fixed_arith_modes[i].name, 3);
1330 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1331 "mul", fixed_arith_modes[i].name, 3);
1332 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1333 "ssmul", fixed_arith_modes[i].name, 3);
1334 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1335 "usmul", fixed_arith_modes[i].name, 3);
1336 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1337 "div", fixed_arith_modes[i].name, 3);
1338 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1339 "udiv", fixed_arith_modes[i].name, 3);
1340 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1341 "ssdiv", fixed_arith_modes[i].name, 3);
1342 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1343 "usdiv", fixed_arith_modes[i].name, 3);
1344 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1345 "neg", fixed_arith_modes[i].name, 2);
1346 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1347 "ssneg", fixed_arith_modes[i].name, 2);
1348 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1349 "usneg", fixed_arith_modes[i].name, 2);
1350 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1351 "ashl", fixed_arith_modes[i].name, 3);
1352 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1353 "ashr", fixed_arith_modes[i].name, 3);
1354 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1355 "lshr", fixed_arith_modes[i].name, 3);
1356 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1357 "ssashl", fixed_arith_modes[i].name, 3);
1358 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1359 "usashl", fixed_arith_modes[i].name, 3);
1360 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1361 "cmp", fixed_arith_modes[i].name, 2);
1362 }
1363
1364 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1365 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1366 {
1367 if (i == j
1368 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1369 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1370 continue;
1371
1372 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1373 fixed_conv_modes[j].mode, "fract",
1374 fixed_conv_modes[i].name,
1375 fixed_conv_modes[j].name);
1376 arm_set_fixed_conv_libfunc (satfract_optab,
1377 fixed_conv_modes[i].mode,
1378 fixed_conv_modes[j].mode, "satfract",
1379 fixed_conv_modes[i].name,
1380 fixed_conv_modes[j].name);
1381 arm_set_fixed_conv_libfunc (fractuns_optab,
1382 fixed_conv_modes[i].mode,
1383 fixed_conv_modes[j].mode, "fractuns",
1384 fixed_conv_modes[i].name,
1385 fixed_conv_modes[j].name);
1386 arm_set_fixed_conv_libfunc (satfractuns_optab,
1387 fixed_conv_modes[i].mode,
1388 fixed_conv_modes[j].mode, "satfractuns",
1389 fixed_conv_modes[i].name,
1390 fixed_conv_modes[j].name);
1391 }
1392 }
1393
1394 if (TARGET_AAPCS_BASED)
1395 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1396 }
1397
1398 /* On AAPCS systems, this is the "struct __va_list". */
1399 static GTY(()) tree va_list_type;
1400
1401 /* Return the type to use as __builtin_va_list. */
1402 static tree
1403 arm_build_builtin_va_list (void)
1404 {
1405 tree va_list_name;
1406 tree ap_field;
1407
1408 if (!TARGET_AAPCS_BASED)
1409 return std_build_builtin_va_list ();
1410
1411 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1412 defined as:
1413
1414 struct __va_list
1415 {
1416 void *__ap;
1417 };
1418
1419 The C Library ABI further reinforces this definition in \S
1420 4.1.
1421
1422 We must follow this definition exactly. The structure tag
1423 name is visible in C++ mangled names, and thus forms a part
1424 of the ABI. The field name may be used by people who
1425 #include <stdarg.h>. */
1426 /* Create the type. */
1427 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1428 /* Give it the required name. */
1429 va_list_name = build_decl (BUILTINS_LOCATION,
1430 TYPE_DECL,
1431 get_identifier ("__va_list"),
1432 va_list_type);
1433 DECL_ARTIFICIAL (va_list_name) = 1;
1434 TYPE_NAME (va_list_type) = va_list_name;
1435 TYPE_STUB_DECL (va_list_type) = va_list_name;
1436 /* Create the __ap field. */
1437 ap_field = build_decl (BUILTINS_LOCATION,
1438 FIELD_DECL,
1439 get_identifier ("__ap"),
1440 ptr_type_node);
1441 DECL_ARTIFICIAL (ap_field) = 1;
1442 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1443 TYPE_FIELDS (va_list_type) = ap_field;
1444 /* Compute its layout. */
1445 layout_type (va_list_type);
1446
1447 return va_list_type;
1448 }
1449
1450 /* Return an expression of type "void *" pointing to the next
1451 available argument in a variable-argument list. VALIST is the
1452 user-level va_list object, of type __builtin_va_list. */
1453 static tree
1454 arm_extract_valist_ptr (tree valist)
1455 {
1456 if (TREE_TYPE (valist) == error_mark_node)
1457 return error_mark_node;
1458
1459 /* On an AAPCS target, the pointer is stored within "struct
1460 va_list". */
1461 if (TARGET_AAPCS_BASED)
1462 {
1463 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1464 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1465 valist, ap_field, NULL_TREE);
1466 }
1467
1468 return valist;
1469 }
1470
1471 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1472 static void
1473 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1474 {
1475 valist = arm_extract_valist_ptr (valist);
1476 std_expand_builtin_va_start (valist, nextarg);
1477 }
1478
1479 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1480 static tree
1481 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1482 gimple_seq *post_p)
1483 {
1484 valist = arm_extract_valist_ptr (valist);
1485 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1486 }
1487
1488 /* Fix up any incompatible options that the user has specified. */
1489 static void
1490 arm_option_override (void)
1491 {
1492 if (global_options_set.x_arm_arch_option)
1493 arm_selected_arch = &all_architectures[arm_arch_option];
1494
1495 if (global_options_set.x_arm_cpu_option)
1496 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1497
1498 if (global_options_set.x_arm_tune_option)
1499 arm_selected_tune = &all_cores[(int) arm_tune_option];
1500
1501 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1502 SUBTARGET_OVERRIDE_OPTIONS;
1503 #endif
1504
1505 if (arm_selected_arch)
1506 {
1507 if (arm_selected_cpu)
1508 {
1509 /* Check for conflict between mcpu and march. */
1510 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1511 {
1512 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1513 arm_selected_cpu->name, arm_selected_arch->name);
1514 /* -march wins for code generation.
1515 -mcpu wins for default tuning. */
1516 if (!arm_selected_tune)
1517 arm_selected_tune = arm_selected_cpu;
1518
1519 arm_selected_cpu = arm_selected_arch;
1520 }
1521 else
1522 /* -mcpu wins. */
1523 arm_selected_arch = NULL;
1524 }
1525 else
1526 /* Pick a CPU based on the architecture. */
1527 arm_selected_cpu = arm_selected_arch;
1528 }
1529
1530 /* If the user did not specify a processor, choose one for them. */
1531 if (!arm_selected_cpu)
1532 {
1533 const struct processors * sel;
1534 unsigned int sought;
1535
1536 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1537 if (!arm_selected_cpu->name)
1538 {
1539 #ifdef SUBTARGET_CPU_DEFAULT
1540 /* Use the subtarget default CPU if none was specified by
1541 configure. */
1542 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1543 #endif
1544 /* Default to ARM6. */
1545 if (!arm_selected_cpu->name)
1546 arm_selected_cpu = &all_cores[arm6];
1547 }
1548
1549 sel = arm_selected_cpu;
1550 insn_flags = sel->flags;
1551
1552 /* Now check to see if the user has specified some command line
1553 switch that require certain abilities from the cpu. */
1554 sought = 0;
1555
1556 if (TARGET_INTERWORK || TARGET_THUMB)
1557 {
1558 sought |= (FL_THUMB | FL_MODE32);
1559
1560 /* There are no ARM processors that support both APCS-26 and
1561 interworking. Therefore we force FL_MODE26 to be removed
1562 from insn_flags here (if it was set), so that the search
1563 below will always be able to find a compatible processor. */
1564 insn_flags &= ~FL_MODE26;
1565 }
1566
1567 if (sought != 0 && ((sought & insn_flags) != sought))
1568 {
1569 /* Try to locate a CPU type that supports all of the abilities
1570 of the default CPU, plus the extra abilities requested by
1571 the user. */
1572 for (sel = all_cores; sel->name != NULL; sel++)
1573 if ((sel->flags & sought) == (sought | insn_flags))
1574 break;
1575
1576 if (sel->name == NULL)
1577 {
1578 unsigned current_bit_count = 0;
1579 const struct processors * best_fit = NULL;
1580
1581 /* Ideally we would like to issue an error message here
1582 saying that it was not possible to find a CPU compatible
1583 with the default CPU, but which also supports the command
1584 line options specified by the programmer, and so they
1585 ought to use the -mcpu=<name> command line option to
1586 override the default CPU type.
1587
1588 If we cannot find a cpu that has both the
1589 characteristics of the default cpu and the given
1590 command line options we scan the array again looking
1591 for a best match. */
1592 for (sel = all_cores; sel->name != NULL; sel++)
1593 if ((sel->flags & sought) == sought)
1594 {
1595 unsigned count;
1596
1597 count = bit_count (sel->flags & insn_flags);
1598
1599 if (count >= current_bit_count)
1600 {
1601 best_fit = sel;
1602 current_bit_count = count;
1603 }
1604 }
1605
1606 gcc_assert (best_fit);
1607 sel = best_fit;
1608 }
1609
1610 arm_selected_cpu = sel;
1611 }
1612 }
1613
1614 gcc_assert (arm_selected_cpu);
1615 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1616 if (!arm_selected_tune)
1617 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1618
1619 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1620 insn_flags = arm_selected_cpu->flags;
1621
1622 arm_tune = arm_selected_tune->core;
1623 tune_flags = arm_selected_tune->flags;
1624 current_tune = arm_selected_tune->tune;
1625
1626 /* Make sure that the processor choice does not conflict with any of the
1627 other command line choices. */
1628 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1629 error ("target CPU does not support ARM mode");
1630
1631 /* BPABI targets use linker tricks to allow interworking on cores
1632 without thumb support. */
1633 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1634 {
1635 warning (0, "target CPU does not support interworking" );
1636 target_flags &= ~MASK_INTERWORK;
1637 }
1638
1639 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1640 {
1641 warning (0, "target CPU does not support THUMB instructions");
1642 target_flags &= ~MASK_THUMB;
1643 }
1644
1645 if (TARGET_APCS_FRAME && TARGET_THUMB)
1646 {
1647 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1648 target_flags &= ~MASK_APCS_FRAME;
1649 }
1650
1651 /* Callee super interworking implies thumb interworking. Adding
1652 this to the flags here simplifies the logic elsewhere. */
1653 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1654 target_flags |= MASK_INTERWORK;
1655
1656 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1657 from here where no function is being compiled currently. */
1658 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1659 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1660
1661 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1662 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1663
1664 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1665 {
1666 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1667 target_flags |= MASK_APCS_FRAME;
1668 }
1669
1670 if (TARGET_POKE_FUNCTION_NAME)
1671 target_flags |= MASK_APCS_FRAME;
1672
1673 if (TARGET_APCS_REENT && flag_pic)
1674 error ("-fpic and -mapcs-reent are incompatible");
1675
1676 if (TARGET_APCS_REENT)
1677 warning (0, "APCS reentrant code not supported. Ignored");
1678
1679 /* If this target is normally configured to use APCS frames, warn if they
1680 are turned off and debugging is turned on. */
1681 if (TARGET_ARM
1682 && write_symbols != NO_DEBUG
1683 && !TARGET_APCS_FRAME
1684 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1685 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1686
1687 if (TARGET_APCS_FLOAT)
1688 warning (0, "passing floating point arguments in fp regs not yet supported");
1689
1690 if (TARGET_LITTLE_WORDS)
1691 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1692 "will be removed in a future release");
1693
1694 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1695 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1696 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1697 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1698 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1699 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1700 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1701 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1702 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1703 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1704 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1705 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1706 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1707 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1708
1709 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1710 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1711 thumb_code = TARGET_ARM == 0;
1712 thumb1_code = TARGET_THUMB1 != 0;
1713 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1714 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1715 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1716 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1717 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1718 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1719
1720 /* If we are not using the default (ARM mode) section anchor offset
1721 ranges, then set the correct ranges now. */
1722 if (TARGET_THUMB1)
1723 {
1724 /* Thumb-1 LDR instructions cannot have negative offsets.
1725 Permissible positive offset ranges are 5-bit (for byte loads),
1726 6-bit (for halfword loads), or 7-bit (for word loads).
1727 Empirical results suggest a 7-bit anchor range gives the best
1728 overall code size. */
1729 targetm.min_anchor_offset = 0;
1730 targetm.max_anchor_offset = 127;
1731 }
1732 else if (TARGET_THUMB2)
1733 {
1734 /* The minimum is set such that the total size of the block
1735 for a particular anchor is 248 + 1 + 4095 bytes, which is
1736 divisible by eight, ensuring natural spacing of anchors. */
1737 targetm.min_anchor_offset = -248;
1738 targetm.max_anchor_offset = 4095;
1739 }
1740
1741 /* V5 code we generate is completely interworking capable, so we turn off
1742 TARGET_INTERWORK here to avoid many tests later on. */
1743
1744 /* XXX However, we must pass the right pre-processor defines to CPP
1745 or GLD can get confused. This is a hack. */
1746 if (TARGET_INTERWORK)
1747 arm_cpp_interwork = 1;
1748
1749 if (arm_arch5)
1750 target_flags &= ~MASK_INTERWORK;
1751
1752 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1753 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1754
1755 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1756 error ("iwmmxt abi requires an iwmmxt capable cpu");
1757
1758 if (!global_options_set.x_arm_fpu_index)
1759 {
1760 const char *target_fpu_name;
1761 bool ok;
1762
1763 #ifdef FPUTYPE_DEFAULT
1764 target_fpu_name = FPUTYPE_DEFAULT;
1765 #else
1766 if (arm_arch_cirrus)
1767 target_fpu_name = "maverick";
1768 else
1769 target_fpu_name = "fpe2";
1770 #endif
1771
1772 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1773 CL_TARGET);
1774 gcc_assert (ok);
1775 }
1776
1777 arm_fpu_desc = &all_fpus[arm_fpu_index];
1778
1779 switch (arm_fpu_desc->model)
1780 {
1781 case ARM_FP_MODEL_FPA:
1782 if (arm_fpu_desc->rev == 2)
1783 arm_fpu_attr = FPU_FPE2;
1784 else if (arm_fpu_desc->rev == 3)
1785 arm_fpu_attr = FPU_FPE3;
1786 else
1787 arm_fpu_attr = FPU_FPA;
1788 break;
1789
1790 case ARM_FP_MODEL_MAVERICK:
1791 arm_fpu_attr = FPU_MAVERICK;
1792 break;
1793
1794 case ARM_FP_MODEL_VFP:
1795 arm_fpu_attr = FPU_VFP;
1796 break;
1797
1798 default:
1799 gcc_unreachable();
1800 }
1801
1802 if (TARGET_AAPCS_BASED
1803 && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
1804 error ("FPA is unsupported in the AAPCS");
1805
1806 if (TARGET_AAPCS_BASED)
1807 {
1808 if (TARGET_CALLER_INTERWORKING)
1809 error ("AAPCS does not support -mcaller-super-interworking");
1810 else
1811 if (TARGET_CALLEE_INTERWORKING)
1812 error ("AAPCS does not support -mcallee-super-interworking");
1813 }
1814
1815 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1816 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1817 will ever exist. GCC makes no attempt to support this combination. */
1818 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1819 sorry ("iWMMXt and hardware floating point");
1820
1821 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1822 if (TARGET_THUMB2 && TARGET_IWMMXT)
1823 sorry ("Thumb-2 iWMMXt");
1824
1825 /* __fp16 support currently assumes the core has ldrh. */
1826 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1827 sorry ("__fp16 and no ldrh");
1828
1829 /* If soft-float is specified then don't use FPU. */
1830 if (TARGET_SOFT_FLOAT)
1831 arm_fpu_attr = FPU_NONE;
1832
1833 if (TARGET_AAPCS_BASED)
1834 {
1835 if (arm_abi == ARM_ABI_IWMMXT)
1836 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1837 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1838 && TARGET_HARD_FLOAT
1839 && TARGET_VFP)
1840 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1841 else
1842 arm_pcs_default = ARM_PCS_AAPCS;
1843 }
1844 else
1845 {
1846 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1847 sorry ("-mfloat-abi=hard and VFP");
1848
1849 if (arm_abi == ARM_ABI_APCS)
1850 arm_pcs_default = ARM_PCS_APCS;
1851 else
1852 arm_pcs_default = ARM_PCS_ATPCS;
1853 }
1854
1855 /* For arm2/3 there is no need to do any scheduling if there is only
1856 a floating point emulator, or we are doing software floating-point. */
1857 if ((TARGET_SOFT_FLOAT
1858 || (TARGET_FPA && arm_fpu_desc->rev))
1859 && (tune_flags & FL_MODE32) == 0)
1860 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1861
1862 /* Use the cp15 method if it is available. */
1863 if (target_thread_pointer == TP_AUTO)
1864 {
1865 if (arm_arch6k && !TARGET_THUMB1)
1866 target_thread_pointer = TP_CP15;
1867 else
1868 target_thread_pointer = TP_SOFT;
1869 }
1870
1871 if (TARGET_HARD_TP && TARGET_THUMB1)
1872 error ("can not use -mtp=cp15 with 16-bit Thumb");
1873
1874 /* Override the default structure alignment for AAPCS ABI. */
1875 if (!global_options_set.x_arm_structure_size_boundary)
1876 {
1877 if (TARGET_AAPCS_BASED)
1878 arm_structure_size_boundary = 8;
1879 }
1880 else
1881 {
1882 if (arm_structure_size_boundary != 8
1883 && arm_structure_size_boundary != 32
1884 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1885 {
1886 if (ARM_DOUBLEWORD_ALIGN)
1887 warning (0,
1888 "structure size boundary can only be set to 8, 32 or 64");
1889 else
1890 warning (0, "structure size boundary can only be set to 8 or 32");
1891 arm_structure_size_boundary
1892 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1893 }
1894 }
1895
1896 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1897 {
1898 error ("RTP PIC is incompatible with Thumb");
1899 flag_pic = 0;
1900 }
1901
1902 /* If stack checking is disabled, we can use r10 as the PIC register,
1903 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1904 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1905 {
1906 if (TARGET_VXWORKS_RTP)
1907 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1908 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1909 }
1910
1911 if (flag_pic && TARGET_VXWORKS_RTP)
1912 arm_pic_register = 9;
1913
1914 if (arm_pic_register_string != NULL)
1915 {
1916 int pic_register = decode_reg_name (arm_pic_register_string);
1917
1918 if (!flag_pic)
1919 warning (0, "-mpic-register= is useless without -fpic");
1920
1921 /* Prevent the user from choosing an obviously stupid PIC register. */
1922 else if (pic_register < 0 || call_used_regs[pic_register]
1923 || pic_register == HARD_FRAME_POINTER_REGNUM
1924 || pic_register == STACK_POINTER_REGNUM
1925 || pic_register >= PC_REGNUM
1926 || (TARGET_VXWORKS_RTP
1927 && (unsigned int) pic_register != arm_pic_register))
1928 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1929 else
1930 arm_pic_register = pic_register;
1931 }
1932
1933 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1934 if (fix_cm3_ldrd == 2)
1935 {
1936 if (arm_selected_cpu->core == cortexm3)
1937 fix_cm3_ldrd = 1;
1938 else
1939 fix_cm3_ldrd = 0;
1940 }
1941
1942 /* Enable -munaligned-access by default for
1943 - all ARMv6 architecture-based processors
1944 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
1945
1946 Disable -munaligned-access by default for
1947 - all pre-ARMv6 architecture-based processors
1948 - ARMv6-M architecture-based processors. */
1949
1950 if (unaligned_access == 2)
1951 {
1952 if (arm_arch6 && (arm_arch_notm || arm_arch7))
1953 unaligned_access = 1;
1954 else
1955 unaligned_access = 0;
1956 }
1957 else if (unaligned_access == 1
1958 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
1959 {
1960 warning (0, "target CPU does not support unaligned accesses");
1961 unaligned_access = 0;
1962 }
1963
1964 if (TARGET_THUMB1 && flag_schedule_insns)
1965 {
1966 /* Don't warn since it's on by default in -O2. */
1967 flag_schedule_insns = 0;
1968 }
1969
1970 if (optimize_size)
1971 {
1972 /* If optimizing for size, bump the number of instructions that we
1973 are prepared to conditionally execute (even on a StrongARM). */
1974 max_insns_skipped = 6;
1975 }
1976 else
1977 max_insns_skipped = current_tune->max_insns_skipped;
1978
1979 /* Hot/Cold partitioning is not currently supported, since we can't
1980 handle literal pool placement in that case. */
1981 if (flag_reorder_blocks_and_partition)
1982 {
1983 inform (input_location,
1984 "-freorder-blocks-and-partition not supported on this architecture");
1985 flag_reorder_blocks_and_partition = 0;
1986 flag_reorder_blocks = 1;
1987 }
1988
1989 if (flag_pic)
1990 /* Hoisting PIC address calculations more aggressively provides a small,
1991 but measurable, size reduction for PIC code. Therefore, we decrease
1992 the bar for unrestricted expression hoisting to the cost of PIC address
1993 calculation, which is 2 instructions. */
1994 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
1995 global_options.x_param_values,
1996 global_options_set.x_param_values);
1997
1998 /* ARM EABI defaults to strict volatile bitfields. */
1999 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2000 && abi_version_at_least(2))
2001 flag_strict_volatile_bitfields = 1;
2002
2003 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2004 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2005 if (flag_prefetch_loop_arrays < 0
2006 && HAVE_prefetch
2007 && optimize >= 3
2008 && current_tune->num_prefetch_slots > 0)
2009 flag_prefetch_loop_arrays = 1;
2010
2011 /* Set up parameters to be used in prefetching algorithm. Do not override the
2012 defaults unless we are tuning for a core we have researched values for. */
2013 if (current_tune->num_prefetch_slots > 0)
2014 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2015 current_tune->num_prefetch_slots,
2016 global_options.x_param_values,
2017 global_options_set.x_param_values);
2018 if (current_tune->l1_cache_line_size >= 0)
2019 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2020 current_tune->l1_cache_line_size,
2021 global_options.x_param_values,
2022 global_options_set.x_param_values);
2023 if (current_tune->l1_cache_size >= 0)
2024 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2025 current_tune->l1_cache_size,
2026 global_options.x_param_values,
2027 global_options_set.x_param_values);
2028
2029 /* Register global variables with the garbage collector. */
2030 arm_add_gc_roots ();
2031 }
2032
2033 static void
2034 arm_add_gc_roots (void)
2035 {
2036 gcc_obstack_init(&minipool_obstack);
2037 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2038 }
2039 \f
2040 /* A table of known ARM exception types.
2041 For use with the interrupt function attribute. */
2042
2043 typedef struct
2044 {
2045 const char *const arg;
2046 const unsigned long return_value;
2047 }
2048 isr_attribute_arg;
2049
2050 static const isr_attribute_arg isr_attribute_args [] =
2051 {
2052 { "IRQ", ARM_FT_ISR },
2053 { "irq", ARM_FT_ISR },
2054 { "FIQ", ARM_FT_FIQ },
2055 { "fiq", ARM_FT_FIQ },
2056 { "ABORT", ARM_FT_ISR },
2057 { "abort", ARM_FT_ISR },
2058 { "ABORT", ARM_FT_ISR },
2059 { "abort", ARM_FT_ISR },
2060 { "UNDEF", ARM_FT_EXCEPTION },
2061 { "undef", ARM_FT_EXCEPTION },
2062 { "SWI", ARM_FT_EXCEPTION },
2063 { "swi", ARM_FT_EXCEPTION },
2064 { NULL, ARM_FT_NORMAL }
2065 };
2066
2067 /* Returns the (interrupt) function type of the current
2068 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2069
2070 static unsigned long
2071 arm_isr_value (tree argument)
2072 {
2073 const isr_attribute_arg * ptr;
2074 const char * arg;
2075
2076 if (!arm_arch_notm)
2077 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2078
2079 /* No argument - default to IRQ. */
2080 if (argument == NULL_TREE)
2081 return ARM_FT_ISR;
2082
2083 /* Get the value of the argument. */
2084 if (TREE_VALUE (argument) == NULL_TREE
2085 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2086 return ARM_FT_UNKNOWN;
2087
2088 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2089
2090 /* Check it against the list of known arguments. */
2091 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2092 if (streq (arg, ptr->arg))
2093 return ptr->return_value;
2094
2095 /* An unrecognized interrupt type. */
2096 return ARM_FT_UNKNOWN;
2097 }
2098
2099 /* Computes the type of the current function. */
2100
2101 static unsigned long
2102 arm_compute_func_type (void)
2103 {
2104 unsigned long type = ARM_FT_UNKNOWN;
2105 tree a;
2106 tree attr;
2107
2108 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2109
2110 /* Decide if the current function is volatile. Such functions
2111 never return, and many memory cycles can be saved by not storing
2112 register values that will never be needed again. This optimization
2113 was added to speed up context switching in a kernel application. */
2114 if (optimize > 0
2115 && (TREE_NOTHROW (current_function_decl)
2116 || !(flag_unwind_tables
2117 || (flag_exceptions
2118 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2119 && TREE_THIS_VOLATILE (current_function_decl))
2120 type |= ARM_FT_VOLATILE;
2121
2122 if (cfun->static_chain_decl != NULL)
2123 type |= ARM_FT_NESTED;
2124
2125 attr = DECL_ATTRIBUTES (current_function_decl);
2126
2127 a = lookup_attribute ("naked", attr);
2128 if (a != NULL_TREE)
2129 type |= ARM_FT_NAKED;
2130
2131 a = lookup_attribute ("isr", attr);
2132 if (a == NULL_TREE)
2133 a = lookup_attribute ("interrupt", attr);
2134
2135 if (a == NULL_TREE)
2136 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2137 else
2138 type |= arm_isr_value (TREE_VALUE (a));
2139
2140 return type;
2141 }
2142
2143 /* Returns the type of the current function. */
2144
2145 unsigned long
2146 arm_current_func_type (void)
2147 {
2148 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2149 cfun->machine->func_type = arm_compute_func_type ();
2150
2151 return cfun->machine->func_type;
2152 }
2153
2154 bool
2155 arm_allocate_stack_slots_for_args (void)
2156 {
2157 /* Naked functions should not allocate stack slots for arguments. */
2158 return !IS_NAKED (arm_current_func_type ());
2159 }
2160
2161 \f
2162 /* Output assembler code for a block containing the constant parts
2163 of a trampoline, leaving space for the variable parts.
2164
2165 On the ARM, (if r8 is the static chain regnum, and remembering that
2166 referencing pc adds an offset of 8) the trampoline looks like:
2167 ldr r8, [pc, #0]
2168 ldr pc, [pc]
2169 .word static chain value
2170 .word function's address
2171 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2172
2173 static void
2174 arm_asm_trampoline_template (FILE *f)
2175 {
2176 if (TARGET_ARM)
2177 {
2178 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2179 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2180 }
2181 else if (TARGET_THUMB2)
2182 {
2183 /* The Thumb-2 trampoline is similar to the arm implementation.
2184 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2185 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2186 STATIC_CHAIN_REGNUM, PC_REGNUM);
2187 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2188 }
2189 else
2190 {
2191 ASM_OUTPUT_ALIGN (f, 2);
2192 fprintf (f, "\t.code\t16\n");
2193 fprintf (f, ".Ltrampoline_start:\n");
2194 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2195 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2196 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2197 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2198 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2199 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2200 }
2201 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2202 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2203 }
2204
2205 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2206
2207 static void
2208 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2209 {
2210 rtx fnaddr, mem, a_tramp;
2211
2212 emit_block_move (m_tramp, assemble_trampoline_template (),
2213 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2214
2215 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2216 emit_move_insn (mem, chain_value);
2217
2218 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2219 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2220 emit_move_insn (mem, fnaddr);
2221
2222 a_tramp = XEXP (m_tramp, 0);
2223 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2224 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2225 plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
2226 }
2227
2228 /* Thumb trampolines should be entered in thumb mode, so set
2229 the bottom bit of the address. */
2230
2231 static rtx
2232 arm_trampoline_adjust_address (rtx addr)
2233 {
2234 if (TARGET_THUMB)
2235 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2236 NULL, 0, OPTAB_LIB_WIDEN);
2237 return addr;
2238 }
2239 \f
2240 /* Return 1 if it is possible to return using a single instruction.
2241 If SIBLING is non-null, this is a test for a return before a sibling
2242 call. SIBLING is the call insn, so we can examine its register usage. */
2243
2244 int
2245 use_return_insn (int iscond, rtx sibling)
2246 {
2247 int regno;
2248 unsigned int func_type;
2249 unsigned long saved_int_regs;
2250 unsigned HOST_WIDE_INT stack_adjust;
2251 arm_stack_offsets *offsets;
2252
2253 /* Never use a return instruction before reload has run. */
2254 if (!reload_completed)
2255 return 0;
2256
2257 func_type = arm_current_func_type ();
2258
2259 /* Naked, volatile and stack alignment functions need special
2260 consideration. */
2261 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2262 return 0;
2263
2264 /* So do interrupt functions that use the frame pointer and Thumb
2265 interrupt functions. */
2266 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2267 return 0;
2268
2269 offsets = arm_get_frame_offsets ();
2270 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2271
2272 /* As do variadic functions. */
2273 if (crtl->args.pretend_args_size
2274 || cfun->machine->uses_anonymous_args
2275 /* Or if the function calls __builtin_eh_return () */
2276 || crtl->calls_eh_return
2277 /* Or if the function calls alloca */
2278 || cfun->calls_alloca
2279 /* Or if there is a stack adjustment. However, if the stack pointer
2280 is saved on the stack, we can use a pre-incrementing stack load. */
2281 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2282 && stack_adjust == 4)))
2283 return 0;
2284
2285 saved_int_regs = offsets->saved_regs_mask;
2286
2287 /* Unfortunately, the insn
2288
2289 ldmib sp, {..., sp, ...}
2290
2291 triggers a bug on most SA-110 based devices, such that the stack
2292 pointer won't be correctly restored if the instruction takes a
2293 page fault. We work around this problem by popping r3 along with
2294 the other registers, since that is never slower than executing
2295 another instruction.
2296
2297 We test for !arm_arch5 here, because code for any architecture
2298 less than this could potentially be run on one of the buggy
2299 chips. */
2300 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2301 {
2302 /* Validate that r3 is a call-clobbered register (always true in
2303 the default abi) ... */
2304 if (!call_used_regs[3])
2305 return 0;
2306
2307 /* ... that it isn't being used for a return value ... */
2308 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2309 return 0;
2310
2311 /* ... or for a tail-call argument ... */
2312 if (sibling)
2313 {
2314 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2315
2316 if (find_regno_fusage (sibling, USE, 3))
2317 return 0;
2318 }
2319
2320 /* ... and that there are no call-saved registers in r0-r2
2321 (always true in the default ABI). */
2322 if (saved_int_regs & 0x7)
2323 return 0;
2324 }
2325
2326 /* Can't be done if interworking with Thumb, and any registers have been
2327 stacked. */
2328 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2329 return 0;
2330
2331 /* On StrongARM, conditional returns are expensive if they aren't
2332 taken and multiple registers have been stacked. */
2333 if (iscond && arm_tune_strongarm)
2334 {
2335 /* Conditional return when just the LR is stored is a simple
2336 conditional-load instruction, that's not expensive. */
2337 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2338 return 0;
2339
2340 if (flag_pic
2341 && arm_pic_register != INVALID_REGNUM
2342 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2343 return 0;
2344 }
2345
2346 /* If there are saved registers but the LR isn't saved, then we need
2347 two instructions for the return. */
2348 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2349 return 0;
2350
2351 /* Can't be done if any of the FPA regs are pushed,
2352 since this also requires an insn. */
2353 if (TARGET_HARD_FLOAT && TARGET_FPA)
2354 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
2355 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2356 return 0;
2357
2358 /* Likewise VFP regs. */
2359 if (TARGET_HARD_FLOAT && TARGET_VFP)
2360 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2361 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2362 return 0;
2363
2364 if (TARGET_REALLY_IWMMXT)
2365 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2366 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2367 return 0;
2368
2369 return 1;
2370 }
2371
2372 /* Return TRUE if int I is a valid immediate ARM constant. */
2373
2374 int
2375 const_ok_for_arm (HOST_WIDE_INT i)
2376 {
2377 int lowbit;
2378
2379 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2380 be all zero, or all one. */
2381 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2382 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2383 != ((~(unsigned HOST_WIDE_INT) 0)
2384 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2385 return FALSE;
2386
2387 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2388
2389 /* Fast return for 0 and small values. We must do this for zero, since
2390 the code below can't handle that one case. */
2391 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2392 return TRUE;
2393
2394 /* Get the number of trailing zeros. */
2395 lowbit = ffs((int) i) - 1;
2396
2397 /* Only even shifts are allowed in ARM mode so round down to the
2398 nearest even number. */
2399 if (TARGET_ARM)
2400 lowbit &= ~1;
2401
2402 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2403 return TRUE;
2404
2405 if (TARGET_ARM)
2406 {
2407 /* Allow rotated constants in ARM mode. */
2408 if (lowbit <= 4
2409 && ((i & ~0xc000003f) == 0
2410 || (i & ~0xf000000f) == 0
2411 || (i & ~0xfc000003) == 0))
2412 return TRUE;
2413 }
2414 else
2415 {
2416 HOST_WIDE_INT v;
2417
2418 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2419 v = i & 0xff;
2420 v |= v << 16;
2421 if (i == v || i == (v | (v << 8)))
2422 return TRUE;
2423
2424 /* Allow repeated pattern 0xXY00XY00. */
2425 v = i & 0xff00;
2426 v |= v << 16;
2427 if (i == v)
2428 return TRUE;
2429 }
2430
2431 return FALSE;
2432 }
2433
2434 /* Return true if I is a valid constant for the operation CODE. */
2435 int
2436 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2437 {
2438 if (const_ok_for_arm (i))
2439 return 1;
2440
2441 switch (code)
2442 {
2443 case SET:
2444 /* See if we can use movw. */
2445 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2446 return 1;
2447 else
2448 /* Otherwise, try mvn. */
2449 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2450
2451 case PLUS:
2452 /* See if we can use addw or subw. */
2453 if (TARGET_THUMB2
2454 && ((i & 0xfffff000) == 0
2455 || ((-i) & 0xfffff000) == 0))
2456 return 1;
2457 /* else fall through. */
2458
2459 case COMPARE:
2460 case EQ:
2461 case NE:
2462 case GT:
2463 case LE:
2464 case LT:
2465 case GE:
2466 case GEU:
2467 case LTU:
2468 case GTU:
2469 case LEU:
2470 case UNORDERED:
2471 case ORDERED:
2472 case UNEQ:
2473 case UNGE:
2474 case UNLT:
2475 case UNGT:
2476 case UNLE:
2477 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2478
2479 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2480 case XOR:
2481 return 0;
2482
2483 case IOR:
2484 if (TARGET_THUMB2)
2485 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2486 return 0;
2487
2488 case AND:
2489 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2490
2491 default:
2492 gcc_unreachable ();
2493 }
2494 }
2495
2496 /* Emit a sequence of insns to handle a large constant.
2497 CODE is the code of the operation required, it can be any of SET, PLUS,
2498 IOR, AND, XOR, MINUS;
2499 MODE is the mode in which the operation is being performed;
2500 VAL is the integer to operate on;
2501 SOURCE is the other operand (a register, or a null-pointer for SET);
2502 SUBTARGETS means it is safe to create scratch registers if that will
2503 either produce a simpler sequence, or we will want to cse the values.
2504 Return value is the number of insns emitted. */
2505
2506 /* ??? Tweak this for thumb2. */
2507 int
2508 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2509 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2510 {
2511 rtx cond;
2512
2513 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2514 cond = COND_EXEC_TEST (PATTERN (insn));
2515 else
2516 cond = NULL_RTX;
2517
2518 if (subtargets || code == SET
2519 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2520 && REGNO (target) != REGNO (source)))
2521 {
2522 /* After arm_reorg has been called, we can't fix up expensive
2523 constants by pushing them into memory so we must synthesize
2524 them in-line, regardless of the cost. This is only likely to
2525 be more costly on chips that have load delay slots and we are
2526 compiling without running the scheduler (so no splitting
2527 occurred before the final instruction emission).
2528
2529 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2530 */
2531 if (!after_arm_reorg
2532 && !cond
2533 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2534 1, 0)
2535 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2536 + (code != SET))))
2537 {
2538 if (code == SET)
2539 {
2540 /* Currently SET is the only monadic value for CODE, all
2541 the rest are diadic. */
2542 if (TARGET_USE_MOVT)
2543 arm_emit_movpair (target, GEN_INT (val));
2544 else
2545 emit_set_insn (target, GEN_INT (val));
2546
2547 return 1;
2548 }
2549 else
2550 {
2551 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2552
2553 if (TARGET_USE_MOVT)
2554 arm_emit_movpair (temp, GEN_INT (val));
2555 else
2556 emit_set_insn (temp, GEN_INT (val));
2557
2558 /* For MINUS, the value is subtracted from, since we never
2559 have subtraction of a constant. */
2560 if (code == MINUS)
2561 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2562 else
2563 emit_set_insn (target,
2564 gen_rtx_fmt_ee (code, mode, source, temp));
2565 return 2;
2566 }
2567 }
2568 }
2569
2570 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2571 1);
2572 }
2573
2574 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2575 ARM/THUMB2 immediates, and add up to VAL.
2576 Thr function return value gives the number of insns required. */
2577 static int
2578 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2579 struct four_ints *return_sequence)
2580 {
2581 int best_consecutive_zeros = 0;
2582 int i;
2583 int best_start = 0;
2584 int insns1, insns2;
2585 struct four_ints tmp_sequence;
2586
2587 /* If we aren't targetting ARM, the best place to start is always at
2588 the bottom, otherwise look more closely. */
2589 if (TARGET_ARM)
2590 {
2591 for (i = 0; i < 32; i += 2)
2592 {
2593 int consecutive_zeros = 0;
2594
2595 if (!(val & (3 << i)))
2596 {
2597 while ((i < 32) && !(val & (3 << i)))
2598 {
2599 consecutive_zeros += 2;
2600 i += 2;
2601 }
2602 if (consecutive_zeros > best_consecutive_zeros)
2603 {
2604 best_consecutive_zeros = consecutive_zeros;
2605 best_start = i - consecutive_zeros;
2606 }
2607 i -= 2;
2608 }
2609 }
2610 }
2611
2612 /* So long as it won't require any more insns to do so, it's
2613 desirable to emit a small constant (in bits 0...9) in the last
2614 insn. This way there is more chance that it can be combined with
2615 a later addressing insn to form a pre-indexed load or store
2616 operation. Consider:
2617
2618 *((volatile int *)0xe0000100) = 1;
2619 *((volatile int *)0xe0000110) = 2;
2620
2621 We want this to wind up as:
2622
2623 mov rA, #0xe0000000
2624 mov rB, #1
2625 str rB, [rA, #0x100]
2626 mov rB, #2
2627 str rB, [rA, #0x110]
2628
2629 rather than having to synthesize both large constants from scratch.
2630
2631 Therefore, we calculate how many insns would be required to emit
2632 the constant starting from `best_start', and also starting from
2633 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2634 yield a shorter sequence, we may as well use zero. */
2635 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2636 if (best_start != 0
2637 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2638 {
2639 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2640 if (insns2 <= insns1)
2641 {
2642 *return_sequence = tmp_sequence;
2643 insns1 = insns2;
2644 }
2645 }
2646
2647 return insns1;
2648 }
2649
2650 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2651 static int
2652 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2653 struct four_ints *return_sequence, int i)
2654 {
2655 int remainder = val & 0xffffffff;
2656 int insns = 0;
2657
2658 /* Try and find a way of doing the job in either two or three
2659 instructions.
2660
2661 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2662 location. We start at position I. This may be the MSB, or
2663 optimial_immediate_sequence may have positioned it at the largest block
2664 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2665 wrapping around to the top of the word when we drop off the bottom.
2666 In the worst case this code should produce no more than four insns.
2667
2668 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2669 constants, shifted to any arbitrary location. We should always start
2670 at the MSB. */
2671 do
2672 {
2673 int end;
2674 unsigned int b1, b2, b3, b4;
2675 unsigned HOST_WIDE_INT result;
2676 int loc;
2677
2678 gcc_assert (insns < 4);
2679
2680 if (i <= 0)
2681 i += 32;
2682
2683 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2684 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2685 {
2686 loc = i;
2687 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2688 /* We can use addw/subw for the last 12 bits. */
2689 result = remainder;
2690 else
2691 {
2692 /* Use an 8-bit shifted/rotated immediate. */
2693 end = i - 8;
2694 if (end < 0)
2695 end += 32;
2696 result = remainder & ((0x0ff << end)
2697 | ((i < end) ? (0xff >> (32 - end))
2698 : 0));
2699 i -= 8;
2700 }
2701 }
2702 else
2703 {
2704 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2705 arbitrary shifts. */
2706 i -= TARGET_ARM ? 2 : 1;
2707 continue;
2708 }
2709
2710 /* Next, see if we can do a better job with a thumb2 replicated
2711 constant.
2712
2713 We do it this way around to catch the cases like 0x01F001E0 where
2714 two 8-bit immediates would work, but a replicated constant would
2715 make it worse.
2716
2717 TODO: 16-bit constants that don't clear all the bits, but still win.
2718 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2719 if (TARGET_THUMB2)
2720 {
2721 b1 = (remainder & 0xff000000) >> 24;
2722 b2 = (remainder & 0x00ff0000) >> 16;
2723 b3 = (remainder & 0x0000ff00) >> 8;
2724 b4 = remainder & 0xff;
2725
2726 if (loc > 24)
2727 {
2728 /* The 8-bit immediate already found clears b1 (and maybe b2),
2729 but must leave b3 and b4 alone. */
2730
2731 /* First try to find a 32-bit replicated constant that clears
2732 almost everything. We can assume that we can't do it in one,
2733 or else we wouldn't be here. */
2734 unsigned int tmp = b1 & b2 & b3 & b4;
2735 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2736 + (tmp << 24);
2737 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2738 + (tmp == b3) + (tmp == b4);
2739 if (tmp
2740 && (matching_bytes >= 3
2741 || (matching_bytes == 2
2742 && const_ok_for_op (remainder & ~tmp2, code))))
2743 {
2744 /* At least 3 of the bytes match, and the fourth has at
2745 least as many bits set, or two of the bytes match
2746 and it will only require one more insn to finish. */
2747 result = tmp2;
2748 i = tmp != b1 ? 32
2749 : tmp != b2 ? 24
2750 : tmp != b3 ? 16
2751 : 8;
2752 }
2753
2754 /* Second, try to find a 16-bit replicated constant that can
2755 leave three of the bytes clear. If b2 or b4 is already
2756 zero, then we can. If the 8-bit from above would not
2757 clear b2 anyway, then we still win. */
2758 else if (b1 == b3 && (!b2 || !b4
2759 || (remainder & 0x00ff0000 & ~result)))
2760 {
2761 result = remainder & 0xff00ff00;
2762 i = 24;
2763 }
2764 }
2765 else if (loc > 16)
2766 {
2767 /* The 8-bit immediate already found clears b2 (and maybe b3)
2768 and we don't get here unless b1 is alredy clear, but it will
2769 leave b4 unchanged. */
2770
2771 /* If we can clear b2 and b4 at once, then we win, since the
2772 8-bits couldn't possibly reach that far. */
2773 if (b2 == b4)
2774 {
2775 result = remainder & 0x00ff00ff;
2776 i = 16;
2777 }
2778 }
2779 }
2780
2781 return_sequence->i[insns++] = result;
2782 remainder &= ~result;
2783
2784 if (code == SET || code == MINUS)
2785 code = PLUS;
2786 }
2787 while (remainder);
2788
2789 return insns;
2790 }
2791
2792 /* Emit an instruction with the indicated PATTERN. If COND is
2793 non-NULL, conditionalize the execution of the instruction on COND
2794 being true. */
2795
2796 static void
2797 emit_constant_insn (rtx cond, rtx pattern)
2798 {
2799 if (cond)
2800 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2801 emit_insn (pattern);
2802 }
2803
2804 /* As above, but extra parameter GENERATE which, if clear, suppresses
2805 RTL generation. */
2806
2807 static int
2808 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2809 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2810 int generate)
2811 {
2812 int can_invert = 0;
2813 int can_negate = 0;
2814 int final_invert = 0;
2815 int i;
2816 int set_sign_bit_copies = 0;
2817 int clear_sign_bit_copies = 0;
2818 int clear_zero_bit_copies = 0;
2819 int set_zero_bit_copies = 0;
2820 int insns = 0, neg_insns, inv_insns;
2821 unsigned HOST_WIDE_INT temp1, temp2;
2822 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2823 struct four_ints *immediates;
2824 struct four_ints pos_immediates, neg_immediates, inv_immediates;
2825
2826 /* Find out which operations are safe for a given CODE. Also do a quick
2827 check for degenerate cases; these can occur when DImode operations
2828 are split. */
2829 switch (code)
2830 {
2831 case SET:
2832 can_invert = 1;
2833 break;
2834
2835 case PLUS:
2836 can_negate = 1;
2837 break;
2838
2839 case IOR:
2840 if (remainder == 0xffffffff)
2841 {
2842 if (generate)
2843 emit_constant_insn (cond,
2844 gen_rtx_SET (VOIDmode, target,
2845 GEN_INT (ARM_SIGN_EXTEND (val))));
2846 return 1;
2847 }
2848
2849 if (remainder == 0)
2850 {
2851 if (reload_completed && rtx_equal_p (target, source))
2852 return 0;
2853
2854 if (generate)
2855 emit_constant_insn (cond,
2856 gen_rtx_SET (VOIDmode, target, source));
2857 return 1;
2858 }
2859 break;
2860
2861 case AND:
2862 if (remainder == 0)
2863 {
2864 if (generate)
2865 emit_constant_insn (cond,
2866 gen_rtx_SET (VOIDmode, target, const0_rtx));
2867 return 1;
2868 }
2869 if (remainder == 0xffffffff)
2870 {
2871 if (reload_completed && rtx_equal_p (target, source))
2872 return 0;
2873 if (generate)
2874 emit_constant_insn (cond,
2875 gen_rtx_SET (VOIDmode, target, source));
2876 return 1;
2877 }
2878 can_invert = 1;
2879 break;
2880
2881 case XOR:
2882 if (remainder == 0)
2883 {
2884 if (reload_completed && rtx_equal_p (target, source))
2885 return 0;
2886 if (generate)
2887 emit_constant_insn (cond,
2888 gen_rtx_SET (VOIDmode, target, source));
2889 return 1;
2890 }
2891
2892 if (remainder == 0xffffffff)
2893 {
2894 if (generate)
2895 emit_constant_insn (cond,
2896 gen_rtx_SET (VOIDmode, target,
2897 gen_rtx_NOT (mode, source)));
2898 return 1;
2899 }
2900 final_invert = 1;
2901 break;
2902
2903 case MINUS:
2904 /* We treat MINUS as (val - source), since (source - val) is always
2905 passed as (source + (-val)). */
2906 if (remainder == 0)
2907 {
2908 if (generate)
2909 emit_constant_insn (cond,
2910 gen_rtx_SET (VOIDmode, target,
2911 gen_rtx_NEG (mode, source)));
2912 return 1;
2913 }
2914 if (const_ok_for_arm (val))
2915 {
2916 if (generate)
2917 emit_constant_insn (cond,
2918 gen_rtx_SET (VOIDmode, target,
2919 gen_rtx_MINUS (mode, GEN_INT (val),
2920 source)));
2921 return 1;
2922 }
2923
2924 break;
2925
2926 default:
2927 gcc_unreachable ();
2928 }
2929
2930 /* If we can do it in one insn get out quickly. */
2931 if (const_ok_for_op (val, code))
2932 {
2933 if (generate)
2934 emit_constant_insn (cond,
2935 gen_rtx_SET (VOIDmode, target,
2936 (source
2937 ? gen_rtx_fmt_ee (code, mode, source,
2938 GEN_INT (val))
2939 : GEN_INT (val))));
2940 return 1;
2941 }
2942
2943 /* Calculate a few attributes that may be useful for specific
2944 optimizations. */
2945 /* Count number of leading zeros. */
2946 for (i = 31; i >= 0; i--)
2947 {
2948 if ((remainder & (1 << i)) == 0)
2949 clear_sign_bit_copies++;
2950 else
2951 break;
2952 }
2953
2954 /* Count number of leading 1's. */
2955 for (i = 31; i >= 0; i--)
2956 {
2957 if ((remainder & (1 << i)) != 0)
2958 set_sign_bit_copies++;
2959 else
2960 break;
2961 }
2962
2963 /* Count number of trailing zero's. */
2964 for (i = 0; i <= 31; i++)
2965 {
2966 if ((remainder & (1 << i)) == 0)
2967 clear_zero_bit_copies++;
2968 else
2969 break;
2970 }
2971
2972 /* Count number of trailing 1's. */
2973 for (i = 0; i <= 31; i++)
2974 {
2975 if ((remainder & (1 << i)) != 0)
2976 set_zero_bit_copies++;
2977 else
2978 break;
2979 }
2980
2981 switch (code)
2982 {
2983 case SET:
2984 /* See if we can do this by sign_extending a constant that is known
2985 to be negative. This is a good, way of doing it, since the shift
2986 may well merge into a subsequent insn. */
2987 if (set_sign_bit_copies > 1)
2988 {
2989 if (const_ok_for_arm
2990 (temp1 = ARM_SIGN_EXTEND (remainder
2991 << (set_sign_bit_copies - 1))))
2992 {
2993 if (generate)
2994 {
2995 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2996 emit_constant_insn (cond,
2997 gen_rtx_SET (VOIDmode, new_src,
2998 GEN_INT (temp1)));
2999 emit_constant_insn (cond,
3000 gen_ashrsi3 (target, new_src,
3001 GEN_INT (set_sign_bit_copies - 1)));
3002 }
3003 return 2;
3004 }
3005 /* For an inverted constant, we will need to set the low bits,
3006 these will be shifted out of harm's way. */
3007 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3008 if (const_ok_for_arm (~temp1))
3009 {
3010 if (generate)
3011 {
3012 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3013 emit_constant_insn (cond,
3014 gen_rtx_SET (VOIDmode, new_src,
3015 GEN_INT (temp1)));
3016 emit_constant_insn (cond,
3017 gen_ashrsi3 (target, new_src,
3018 GEN_INT (set_sign_bit_copies - 1)));
3019 }
3020 return 2;
3021 }
3022 }
3023
3024 /* See if we can calculate the value as the difference between two
3025 valid immediates. */
3026 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3027 {
3028 int topshift = clear_sign_bit_copies & ~1;
3029
3030 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3031 & (0xff000000 >> topshift));
3032
3033 /* If temp1 is zero, then that means the 9 most significant
3034 bits of remainder were 1 and we've caused it to overflow.
3035 When topshift is 0 we don't need to do anything since we
3036 can borrow from 'bit 32'. */
3037 if (temp1 == 0 && topshift != 0)
3038 temp1 = 0x80000000 >> (topshift - 1);
3039
3040 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3041
3042 if (const_ok_for_arm (temp2))
3043 {
3044 if (generate)
3045 {
3046 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3047 emit_constant_insn (cond,
3048 gen_rtx_SET (VOIDmode, new_src,
3049 GEN_INT (temp1)));
3050 emit_constant_insn (cond,
3051 gen_addsi3 (target, new_src,
3052 GEN_INT (-temp2)));
3053 }
3054
3055 return 2;
3056 }
3057 }
3058
3059 /* See if we can generate this by setting the bottom (or the top)
3060 16 bits, and then shifting these into the other half of the
3061 word. We only look for the simplest cases, to do more would cost
3062 too much. Be careful, however, not to generate this when the
3063 alternative would take fewer insns. */
3064 if (val & 0xffff0000)
3065 {
3066 temp1 = remainder & 0xffff0000;
3067 temp2 = remainder & 0x0000ffff;
3068
3069 /* Overlaps outside this range are best done using other methods. */
3070 for (i = 9; i < 24; i++)
3071 {
3072 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3073 && !const_ok_for_arm (temp2))
3074 {
3075 rtx new_src = (subtargets
3076 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3077 : target);
3078 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3079 source, subtargets, generate);
3080 source = new_src;
3081 if (generate)
3082 emit_constant_insn
3083 (cond,
3084 gen_rtx_SET
3085 (VOIDmode, target,
3086 gen_rtx_IOR (mode,
3087 gen_rtx_ASHIFT (mode, source,
3088 GEN_INT (i)),
3089 source)));
3090 return insns + 1;
3091 }
3092 }
3093
3094 /* Don't duplicate cases already considered. */
3095 for (i = 17; i < 24; i++)
3096 {
3097 if (((temp1 | (temp1 >> i)) == remainder)
3098 && !const_ok_for_arm (temp1))
3099 {
3100 rtx new_src = (subtargets
3101 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3102 : target);
3103 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3104 source, subtargets, generate);
3105 source = new_src;
3106 if (generate)
3107 emit_constant_insn
3108 (cond,
3109 gen_rtx_SET (VOIDmode, target,
3110 gen_rtx_IOR
3111 (mode,
3112 gen_rtx_LSHIFTRT (mode, source,
3113 GEN_INT (i)),
3114 source)));
3115 return insns + 1;
3116 }
3117 }
3118 }
3119 break;
3120
3121 case IOR:
3122 case XOR:
3123 /* If we have IOR or XOR, and the constant can be loaded in a
3124 single instruction, and we can find a temporary to put it in,
3125 then this can be done in two instructions instead of 3-4. */
3126 if (subtargets
3127 /* TARGET can't be NULL if SUBTARGETS is 0 */
3128 || (reload_completed && !reg_mentioned_p (target, source)))
3129 {
3130 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3131 {
3132 if (generate)
3133 {
3134 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3135
3136 emit_constant_insn (cond,
3137 gen_rtx_SET (VOIDmode, sub,
3138 GEN_INT (val)));
3139 emit_constant_insn (cond,
3140 gen_rtx_SET (VOIDmode, target,
3141 gen_rtx_fmt_ee (code, mode,
3142 source, sub)));
3143 }
3144 return 2;
3145 }
3146 }
3147
3148 if (code == XOR)
3149 break;
3150
3151 /* Convert.
3152 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3153 and the remainder 0s for e.g. 0xfff00000)
3154 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3155
3156 This can be done in 2 instructions by using shifts with mov or mvn.
3157 e.g. for
3158 x = x | 0xfff00000;
3159 we generate.
3160 mvn r0, r0, asl #12
3161 mvn r0, r0, lsr #12 */
3162 if (set_sign_bit_copies > 8
3163 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3164 {
3165 if (generate)
3166 {
3167 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3168 rtx shift = GEN_INT (set_sign_bit_copies);
3169
3170 emit_constant_insn
3171 (cond,
3172 gen_rtx_SET (VOIDmode, sub,
3173 gen_rtx_NOT (mode,
3174 gen_rtx_ASHIFT (mode,
3175 source,
3176 shift))));
3177 emit_constant_insn
3178 (cond,
3179 gen_rtx_SET (VOIDmode, target,
3180 gen_rtx_NOT (mode,
3181 gen_rtx_LSHIFTRT (mode, sub,
3182 shift))));
3183 }
3184 return 2;
3185 }
3186
3187 /* Convert
3188 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3189 to
3190 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3191
3192 For eg. r0 = r0 | 0xfff
3193 mvn r0, r0, lsr #12
3194 mvn r0, r0, asl #12
3195
3196 */
3197 if (set_zero_bit_copies > 8
3198 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3199 {
3200 if (generate)
3201 {
3202 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3203 rtx shift = GEN_INT (set_zero_bit_copies);
3204
3205 emit_constant_insn
3206 (cond,
3207 gen_rtx_SET (VOIDmode, sub,
3208 gen_rtx_NOT (mode,
3209 gen_rtx_LSHIFTRT (mode,
3210 source,
3211 shift))));
3212 emit_constant_insn
3213 (cond,
3214 gen_rtx_SET (VOIDmode, target,
3215 gen_rtx_NOT (mode,
3216 gen_rtx_ASHIFT (mode, sub,
3217 shift))));
3218 }
3219 return 2;
3220 }
3221
3222 /* This will never be reached for Thumb2 because orn is a valid
3223 instruction. This is for Thumb1 and the ARM 32 bit cases.
3224
3225 x = y | constant (such that ~constant is a valid constant)
3226 Transform this to
3227 x = ~(~y & ~constant).
3228 */
3229 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3230 {
3231 if (generate)
3232 {
3233 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3234 emit_constant_insn (cond,
3235 gen_rtx_SET (VOIDmode, sub,
3236 gen_rtx_NOT (mode, source)));
3237 source = sub;
3238 if (subtargets)
3239 sub = gen_reg_rtx (mode);
3240 emit_constant_insn (cond,
3241 gen_rtx_SET (VOIDmode, sub,
3242 gen_rtx_AND (mode, source,
3243 GEN_INT (temp1))));
3244 emit_constant_insn (cond,
3245 gen_rtx_SET (VOIDmode, target,
3246 gen_rtx_NOT (mode, sub)));
3247 }
3248 return 3;
3249 }
3250 break;
3251
3252 case AND:
3253 /* See if two shifts will do 2 or more insn's worth of work. */
3254 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3255 {
3256 HOST_WIDE_INT shift_mask = ((0xffffffff
3257 << (32 - clear_sign_bit_copies))
3258 & 0xffffffff);
3259
3260 if ((remainder | shift_mask) != 0xffffffff)
3261 {
3262 if (generate)
3263 {
3264 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3265 insns = arm_gen_constant (AND, mode, cond,
3266 remainder | shift_mask,
3267 new_src, source, subtargets, 1);
3268 source = new_src;
3269 }
3270 else
3271 {
3272 rtx targ = subtargets ? NULL_RTX : target;
3273 insns = arm_gen_constant (AND, mode, cond,
3274 remainder | shift_mask,
3275 targ, source, subtargets, 0);
3276 }
3277 }
3278
3279 if (generate)
3280 {
3281 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3282 rtx shift = GEN_INT (clear_sign_bit_copies);
3283
3284 emit_insn (gen_ashlsi3 (new_src, source, shift));
3285 emit_insn (gen_lshrsi3 (target, new_src, shift));
3286 }
3287
3288 return insns + 2;
3289 }
3290
3291 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3292 {
3293 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3294
3295 if ((remainder | shift_mask) != 0xffffffff)
3296 {
3297 if (generate)
3298 {
3299 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3300
3301 insns = arm_gen_constant (AND, mode, cond,
3302 remainder | shift_mask,
3303 new_src, source, subtargets, 1);
3304 source = new_src;
3305 }
3306 else
3307 {
3308 rtx targ = subtargets ? NULL_RTX : target;
3309
3310 insns = arm_gen_constant (AND, mode, cond,
3311 remainder | shift_mask,
3312 targ, source, subtargets, 0);
3313 }
3314 }
3315
3316 if (generate)
3317 {
3318 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3319 rtx shift = GEN_INT (clear_zero_bit_copies);
3320
3321 emit_insn (gen_lshrsi3 (new_src, source, shift));
3322 emit_insn (gen_ashlsi3 (target, new_src, shift));
3323 }
3324
3325 return insns + 2;
3326 }
3327
3328 break;
3329
3330 default:
3331 break;
3332 }
3333
3334 /* Calculate what the instruction sequences would be if we generated it
3335 normally, negated, or inverted. */
3336 if (code == AND)
3337 /* AND cannot be split into multiple insns, so invert and use BIC. */
3338 insns = 99;
3339 else
3340 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3341
3342 if (can_negate)
3343 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3344 &neg_immediates);
3345 else
3346 neg_insns = 99;
3347
3348 if (can_invert || final_invert)
3349 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3350 &inv_immediates);
3351 else
3352 inv_insns = 99;
3353
3354 immediates = &pos_immediates;
3355
3356 /* Is the negated immediate sequence more efficient? */
3357 if (neg_insns < insns && neg_insns <= inv_insns)
3358 {
3359 insns = neg_insns;
3360 immediates = &neg_immediates;
3361 }
3362 else
3363 can_negate = 0;
3364
3365 /* Is the inverted immediate sequence more efficient?
3366 We must allow for an extra NOT instruction for XOR operations, although
3367 there is some chance that the final 'mvn' will get optimized later. */
3368 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3369 {
3370 insns = inv_insns;
3371 immediates = &inv_immediates;
3372 }
3373 else
3374 {
3375 can_invert = 0;
3376 final_invert = 0;
3377 }
3378
3379 /* Now output the chosen sequence as instructions. */
3380 if (generate)
3381 {
3382 for (i = 0; i < insns; i++)
3383 {
3384 rtx new_src, temp1_rtx;
3385
3386 temp1 = immediates->i[i];
3387
3388 if (code == SET || code == MINUS)
3389 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3390 else if ((final_invert || i < (insns - 1)) && subtargets)
3391 new_src = gen_reg_rtx (mode);
3392 else
3393 new_src = target;
3394
3395 if (can_invert)
3396 temp1 = ~temp1;
3397 else if (can_negate)
3398 temp1 = -temp1;
3399
3400 temp1 = trunc_int_for_mode (temp1, mode);
3401 temp1_rtx = GEN_INT (temp1);
3402
3403 if (code == SET)
3404 ;
3405 else if (code == MINUS)
3406 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3407 else
3408 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3409
3410 emit_constant_insn (cond,
3411 gen_rtx_SET (VOIDmode, new_src,
3412 temp1_rtx));
3413 source = new_src;
3414
3415 if (code == SET)
3416 {
3417 can_negate = can_invert;
3418 can_invert = 0;
3419 code = PLUS;
3420 }
3421 else if (code == MINUS)
3422 code = PLUS;
3423 }
3424 }
3425
3426 if (final_invert)
3427 {
3428 if (generate)
3429 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3430 gen_rtx_NOT (mode, source)));
3431 insns++;
3432 }
3433
3434 return insns;
3435 }
3436
3437 /* Canonicalize a comparison so that we are more likely to recognize it.
3438 This can be done for a few constant compares, where we can make the
3439 immediate value easier to load. */
3440
3441 enum rtx_code
3442 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3443 {
3444 enum machine_mode mode;
3445 unsigned HOST_WIDE_INT i, maxval;
3446
3447 mode = GET_MODE (*op0);
3448 if (mode == VOIDmode)
3449 mode = GET_MODE (*op1);
3450
3451 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3452
3453 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3454 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3455 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3456 for GTU/LEU in Thumb mode. */
3457 if (mode == DImode)
3458 {
3459 rtx tem;
3460
3461 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3462 available. */
3463 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3464 return code;
3465
3466 if (code == GT || code == LE
3467 || (!TARGET_ARM && (code == GTU || code == LEU)))
3468 {
3469 /* Missing comparison. First try to use an available
3470 comparison. */
3471 if (GET_CODE (*op1) == CONST_INT)
3472 {
3473 i = INTVAL (*op1);
3474 switch (code)
3475 {
3476 case GT:
3477 case LE:
3478 if (i != maxval
3479 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3480 {
3481 *op1 = GEN_INT (i + 1);
3482 return code == GT ? GE : LT;
3483 }
3484 break;
3485 case GTU:
3486 case LEU:
3487 if (i != ~((unsigned HOST_WIDE_INT) 0)
3488 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3489 {
3490 *op1 = GEN_INT (i + 1);
3491 return code == GTU ? GEU : LTU;
3492 }
3493 break;
3494 default:
3495 gcc_unreachable ();
3496 }
3497 }
3498
3499 /* If that did not work, reverse the condition. */
3500 tem = *op0;
3501 *op0 = *op1;
3502 *op1 = tem;
3503 return swap_condition (code);
3504 }
3505
3506 return code;
3507 }
3508
3509 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3510 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3511 to facilitate possible combining with a cmp into 'ands'. */
3512 if (mode == SImode
3513 && GET_CODE (*op0) == ZERO_EXTEND
3514 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3515 && GET_MODE (XEXP (*op0, 0)) == QImode
3516 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3517 && subreg_lowpart_p (XEXP (*op0, 0))
3518 && *op1 == const0_rtx)
3519 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3520 GEN_INT (255));
3521
3522 /* Comparisons smaller than DImode. Only adjust comparisons against
3523 an out-of-range constant. */
3524 if (GET_CODE (*op1) != CONST_INT
3525 || const_ok_for_arm (INTVAL (*op1))
3526 || const_ok_for_arm (- INTVAL (*op1)))
3527 return code;
3528
3529 i = INTVAL (*op1);
3530
3531 switch (code)
3532 {
3533 case EQ:
3534 case NE:
3535 return code;
3536
3537 case GT:
3538 case LE:
3539 if (i != maxval
3540 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3541 {
3542 *op1 = GEN_INT (i + 1);
3543 return code == GT ? GE : LT;
3544 }
3545 break;
3546
3547 case GE:
3548 case LT:
3549 if (i != ~maxval
3550 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3551 {
3552 *op1 = GEN_INT (i - 1);
3553 return code == GE ? GT : LE;
3554 }
3555 break;
3556
3557 case GTU:
3558 case LEU:
3559 if (i != ~((unsigned HOST_WIDE_INT) 0)
3560 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3561 {
3562 *op1 = GEN_INT (i + 1);
3563 return code == GTU ? GEU : LTU;
3564 }
3565 break;
3566
3567 case GEU:
3568 case LTU:
3569 if (i != 0
3570 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3571 {
3572 *op1 = GEN_INT (i - 1);
3573 return code == GEU ? GTU : LEU;
3574 }
3575 break;
3576
3577 default:
3578 gcc_unreachable ();
3579 }
3580
3581 return code;
3582 }
3583
3584
3585 /* Define how to find the value returned by a function. */
3586
3587 static rtx
3588 arm_function_value(const_tree type, const_tree func,
3589 bool outgoing ATTRIBUTE_UNUSED)
3590 {
3591 enum machine_mode mode;
3592 int unsignedp ATTRIBUTE_UNUSED;
3593 rtx r ATTRIBUTE_UNUSED;
3594
3595 mode = TYPE_MODE (type);
3596
3597 if (TARGET_AAPCS_BASED)
3598 return aapcs_allocate_return_reg (mode, type, func);
3599
3600 /* Promote integer types. */
3601 if (INTEGRAL_TYPE_P (type))
3602 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3603
3604 /* Promotes small structs returned in a register to full-word size
3605 for big-endian AAPCS. */
3606 if (arm_return_in_msb (type))
3607 {
3608 HOST_WIDE_INT size = int_size_in_bytes (type);
3609 if (size % UNITS_PER_WORD != 0)
3610 {
3611 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3612 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3613 }
3614 }
3615
3616 return arm_libcall_value_1 (mode);
3617 }
3618
3619 static int
3620 libcall_eq (const void *p1, const void *p2)
3621 {
3622 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3623 }
3624
3625 static hashval_t
3626 libcall_hash (const void *p1)
3627 {
3628 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3629 }
3630
3631 static void
3632 add_libcall (htab_t htab, rtx libcall)
3633 {
3634 *htab_find_slot (htab, libcall, INSERT) = libcall;
3635 }
3636
3637 static bool
3638 arm_libcall_uses_aapcs_base (const_rtx libcall)
3639 {
3640 static bool init_done = false;
3641 static htab_t libcall_htab;
3642
3643 if (!init_done)
3644 {
3645 init_done = true;
3646
3647 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3648 NULL);
3649 add_libcall (libcall_htab,
3650 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3651 add_libcall (libcall_htab,
3652 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3653 add_libcall (libcall_htab,
3654 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3655 add_libcall (libcall_htab,
3656 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3657
3658 add_libcall (libcall_htab,
3659 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3660 add_libcall (libcall_htab,
3661 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3662 add_libcall (libcall_htab,
3663 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3664 add_libcall (libcall_htab,
3665 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3666
3667 add_libcall (libcall_htab,
3668 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3669 add_libcall (libcall_htab,
3670 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3671 add_libcall (libcall_htab,
3672 convert_optab_libfunc (sfix_optab, SImode, DFmode));
3673 add_libcall (libcall_htab,
3674 convert_optab_libfunc (ufix_optab, SImode, DFmode));
3675 add_libcall (libcall_htab,
3676 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3677 add_libcall (libcall_htab,
3678 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3679 add_libcall (libcall_htab,
3680 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3681 add_libcall (libcall_htab,
3682 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3683
3684 /* Values from double-precision helper functions are returned in core
3685 registers if the selected core only supports single-precision
3686 arithmetic, even if we are using the hard-float ABI. The same is
3687 true for single-precision helpers, but we will never be using the
3688 hard-float ABI on a CPU which doesn't support single-precision
3689 operations in hardware. */
3690 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3691 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3692 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3693 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3694 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3695 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3696 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3697 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3698 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3699 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3700 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3701 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3702 SFmode));
3703 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3704 DFmode));
3705 }
3706
3707 return libcall && htab_find (libcall_htab, libcall) != NULL;
3708 }
3709
3710 static rtx
3711 arm_libcall_value_1 (enum machine_mode mode)
3712 {
3713 if (TARGET_AAPCS_BASED)
3714 return aapcs_libcall_value (mode);
3715 else if (TARGET_32BIT
3716 && TARGET_HARD_FLOAT_ABI
3717 && TARGET_FPA
3718 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3719 return gen_rtx_REG (mode, FIRST_FPA_REGNUM);
3720 else if (TARGET_32BIT
3721 && TARGET_HARD_FLOAT_ABI
3722 && TARGET_MAVERICK
3723 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3724 return gen_rtx_REG (mode, FIRST_CIRRUS_FP_REGNUM);
3725 else if (TARGET_IWMMXT_ABI
3726 && arm_vector_mode_supported_p (mode))
3727 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
3728 else
3729 return gen_rtx_REG (mode, ARG_REGISTER (1));
3730 }
3731
3732 /* Define how to find the value returned by a library function
3733 assuming the value has mode MODE. */
3734
3735 static rtx
3736 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3737 {
3738 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3739 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3740 {
3741 /* The following libcalls return their result in integer registers,
3742 even though they return a floating point value. */
3743 if (arm_libcall_uses_aapcs_base (libcall))
3744 return gen_rtx_REG (mode, ARG_REGISTER(1));
3745
3746 }
3747
3748 return arm_libcall_value_1 (mode);
3749 }
3750
3751 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
3752
3753 static bool
3754 arm_function_value_regno_p (const unsigned int regno)
3755 {
3756 if (regno == ARG_REGISTER (1)
3757 || (TARGET_32BIT
3758 && TARGET_AAPCS_BASED
3759 && TARGET_VFP
3760 && TARGET_HARD_FLOAT
3761 && regno == FIRST_VFP_REGNUM)
3762 || (TARGET_32BIT
3763 && TARGET_HARD_FLOAT_ABI
3764 && TARGET_MAVERICK
3765 && regno == FIRST_CIRRUS_FP_REGNUM)
3766 || (TARGET_IWMMXT_ABI
3767 && regno == FIRST_IWMMXT_REGNUM)
3768 || (TARGET_32BIT
3769 && TARGET_HARD_FLOAT_ABI
3770 && TARGET_FPA
3771 && regno == FIRST_FPA_REGNUM))
3772 return true;
3773
3774 return false;
3775 }
3776
3777 /* Determine the amount of memory needed to store the possible return
3778 registers of an untyped call. */
3779 int
3780 arm_apply_result_size (void)
3781 {
3782 int size = 16;
3783
3784 if (TARGET_32BIT)
3785 {
3786 if (TARGET_HARD_FLOAT_ABI)
3787 {
3788 if (TARGET_VFP)
3789 size += 32;
3790 if (TARGET_FPA)
3791 size += 12;
3792 if (TARGET_MAVERICK)
3793 size += 8;
3794 }
3795 if (TARGET_IWMMXT_ABI)
3796 size += 8;
3797 }
3798
3799 return size;
3800 }
3801
3802 /* Decide whether TYPE should be returned in memory (true)
3803 or in a register (false). FNTYPE is the type of the function making
3804 the call. */
3805 static bool
3806 arm_return_in_memory (const_tree type, const_tree fntype)
3807 {
3808 HOST_WIDE_INT size;
3809
3810 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3811
3812 if (TARGET_AAPCS_BASED)
3813 {
3814 /* Simple, non-aggregate types (ie not including vectors and
3815 complex) are always returned in a register (or registers).
3816 We don't care about which register here, so we can short-cut
3817 some of the detail. */
3818 if (!AGGREGATE_TYPE_P (type)
3819 && TREE_CODE (type) != VECTOR_TYPE
3820 && TREE_CODE (type) != COMPLEX_TYPE)
3821 return false;
3822
3823 /* Any return value that is no larger than one word can be
3824 returned in r0. */
3825 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3826 return false;
3827
3828 /* Check any available co-processors to see if they accept the
3829 type as a register candidate (VFP, for example, can return
3830 some aggregates in consecutive registers). These aren't
3831 available if the call is variadic. */
3832 if (aapcs_select_return_coproc (type, fntype) >= 0)
3833 return false;
3834
3835 /* Vector values should be returned using ARM registers, not
3836 memory (unless they're over 16 bytes, which will break since
3837 we only have four call-clobbered registers to play with). */
3838 if (TREE_CODE (type) == VECTOR_TYPE)
3839 return (size < 0 || size > (4 * UNITS_PER_WORD));
3840
3841 /* The rest go in memory. */
3842 return true;
3843 }
3844
3845 if (TREE_CODE (type) == VECTOR_TYPE)
3846 return (size < 0 || size > (4 * UNITS_PER_WORD));
3847
3848 if (!AGGREGATE_TYPE_P (type) &&
3849 (TREE_CODE (type) != VECTOR_TYPE))
3850 /* All simple types are returned in registers. */
3851 return false;
3852
3853 if (arm_abi != ARM_ABI_APCS)
3854 {
3855 /* ATPCS and later return aggregate types in memory only if they are
3856 larger than a word (or are variable size). */
3857 return (size < 0 || size > UNITS_PER_WORD);
3858 }
3859
3860 /* For the arm-wince targets we choose to be compatible with Microsoft's
3861 ARM and Thumb compilers, which always return aggregates in memory. */
3862 #ifndef ARM_WINCE
3863 /* All structures/unions bigger than one word are returned in memory.
3864 Also catch the case where int_size_in_bytes returns -1. In this case
3865 the aggregate is either huge or of variable size, and in either case
3866 we will want to return it via memory and not in a register. */
3867 if (size < 0 || size > UNITS_PER_WORD)
3868 return true;
3869
3870 if (TREE_CODE (type) == RECORD_TYPE)
3871 {
3872 tree field;
3873
3874 /* For a struct the APCS says that we only return in a register
3875 if the type is 'integer like' and every addressable element
3876 has an offset of zero. For practical purposes this means
3877 that the structure can have at most one non bit-field element
3878 and that this element must be the first one in the structure. */
3879
3880 /* Find the first field, ignoring non FIELD_DECL things which will
3881 have been created by C++. */
3882 for (field = TYPE_FIELDS (type);
3883 field && TREE_CODE (field) != FIELD_DECL;
3884 field = DECL_CHAIN (field))
3885 continue;
3886
3887 if (field == NULL)
3888 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3889
3890 /* Check that the first field is valid for returning in a register. */
3891
3892 /* ... Floats are not allowed */
3893 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3894 return true;
3895
3896 /* ... Aggregates that are not themselves valid for returning in
3897 a register are not allowed. */
3898 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3899 return true;
3900
3901 /* Now check the remaining fields, if any. Only bitfields are allowed,
3902 since they are not addressable. */
3903 for (field = DECL_CHAIN (field);
3904 field;
3905 field = DECL_CHAIN (field))
3906 {
3907 if (TREE_CODE (field) != FIELD_DECL)
3908 continue;
3909
3910 if (!DECL_BIT_FIELD_TYPE (field))
3911 return true;
3912 }
3913
3914 return false;
3915 }
3916
3917 if (TREE_CODE (type) == UNION_TYPE)
3918 {
3919 tree field;
3920
3921 /* Unions can be returned in registers if every element is
3922 integral, or can be returned in an integer register. */
3923 for (field = TYPE_FIELDS (type);
3924 field;
3925 field = DECL_CHAIN (field))
3926 {
3927 if (TREE_CODE (field) != FIELD_DECL)
3928 continue;
3929
3930 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3931 return true;
3932
3933 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3934 return true;
3935 }
3936
3937 return false;
3938 }
3939 #endif /* not ARM_WINCE */
3940
3941 /* Return all other types in memory. */
3942 return true;
3943 }
3944
3945 /* Indicate whether or not words of a double are in big-endian order. */
3946
3947 int
3948 arm_float_words_big_endian (void)
3949 {
3950 if (TARGET_MAVERICK)
3951 return 0;
3952
3953 /* For FPA, float words are always big-endian. For VFP, floats words
3954 follow the memory system mode. */
3955
3956 if (TARGET_FPA)
3957 {
3958 return 1;
3959 }
3960
3961 if (TARGET_VFP)
3962 return (TARGET_BIG_END ? 1 : 0);
3963
3964 return 1;
3965 }
3966
3967 const struct pcs_attribute_arg
3968 {
3969 const char *arg;
3970 enum arm_pcs value;
3971 } pcs_attribute_args[] =
3972 {
3973 {"aapcs", ARM_PCS_AAPCS},
3974 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3975 #if 0
3976 /* We could recognize these, but changes would be needed elsewhere
3977 * to implement them. */
3978 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3979 {"atpcs", ARM_PCS_ATPCS},
3980 {"apcs", ARM_PCS_APCS},
3981 #endif
3982 {NULL, ARM_PCS_UNKNOWN}
3983 };
3984
3985 static enum arm_pcs
3986 arm_pcs_from_attribute (tree attr)
3987 {
3988 const struct pcs_attribute_arg *ptr;
3989 const char *arg;
3990
3991 /* Get the value of the argument. */
3992 if (TREE_VALUE (attr) == NULL_TREE
3993 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
3994 return ARM_PCS_UNKNOWN;
3995
3996 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
3997
3998 /* Check it against the list of known arguments. */
3999 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4000 if (streq (arg, ptr->arg))
4001 return ptr->value;
4002
4003 /* An unrecognized interrupt type. */
4004 return ARM_PCS_UNKNOWN;
4005 }
4006
4007 /* Get the PCS variant to use for this call. TYPE is the function's type
4008 specification, DECL is the specific declartion. DECL may be null if
4009 the call could be indirect or if this is a library call. */
4010 static enum arm_pcs
4011 arm_get_pcs_model (const_tree type, const_tree decl)
4012 {
4013 bool user_convention = false;
4014 enum arm_pcs user_pcs = arm_pcs_default;
4015 tree attr;
4016
4017 gcc_assert (type);
4018
4019 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4020 if (attr)
4021 {
4022 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4023 user_convention = true;
4024 }
4025
4026 if (TARGET_AAPCS_BASED)
4027 {
4028 /* Detect varargs functions. These always use the base rules
4029 (no argument is ever a candidate for a co-processor
4030 register). */
4031 bool base_rules = stdarg_p (type);
4032
4033 if (user_convention)
4034 {
4035 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4036 sorry ("non-AAPCS derived PCS variant");
4037 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4038 error ("variadic functions must use the base AAPCS variant");
4039 }
4040
4041 if (base_rules)
4042 return ARM_PCS_AAPCS;
4043 else if (user_convention)
4044 return user_pcs;
4045 else if (decl && flag_unit_at_a_time)
4046 {
4047 /* Local functions never leak outside this compilation unit,
4048 so we are free to use whatever conventions are
4049 appropriate. */
4050 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4051 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4052 if (i && i->local)
4053 return ARM_PCS_AAPCS_LOCAL;
4054 }
4055 }
4056 else if (user_convention && user_pcs != arm_pcs_default)
4057 sorry ("PCS variant");
4058
4059 /* For everything else we use the target's default. */
4060 return arm_pcs_default;
4061 }
4062
4063
4064 static void
4065 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4066 const_tree fntype ATTRIBUTE_UNUSED,
4067 rtx libcall ATTRIBUTE_UNUSED,
4068 const_tree fndecl ATTRIBUTE_UNUSED)
4069 {
4070 /* Record the unallocated VFP registers. */
4071 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4072 pcum->aapcs_vfp_reg_alloc = 0;
4073 }
4074
4075 /* Walk down the type tree of TYPE counting consecutive base elements.
4076 If *MODEP is VOIDmode, then set it to the first valid floating point
4077 type. If a non-floating point type is found, or if a floating point
4078 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4079 otherwise return the count in the sub-tree. */
4080 static int
4081 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4082 {
4083 enum machine_mode mode;
4084 HOST_WIDE_INT size;
4085
4086 switch (TREE_CODE (type))
4087 {
4088 case REAL_TYPE:
4089 mode = TYPE_MODE (type);
4090 if (mode != DFmode && mode != SFmode)
4091 return -1;
4092
4093 if (*modep == VOIDmode)
4094 *modep = mode;
4095
4096 if (*modep == mode)
4097 return 1;
4098
4099 break;
4100
4101 case COMPLEX_TYPE:
4102 mode = TYPE_MODE (TREE_TYPE (type));
4103 if (mode != DFmode && mode != SFmode)
4104 return -1;
4105
4106 if (*modep == VOIDmode)
4107 *modep = mode;
4108
4109 if (*modep == mode)
4110 return 2;
4111
4112 break;
4113
4114 case VECTOR_TYPE:
4115 /* Use V2SImode and V4SImode as representatives of all 64-bit
4116 and 128-bit vector types, whether or not those modes are
4117 supported with the present options. */
4118 size = int_size_in_bytes (type);
4119 switch (size)
4120 {
4121 case 8:
4122 mode = V2SImode;
4123 break;
4124 case 16:
4125 mode = V4SImode;
4126 break;
4127 default:
4128 return -1;
4129 }
4130
4131 if (*modep == VOIDmode)
4132 *modep = mode;
4133
4134 /* Vector modes are considered to be opaque: two vectors are
4135 equivalent for the purposes of being homogeneous aggregates
4136 if they are the same size. */
4137 if (*modep == mode)
4138 return 1;
4139
4140 break;
4141
4142 case ARRAY_TYPE:
4143 {
4144 int count;
4145 tree index = TYPE_DOMAIN (type);
4146
4147 /* Can't handle incomplete types. */
4148 if (!COMPLETE_TYPE_P(type))
4149 return -1;
4150
4151 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4152 if (count == -1
4153 || !index
4154 || !TYPE_MAX_VALUE (index)
4155 || !host_integerp (TYPE_MAX_VALUE (index), 1)
4156 || !TYPE_MIN_VALUE (index)
4157 || !host_integerp (TYPE_MIN_VALUE (index), 1)
4158 || count < 0)
4159 return -1;
4160
4161 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4162 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4163
4164 /* There must be no padding. */
4165 if (!host_integerp (TYPE_SIZE (type), 1)
4166 || (tree_low_cst (TYPE_SIZE (type), 1)
4167 != count * GET_MODE_BITSIZE (*modep)))
4168 return -1;
4169
4170 return count;
4171 }
4172
4173 case RECORD_TYPE:
4174 {
4175 int count = 0;
4176 int sub_count;
4177 tree field;
4178
4179 /* Can't handle incomplete types. */
4180 if (!COMPLETE_TYPE_P(type))
4181 return -1;
4182
4183 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4184 {
4185 if (TREE_CODE (field) != FIELD_DECL)
4186 continue;
4187
4188 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4189 if (sub_count < 0)
4190 return -1;
4191 count += sub_count;
4192 }
4193
4194 /* There must be no padding. */
4195 if (!host_integerp (TYPE_SIZE (type), 1)
4196 || (tree_low_cst (TYPE_SIZE (type), 1)
4197 != count * GET_MODE_BITSIZE (*modep)))
4198 return -1;
4199
4200 return count;
4201 }
4202
4203 case UNION_TYPE:
4204 case QUAL_UNION_TYPE:
4205 {
4206 /* These aren't very interesting except in a degenerate case. */
4207 int count = 0;
4208 int sub_count;
4209 tree field;
4210
4211 /* Can't handle incomplete types. */
4212 if (!COMPLETE_TYPE_P(type))
4213 return -1;
4214
4215 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4216 {
4217 if (TREE_CODE (field) != FIELD_DECL)
4218 continue;
4219
4220 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4221 if (sub_count < 0)
4222 return -1;
4223 count = count > sub_count ? count : sub_count;
4224 }
4225
4226 /* There must be no padding. */
4227 if (!host_integerp (TYPE_SIZE (type), 1)
4228 || (tree_low_cst (TYPE_SIZE (type), 1)
4229 != count * GET_MODE_BITSIZE (*modep)))
4230 return -1;
4231
4232 return count;
4233 }
4234
4235 default:
4236 break;
4237 }
4238
4239 return -1;
4240 }
4241
4242 /* Return true if PCS_VARIANT should use VFP registers. */
4243 static bool
4244 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4245 {
4246 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4247 {
4248 static bool seen_thumb1_vfp = false;
4249
4250 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4251 {
4252 sorry ("Thumb-1 hard-float VFP ABI");
4253 /* sorry() is not immediately fatal, so only display this once. */
4254 seen_thumb1_vfp = true;
4255 }
4256
4257 return true;
4258 }
4259
4260 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4261 return false;
4262
4263 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4264 (TARGET_VFP_DOUBLE || !is_double));
4265 }
4266
4267 /* Return true if an argument whose type is TYPE, or mode is MODE, is
4268 suitable for passing or returning in VFP registers for the PCS
4269 variant selected. If it is, then *BASE_MODE is updated to contain
4270 a machine mode describing each element of the argument's type and
4271 *COUNT to hold the number of such elements. */
4272 static bool
4273 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4274 enum machine_mode mode, const_tree type,
4275 enum machine_mode *base_mode, int *count)
4276 {
4277 enum machine_mode new_mode = VOIDmode;
4278
4279 /* If we have the type information, prefer that to working things
4280 out from the mode. */
4281 if (type)
4282 {
4283 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4284
4285 if (ag_count > 0 && ag_count <= 4)
4286 *count = ag_count;
4287 else
4288 return false;
4289 }
4290 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
4291 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4292 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4293 {
4294 *count = 1;
4295 new_mode = mode;
4296 }
4297 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4298 {
4299 *count = 2;
4300 new_mode = (mode == DCmode ? DFmode : SFmode);
4301 }
4302 else
4303 return false;
4304
4305
4306 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4307 return false;
4308
4309 *base_mode = new_mode;
4310 return true;
4311 }
4312
4313 static bool
4314 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4315 enum machine_mode mode, const_tree type)
4316 {
4317 int count ATTRIBUTE_UNUSED;
4318 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4319
4320 if (!use_vfp_abi (pcs_variant, false))
4321 return false;
4322 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4323 &ag_mode, &count);
4324 }
4325
4326 static bool
4327 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4328 const_tree type)
4329 {
4330 if (!use_vfp_abi (pcum->pcs_variant, false))
4331 return false;
4332
4333 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4334 &pcum->aapcs_vfp_rmode,
4335 &pcum->aapcs_vfp_rcount);
4336 }
4337
4338 static bool
4339 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4340 const_tree type ATTRIBUTE_UNUSED)
4341 {
4342 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4343 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4344 int regno;
4345
4346 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4347 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4348 {
4349 pcum->aapcs_vfp_reg_alloc = mask << regno;
4350 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4351 {
4352 int i;
4353 int rcount = pcum->aapcs_vfp_rcount;
4354 int rshift = shift;
4355 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4356 rtx par;
4357 if (!TARGET_NEON)
4358 {
4359 /* Avoid using unsupported vector modes. */
4360 if (rmode == V2SImode)
4361 rmode = DImode;
4362 else if (rmode == V4SImode)
4363 {
4364 rmode = DImode;
4365 rcount *= 2;
4366 rshift /= 2;
4367 }
4368 }
4369 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4370 for (i = 0; i < rcount; i++)
4371 {
4372 rtx tmp = gen_rtx_REG (rmode,
4373 FIRST_VFP_REGNUM + regno + i * rshift);
4374 tmp = gen_rtx_EXPR_LIST
4375 (VOIDmode, tmp,
4376 GEN_INT (i * GET_MODE_SIZE (rmode)));
4377 XVECEXP (par, 0, i) = tmp;
4378 }
4379
4380 pcum->aapcs_reg = par;
4381 }
4382 else
4383 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4384 return true;
4385 }
4386 return false;
4387 }
4388
4389 static rtx
4390 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4391 enum machine_mode mode,
4392 const_tree type ATTRIBUTE_UNUSED)
4393 {
4394 if (!use_vfp_abi (pcs_variant, false))
4395 return NULL;
4396
4397 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4398 {
4399 int count;
4400 enum machine_mode ag_mode;
4401 int i;
4402 rtx par;
4403 int shift;
4404
4405 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4406 &ag_mode, &count);
4407
4408 if (!TARGET_NEON)
4409 {
4410 if (ag_mode == V2SImode)
4411 ag_mode = DImode;
4412 else if (ag_mode == V4SImode)
4413 {
4414 ag_mode = DImode;
4415 count *= 2;
4416 }
4417 }
4418 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4419 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4420 for (i = 0; i < count; i++)
4421 {
4422 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4423 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4424 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4425 XVECEXP (par, 0, i) = tmp;
4426 }
4427
4428 return par;
4429 }
4430
4431 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4432 }
4433
4434 static void
4435 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4436 enum machine_mode mode ATTRIBUTE_UNUSED,
4437 const_tree type ATTRIBUTE_UNUSED)
4438 {
4439 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4440 pcum->aapcs_vfp_reg_alloc = 0;
4441 return;
4442 }
4443
4444 #define AAPCS_CP(X) \
4445 { \
4446 aapcs_ ## X ## _cum_init, \
4447 aapcs_ ## X ## _is_call_candidate, \
4448 aapcs_ ## X ## _allocate, \
4449 aapcs_ ## X ## _is_return_candidate, \
4450 aapcs_ ## X ## _allocate_return_reg, \
4451 aapcs_ ## X ## _advance \
4452 }
4453
4454 /* Table of co-processors that can be used to pass arguments in
4455 registers. Idealy no arugment should be a candidate for more than
4456 one co-processor table entry, but the table is processed in order
4457 and stops after the first match. If that entry then fails to put
4458 the argument into a co-processor register, the argument will go on
4459 the stack. */
4460 static struct
4461 {
4462 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4463 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4464
4465 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4466 BLKmode) is a candidate for this co-processor's registers; this
4467 function should ignore any position-dependent state in
4468 CUMULATIVE_ARGS and only use call-type dependent information. */
4469 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4470
4471 /* Return true if the argument does get a co-processor register; it
4472 should set aapcs_reg to an RTX of the register allocated as is
4473 required for a return from FUNCTION_ARG. */
4474 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4475
4476 /* Return true if a result of mode MODE (or type TYPE if MODE is
4477 BLKmode) is can be returned in this co-processor's registers. */
4478 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4479
4480 /* Allocate and return an RTX element to hold the return type of a
4481 call, this routine must not fail and will only be called if
4482 is_return_candidate returned true with the same parameters. */
4483 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4484
4485 /* Finish processing this argument and prepare to start processing
4486 the next one. */
4487 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4488 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4489 {
4490 AAPCS_CP(vfp)
4491 };
4492
4493 #undef AAPCS_CP
4494
4495 static int
4496 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4497 const_tree type)
4498 {
4499 int i;
4500
4501 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4502 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4503 return i;
4504
4505 return -1;
4506 }
4507
4508 static int
4509 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4510 {
4511 /* We aren't passed a decl, so we can't check that a call is local.
4512 However, it isn't clear that that would be a win anyway, since it
4513 might limit some tail-calling opportunities. */
4514 enum arm_pcs pcs_variant;
4515
4516 if (fntype)
4517 {
4518 const_tree fndecl = NULL_TREE;
4519
4520 if (TREE_CODE (fntype) == FUNCTION_DECL)
4521 {
4522 fndecl = fntype;
4523 fntype = TREE_TYPE (fntype);
4524 }
4525
4526 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4527 }
4528 else
4529 pcs_variant = arm_pcs_default;
4530
4531 if (pcs_variant != ARM_PCS_AAPCS)
4532 {
4533 int i;
4534
4535 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4536 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4537 TYPE_MODE (type),
4538 type))
4539 return i;
4540 }
4541 return -1;
4542 }
4543
4544 static rtx
4545 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4546 const_tree fntype)
4547 {
4548 /* We aren't passed a decl, so we can't check that a call is local.
4549 However, it isn't clear that that would be a win anyway, since it
4550 might limit some tail-calling opportunities. */
4551 enum arm_pcs pcs_variant;
4552 int unsignedp ATTRIBUTE_UNUSED;
4553
4554 if (fntype)
4555 {
4556 const_tree fndecl = NULL_TREE;
4557
4558 if (TREE_CODE (fntype) == FUNCTION_DECL)
4559 {
4560 fndecl = fntype;
4561 fntype = TREE_TYPE (fntype);
4562 }
4563
4564 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4565 }
4566 else
4567 pcs_variant = arm_pcs_default;
4568
4569 /* Promote integer types. */
4570 if (type && INTEGRAL_TYPE_P (type))
4571 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4572
4573 if (pcs_variant != ARM_PCS_AAPCS)
4574 {
4575 int i;
4576
4577 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4578 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4579 type))
4580 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4581 mode, type);
4582 }
4583
4584 /* Promotes small structs returned in a register to full-word size
4585 for big-endian AAPCS. */
4586 if (type && arm_return_in_msb (type))
4587 {
4588 HOST_WIDE_INT size = int_size_in_bytes (type);
4589 if (size % UNITS_PER_WORD != 0)
4590 {
4591 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4592 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4593 }
4594 }
4595
4596 return gen_rtx_REG (mode, R0_REGNUM);
4597 }
4598
4599 static rtx
4600 aapcs_libcall_value (enum machine_mode mode)
4601 {
4602 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4603 && GET_MODE_SIZE (mode) <= 4)
4604 mode = SImode;
4605
4606 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4607 }
4608
4609 /* Lay out a function argument using the AAPCS rules. The rule
4610 numbers referred to here are those in the AAPCS. */
4611 static void
4612 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4613 const_tree type, bool named)
4614 {
4615 int nregs, nregs2;
4616 int ncrn;
4617
4618 /* We only need to do this once per argument. */
4619 if (pcum->aapcs_arg_processed)
4620 return;
4621
4622 pcum->aapcs_arg_processed = true;
4623
4624 /* Special case: if named is false then we are handling an incoming
4625 anonymous argument which is on the stack. */
4626 if (!named)
4627 return;
4628
4629 /* Is this a potential co-processor register candidate? */
4630 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4631 {
4632 int slot = aapcs_select_call_coproc (pcum, mode, type);
4633 pcum->aapcs_cprc_slot = slot;
4634
4635 /* We don't have to apply any of the rules from part B of the
4636 preparation phase, these are handled elsewhere in the
4637 compiler. */
4638
4639 if (slot >= 0)
4640 {
4641 /* A Co-processor register candidate goes either in its own
4642 class of registers or on the stack. */
4643 if (!pcum->aapcs_cprc_failed[slot])
4644 {
4645 /* C1.cp - Try to allocate the argument to co-processor
4646 registers. */
4647 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4648 return;
4649
4650 /* C2.cp - Put the argument on the stack and note that we
4651 can't assign any more candidates in this slot. We also
4652 need to note that we have allocated stack space, so that
4653 we won't later try to split a non-cprc candidate between
4654 core registers and the stack. */
4655 pcum->aapcs_cprc_failed[slot] = true;
4656 pcum->can_split = false;
4657 }
4658
4659 /* We didn't get a register, so this argument goes on the
4660 stack. */
4661 gcc_assert (pcum->can_split == false);
4662 return;
4663 }
4664 }
4665
4666 /* C3 - For double-word aligned arguments, round the NCRN up to the
4667 next even number. */
4668 ncrn = pcum->aapcs_ncrn;
4669 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4670 ncrn++;
4671
4672 nregs = ARM_NUM_REGS2(mode, type);
4673
4674 /* Sigh, this test should really assert that nregs > 0, but a GCC
4675 extension allows empty structs and then gives them empty size; it
4676 then allows such a structure to be passed by value. For some of
4677 the code below we have to pretend that such an argument has
4678 non-zero size so that we 'locate' it correctly either in
4679 registers or on the stack. */
4680 gcc_assert (nregs >= 0);
4681
4682 nregs2 = nregs ? nregs : 1;
4683
4684 /* C4 - Argument fits entirely in core registers. */
4685 if (ncrn + nregs2 <= NUM_ARG_REGS)
4686 {
4687 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4688 pcum->aapcs_next_ncrn = ncrn + nregs;
4689 return;
4690 }
4691
4692 /* C5 - Some core registers left and there are no arguments already
4693 on the stack: split this argument between the remaining core
4694 registers and the stack. */
4695 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4696 {
4697 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4698 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4699 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4700 return;
4701 }
4702
4703 /* C6 - NCRN is set to 4. */
4704 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4705
4706 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4707 return;
4708 }
4709
4710 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4711 for a call to a function whose data type is FNTYPE.
4712 For a library call, FNTYPE is NULL. */
4713 void
4714 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4715 rtx libname,
4716 tree fndecl ATTRIBUTE_UNUSED)
4717 {
4718 /* Long call handling. */
4719 if (fntype)
4720 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4721 else
4722 pcum->pcs_variant = arm_pcs_default;
4723
4724 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4725 {
4726 if (arm_libcall_uses_aapcs_base (libname))
4727 pcum->pcs_variant = ARM_PCS_AAPCS;
4728
4729 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4730 pcum->aapcs_reg = NULL_RTX;
4731 pcum->aapcs_partial = 0;
4732 pcum->aapcs_arg_processed = false;
4733 pcum->aapcs_cprc_slot = -1;
4734 pcum->can_split = true;
4735
4736 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4737 {
4738 int i;
4739
4740 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4741 {
4742 pcum->aapcs_cprc_failed[i] = false;
4743 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4744 }
4745 }
4746 return;
4747 }
4748
4749 /* Legacy ABIs */
4750
4751 /* On the ARM, the offset starts at 0. */
4752 pcum->nregs = 0;
4753 pcum->iwmmxt_nregs = 0;
4754 pcum->can_split = true;
4755
4756 /* Varargs vectors are treated the same as long long.
4757 named_count avoids having to change the way arm handles 'named' */
4758 pcum->named_count = 0;
4759 pcum->nargs = 0;
4760
4761 if (TARGET_REALLY_IWMMXT && fntype)
4762 {
4763 tree fn_arg;
4764
4765 for (fn_arg = TYPE_ARG_TYPES (fntype);
4766 fn_arg;
4767 fn_arg = TREE_CHAIN (fn_arg))
4768 pcum->named_count += 1;
4769
4770 if (! pcum->named_count)
4771 pcum->named_count = INT_MAX;
4772 }
4773 }
4774
4775
4776 /* Return true if mode/type need doubleword alignment. */
4777 static bool
4778 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4779 {
4780 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4781 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4782 }
4783
4784
4785 /* Determine where to put an argument to a function.
4786 Value is zero to push the argument on the stack,
4787 or a hard register in which to store the argument.
4788
4789 MODE is the argument's machine mode.
4790 TYPE is the data type of the argument (as a tree).
4791 This is null for libcalls where that information may
4792 not be available.
4793 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4794 the preceding args and about the function being called.
4795 NAMED is nonzero if this argument is a named parameter
4796 (otherwise it is an extra parameter matching an ellipsis).
4797
4798 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4799 other arguments are passed on the stack. If (NAMED == 0) (which happens
4800 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4801 defined), say it is passed in the stack (function_prologue will
4802 indeed make it pass in the stack if necessary). */
4803
4804 static rtx
4805 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4806 const_tree type, bool named)
4807 {
4808 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4809 int nregs;
4810
4811 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4812 a call insn (op3 of a call_value insn). */
4813 if (mode == VOIDmode)
4814 return const0_rtx;
4815
4816 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4817 {
4818 aapcs_layout_arg (pcum, mode, type, named);
4819 return pcum->aapcs_reg;
4820 }
4821
4822 /* Varargs vectors are treated the same as long long.
4823 named_count avoids having to change the way arm handles 'named' */
4824 if (TARGET_IWMMXT_ABI
4825 && arm_vector_mode_supported_p (mode)
4826 && pcum->named_count > pcum->nargs + 1)
4827 {
4828 if (pcum->iwmmxt_nregs <= 9)
4829 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4830 else
4831 {
4832 pcum->can_split = false;
4833 return NULL_RTX;
4834 }
4835 }
4836
4837 /* Put doubleword aligned quantities in even register pairs. */
4838 if (pcum->nregs & 1
4839 && ARM_DOUBLEWORD_ALIGN
4840 && arm_needs_doubleword_align (mode, type))
4841 pcum->nregs++;
4842
4843 /* Only allow splitting an arg between regs and memory if all preceding
4844 args were allocated to regs. For args passed by reference we only count
4845 the reference pointer. */
4846 if (pcum->can_split)
4847 nregs = 1;
4848 else
4849 nregs = ARM_NUM_REGS2 (mode, type);
4850
4851 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4852 return NULL_RTX;
4853
4854 return gen_rtx_REG (mode, pcum->nregs);
4855 }
4856
4857 static unsigned int
4858 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4859 {
4860 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4861 ? DOUBLEWORD_ALIGNMENT
4862 : PARM_BOUNDARY);
4863 }
4864
4865 static int
4866 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
4867 tree type, bool named)
4868 {
4869 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4870 int nregs = pcum->nregs;
4871
4872 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4873 {
4874 aapcs_layout_arg (pcum, mode, type, named);
4875 return pcum->aapcs_partial;
4876 }
4877
4878 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4879 return 0;
4880
4881 if (NUM_ARG_REGS > nregs
4882 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4883 && pcum->can_split)
4884 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4885
4886 return 0;
4887 }
4888
4889 /* Update the data in PCUM to advance over an argument
4890 of mode MODE and data type TYPE.
4891 (TYPE is null for libcalls where that information may not be available.) */
4892
4893 static void
4894 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
4895 const_tree type, bool named)
4896 {
4897 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4898
4899 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4900 {
4901 aapcs_layout_arg (pcum, mode, type, named);
4902
4903 if (pcum->aapcs_cprc_slot >= 0)
4904 {
4905 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4906 type);
4907 pcum->aapcs_cprc_slot = -1;
4908 }
4909
4910 /* Generic stuff. */
4911 pcum->aapcs_arg_processed = false;
4912 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4913 pcum->aapcs_reg = NULL_RTX;
4914 pcum->aapcs_partial = 0;
4915 }
4916 else
4917 {
4918 pcum->nargs += 1;
4919 if (arm_vector_mode_supported_p (mode)
4920 && pcum->named_count > pcum->nargs
4921 && TARGET_IWMMXT_ABI)
4922 pcum->iwmmxt_nregs += 1;
4923 else
4924 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4925 }
4926 }
4927
4928 /* Variable sized types are passed by reference. This is a GCC
4929 extension to the ARM ABI. */
4930
4931 static bool
4932 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
4933 enum machine_mode mode ATTRIBUTE_UNUSED,
4934 const_tree type, bool named ATTRIBUTE_UNUSED)
4935 {
4936 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4937 }
4938 \f
4939 /* Encode the current state of the #pragma [no_]long_calls. */
4940 typedef enum
4941 {
4942 OFF, /* No #pragma [no_]long_calls is in effect. */
4943 LONG, /* #pragma long_calls is in effect. */
4944 SHORT /* #pragma no_long_calls is in effect. */
4945 } arm_pragma_enum;
4946
4947 static arm_pragma_enum arm_pragma_long_calls = OFF;
4948
4949 void
4950 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4951 {
4952 arm_pragma_long_calls = LONG;
4953 }
4954
4955 void
4956 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4957 {
4958 arm_pragma_long_calls = SHORT;
4959 }
4960
4961 void
4962 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4963 {
4964 arm_pragma_long_calls = OFF;
4965 }
4966 \f
4967 /* Handle an attribute requiring a FUNCTION_DECL;
4968 arguments as in struct attribute_spec.handler. */
4969 static tree
4970 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4971 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4972 {
4973 if (TREE_CODE (*node) != FUNCTION_DECL)
4974 {
4975 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4976 name);
4977 *no_add_attrs = true;
4978 }
4979
4980 return NULL_TREE;
4981 }
4982
4983 /* Handle an "interrupt" or "isr" attribute;
4984 arguments as in struct attribute_spec.handler. */
4985 static tree
4986 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
4987 bool *no_add_attrs)
4988 {
4989 if (DECL_P (*node))
4990 {
4991 if (TREE_CODE (*node) != FUNCTION_DECL)
4992 {
4993 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4994 name);
4995 *no_add_attrs = true;
4996 }
4997 /* FIXME: the argument if any is checked for type attributes;
4998 should it be checked for decl ones? */
4999 }
5000 else
5001 {
5002 if (TREE_CODE (*node) == FUNCTION_TYPE
5003 || TREE_CODE (*node) == METHOD_TYPE)
5004 {
5005 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5006 {
5007 warning (OPT_Wattributes, "%qE attribute ignored",
5008 name);
5009 *no_add_attrs = true;
5010 }
5011 }
5012 else if (TREE_CODE (*node) == POINTER_TYPE
5013 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5014 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5015 && arm_isr_value (args) != ARM_FT_UNKNOWN)
5016 {
5017 *node = build_variant_type_copy (*node);
5018 TREE_TYPE (*node) = build_type_attribute_variant
5019 (TREE_TYPE (*node),
5020 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5021 *no_add_attrs = true;
5022 }
5023 else
5024 {
5025 /* Possibly pass this attribute on from the type to a decl. */
5026 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5027 | (int) ATTR_FLAG_FUNCTION_NEXT
5028 | (int) ATTR_FLAG_ARRAY_NEXT))
5029 {
5030 *no_add_attrs = true;
5031 return tree_cons (name, args, NULL_TREE);
5032 }
5033 else
5034 {
5035 warning (OPT_Wattributes, "%qE attribute ignored",
5036 name);
5037 }
5038 }
5039 }
5040
5041 return NULL_TREE;
5042 }
5043
5044 /* Handle a "pcs" attribute; arguments as in struct
5045 attribute_spec.handler. */
5046 static tree
5047 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5048 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5049 {
5050 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5051 {
5052 warning (OPT_Wattributes, "%qE attribute ignored", name);
5053 *no_add_attrs = true;
5054 }
5055 return NULL_TREE;
5056 }
5057
5058 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5059 /* Handle the "notshared" attribute. This attribute is another way of
5060 requesting hidden visibility. ARM's compiler supports
5061 "__declspec(notshared)"; we support the same thing via an
5062 attribute. */
5063
5064 static tree
5065 arm_handle_notshared_attribute (tree *node,
5066 tree name ATTRIBUTE_UNUSED,
5067 tree args ATTRIBUTE_UNUSED,
5068 int flags ATTRIBUTE_UNUSED,
5069 bool *no_add_attrs)
5070 {
5071 tree decl = TYPE_NAME (*node);
5072
5073 if (decl)
5074 {
5075 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5076 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5077 *no_add_attrs = false;
5078 }
5079 return NULL_TREE;
5080 }
5081 #endif
5082
5083 /* Return 0 if the attributes for two types are incompatible, 1 if they
5084 are compatible, and 2 if they are nearly compatible (which causes a
5085 warning to be generated). */
5086 static int
5087 arm_comp_type_attributes (const_tree type1, const_tree type2)
5088 {
5089 int l1, l2, s1, s2;
5090
5091 /* Check for mismatch of non-default calling convention. */
5092 if (TREE_CODE (type1) != FUNCTION_TYPE)
5093 return 1;
5094
5095 /* Check for mismatched call attributes. */
5096 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5097 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5098 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5099 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5100
5101 /* Only bother to check if an attribute is defined. */
5102 if (l1 | l2 | s1 | s2)
5103 {
5104 /* If one type has an attribute, the other must have the same attribute. */
5105 if ((l1 != l2) || (s1 != s2))
5106 return 0;
5107
5108 /* Disallow mixed attributes. */
5109 if ((l1 & s2) || (l2 & s1))
5110 return 0;
5111 }
5112
5113 /* Check for mismatched ISR attribute. */
5114 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5115 if (! l1)
5116 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5117 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5118 if (! l2)
5119 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5120 if (l1 != l2)
5121 return 0;
5122
5123 return 1;
5124 }
5125
5126 /* Assigns default attributes to newly defined type. This is used to
5127 set short_call/long_call attributes for function types of
5128 functions defined inside corresponding #pragma scopes. */
5129 static void
5130 arm_set_default_type_attributes (tree type)
5131 {
5132 /* Add __attribute__ ((long_call)) to all functions, when
5133 inside #pragma long_calls or __attribute__ ((short_call)),
5134 when inside #pragma no_long_calls. */
5135 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5136 {
5137 tree type_attr_list, attr_name;
5138 type_attr_list = TYPE_ATTRIBUTES (type);
5139
5140 if (arm_pragma_long_calls == LONG)
5141 attr_name = get_identifier ("long_call");
5142 else if (arm_pragma_long_calls == SHORT)
5143 attr_name = get_identifier ("short_call");
5144 else
5145 return;
5146
5147 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5148 TYPE_ATTRIBUTES (type) = type_attr_list;
5149 }
5150 }
5151 \f
5152 /* Return true if DECL is known to be linked into section SECTION. */
5153
5154 static bool
5155 arm_function_in_section_p (tree decl, section *section)
5156 {
5157 /* We can only be certain about functions defined in the same
5158 compilation unit. */
5159 if (!TREE_STATIC (decl))
5160 return false;
5161
5162 /* Make sure that SYMBOL always binds to the definition in this
5163 compilation unit. */
5164 if (!targetm.binds_local_p (decl))
5165 return false;
5166
5167 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5168 if (!DECL_SECTION_NAME (decl))
5169 {
5170 /* Make sure that we will not create a unique section for DECL. */
5171 if (flag_function_sections || DECL_ONE_ONLY (decl))
5172 return false;
5173 }
5174
5175 return function_section (decl) == section;
5176 }
5177
5178 /* Return nonzero if a 32-bit "long_call" should be generated for
5179 a call from the current function to DECL. We generate a long_call
5180 if the function:
5181
5182 a. has an __attribute__((long call))
5183 or b. is within the scope of a #pragma long_calls
5184 or c. the -mlong-calls command line switch has been specified
5185
5186 However we do not generate a long call if the function:
5187
5188 d. has an __attribute__ ((short_call))
5189 or e. is inside the scope of a #pragma no_long_calls
5190 or f. is defined in the same section as the current function. */
5191
5192 bool
5193 arm_is_long_call_p (tree decl)
5194 {
5195 tree attrs;
5196
5197 if (!decl)
5198 return TARGET_LONG_CALLS;
5199
5200 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5201 if (lookup_attribute ("short_call", attrs))
5202 return false;
5203
5204 /* For "f", be conservative, and only cater for cases in which the
5205 whole of the current function is placed in the same section. */
5206 if (!flag_reorder_blocks_and_partition
5207 && TREE_CODE (decl) == FUNCTION_DECL
5208 && arm_function_in_section_p (decl, current_function_section ()))
5209 return false;
5210
5211 if (lookup_attribute ("long_call", attrs))
5212 return true;
5213
5214 return TARGET_LONG_CALLS;
5215 }
5216
5217 /* Return nonzero if it is ok to make a tail-call to DECL. */
5218 static bool
5219 arm_function_ok_for_sibcall (tree decl, tree exp)
5220 {
5221 unsigned long func_type;
5222
5223 if (cfun->machine->sibcall_blocked)
5224 return false;
5225
5226 /* Never tailcall something for which we have no decl, or if we
5227 are generating code for Thumb-1. */
5228 if (decl == NULL || TARGET_THUMB1)
5229 return false;
5230
5231 /* The PIC register is live on entry to VxWorks PLT entries, so we
5232 must make the call before restoring the PIC register. */
5233 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5234 return false;
5235
5236 /* Cannot tail-call to long calls, since these are out of range of
5237 a branch instruction. */
5238 if (arm_is_long_call_p (decl))
5239 return false;
5240
5241 /* If we are interworking and the function is not declared static
5242 then we can't tail-call it unless we know that it exists in this
5243 compilation unit (since it might be a Thumb routine). */
5244 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5245 return false;
5246
5247 func_type = arm_current_func_type ();
5248 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5249 if (IS_INTERRUPT (func_type))
5250 return false;
5251
5252 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5253 {
5254 /* Check that the return value locations are the same. For
5255 example that we aren't returning a value from the sibling in
5256 a VFP register but then need to transfer it to a core
5257 register. */
5258 rtx a, b;
5259
5260 a = arm_function_value (TREE_TYPE (exp), decl, false);
5261 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5262 cfun->decl, false);
5263 if (!rtx_equal_p (a, b))
5264 return false;
5265 }
5266
5267 /* Never tailcall if function may be called with a misaligned SP. */
5268 if (IS_STACKALIGN (func_type))
5269 return false;
5270
5271 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5272 references should become a NOP. Don't convert such calls into
5273 sibling calls. */
5274 if (TARGET_AAPCS_BASED
5275 && arm_abi == ARM_ABI_AAPCS
5276 && DECL_WEAK (decl))
5277 return false;
5278
5279 /* Everything else is ok. */
5280 return true;
5281 }
5282
5283 \f
5284 /* Addressing mode support functions. */
5285
5286 /* Return nonzero if X is a legitimate immediate operand when compiling
5287 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5288 int
5289 legitimate_pic_operand_p (rtx x)
5290 {
5291 if (GET_CODE (x) == SYMBOL_REF
5292 || (GET_CODE (x) == CONST
5293 && GET_CODE (XEXP (x, 0)) == PLUS
5294 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5295 return 0;
5296
5297 return 1;
5298 }
5299
5300 /* Record that the current function needs a PIC register. Initialize
5301 cfun->machine->pic_reg if we have not already done so. */
5302
5303 static void
5304 require_pic_register (void)
5305 {
5306 /* A lot of the logic here is made obscure by the fact that this
5307 routine gets called as part of the rtx cost estimation process.
5308 We don't want those calls to affect any assumptions about the real
5309 function; and further, we can't call entry_of_function() until we
5310 start the real expansion process. */
5311 if (!crtl->uses_pic_offset_table)
5312 {
5313 gcc_assert (can_create_pseudo_p ());
5314 if (arm_pic_register != INVALID_REGNUM)
5315 {
5316 if (!cfun->machine->pic_reg)
5317 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5318
5319 /* Play games to avoid marking the function as needing pic
5320 if we are being called as part of the cost-estimation
5321 process. */
5322 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5323 crtl->uses_pic_offset_table = 1;
5324 }
5325 else
5326 {
5327 rtx seq, insn;
5328
5329 if (!cfun->machine->pic_reg)
5330 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5331
5332 /* Play games to avoid marking the function as needing pic
5333 if we are being called as part of the cost-estimation
5334 process. */
5335 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5336 {
5337 crtl->uses_pic_offset_table = 1;
5338 start_sequence ();
5339
5340 arm_load_pic_register (0UL);
5341
5342 seq = get_insns ();
5343 end_sequence ();
5344
5345 for (insn = seq; insn; insn = NEXT_INSN (insn))
5346 if (INSN_P (insn))
5347 INSN_LOCATOR (insn) = prologue_locator;
5348
5349 /* We can be called during expansion of PHI nodes, where
5350 we can't yet emit instructions directly in the final
5351 insn stream. Queue the insns on the entry edge, they will
5352 be committed after everything else is expanded. */
5353 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5354 }
5355 }
5356 }
5357 }
5358
5359 rtx
5360 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5361 {
5362 if (GET_CODE (orig) == SYMBOL_REF
5363 || GET_CODE (orig) == LABEL_REF)
5364 {
5365 rtx insn;
5366
5367 if (reg == 0)
5368 {
5369 gcc_assert (can_create_pseudo_p ());
5370 reg = gen_reg_rtx (Pmode);
5371 }
5372
5373 /* VxWorks does not impose a fixed gap between segments; the run-time
5374 gap can be different from the object-file gap. We therefore can't
5375 use GOTOFF unless we are absolutely sure that the symbol is in the
5376 same segment as the GOT. Unfortunately, the flexibility of linker
5377 scripts means that we can't be sure of that in general, so assume
5378 that GOTOFF is never valid on VxWorks. */
5379 if ((GET_CODE (orig) == LABEL_REF
5380 || (GET_CODE (orig) == SYMBOL_REF &&
5381 SYMBOL_REF_LOCAL_P (orig)))
5382 && NEED_GOT_RELOC
5383 && !TARGET_VXWORKS_RTP)
5384 insn = arm_pic_static_addr (orig, reg);
5385 else
5386 {
5387 rtx pat;
5388 rtx mem;
5389
5390 /* If this function doesn't have a pic register, create one now. */
5391 require_pic_register ();
5392
5393 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5394
5395 /* Make the MEM as close to a constant as possible. */
5396 mem = SET_SRC (pat);
5397 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5398 MEM_READONLY_P (mem) = 1;
5399 MEM_NOTRAP_P (mem) = 1;
5400
5401 insn = emit_insn (pat);
5402 }
5403
5404 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5405 by loop. */
5406 set_unique_reg_note (insn, REG_EQUAL, orig);
5407
5408 return reg;
5409 }
5410 else if (GET_CODE (orig) == CONST)
5411 {
5412 rtx base, offset;
5413
5414 if (GET_CODE (XEXP (orig, 0)) == PLUS
5415 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5416 return orig;
5417
5418 /* Handle the case where we have: const (UNSPEC_TLS). */
5419 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5420 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5421 return orig;
5422
5423 /* Handle the case where we have:
5424 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5425 CONST_INT. */
5426 if (GET_CODE (XEXP (orig, 0)) == PLUS
5427 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5428 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5429 {
5430 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5431 return orig;
5432 }
5433
5434 if (reg == 0)
5435 {
5436 gcc_assert (can_create_pseudo_p ());
5437 reg = gen_reg_rtx (Pmode);
5438 }
5439
5440 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5441
5442 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5443 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5444 base == reg ? 0 : reg);
5445
5446 if (GET_CODE (offset) == CONST_INT)
5447 {
5448 /* The base register doesn't really matter, we only want to
5449 test the index for the appropriate mode. */
5450 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5451 {
5452 gcc_assert (can_create_pseudo_p ());
5453 offset = force_reg (Pmode, offset);
5454 }
5455
5456 if (GET_CODE (offset) == CONST_INT)
5457 return plus_constant (base, INTVAL (offset));
5458 }
5459
5460 if (GET_MODE_SIZE (mode) > 4
5461 && (GET_MODE_CLASS (mode) == MODE_INT
5462 || TARGET_SOFT_FLOAT))
5463 {
5464 emit_insn (gen_addsi3 (reg, base, offset));
5465 return reg;
5466 }
5467
5468 return gen_rtx_PLUS (Pmode, base, offset);
5469 }
5470
5471 return orig;
5472 }
5473
5474
5475 /* Find a spare register to use during the prolog of a function. */
5476
5477 static int
5478 thumb_find_work_register (unsigned long pushed_regs_mask)
5479 {
5480 int reg;
5481
5482 /* Check the argument registers first as these are call-used. The
5483 register allocation order means that sometimes r3 might be used
5484 but earlier argument registers might not, so check them all. */
5485 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5486 if (!df_regs_ever_live_p (reg))
5487 return reg;
5488
5489 /* Before going on to check the call-saved registers we can try a couple
5490 more ways of deducing that r3 is available. The first is when we are
5491 pushing anonymous arguments onto the stack and we have less than 4
5492 registers worth of fixed arguments(*). In this case r3 will be part of
5493 the variable argument list and so we can be sure that it will be
5494 pushed right at the start of the function. Hence it will be available
5495 for the rest of the prologue.
5496 (*): ie crtl->args.pretend_args_size is greater than 0. */
5497 if (cfun->machine->uses_anonymous_args
5498 && crtl->args.pretend_args_size > 0)
5499 return LAST_ARG_REGNUM;
5500
5501 /* The other case is when we have fixed arguments but less than 4 registers
5502 worth. In this case r3 might be used in the body of the function, but
5503 it is not being used to convey an argument into the function. In theory
5504 we could just check crtl->args.size to see how many bytes are
5505 being passed in argument registers, but it seems that it is unreliable.
5506 Sometimes it will have the value 0 when in fact arguments are being
5507 passed. (See testcase execute/20021111-1.c for an example). So we also
5508 check the args_info.nregs field as well. The problem with this field is
5509 that it makes no allowances for arguments that are passed to the
5510 function but which are not used. Hence we could miss an opportunity
5511 when a function has an unused argument in r3. But it is better to be
5512 safe than to be sorry. */
5513 if (! cfun->machine->uses_anonymous_args
5514 && crtl->args.size >= 0
5515 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5516 && crtl->args.info.nregs < 4)
5517 return LAST_ARG_REGNUM;
5518
5519 /* Otherwise look for a call-saved register that is going to be pushed. */
5520 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5521 if (pushed_regs_mask & (1 << reg))
5522 return reg;
5523
5524 if (TARGET_THUMB2)
5525 {
5526 /* Thumb-2 can use high regs. */
5527 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5528 if (pushed_regs_mask & (1 << reg))
5529 return reg;
5530 }
5531 /* Something went wrong - thumb_compute_save_reg_mask()
5532 should have arranged for a suitable register to be pushed. */
5533 gcc_unreachable ();
5534 }
5535
5536 static GTY(()) int pic_labelno;
5537
5538 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5539 low register. */
5540
5541 void
5542 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5543 {
5544 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5545
5546 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5547 return;
5548
5549 gcc_assert (flag_pic);
5550
5551 pic_reg = cfun->machine->pic_reg;
5552 if (TARGET_VXWORKS_RTP)
5553 {
5554 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5555 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5556 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5557
5558 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5559
5560 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5561 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5562 }
5563 else
5564 {
5565 /* We use an UNSPEC rather than a LABEL_REF because this label
5566 never appears in the code stream. */
5567
5568 labelno = GEN_INT (pic_labelno++);
5569 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5570 l1 = gen_rtx_CONST (VOIDmode, l1);
5571
5572 /* On the ARM the PC register contains 'dot + 8' at the time of the
5573 addition, on the Thumb it is 'dot + 4'. */
5574 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5575 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5576 UNSPEC_GOTSYM_OFF);
5577 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5578
5579 if (TARGET_32BIT)
5580 {
5581 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5582 }
5583 else /* TARGET_THUMB1 */
5584 {
5585 if (arm_pic_register != INVALID_REGNUM
5586 && REGNO (pic_reg) > LAST_LO_REGNUM)
5587 {
5588 /* We will have pushed the pic register, so we should always be
5589 able to find a work register. */
5590 pic_tmp = gen_rtx_REG (SImode,
5591 thumb_find_work_register (saved_regs));
5592 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5593 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5594 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5595 }
5596 else
5597 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5598 }
5599 }
5600
5601 /* Need to emit this whether or not we obey regdecls,
5602 since setjmp/longjmp can cause life info to screw up. */
5603 emit_use (pic_reg);
5604 }
5605
5606 /* Generate code to load the address of a static var when flag_pic is set. */
5607 static rtx
5608 arm_pic_static_addr (rtx orig, rtx reg)
5609 {
5610 rtx l1, labelno, offset_rtx, insn;
5611
5612 gcc_assert (flag_pic);
5613
5614 /* We use an UNSPEC rather than a LABEL_REF because this label
5615 never appears in the code stream. */
5616 labelno = GEN_INT (pic_labelno++);
5617 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5618 l1 = gen_rtx_CONST (VOIDmode, l1);
5619
5620 /* On the ARM the PC register contains 'dot + 8' at the time of the
5621 addition, on the Thumb it is 'dot + 4'. */
5622 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5623 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5624 UNSPEC_SYMBOL_OFFSET);
5625 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5626
5627 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
5628 return insn;
5629 }
5630
5631 /* Return nonzero if X is valid as an ARM state addressing register. */
5632 static int
5633 arm_address_register_rtx_p (rtx x, int strict_p)
5634 {
5635 int regno;
5636
5637 if (GET_CODE (x) != REG)
5638 return 0;
5639
5640 regno = REGNO (x);
5641
5642 if (strict_p)
5643 return ARM_REGNO_OK_FOR_BASE_P (regno);
5644
5645 return (regno <= LAST_ARM_REGNUM
5646 || regno >= FIRST_PSEUDO_REGISTER
5647 || regno == FRAME_POINTER_REGNUM
5648 || regno == ARG_POINTER_REGNUM);
5649 }
5650
5651 /* Return TRUE if this rtx is the difference of a symbol and a label,
5652 and will reduce to a PC-relative relocation in the object file.
5653 Expressions like this can be left alone when generating PIC, rather
5654 than forced through the GOT. */
5655 static int
5656 pcrel_constant_p (rtx x)
5657 {
5658 if (GET_CODE (x) == MINUS)
5659 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5660
5661 return FALSE;
5662 }
5663
5664 /* Return true if X will surely end up in an index register after next
5665 splitting pass. */
5666 static bool
5667 will_be_in_index_register (const_rtx x)
5668 {
5669 /* arm.md: calculate_pic_address will split this into a register. */
5670 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
5671 }
5672
5673 /* Return nonzero if X is a valid ARM state address operand. */
5674 int
5675 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5676 int strict_p)
5677 {
5678 bool use_ldrd;
5679 enum rtx_code code = GET_CODE (x);
5680
5681 if (arm_address_register_rtx_p (x, strict_p))
5682 return 1;
5683
5684 use_ldrd = (TARGET_LDRD
5685 && (mode == DImode
5686 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5687
5688 if (code == POST_INC || code == PRE_DEC
5689 || ((code == PRE_INC || code == POST_DEC)
5690 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5691 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5692
5693 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5694 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5695 && GET_CODE (XEXP (x, 1)) == PLUS
5696 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5697 {
5698 rtx addend = XEXP (XEXP (x, 1), 1);
5699
5700 /* Don't allow ldrd post increment by register because it's hard
5701 to fixup invalid register choices. */
5702 if (use_ldrd
5703 && GET_CODE (x) == POST_MODIFY
5704 && GET_CODE (addend) == REG)
5705 return 0;
5706
5707 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5708 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5709 }
5710
5711 /* After reload constants split into minipools will have addresses
5712 from a LABEL_REF. */
5713 else if (reload_completed
5714 && (code == LABEL_REF
5715 || (code == CONST
5716 && GET_CODE (XEXP (x, 0)) == PLUS
5717 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5718 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5719 return 1;
5720
5721 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5722 return 0;
5723
5724 else if (code == PLUS)
5725 {
5726 rtx xop0 = XEXP (x, 0);
5727 rtx xop1 = XEXP (x, 1);
5728
5729 return ((arm_address_register_rtx_p (xop0, strict_p)
5730 && ((GET_CODE(xop1) == CONST_INT
5731 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5732 || (!strict_p && will_be_in_index_register (xop1))))
5733 || (arm_address_register_rtx_p (xop1, strict_p)
5734 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5735 }
5736
5737 #if 0
5738 /* Reload currently can't handle MINUS, so disable this for now */
5739 else if (GET_CODE (x) == MINUS)
5740 {
5741 rtx xop0 = XEXP (x, 0);
5742 rtx xop1 = XEXP (x, 1);
5743
5744 return (arm_address_register_rtx_p (xop0, strict_p)
5745 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5746 }
5747 #endif
5748
5749 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5750 && code == SYMBOL_REF
5751 && CONSTANT_POOL_ADDRESS_P (x)
5752 && ! (flag_pic
5753 && symbol_mentioned_p (get_pool_constant (x))
5754 && ! pcrel_constant_p (get_pool_constant (x))))
5755 return 1;
5756
5757 return 0;
5758 }
5759
5760 /* Return nonzero if X is a valid Thumb-2 address operand. */
5761 static int
5762 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5763 {
5764 bool use_ldrd;
5765 enum rtx_code code = GET_CODE (x);
5766
5767 if (arm_address_register_rtx_p (x, strict_p))
5768 return 1;
5769
5770 use_ldrd = (TARGET_LDRD
5771 && (mode == DImode
5772 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5773
5774 if (code == POST_INC || code == PRE_DEC
5775 || ((code == PRE_INC || code == POST_DEC)
5776 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5777 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5778
5779 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5780 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5781 && GET_CODE (XEXP (x, 1)) == PLUS
5782 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5783 {
5784 /* Thumb-2 only has autoincrement by constant. */
5785 rtx addend = XEXP (XEXP (x, 1), 1);
5786 HOST_WIDE_INT offset;
5787
5788 if (GET_CODE (addend) != CONST_INT)
5789 return 0;
5790
5791 offset = INTVAL(addend);
5792 if (GET_MODE_SIZE (mode) <= 4)
5793 return (offset > -256 && offset < 256);
5794
5795 return (use_ldrd && offset > -1024 && offset < 1024
5796 && (offset & 3) == 0);
5797 }
5798
5799 /* After reload constants split into minipools will have addresses
5800 from a LABEL_REF. */
5801 else if (reload_completed
5802 && (code == LABEL_REF
5803 || (code == CONST
5804 && GET_CODE (XEXP (x, 0)) == PLUS
5805 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5806 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5807 return 1;
5808
5809 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5810 return 0;
5811
5812 else if (code == PLUS)
5813 {
5814 rtx xop0 = XEXP (x, 0);
5815 rtx xop1 = XEXP (x, 1);
5816
5817 return ((arm_address_register_rtx_p (xop0, strict_p)
5818 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5819 || (!strict_p && will_be_in_index_register (xop1))))
5820 || (arm_address_register_rtx_p (xop1, strict_p)
5821 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5822 }
5823
5824 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5825 && code == SYMBOL_REF
5826 && CONSTANT_POOL_ADDRESS_P (x)
5827 && ! (flag_pic
5828 && symbol_mentioned_p (get_pool_constant (x))
5829 && ! pcrel_constant_p (get_pool_constant (x))))
5830 return 1;
5831
5832 return 0;
5833 }
5834
5835 /* Return nonzero if INDEX is valid for an address index operand in
5836 ARM state. */
5837 static int
5838 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5839 int strict_p)
5840 {
5841 HOST_WIDE_INT range;
5842 enum rtx_code code = GET_CODE (index);
5843
5844 /* Standard coprocessor addressing modes. */
5845 if (TARGET_HARD_FLOAT
5846 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5847 && (mode == SFmode || mode == DFmode
5848 || (TARGET_MAVERICK && mode == DImode)))
5849 return (code == CONST_INT && INTVAL (index) < 1024
5850 && INTVAL (index) > -1024
5851 && (INTVAL (index) & 3) == 0);
5852
5853 /* For quad modes, we restrict the constant offset to be slightly less
5854 than what the instruction format permits. We do this because for
5855 quad mode moves, we will actually decompose them into two separate
5856 double-mode reads or writes. INDEX must therefore be a valid
5857 (double-mode) offset and so should INDEX+8. */
5858 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5859 return (code == CONST_INT
5860 && INTVAL (index) < 1016
5861 && INTVAL (index) > -1024
5862 && (INTVAL (index) & 3) == 0);
5863
5864 /* We have no such constraint on double mode offsets, so we permit the
5865 full range of the instruction format. */
5866 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5867 return (code == CONST_INT
5868 && INTVAL (index) < 1024
5869 && INTVAL (index) > -1024
5870 && (INTVAL (index) & 3) == 0);
5871
5872 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5873 return (code == CONST_INT
5874 && INTVAL (index) < 1024
5875 && INTVAL (index) > -1024
5876 && (INTVAL (index) & 3) == 0);
5877
5878 if (arm_address_register_rtx_p (index, strict_p)
5879 && (GET_MODE_SIZE (mode) <= 4))
5880 return 1;
5881
5882 if (mode == DImode || mode == DFmode)
5883 {
5884 if (code == CONST_INT)
5885 {
5886 HOST_WIDE_INT val = INTVAL (index);
5887
5888 if (TARGET_LDRD)
5889 return val > -256 && val < 256;
5890 else
5891 return val > -4096 && val < 4092;
5892 }
5893
5894 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5895 }
5896
5897 if (GET_MODE_SIZE (mode) <= 4
5898 && ! (arm_arch4
5899 && (mode == HImode
5900 || mode == HFmode
5901 || (mode == QImode && outer == SIGN_EXTEND))))
5902 {
5903 if (code == MULT)
5904 {
5905 rtx xiop0 = XEXP (index, 0);
5906 rtx xiop1 = XEXP (index, 1);
5907
5908 return ((arm_address_register_rtx_p (xiop0, strict_p)
5909 && power_of_two_operand (xiop1, SImode))
5910 || (arm_address_register_rtx_p (xiop1, strict_p)
5911 && power_of_two_operand (xiop0, SImode)));
5912 }
5913 else if (code == LSHIFTRT || code == ASHIFTRT
5914 || code == ASHIFT || code == ROTATERT)
5915 {
5916 rtx op = XEXP (index, 1);
5917
5918 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5919 && GET_CODE (op) == CONST_INT
5920 && INTVAL (op) > 0
5921 && INTVAL (op) <= 31);
5922 }
5923 }
5924
5925 /* For ARM v4 we may be doing a sign-extend operation during the
5926 load. */
5927 if (arm_arch4)
5928 {
5929 if (mode == HImode
5930 || mode == HFmode
5931 || (outer == SIGN_EXTEND && mode == QImode))
5932 range = 256;
5933 else
5934 range = 4096;
5935 }
5936 else
5937 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5938
5939 return (code == CONST_INT
5940 && INTVAL (index) < range
5941 && INTVAL (index) > -range);
5942 }
5943
5944 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5945 index operand. i.e. 1, 2, 4 or 8. */
5946 static bool
5947 thumb2_index_mul_operand (rtx op)
5948 {
5949 HOST_WIDE_INT val;
5950
5951 if (GET_CODE(op) != CONST_INT)
5952 return false;
5953
5954 val = INTVAL(op);
5955 return (val == 1 || val == 2 || val == 4 || val == 8);
5956 }
5957
5958 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5959 static int
5960 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5961 {
5962 enum rtx_code code = GET_CODE (index);
5963
5964 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5965 /* Standard coprocessor addressing modes. */
5966 if (TARGET_HARD_FLOAT
5967 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5968 && (mode == SFmode || mode == DFmode
5969 || (TARGET_MAVERICK && mode == DImode)))
5970 return (code == CONST_INT && INTVAL (index) < 1024
5971 /* Thumb-2 allows only > -256 index range for it's core register
5972 load/stores. Since we allow SF/DF in core registers, we have
5973 to use the intersection between -256~4096 (core) and -1024~1024
5974 (coprocessor). */
5975 && INTVAL (index) > -256
5976 && (INTVAL (index) & 3) == 0);
5977
5978 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5979 {
5980 /* For DImode assume values will usually live in core regs
5981 and only allow LDRD addressing modes. */
5982 if (!TARGET_LDRD || mode != DImode)
5983 return (code == CONST_INT
5984 && INTVAL (index) < 1024
5985 && INTVAL (index) > -1024
5986 && (INTVAL (index) & 3) == 0);
5987 }
5988
5989 /* For quad modes, we restrict the constant offset to be slightly less
5990 than what the instruction format permits. We do this because for
5991 quad mode moves, we will actually decompose them into two separate
5992 double-mode reads or writes. INDEX must therefore be a valid
5993 (double-mode) offset and so should INDEX+8. */
5994 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5995 return (code == CONST_INT
5996 && INTVAL (index) < 1016
5997 && INTVAL (index) > -1024
5998 && (INTVAL (index) & 3) == 0);
5999
6000 /* We have no such constraint on double mode offsets, so we permit the
6001 full range of the instruction format. */
6002 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6003 return (code == CONST_INT
6004 && INTVAL (index) < 1024
6005 && INTVAL (index) > -1024
6006 && (INTVAL (index) & 3) == 0);
6007
6008 if (arm_address_register_rtx_p (index, strict_p)
6009 && (GET_MODE_SIZE (mode) <= 4))
6010 return 1;
6011
6012 if (mode == DImode || mode == DFmode)
6013 {
6014 if (code == CONST_INT)
6015 {
6016 HOST_WIDE_INT val = INTVAL (index);
6017 /* ??? Can we assume ldrd for thumb2? */
6018 /* Thumb-2 ldrd only has reg+const addressing modes. */
6019 /* ldrd supports offsets of +-1020.
6020 However the ldr fallback does not. */
6021 return val > -256 && val < 256 && (val & 3) == 0;
6022 }
6023 else
6024 return 0;
6025 }
6026
6027 if (code == MULT)
6028 {
6029 rtx xiop0 = XEXP (index, 0);
6030 rtx xiop1 = XEXP (index, 1);
6031
6032 return ((arm_address_register_rtx_p (xiop0, strict_p)
6033 && thumb2_index_mul_operand (xiop1))
6034 || (arm_address_register_rtx_p (xiop1, strict_p)
6035 && thumb2_index_mul_operand (xiop0)));
6036 }
6037 else if (code == ASHIFT)
6038 {
6039 rtx op = XEXP (index, 1);
6040
6041 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6042 && GET_CODE (op) == CONST_INT
6043 && INTVAL (op) > 0
6044 && INTVAL (op) <= 3);
6045 }
6046
6047 return (code == CONST_INT
6048 && INTVAL (index) < 4096
6049 && INTVAL (index) > -256);
6050 }
6051
6052 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6053 static int
6054 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6055 {
6056 int regno;
6057
6058 if (GET_CODE (x) != REG)
6059 return 0;
6060
6061 regno = REGNO (x);
6062
6063 if (strict_p)
6064 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6065
6066 return (regno <= LAST_LO_REGNUM
6067 || regno > LAST_VIRTUAL_REGISTER
6068 || regno == FRAME_POINTER_REGNUM
6069 || (GET_MODE_SIZE (mode) >= 4
6070 && (regno == STACK_POINTER_REGNUM
6071 || regno >= FIRST_PSEUDO_REGISTER
6072 || x == hard_frame_pointer_rtx
6073 || x == arg_pointer_rtx)));
6074 }
6075
6076 /* Return nonzero if x is a legitimate index register. This is the case
6077 for any base register that can access a QImode object. */
6078 inline static int
6079 thumb1_index_register_rtx_p (rtx x, int strict_p)
6080 {
6081 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6082 }
6083
6084 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6085
6086 The AP may be eliminated to either the SP or the FP, so we use the
6087 least common denominator, e.g. SImode, and offsets from 0 to 64.
6088
6089 ??? Verify whether the above is the right approach.
6090
6091 ??? Also, the FP may be eliminated to the SP, so perhaps that
6092 needs special handling also.
6093
6094 ??? Look at how the mips16 port solves this problem. It probably uses
6095 better ways to solve some of these problems.
6096
6097 Although it is not incorrect, we don't accept QImode and HImode
6098 addresses based on the frame pointer or arg pointer until the
6099 reload pass starts. This is so that eliminating such addresses
6100 into stack based ones won't produce impossible code. */
6101 int
6102 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6103 {
6104 /* ??? Not clear if this is right. Experiment. */
6105 if (GET_MODE_SIZE (mode) < 4
6106 && !(reload_in_progress || reload_completed)
6107 && (reg_mentioned_p (frame_pointer_rtx, x)
6108 || reg_mentioned_p (arg_pointer_rtx, x)
6109 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6110 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6111 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6112 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6113 return 0;
6114
6115 /* Accept any base register. SP only in SImode or larger. */
6116 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6117 return 1;
6118
6119 /* This is PC relative data before arm_reorg runs. */
6120 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6121 && GET_CODE (x) == SYMBOL_REF
6122 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6123 return 1;
6124
6125 /* This is PC relative data after arm_reorg runs. */
6126 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6127 && reload_completed
6128 && (GET_CODE (x) == LABEL_REF
6129 || (GET_CODE (x) == CONST
6130 && GET_CODE (XEXP (x, 0)) == PLUS
6131 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6132 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
6133 return 1;
6134
6135 /* Post-inc indexing only supported for SImode and larger. */
6136 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6137 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6138 return 1;
6139
6140 else if (GET_CODE (x) == PLUS)
6141 {
6142 /* REG+REG address can be any two index registers. */
6143 /* We disallow FRAME+REG addressing since we know that FRAME
6144 will be replaced with STACK, and SP relative addressing only
6145 permits SP+OFFSET. */
6146 if (GET_MODE_SIZE (mode) <= 4
6147 && XEXP (x, 0) != frame_pointer_rtx
6148 && XEXP (x, 1) != frame_pointer_rtx
6149 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6150 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6151 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6152 return 1;
6153
6154 /* REG+const has 5-7 bit offset for non-SP registers. */
6155 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6156 || XEXP (x, 0) == arg_pointer_rtx)
6157 && GET_CODE (XEXP (x, 1)) == CONST_INT
6158 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6159 return 1;
6160
6161 /* REG+const has 10-bit offset for SP, but only SImode and
6162 larger is supported. */
6163 /* ??? Should probably check for DI/DFmode overflow here
6164 just like GO_IF_LEGITIMATE_OFFSET does. */
6165 else if (GET_CODE (XEXP (x, 0)) == REG
6166 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6167 && GET_MODE_SIZE (mode) >= 4
6168 && GET_CODE (XEXP (x, 1)) == CONST_INT
6169 && INTVAL (XEXP (x, 1)) >= 0
6170 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6171 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6172 return 1;
6173
6174 else if (GET_CODE (XEXP (x, 0)) == REG
6175 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6176 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6177 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6178 && REGNO (XEXP (x, 0))
6179 <= LAST_VIRTUAL_POINTER_REGISTER))
6180 && GET_MODE_SIZE (mode) >= 4
6181 && GET_CODE (XEXP (x, 1)) == CONST_INT
6182 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6183 return 1;
6184 }
6185
6186 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6187 && GET_MODE_SIZE (mode) == 4
6188 && GET_CODE (x) == SYMBOL_REF
6189 && CONSTANT_POOL_ADDRESS_P (x)
6190 && ! (flag_pic
6191 && symbol_mentioned_p (get_pool_constant (x))
6192 && ! pcrel_constant_p (get_pool_constant (x))))
6193 return 1;
6194
6195 return 0;
6196 }
6197
6198 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6199 instruction of mode MODE. */
6200 int
6201 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6202 {
6203 switch (GET_MODE_SIZE (mode))
6204 {
6205 case 1:
6206 return val >= 0 && val < 32;
6207
6208 case 2:
6209 return val >= 0 && val < 64 && (val & 1) == 0;
6210
6211 default:
6212 return (val >= 0
6213 && (val + GET_MODE_SIZE (mode)) <= 128
6214 && (val & 3) == 0);
6215 }
6216 }
6217
6218 bool
6219 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6220 {
6221 if (TARGET_ARM)
6222 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6223 else if (TARGET_THUMB2)
6224 return thumb2_legitimate_address_p (mode, x, strict_p);
6225 else /* if (TARGET_THUMB1) */
6226 return thumb1_legitimate_address_p (mode, x, strict_p);
6227 }
6228
6229 /* Build the SYMBOL_REF for __tls_get_addr. */
6230
6231 static GTY(()) rtx tls_get_addr_libfunc;
6232
6233 static rtx
6234 get_tls_get_addr (void)
6235 {
6236 if (!tls_get_addr_libfunc)
6237 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6238 return tls_get_addr_libfunc;
6239 }
6240
6241 static rtx
6242 arm_load_tp (rtx target)
6243 {
6244 if (!target)
6245 target = gen_reg_rtx (SImode);
6246
6247 if (TARGET_HARD_TP)
6248 {
6249 /* Can return in any reg. */
6250 emit_insn (gen_load_tp_hard (target));
6251 }
6252 else
6253 {
6254 /* Always returned in r0. Immediately copy the result into a pseudo,
6255 otherwise other uses of r0 (e.g. setting up function arguments) may
6256 clobber the value. */
6257
6258 rtx tmp;
6259
6260 emit_insn (gen_load_tp_soft ());
6261
6262 tmp = gen_rtx_REG (SImode, 0);
6263 emit_move_insn (target, tmp);
6264 }
6265 return target;
6266 }
6267
6268 static rtx
6269 load_tls_operand (rtx x, rtx reg)
6270 {
6271 rtx tmp;
6272
6273 if (reg == NULL_RTX)
6274 reg = gen_reg_rtx (SImode);
6275
6276 tmp = gen_rtx_CONST (SImode, x);
6277
6278 emit_move_insn (reg, tmp);
6279
6280 return reg;
6281 }
6282
6283 static rtx
6284 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6285 {
6286 rtx insns, label, labelno, sum;
6287
6288 gcc_assert (reloc != TLS_DESCSEQ);
6289 start_sequence ();
6290
6291 labelno = GEN_INT (pic_labelno++);
6292 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6293 label = gen_rtx_CONST (VOIDmode, label);
6294
6295 sum = gen_rtx_UNSPEC (Pmode,
6296 gen_rtvec (4, x, GEN_INT (reloc), label,
6297 GEN_INT (TARGET_ARM ? 8 : 4)),
6298 UNSPEC_TLS);
6299 reg = load_tls_operand (sum, reg);
6300
6301 if (TARGET_ARM)
6302 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6303 else
6304 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6305
6306 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6307 LCT_PURE, /* LCT_CONST? */
6308 Pmode, 1, reg, Pmode);
6309
6310 insns = get_insns ();
6311 end_sequence ();
6312
6313 return insns;
6314 }
6315
6316 static rtx
6317 arm_tls_descseq_addr (rtx x, rtx reg)
6318 {
6319 rtx labelno = GEN_INT (pic_labelno++);
6320 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6321 rtx sum = gen_rtx_UNSPEC (Pmode,
6322 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6323 gen_rtx_CONST (VOIDmode, label),
6324 GEN_INT (!TARGET_ARM)),
6325 UNSPEC_TLS);
6326 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6327
6328 emit_insn (gen_tlscall (x, labelno));
6329 if (!reg)
6330 reg = gen_reg_rtx (SImode);
6331 else
6332 gcc_assert (REGNO (reg) != 0);
6333
6334 emit_move_insn (reg, reg0);
6335
6336 return reg;
6337 }
6338
6339 rtx
6340 legitimize_tls_address (rtx x, rtx reg)
6341 {
6342 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6343 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6344
6345 switch (model)
6346 {
6347 case TLS_MODEL_GLOBAL_DYNAMIC:
6348 if (TARGET_GNU2_TLS)
6349 {
6350 reg = arm_tls_descseq_addr (x, reg);
6351
6352 tp = arm_load_tp (NULL_RTX);
6353
6354 dest = gen_rtx_PLUS (Pmode, tp, reg);
6355 }
6356 else
6357 {
6358 /* Original scheme */
6359 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6360 dest = gen_reg_rtx (Pmode);
6361 emit_libcall_block (insns, dest, ret, x);
6362 }
6363 return dest;
6364
6365 case TLS_MODEL_LOCAL_DYNAMIC:
6366 if (TARGET_GNU2_TLS)
6367 {
6368 reg = arm_tls_descseq_addr (x, reg);
6369
6370 tp = arm_load_tp (NULL_RTX);
6371
6372 dest = gen_rtx_PLUS (Pmode, tp, reg);
6373 }
6374 else
6375 {
6376 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6377
6378 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6379 share the LDM result with other LD model accesses. */
6380 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6381 UNSPEC_TLS);
6382 dest = gen_reg_rtx (Pmode);
6383 emit_libcall_block (insns, dest, ret, eqv);
6384
6385 /* Load the addend. */
6386 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6387 GEN_INT (TLS_LDO32)),
6388 UNSPEC_TLS);
6389 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6390 dest = gen_rtx_PLUS (Pmode, dest, addend);
6391 }
6392 return dest;
6393
6394 case TLS_MODEL_INITIAL_EXEC:
6395 labelno = GEN_INT (pic_labelno++);
6396 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6397 label = gen_rtx_CONST (VOIDmode, label);
6398 sum = gen_rtx_UNSPEC (Pmode,
6399 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6400 GEN_INT (TARGET_ARM ? 8 : 4)),
6401 UNSPEC_TLS);
6402 reg = load_tls_operand (sum, reg);
6403
6404 if (TARGET_ARM)
6405 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6406 else if (TARGET_THUMB2)
6407 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6408 else
6409 {
6410 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6411 emit_move_insn (reg, gen_const_mem (SImode, reg));
6412 }
6413
6414 tp = arm_load_tp (NULL_RTX);
6415
6416 return gen_rtx_PLUS (Pmode, tp, reg);
6417
6418 case TLS_MODEL_LOCAL_EXEC:
6419 tp = arm_load_tp (NULL_RTX);
6420
6421 reg = gen_rtx_UNSPEC (Pmode,
6422 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6423 UNSPEC_TLS);
6424 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6425
6426 return gen_rtx_PLUS (Pmode, tp, reg);
6427
6428 default:
6429 abort ();
6430 }
6431 }
6432
6433 /* Try machine-dependent ways of modifying an illegitimate address
6434 to be legitimate. If we find one, return the new, valid address. */
6435 rtx
6436 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6437 {
6438 if (!TARGET_ARM)
6439 {
6440 /* TODO: legitimize_address for Thumb2. */
6441 if (TARGET_THUMB2)
6442 return x;
6443 return thumb_legitimize_address (x, orig_x, mode);
6444 }
6445
6446 if (arm_tls_symbol_p (x))
6447 return legitimize_tls_address (x, NULL_RTX);
6448
6449 if (GET_CODE (x) == PLUS)
6450 {
6451 rtx xop0 = XEXP (x, 0);
6452 rtx xop1 = XEXP (x, 1);
6453
6454 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6455 xop0 = force_reg (SImode, xop0);
6456
6457 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6458 xop1 = force_reg (SImode, xop1);
6459
6460 if (ARM_BASE_REGISTER_RTX_P (xop0)
6461 && GET_CODE (xop1) == CONST_INT)
6462 {
6463 HOST_WIDE_INT n, low_n;
6464 rtx base_reg, val;
6465 n = INTVAL (xop1);
6466
6467 /* VFP addressing modes actually allow greater offsets, but for
6468 now we just stick with the lowest common denominator. */
6469 if (mode == DImode
6470 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6471 {
6472 low_n = n & 0x0f;
6473 n &= ~0x0f;
6474 if (low_n > 4)
6475 {
6476 n += 16;
6477 low_n -= 16;
6478 }
6479 }
6480 else
6481 {
6482 low_n = ((mode) == TImode ? 0
6483 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6484 n -= low_n;
6485 }
6486
6487 base_reg = gen_reg_rtx (SImode);
6488 val = force_operand (plus_constant (xop0, n), NULL_RTX);
6489 emit_move_insn (base_reg, val);
6490 x = plus_constant (base_reg, low_n);
6491 }
6492 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6493 x = gen_rtx_PLUS (SImode, xop0, xop1);
6494 }
6495
6496 /* XXX We don't allow MINUS any more -- see comment in
6497 arm_legitimate_address_outer_p (). */
6498 else if (GET_CODE (x) == MINUS)
6499 {
6500 rtx xop0 = XEXP (x, 0);
6501 rtx xop1 = XEXP (x, 1);
6502
6503 if (CONSTANT_P (xop0))
6504 xop0 = force_reg (SImode, xop0);
6505
6506 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6507 xop1 = force_reg (SImode, xop1);
6508
6509 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6510 x = gen_rtx_MINUS (SImode, xop0, xop1);
6511 }
6512
6513 /* Make sure to take full advantage of the pre-indexed addressing mode
6514 with absolute addresses which often allows for the base register to
6515 be factorized for multiple adjacent memory references, and it might
6516 even allows for the mini pool to be avoided entirely. */
6517 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6518 {
6519 unsigned int bits;
6520 HOST_WIDE_INT mask, base, index;
6521 rtx base_reg;
6522
6523 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6524 use a 8-bit index. So let's use a 12-bit index for SImode only and
6525 hope that arm_gen_constant will enable ldrb to use more bits. */
6526 bits = (mode == SImode) ? 12 : 8;
6527 mask = (1 << bits) - 1;
6528 base = INTVAL (x) & ~mask;
6529 index = INTVAL (x) & mask;
6530 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6531 {
6532 /* It'll most probably be more efficient to generate the base
6533 with more bits set and use a negative index instead. */
6534 base |= mask;
6535 index -= mask;
6536 }
6537 base_reg = force_reg (SImode, GEN_INT (base));
6538 x = plus_constant (base_reg, index);
6539 }
6540
6541 if (flag_pic)
6542 {
6543 /* We need to find and carefully transform any SYMBOL and LABEL
6544 references; so go back to the original address expression. */
6545 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6546
6547 if (new_x != orig_x)
6548 x = new_x;
6549 }
6550
6551 return x;
6552 }
6553
6554
6555 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6556 to be legitimate. If we find one, return the new, valid address. */
6557 rtx
6558 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6559 {
6560 if (arm_tls_symbol_p (x))
6561 return legitimize_tls_address (x, NULL_RTX);
6562
6563 if (GET_CODE (x) == PLUS
6564 && GET_CODE (XEXP (x, 1)) == CONST_INT
6565 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6566 || INTVAL (XEXP (x, 1)) < 0))
6567 {
6568 rtx xop0 = XEXP (x, 0);
6569 rtx xop1 = XEXP (x, 1);
6570 HOST_WIDE_INT offset = INTVAL (xop1);
6571
6572 /* Try and fold the offset into a biasing of the base register and
6573 then offsetting that. Don't do this when optimizing for space
6574 since it can cause too many CSEs. */
6575 if (optimize_size && offset >= 0
6576 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6577 {
6578 HOST_WIDE_INT delta;
6579
6580 if (offset >= 256)
6581 delta = offset - (256 - GET_MODE_SIZE (mode));
6582 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6583 delta = 31 * GET_MODE_SIZE (mode);
6584 else
6585 delta = offset & (~31 * GET_MODE_SIZE (mode));
6586
6587 xop0 = force_operand (plus_constant (xop0, offset - delta),
6588 NULL_RTX);
6589 x = plus_constant (xop0, delta);
6590 }
6591 else if (offset < 0 && offset > -256)
6592 /* Small negative offsets are best done with a subtract before the
6593 dereference, forcing these into a register normally takes two
6594 instructions. */
6595 x = force_operand (x, NULL_RTX);
6596 else
6597 {
6598 /* For the remaining cases, force the constant into a register. */
6599 xop1 = force_reg (SImode, xop1);
6600 x = gen_rtx_PLUS (SImode, xop0, xop1);
6601 }
6602 }
6603 else if (GET_CODE (x) == PLUS
6604 && s_register_operand (XEXP (x, 1), SImode)
6605 && !s_register_operand (XEXP (x, 0), SImode))
6606 {
6607 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6608
6609 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6610 }
6611
6612 if (flag_pic)
6613 {
6614 /* We need to find and carefully transform any SYMBOL and LABEL
6615 references; so go back to the original address expression. */
6616 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6617
6618 if (new_x != orig_x)
6619 x = new_x;
6620 }
6621
6622 return x;
6623 }
6624
6625 bool
6626 arm_legitimize_reload_address (rtx *p,
6627 enum machine_mode mode,
6628 int opnum, int type,
6629 int ind_levels ATTRIBUTE_UNUSED)
6630 {
6631 /* We must recognize output that we have already generated ourselves. */
6632 if (GET_CODE (*p) == PLUS
6633 && GET_CODE (XEXP (*p, 0)) == PLUS
6634 && GET_CODE (XEXP (XEXP (*p, 0), 0)) == REG
6635 && GET_CODE (XEXP (XEXP (*p, 0), 1)) == CONST_INT
6636 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6637 {
6638 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6639 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6640 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6641 return true;
6642 }
6643
6644 if (GET_CODE (*p) == PLUS
6645 && GET_CODE (XEXP (*p, 0)) == REG
6646 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6647 /* If the base register is equivalent to a constant, let the generic
6648 code handle it. Otherwise we will run into problems if a future
6649 reload pass decides to rematerialize the constant. */
6650 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
6651 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6652 {
6653 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6654 HOST_WIDE_INT low, high;
6655
6656 /* Detect coprocessor load/stores. */
6657 bool coproc_p = ((TARGET_HARD_FLOAT
6658 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
6659 && (mode == SFmode || mode == DFmode
6660 || (mode == DImode && TARGET_MAVERICK)))
6661 || (TARGET_REALLY_IWMMXT
6662 && VALID_IWMMXT_REG_MODE (mode))
6663 || (TARGET_NEON
6664 && (VALID_NEON_DREG_MODE (mode)
6665 || VALID_NEON_QREG_MODE (mode))));
6666
6667 /* For some conditions, bail out when lower two bits are unaligned. */
6668 if ((val & 0x3) != 0
6669 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6670 && (coproc_p
6671 /* For DI, and DF under soft-float: */
6672 || ((mode == DImode || mode == DFmode)
6673 /* Without ldrd, we use stm/ldm, which does not
6674 fair well with unaligned bits. */
6675 && (! TARGET_LDRD
6676 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6677 || TARGET_THUMB2))))
6678 return false;
6679
6680 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6681 of which the (reg+high) gets turned into a reload add insn,
6682 we try to decompose the index into high/low values that can often
6683 also lead to better reload CSE.
6684 For example:
6685 ldr r0, [r2, #4100] // Offset too large
6686 ldr r1, [r2, #4104] // Offset too large
6687
6688 is best reloaded as:
6689 add t1, r2, #4096
6690 ldr r0, [t1, #4]
6691 add t2, r2, #4096
6692 ldr r1, [t2, #8]
6693
6694 which post-reload CSE can simplify in most cases to eliminate the
6695 second add instruction:
6696 add t1, r2, #4096
6697 ldr r0, [t1, #4]
6698 ldr r1, [t1, #8]
6699
6700 The idea here is that we want to split out the bits of the constant
6701 as a mask, rather than as subtracting the maximum offset that the
6702 respective type of load/store used can handle.
6703
6704 When encountering negative offsets, we can still utilize it even if
6705 the overall offset is positive; sometimes this may lead to an immediate
6706 that can be constructed with fewer instructions.
6707 For example:
6708 ldr r0, [r2, #0x3FFFFC]
6709
6710 This is best reloaded as:
6711 add t1, r2, #0x400000
6712 ldr r0, [t1, #-4]
6713
6714 The trick for spotting this for a load insn with N bits of offset
6715 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6716 negative offset that is going to make bit N and all the bits below
6717 it become zero in the remainder part.
6718
6719 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6720 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6721 used in most cases of ARM load/store instructions. */
6722
6723 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6724 (((VAL) & ((1 << (N)) - 1)) \
6725 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6726 : 0)
6727
6728 if (coproc_p)
6729 {
6730 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6731
6732 /* NEON quad-word load/stores are made of two double-word accesses,
6733 so the valid index range is reduced by 8. Treat as 9-bit range if
6734 we go over it. */
6735 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6736 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6737 }
6738 else if (GET_MODE_SIZE (mode) == 8)
6739 {
6740 if (TARGET_LDRD)
6741 low = (TARGET_THUMB2
6742 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6743 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6744 else
6745 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6746 to access doublewords. The supported load/store offsets are
6747 -8, -4, and 4, which we try to produce here. */
6748 low = ((val & 0xf) ^ 0x8) - 0x8;
6749 }
6750 else if (GET_MODE_SIZE (mode) < 8)
6751 {
6752 /* NEON element load/stores do not have an offset. */
6753 if (TARGET_NEON_FP16 && mode == HFmode)
6754 return false;
6755
6756 if (TARGET_THUMB2)
6757 {
6758 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6759 Try the wider 12-bit range first, and re-try if the result
6760 is out of range. */
6761 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6762 if (low < -255)
6763 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6764 }
6765 else
6766 {
6767 if (mode == HImode || mode == HFmode)
6768 {
6769 if (arm_arch4)
6770 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6771 else
6772 {
6773 /* The storehi/movhi_bytes fallbacks can use only
6774 [-4094,+4094] of the full ldrb/strb index range. */
6775 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6776 if (low == 4095 || low == -4095)
6777 return false;
6778 }
6779 }
6780 else
6781 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6782 }
6783 }
6784 else
6785 return false;
6786
6787 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6788 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6789 - (unsigned HOST_WIDE_INT) 0x80000000);
6790 /* Check for overflow or zero */
6791 if (low == 0 || high == 0 || (high + low != val))
6792 return false;
6793
6794 /* Reload the high part into a base reg; leave the low part
6795 in the mem. */
6796 *p = gen_rtx_PLUS (GET_MODE (*p),
6797 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6798 GEN_INT (high)),
6799 GEN_INT (low));
6800 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6801 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6802 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6803 return true;
6804 }
6805
6806 return false;
6807 }
6808
6809 rtx
6810 thumb_legitimize_reload_address (rtx *x_p,
6811 enum machine_mode mode,
6812 int opnum, int type,
6813 int ind_levels ATTRIBUTE_UNUSED)
6814 {
6815 rtx x = *x_p;
6816
6817 if (GET_CODE (x) == PLUS
6818 && GET_MODE_SIZE (mode) < 4
6819 && REG_P (XEXP (x, 0))
6820 && XEXP (x, 0) == stack_pointer_rtx
6821 && GET_CODE (XEXP (x, 1)) == CONST_INT
6822 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6823 {
6824 rtx orig_x = x;
6825
6826 x = copy_rtx (x);
6827 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6828 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6829 return x;
6830 }
6831
6832 /* If both registers are hi-regs, then it's better to reload the
6833 entire expression rather than each register individually. That
6834 only requires one reload register rather than two. */
6835 if (GET_CODE (x) == PLUS
6836 && REG_P (XEXP (x, 0))
6837 && REG_P (XEXP (x, 1))
6838 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6839 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6840 {
6841 rtx orig_x = x;
6842
6843 x = copy_rtx (x);
6844 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6845 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6846 return x;
6847 }
6848
6849 return NULL;
6850 }
6851
6852 /* Test for various thread-local symbols. */
6853
6854 /* Return TRUE if X is a thread-local symbol. */
6855
6856 static bool
6857 arm_tls_symbol_p (rtx x)
6858 {
6859 if (! TARGET_HAVE_TLS)
6860 return false;
6861
6862 if (GET_CODE (x) != SYMBOL_REF)
6863 return false;
6864
6865 return SYMBOL_REF_TLS_MODEL (x) != 0;
6866 }
6867
6868 /* Helper for arm_tls_referenced_p. */
6869
6870 static int
6871 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6872 {
6873 if (GET_CODE (*x) == SYMBOL_REF)
6874 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6875
6876 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6877 TLS offsets, not real symbol references. */
6878 if (GET_CODE (*x) == UNSPEC
6879 && XINT (*x, 1) == UNSPEC_TLS)
6880 return -1;
6881
6882 return 0;
6883 }
6884
6885 /* Return TRUE if X contains any TLS symbol references. */
6886
6887 bool
6888 arm_tls_referenced_p (rtx x)
6889 {
6890 if (! TARGET_HAVE_TLS)
6891 return false;
6892
6893 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6894 }
6895
6896 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6897
6898 On the ARM, allow any integer (invalid ones are removed later by insn
6899 patterns), nice doubles and symbol_refs which refer to the function's
6900 constant pool XXX.
6901
6902 When generating pic allow anything. */
6903
6904 static bool
6905 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
6906 {
6907 /* At present, we have no support for Neon structure constants, so forbid
6908 them here. It might be possible to handle simple cases like 0 and -1
6909 in future. */
6910 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
6911 return false;
6912
6913 return flag_pic || !label_mentioned_p (x);
6914 }
6915
6916 static bool
6917 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6918 {
6919 return (GET_CODE (x) == CONST_INT
6920 || GET_CODE (x) == CONST_DOUBLE
6921 || CONSTANT_ADDRESS_P (x)
6922 || flag_pic);
6923 }
6924
6925 static bool
6926 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
6927 {
6928 return (!arm_cannot_force_const_mem (mode, x)
6929 && (TARGET_32BIT
6930 ? arm_legitimate_constant_p_1 (mode, x)
6931 : thumb_legitimate_constant_p (mode, x)));
6932 }
6933
6934 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6935
6936 static bool
6937 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6938 {
6939 rtx base, offset;
6940
6941 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6942 {
6943 split_const (x, &base, &offset);
6944 if (GET_CODE (base) == SYMBOL_REF
6945 && !offset_within_block_p (base, INTVAL (offset)))
6946 return true;
6947 }
6948 return arm_tls_referenced_p (x);
6949 }
6950 \f
6951 #define REG_OR_SUBREG_REG(X) \
6952 (GET_CODE (X) == REG \
6953 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6954
6955 #define REG_OR_SUBREG_RTX(X) \
6956 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6957
6958 static inline int
6959 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6960 {
6961 enum machine_mode mode = GET_MODE (x);
6962 int total;
6963
6964 switch (code)
6965 {
6966 case ASHIFT:
6967 case ASHIFTRT:
6968 case LSHIFTRT:
6969 case ROTATERT:
6970 case PLUS:
6971 case MINUS:
6972 case COMPARE:
6973 case NEG:
6974 case NOT:
6975 return COSTS_N_INSNS (1);
6976
6977 case MULT:
6978 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6979 {
6980 int cycles = 0;
6981 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
6982
6983 while (i)
6984 {
6985 i >>= 2;
6986 cycles++;
6987 }
6988 return COSTS_N_INSNS (2) + cycles;
6989 }
6990 return COSTS_N_INSNS (1) + 16;
6991
6992 case SET:
6993 return (COSTS_N_INSNS (1)
6994 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6995 + GET_CODE (SET_DEST (x)) == MEM));
6996
6997 case CONST_INT:
6998 if (outer == SET)
6999 {
7000 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7001 return 0;
7002 if (thumb_shiftable_const (INTVAL (x)))
7003 return COSTS_N_INSNS (2);
7004 return COSTS_N_INSNS (3);
7005 }
7006 else if ((outer == PLUS || outer == COMPARE)
7007 && INTVAL (x) < 256 && INTVAL (x) > -256)
7008 return 0;
7009 else if ((outer == IOR || outer == XOR || outer == AND)
7010 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7011 return COSTS_N_INSNS (1);
7012 else if (outer == AND)
7013 {
7014 int i;
7015 /* This duplicates the tests in the andsi3 expander. */
7016 for (i = 9; i <= 31; i++)
7017 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7018 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7019 return COSTS_N_INSNS (2);
7020 }
7021 else if (outer == ASHIFT || outer == ASHIFTRT
7022 || outer == LSHIFTRT)
7023 return 0;
7024 return COSTS_N_INSNS (2);
7025
7026 case CONST:
7027 case CONST_DOUBLE:
7028 case LABEL_REF:
7029 case SYMBOL_REF:
7030 return COSTS_N_INSNS (3);
7031
7032 case UDIV:
7033 case UMOD:
7034 case DIV:
7035 case MOD:
7036 return 100;
7037
7038 case TRUNCATE:
7039 return 99;
7040
7041 case AND:
7042 case XOR:
7043 case IOR:
7044 /* XXX guess. */
7045 return 8;
7046
7047 case MEM:
7048 /* XXX another guess. */
7049 /* Memory costs quite a lot for the first word, but subsequent words
7050 load at the equivalent of a single insn each. */
7051 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7052 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7053 ? 4 : 0));
7054
7055 case IF_THEN_ELSE:
7056 /* XXX a guess. */
7057 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7058 return 14;
7059 return 2;
7060
7061 case SIGN_EXTEND:
7062 case ZERO_EXTEND:
7063 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7064 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7065
7066 if (mode == SImode)
7067 return total;
7068
7069 if (arm_arch6)
7070 return total + COSTS_N_INSNS (1);
7071
7072 /* Assume a two-shift sequence. Increase the cost slightly so
7073 we prefer actual shifts over an extend operation. */
7074 return total + 1 + COSTS_N_INSNS (2);
7075
7076 default:
7077 return 99;
7078 }
7079 }
7080
7081 static inline bool
7082 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7083 {
7084 enum machine_mode mode = GET_MODE (x);
7085 enum rtx_code subcode;
7086 rtx operand;
7087 enum rtx_code code = GET_CODE (x);
7088 *total = 0;
7089
7090 switch (code)
7091 {
7092 case MEM:
7093 /* Memory costs quite a lot for the first word, but subsequent words
7094 load at the equivalent of a single insn each. */
7095 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7096 return true;
7097
7098 case DIV:
7099 case MOD:
7100 case UDIV:
7101 case UMOD:
7102 if (TARGET_HARD_FLOAT && mode == SFmode)
7103 *total = COSTS_N_INSNS (2);
7104 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7105 *total = COSTS_N_INSNS (4);
7106 else
7107 *total = COSTS_N_INSNS (20);
7108 return false;
7109
7110 case ROTATE:
7111 if (GET_CODE (XEXP (x, 1)) == REG)
7112 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7113 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7114 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7115
7116 /* Fall through */
7117 case ROTATERT:
7118 if (mode != SImode)
7119 {
7120 *total += COSTS_N_INSNS (4);
7121 return true;
7122 }
7123
7124 /* Fall through */
7125 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7126 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7127 if (mode == DImode)
7128 {
7129 *total += COSTS_N_INSNS (3);
7130 return true;
7131 }
7132
7133 *total += COSTS_N_INSNS (1);
7134 /* Increase the cost of complex shifts because they aren't any faster,
7135 and reduce dual issue opportunities. */
7136 if (arm_tune_cortex_a9
7137 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
7138 ++*total;
7139
7140 return true;
7141
7142 case MINUS:
7143 if (mode == DImode)
7144 {
7145 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7146 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7147 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7148 {
7149 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7150 return true;
7151 }
7152
7153 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7154 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7155 {
7156 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7157 return true;
7158 }
7159
7160 return false;
7161 }
7162
7163 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7164 {
7165 if (TARGET_HARD_FLOAT
7166 && (mode == SFmode
7167 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7168 {
7169 *total = COSTS_N_INSNS (1);
7170 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
7171 && arm_const_double_rtx (XEXP (x, 0)))
7172 {
7173 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7174 return true;
7175 }
7176
7177 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7178 && arm_const_double_rtx (XEXP (x, 1)))
7179 {
7180 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7181 return true;
7182 }
7183
7184 return false;
7185 }
7186 *total = COSTS_N_INSNS (20);
7187 return false;
7188 }
7189
7190 *total = COSTS_N_INSNS (1);
7191 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7192 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7193 {
7194 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7195 return true;
7196 }
7197
7198 subcode = GET_CODE (XEXP (x, 1));
7199 if (subcode == ASHIFT || subcode == ASHIFTRT
7200 || subcode == LSHIFTRT
7201 || subcode == ROTATE || subcode == ROTATERT)
7202 {
7203 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7204 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7205 return true;
7206 }
7207
7208 /* A shift as a part of RSB costs no more than RSB itself. */
7209 if (GET_CODE (XEXP (x, 0)) == MULT
7210 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7211 {
7212 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7213 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7214 return true;
7215 }
7216
7217 if (subcode == MULT
7218 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7219 {
7220 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7221 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7222 return true;
7223 }
7224
7225 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7226 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7227 {
7228 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7229 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
7230 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7231 *total += COSTS_N_INSNS (1);
7232
7233 return true;
7234 }
7235
7236 /* Fall through */
7237
7238 case PLUS:
7239 if (code == PLUS && arm_arch6 && mode == SImode
7240 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7241 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7242 {
7243 *total = COSTS_N_INSNS (1);
7244 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7245 0, speed);
7246 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7247 return true;
7248 }
7249
7250 /* MLA: All arguments must be registers. We filter out
7251 multiplication by a power of two, so that we fall down into
7252 the code below. */
7253 if (GET_CODE (XEXP (x, 0)) == MULT
7254 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7255 {
7256 /* The cost comes from the cost of the multiply. */
7257 return false;
7258 }
7259
7260 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7261 {
7262 if (TARGET_HARD_FLOAT
7263 && (mode == SFmode
7264 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7265 {
7266 *total = COSTS_N_INSNS (1);
7267 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7268 && arm_const_double_rtx (XEXP (x, 1)))
7269 {
7270 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7271 return true;
7272 }
7273
7274 return false;
7275 }
7276
7277 *total = COSTS_N_INSNS (20);
7278 return false;
7279 }
7280
7281 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7282 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7283 {
7284 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7285 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7286 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7287 *total += COSTS_N_INSNS (1);
7288 return true;
7289 }
7290
7291 /* Fall through */
7292
7293 case AND: case XOR: case IOR:
7294
7295 /* Normally the frame registers will be spilt into reg+const during
7296 reload, so it is a bad idea to combine them with other instructions,
7297 since then they might not be moved outside of loops. As a compromise
7298 we allow integration with ops that have a constant as their second
7299 operand. */
7300 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7301 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7302 && GET_CODE (XEXP (x, 1)) != CONST_INT)
7303 *total = COSTS_N_INSNS (1);
7304
7305 if (mode == DImode)
7306 {
7307 *total += COSTS_N_INSNS (2);
7308 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7309 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7310 {
7311 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7312 return true;
7313 }
7314
7315 return false;
7316 }
7317
7318 *total += COSTS_N_INSNS (1);
7319 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7320 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7321 {
7322 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7323 return true;
7324 }
7325 subcode = GET_CODE (XEXP (x, 0));
7326 if (subcode == ASHIFT || subcode == ASHIFTRT
7327 || subcode == LSHIFTRT
7328 || subcode == ROTATE || subcode == ROTATERT)
7329 {
7330 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7331 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7332 return true;
7333 }
7334
7335 if (subcode == MULT
7336 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7337 {
7338 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7339 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7340 return true;
7341 }
7342
7343 if (subcode == UMIN || subcode == UMAX
7344 || subcode == SMIN || subcode == SMAX)
7345 {
7346 *total = COSTS_N_INSNS (3);
7347 return true;
7348 }
7349
7350 return false;
7351
7352 case MULT:
7353 /* This should have been handled by the CPU specific routines. */
7354 gcc_unreachable ();
7355
7356 case TRUNCATE:
7357 if (arm_arch3m && mode == SImode
7358 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7359 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7360 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7361 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7362 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7363 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7364 {
7365 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7366 return true;
7367 }
7368 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7369 return false;
7370
7371 case NEG:
7372 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7373 {
7374 if (TARGET_HARD_FLOAT
7375 && (mode == SFmode
7376 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7377 {
7378 *total = COSTS_N_INSNS (1);
7379 return false;
7380 }
7381 *total = COSTS_N_INSNS (2);
7382 return false;
7383 }
7384
7385 /* Fall through */
7386 case NOT:
7387 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7388 if (mode == SImode && code == NOT)
7389 {
7390 subcode = GET_CODE (XEXP (x, 0));
7391 if (subcode == ASHIFT || subcode == ASHIFTRT
7392 || subcode == LSHIFTRT
7393 || subcode == ROTATE || subcode == ROTATERT
7394 || (subcode == MULT
7395 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7396 {
7397 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7398 /* Register shifts cost an extra cycle. */
7399 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
7400 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7401 subcode, 1, speed);
7402 return true;
7403 }
7404 }
7405
7406 return false;
7407
7408 case IF_THEN_ELSE:
7409 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7410 {
7411 *total = COSTS_N_INSNS (4);
7412 return true;
7413 }
7414
7415 operand = XEXP (x, 0);
7416
7417 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7418 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7419 && GET_CODE (XEXP (operand, 0)) == REG
7420 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7421 *total += COSTS_N_INSNS (1);
7422 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7423 + rtx_cost (XEXP (x, 2), code, 2, speed));
7424 return true;
7425
7426 case NE:
7427 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7428 {
7429 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7430 return true;
7431 }
7432 goto scc_insn;
7433
7434 case GE:
7435 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7436 && mode == SImode && XEXP (x, 1) == const0_rtx)
7437 {
7438 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7439 return true;
7440 }
7441 goto scc_insn;
7442
7443 case LT:
7444 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7445 && mode == SImode && XEXP (x, 1) == const0_rtx)
7446 {
7447 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7448 return true;
7449 }
7450 goto scc_insn;
7451
7452 case EQ:
7453 case GT:
7454 case LE:
7455 case GEU:
7456 case LTU:
7457 case GTU:
7458 case LEU:
7459 case UNORDERED:
7460 case ORDERED:
7461 case UNEQ:
7462 case UNGE:
7463 case UNLT:
7464 case UNGT:
7465 case UNLE:
7466 scc_insn:
7467 /* SCC insns. In the case where the comparison has already been
7468 performed, then they cost 2 instructions. Otherwise they need
7469 an additional comparison before them. */
7470 *total = COSTS_N_INSNS (2);
7471 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7472 {
7473 return true;
7474 }
7475
7476 /* Fall through */
7477 case COMPARE:
7478 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7479 {
7480 *total = 0;
7481 return true;
7482 }
7483
7484 *total += COSTS_N_INSNS (1);
7485 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7486 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7487 {
7488 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7489 return true;
7490 }
7491
7492 subcode = GET_CODE (XEXP (x, 0));
7493 if (subcode == ASHIFT || subcode == ASHIFTRT
7494 || subcode == LSHIFTRT
7495 || subcode == ROTATE || subcode == ROTATERT)
7496 {
7497 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7498 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7499 return true;
7500 }
7501
7502 if (subcode == MULT
7503 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7504 {
7505 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7506 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7507 return true;
7508 }
7509
7510 return false;
7511
7512 case UMIN:
7513 case UMAX:
7514 case SMIN:
7515 case SMAX:
7516 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7517 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7518 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7519 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7520 return true;
7521
7522 case ABS:
7523 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7524 {
7525 if (TARGET_HARD_FLOAT
7526 && (mode == SFmode
7527 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7528 {
7529 *total = COSTS_N_INSNS (1);
7530 return false;
7531 }
7532 *total = COSTS_N_INSNS (20);
7533 return false;
7534 }
7535 *total = COSTS_N_INSNS (1);
7536 if (mode == DImode)
7537 *total += COSTS_N_INSNS (3);
7538 return false;
7539
7540 case SIGN_EXTEND:
7541 case ZERO_EXTEND:
7542 *total = 0;
7543 if (GET_MODE_CLASS (mode) == MODE_INT)
7544 {
7545 rtx op = XEXP (x, 0);
7546 enum machine_mode opmode = GET_MODE (op);
7547
7548 if (mode == DImode)
7549 *total += COSTS_N_INSNS (1);
7550
7551 if (opmode != SImode)
7552 {
7553 if (MEM_P (op))
7554 {
7555 /* If !arm_arch4, we use one of the extendhisi2_mem
7556 or movhi_bytes patterns for HImode. For a QImode
7557 sign extension, we first zero-extend from memory
7558 and then perform a shift sequence. */
7559 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7560 *total += COSTS_N_INSNS (2);
7561 }
7562 else if (arm_arch6)
7563 *total += COSTS_N_INSNS (1);
7564
7565 /* We don't have the necessary insn, so we need to perform some
7566 other operation. */
7567 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7568 /* An and with constant 255. */
7569 *total += COSTS_N_INSNS (1);
7570 else
7571 /* A shift sequence. Increase costs slightly to avoid
7572 combining two shifts into an extend operation. */
7573 *total += COSTS_N_INSNS (2) + 1;
7574 }
7575
7576 return false;
7577 }
7578
7579 switch (GET_MODE (XEXP (x, 0)))
7580 {
7581 case V8QImode:
7582 case V4HImode:
7583 case V2SImode:
7584 case V4QImode:
7585 case V2HImode:
7586 *total = COSTS_N_INSNS (1);
7587 return false;
7588
7589 default:
7590 gcc_unreachable ();
7591 }
7592 gcc_unreachable ();
7593
7594 case ZERO_EXTRACT:
7595 case SIGN_EXTRACT:
7596 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7597 return true;
7598
7599 case CONST_INT:
7600 if (const_ok_for_arm (INTVAL (x))
7601 || const_ok_for_arm (~INTVAL (x)))
7602 *total = COSTS_N_INSNS (1);
7603 else
7604 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7605 INTVAL (x), NULL_RTX,
7606 NULL_RTX, 0, 0));
7607 return true;
7608
7609 case CONST:
7610 case LABEL_REF:
7611 case SYMBOL_REF:
7612 *total = COSTS_N_INSNS (3);
7613 return true;
7614
7615 case HIGH:
7616 *total = COSTS_N_INSNS (1);
7617 return true;
7618
7619 case LO_SUM:
7620 *total = COSTS_N_INSNS (1);
7621 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7622 return true;
7623
7624 case CONST_DOUBLE:
7625 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7626 && (mode == SFmode || !TARGET_VFP_SINGLE))
7627 *total = COSTS_N_INSNS (1);
7628 else
7629 *total = COSTS_N_INSNS (4);
7630 return true;
7631
7632 case SET:
7633 return false;
7634
7635 case UNSPEC:
7636 /* We cost this as high as our memory costs to allow this to
7637 be hoisted from loops. */
7638 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
7639 {
7640 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7641 }
7642 return true;
7643
7644 default:
7645 *total = COSTS_N_INSNS (4);
7646 return false;
7647 }
7648 }
7649
7650 /* Estimates the size cost of thumb1 instructions.
7651 For now most of the code is copied from thumb1_rtx_costs. We need more
7652 fine grain tuning when we have more related test cases. */
7653 static inline int
7654 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7655 {
7656 enum machine_mode mode = GET_MODE (x);
7657
7658 switch (code)
7659 {
7660 case ASHIFT:
7661 case ASHIFTRT:
7662 case LSHIFTRT:
7663 case ROTATERT:
7664 case PLUS:
7665 case MINUS:
7666 case COMPARE:
7667 case NEG:
7668 case NOT:
7669 return COSTS_N_INSNS (1);
7670
7671 case MULT:
7672 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7673 {
7674 /* Thumb1 mul instruction can't operate on const. We must Load it
7675 into a register first. */
7676 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7677 return COSTS_N_INSNS (1) + const_size;
7678 }
7679 return COSTS_N_INSNS (1);
7680
7681 case SET:
7682 return (COSTS_N_INSNS (1)
7683 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7684 + GET_CODE (SET_DEST (x)) == MEM));
7685
7686 case CONST_INT:
7687 if (outer == SET)
7688 {
7689 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7690 return COSTS_N_INSNS (1);
7691 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7692 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7693 return COSTS_N_INSNS (2);
7694 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7695 if (thumb_shiftable_const (INTVAL (x)))
7696 return COSTS_N_INSNS (2);
7697 return COSTS_N_INSNS (3);
7698 }
7699 else if ((outer == PLUS || outer == COMPARE)
7700 && INTVAL (x) < 256 && INTVAL (x) > -256)
7701 return 0;
7702 else if ((outer == IOR || outer == XOR || outer == AND)
7703 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7704 return COSTS_N_INSNS (1);
7705 else if (outer == AND)
7706 {
7707 int i;
7708 /* This duplicates the tests in the andsi3 expander. */
7709 for (i = 9; i <= 31; i++)
7710 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7711 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7712 return COSTS_N_INSNS (2);
7713 }
7714 else if (outer == ASHIFT || outer == ASHIFTRT
7715 || outer == LSHIFTRT)
7716 return 0;
7717 return COSTS_N_INSNS (2);
7718
7719 case CONST:
7720 case CONST_DOUBLE:
7721 case LABEL_REF:
7722 case SYMBOL_REF:
7723 return COSTS_N_INSNS (3);
7724
7725 case UDIV:
7726 case UMOD:
7727 case DIV:
7728 case MOD:
7729 return 100;
7730
7731 case TRUNCATE:
7732 return 99;
7733
7734 case AND:
7735 case XOR:
7736 case IOR:
7737 /* XXX guess. */
7738 return 8;
7739
7740 case MEM:
7741 /* XXX another guess. */
7742 /* Memory costs quite a lot for the first word, but subsequent words
7743 load at the equivalent of a single insn each. */
7744 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7745 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7746 ? 4 : 0));
7747
7748 case IF_THEN_ELSE:
7749 /* XXX a guess. */
7750 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7751 return 14;
7752 return 2;
7753
7754 case ZERO_EXTEND:
7755 /* XXX still guessing. */
7756 switch (GET_MODE (XEXP (x, 0)))
7757 {
7758 case QImode:
7759 return (1 + (mode == DImode ? 4 : 0)
7760 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7761
7762 case HImode:
7763 return (4 + (mode == DImode ? 4 : 0)
7764 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7765
7766 case SImode:
7767 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7768
7769 default:
7770 return 99;
7771 }
7772
7773 default:
7774 return 99;
7775 }
7776 }
7777
7778 /* RTX costs when optimizing for size. */
7779 static bool
7780 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7781 int *total)
7782 {
7783 enum machine_mode mode = GET_MODE (x);
7784 if (TARGET_THUMB1)
7785 {
7786 *total = thumb1_size_rtx_costs (x, code, outer_code);
7787 return true;
7788 }
7789
7790 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7791 switch (code)
7792 {
7793 case MEM:
7794 /* A memory access costs 1 insn if the mode is small, or the address is
7795 a single register, otherwise it costs one insn per word. */
7796 if (REG_P (XEXP (x, 0)))
7797 *total = COSTS_N_INSNS (1);
7798 else if (flag_pic
7799 && GET_CODE (XEXP (x, 0)) == PLUS
7800 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7801 /* This will be split into two instructions.
7802 See arm.md:calculate_pic_address. */
7803 *total = COSTS_N_INSNS (2);
7804 else
7805 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7806 return true;
7807
7808 case DIV:
7809 case MOD:
7810 case UDIV:
7811 case UMOD:
7812 /* Needs a libcall, so it costs about this. */
7813 *total = COSTS_N_INSNS (2);
7814 return false;
7815
7816 case ROTATE:
7817 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7818 {
7819 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
7820 return true;
7821 }
7822 /* Fall through */
7823 case ROTATERT:
7824 case ASHIFT:
7825 case LSHIFTRT:
7826 case ASHIFTRT:
7827 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7828 {
7829 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
7830 return true;
7831 }
7832 else if (mode == SImode)
7833 {
7834 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
7835 /* Slightly disparage register shifts, but not by much. */
7836 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7837 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
7838 return true;
7839 }
7840
7841 /* Needs a libcall. */
7842 *total = COSTS_N_INSNS (2);
7843 return false;
7844
7845 case MINUS:
7846 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7847 && (mode == SFmode || !TARGET_VFP_SINGLE))
7848 {
7849 *total = COSTS_N_INSNS (1);
7850 return false;
7851 }
7852
7853 if (mode == SImode)
7854 {
7855 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7856 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7857
7858 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7859 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7860 || subcode1 == ROTATE || subcode1 == ROTATERT
7861 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7862 || subcode1 == ASHIFTRT)
7863 {
7864 /* It's just the cost of the two operands. */
7865 *total = 0;
7866 return false;
7867 }
7868
7869 *total = COSTS_N_INSNS (1);
7870 return false;
7871 }
7872
7873 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7874 return false;
7875
7876 case PLUS:
7877 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7878 && (mode == SFmode || !TARGET_VFP_SINGLE))
7879 {
7880 *total = COSTS_N_INSNS (1);
7881 return false;
7882 }
7883
7884 /* A shift as a part of ADD costs nothing. */
7885 if (GET_CODE (XEXP (x, 0)) == MULT
7886 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7887 {
7888 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7889 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
7890 *total += rtx_cost (XEXP (x, 1), code, 1, false);
7891 return true;
7892 }
7893
7894 /* Fall through */
7895 case AND: case XOR: case IOR:
7896 if (mode == SImode)
7897 {
7898 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7899
7900 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7901 || subcode == LSHIFTRT || subcode == ASHIFTRT
7902 || (code == AND && subcode == NOT))
7903 {
7904 /* It's just the cost of the two operands. */
7905 *total = 0;
7906 return false;
7907 }
7908 }
7909
7910 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7911 return false;
7912
7913 case MULT:
7914 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7915 return false;
7916
7917 case NEG:
7918 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7919 && (mode == SFmode || !TARGET_VFP_SINGLE))
7920 {
7921 *total = COSTS_N_INSNS (1);
7922 return false;
7923 }
7924
7925 /* Fall through */
7926 case NOT:
7927 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7928
7929 return false;
7930
7931 case IF_THEN_ELSE:
7932 *total = 0;
7933 return false;
7934
7935 case COMPARE:
7936 if (cc_register (XEXP (x, 0), VOIDmode))
7937 * total = 0;
7938 else
7939 *total = COSTS_N_INSNS (1);
7940 return false;
7941
7942 case ABS:
7943 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7944 && (mode == SFmode || !TARGET_VFP_SINGLE))
7945 *total = COSTS_N_INSNS (1);
7946 else
7947 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7948 return false;
7949
7950 case SIGN_EXTEND:
7951 case ZERO_EXTEND:
7952 return arm_rtx_costs_1 (x, outer_code, total, 0);
7953
7954 case CONST_INT:
7955 if (const_ok_for_arm (INTVAL (x)))
7956 /* A multiplication by a constant requires another instruction
7957 to load the constant to a register. */
7958 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
7959 ? 1 : 0);
7960 else if (const_ok_for_arm (~INTVAL (x)))
7961 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
7962 else if (const_ok_for_arm (-INTVAL (x)))
7963 {
7964 if (outer_code == COMPARE || outer_code == PLUS
7965 || outer_code == MINUS)
7966 *total = 0;
7967 else
7968 *total = COSTS_N_INSNS (1);
7969 }
7970 else
7971 *total = COSTS_N_INSNS (2);
7972 return true;
7973
7974 case CONST:
7975 case LABEL_REF:
7976 case SYMBOL_REF:
7977 *total = COSTS_N_INSNS (2);
7978 return true;
7979
7980 case CONST_DOUBLE:
7981 *total = COSTS_N_INSNS (4);
7982 return true;
7983
7984 case HIGH:
7985 case LO_SUM:
7986 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7987 cost of these slightly. */
7988 *total = COSTS_N_INSNS (1) + 1;
7989 return true;
7990
7991 case SET:
7992 return false;
7993
7994 default:
7995 if (mode != VOIDmode)
7996 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7997 else
7998 *total = COSTS_N_INSNS (4); /* How knows? */
7999 return false;
8000 }
8001 }
8002
8003 /* RTX costs when optimizing for size. */
8004 static bool
8005 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
8006 int *total, bool speed)
8007 {
8008 if (!speed)
8009 return arm_size_rtx_costs (x, (enum rtx_code) code,
8010 (enum rtx_code) outer_code, total);
8011 else
8012 return current_tune->rtx_costs (x, (enum rtx_code) code,
8013 (enum rtx_code) outer_code,
8014 total, speed);
8015 }
8016
8017 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
8018 supported on any "slowmul" cores, so it can be ignored. */
8019
8020 static bool
8021 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8022 int *total, bool speed)
8023 {
8024 enum machine_mode mode = GET_MODE (x);
8025
8026 if (TARGET_THUMB)
8027 {
8028 *total = thumb1_rtx_costs (x, code, outer_code);
8029 return true;
8030 }
8031
8032 switch (code)
8033 {
8034 case MULT:
8035 if (GET_MODE_CLASS (mode) == MODE_FLOAT
8036 || mode == DImode)
8037 {
8038 *total = COSTS_N_INSNS (20);
8039 return false;
8040 }
8041
8042 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8043 {
8044 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8045 & (unsigned HOST_WIDE_INT) 0xffffffff);
8046 int cost, const_ok = const_ok_for_arm (i);
8047 int j, booth_unit_size;
8048
8049 /* Tune as appropriate. */
8050 cost = const_ok ? 4 : 8;
8051 booth_unit_size = 2;
8052 for (j = 0; i && j < 32; j += booth_unit_size)
8053 {
8054 i >>= booth_unit_size;
8055 cost++;
8056 }
8057
8058 *total = COSTS_N_INSNS (cost);
8059 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8060 return true;
8061 }
8062
8063 *total = COSTS_N_INSNS (20);
8064 return false;
8065
8066 default:
8067 return arm_rtx_costs_1 (x, outer_code, total, speed);;
8068 }
8069 }
8070
8071
8072 /* RTX cost for cores with a fast multiply unit (M variants). */
8073
8074 static bool
8075 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8076 int *total, bool speed)
8077 {
8078 enum machine_mode mode = GET_MODE (x);
8079
8080 if (TARGET_THUMB1)
8081 {
8082 *total = thumb1_rtx_costs (x, code, outer_code);
8083 return true;
8084 }
8085
8086 /* ??? should thumb2 use different costs? */
8087 switch (code)
8088 {
8089 case MULT:
8090 /* There is no point basing this on the tuning, since it is always the
8091 fast variant if it exists at all. */
8092 if (mode == DImode
8093 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8094 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8095 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8096 {
8097 *total = COSTS_N_INSNS(2);
8098 return false;
8099 }
8100
8101
8102 if (mode == DImode)
8103 {
8104 *total = COSTS_N_INSNS (5);
8105 return false;
8106 }
8107
8108 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8109 {
8110 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8111 & (unsigned HOST_WIDE_INT) 0xffffffff);
8112 int cost, const_ok = const_ok_for_arm (i);
8113 int j, booth_unit_size;
8114
8115 /* Tune as appropriate. */
8116 cost = const_ok ? 4 : 8;
8117 booth_unit_size = 8;
8118 for (j = 0; i && j < 32; j += booth_unit_size)
8119 {
8120 i >>= booth_unit_size;
8121 cost++;
8122 }
8123
8124 *total = COSTS_N_INSNS(cost);
8125 return false;
8126 }
8127
8128 if (mode == SImode)
8129 {
8130 *total = COSTS_N_INSNS (4);
8131 return false;
8132 }
8133
8134 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8135 {
8136 if (TARGET_HARD_FLOAT
8137 && (mode == SFmode
8138 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8139 {
8140 *total = COSTS_N_INSNS (1);
8141 return false;
8142 }
8143 }
8144
8145 /* Requires a lib call */
8146 *total = COSTS_N_INSNS (20);
8147 return false;
8148
8149 default:
8150 return arm_rtx_costs_1 (x, outer_code, total, speed);
8151 }
8152 }
8153
8154
8155 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8156 so it can be ignored. */
8157
8158 static bool
8159 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8160 int *total, bool speed)
8161 {
8162 enum machine_mode mode = GET_MODE (x);
8163
8164 if (TARGET_THUMB)
8165 {
8166 *total = thumb1_rtx_costs (x, code, outer_code);
8167 return true;
8168 }
8169
8170 switch (code)
8171 {
8172 case COMPARE:
8173 if (GET_CODE (XEXP (x, 0)) != MULT)
8174 return arm_rtx_costs_1 (x, outer_code, total, speed);
8175
8176 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8177 will stall until the multiplication is complete. */
8178 *total = COSTS_N_INSNS (3);
8179 return false;
8180
8181 case MULT:
8182 /* There is no point basing this on the tuning, since it is always the
8183 fast variant if it exists at all. */
8184 if (mode == DImode
8185 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8186 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8187 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8188 {
8189 *total = COSTS_N_INSNS (2);
8190 return false;
8191 }
8192
8193
8194 if (mode == DImode)
8195 {
8196 *total = COSTS_N_INSNS (5);
8197 return false;
8198 }
8199
8200 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8201 {
8202 /* If operand 1 is a constant we can more accurately
8203 calculate the cost of the multiply. The multiplier can
8204 retire 15 bits on the first cycle and a further 12 on the
8205 second. We do, of course, have to load the constant into
8206 a register first. */
8207 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8208 /* There's a general overhead of one cycle. */
8209 int cost = 1;
8210 unsigned HOST_WIDE_INT masked_const;
8211
8212 if (i & 0x80000000)
8213 i = ~i;
8214
8215 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8216
8217 masked_const = i & 0xffff8000;
8218 if (masked_const != 0)
8219 {
8220 cost++;
8221 masked_const = i & 0xf8000000;
8222 if (masked_const != 0)
8223 cost++;
8224 }
8225 *total = COSTS_N_INSNS (cost);
8226 return false;
8227 }
8228
8229 if (mode == SImode)
8230 {
8231 *total = COSTS_N_INSNS (3);
8232 return false;
8233 }
8234
8235 /* Requires a lib call */
8236 *total = COSTS_N_INSNS (20);
8237 return false;
8238
8239 default:
8240 return arm_rtx_costs_1 (x, outer_code, total, speed);
8241 }
8242 }
8243
8244
8245 /* RTX costs for 9e (and later) cores. */
8246
8247 static bool
8248 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8249 int *total, bool speed)
8250 {
8251 enum machine_mode mode = GET_MODE (x);
8252
8253 if (TARGET_THUMB1)
8254 {
8255 switch (code)
8256 {
8257 case MULT:
8258 *total = COSTS_N_INSNS (3);
8259 return true;
8260
8261 default:
8262 *total = thumb1_rtx_costs (x, code, outer_code);
8263 return true;
8264 }
8265 }
8266
8267 switch (code)
8268 {
8269 case MULT:
8270 /* There is no point basing this on the tuning, since it is always the
8271 fast variant if it exists at all. */
8272 if (mode == DImode
8273 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8274 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8275 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8276 {
8277 *total = COSTS_N_INSNS (2);
8278 return false;
8279 }
8280
8281
8282 if (mode == DImode)
8283 {
8284 *total = COSTS_N_INSNS (5);
8285 return false;
8286 }
8287
8288 if (mode == SImode)
8289 {
8290 *total = COSTS_N_INSNS (2);
8291 return false;
8292 }
8293
8294 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8295 {
8296 if (TARGET_HARD_FLOAT
8297 && (mode == SFmode
8298 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8299 {
8300 *total = COSTS_N_INSNS (1);
8301 return false;
8302 }
8303 }
8304
8305 *total = COSTS_N_INSNS (20);
8306 return false;
8307
8308 default:
8309 return arm_rtx_costs_1 (x, outer_code, total, speed);
8310 }
8311 }
8312 /* All address computations that can be done are free, but rtx cost returns
8313 the same for practically all of them. So we weight the different types
8314 of address here in the order (most pref first):
8315 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8316 static inline int
8317 arm_arm_address_cost (rtx x)
8318 {
8319 enum rtx_code c = GET_CODE (x);
8320
8321 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8322 return 0;
8323 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8324 return 10;
8325
8326 if (c == PLUS)
8327 {
8328 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8329 return 2;
8330
8331 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8332 return 3;
8333
8334 return 4;
8335 }
8336
8337 return 6;
8338 }
8339
8340 static inline int
8341 arm_thumb_address_cost (rtx x)
8342 {
8343 enum rtx_code c = GET_CODE (x);
8344
8345 if (c == REG)
8346 return 1;
8347 if (c == PLUS
8348 && GET_CODE (XEXP (x, 0)) == REG
8349 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8350 return 1;
8351
8352 return 2;
8353 }
8354
8355 static int
8356 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8357 {
8358 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8359 }
8360
8361 /* Adjust cost hook for XScale. */
8362 static bool
8363 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8364 {
8365 /* Some true dependencies can have a higher cost depending
8366 on precisely how certain input operands are used. */
8367 if (REG_NOTE_KIND(link) == 0
8368 && recog_memoized (insn) >= 0
8369 && recog_memoized (dep) >= 0)
8370 {
8371 int shift_opnum = get_attr_shift (insn);
8372 enum attr_type attr_type = get_attr_type (dep);
8373
8374 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8375 operand for INSN. If we have a shifted input operand and the
8376 instruction we depend on is another ALU instruction, then we may
8377 have to account for an additional stall. */
8378 if (shift_opnum != 0
8379 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
8380 {
8381 rtx shifted_operand;
8382 int opno;
8383
8384 /* Get the shifted operand. */
8385 extract_insn (insn);
8386 shifted_operand = recog_data.operand[shift_opnum];
8387
8388 /* Iterate over all the operands in DEP. If we write an operand
8389 that overlaps with SHIFTED_OPERAND, then we have increase the
8390 cost of this dependency. */
8391 extract_insn (dep);
8392 preprocess_constraints ();
8393 for (opno = 0; opno < recog_data.n_operands; opno++)
8394 {
8395 /* We can ignore strict inputs. */
8396 if (recog_data.operand_type[opno] == OP_IN)
8397 continue;
8398
8399 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8400 shifted_operand))
8401 {
8402 *cost = 2;
8403 return false;
8404 }
8405 }
8406 }
8407 }
8408 return true;
8409 }
8410
8411 /* Adjust cost hook for Cortex A9. */
8412 static bool
8413 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8414 {
8415 switch (REG_NOTE_KIND (link))
8416 {
8417 case REG_DEP_ANTI:
8418 *cost = 0;
8419 return false;
8420
8421 case REG_DEP_TRUE:
8422 case REG_DEP_OUTPUT:
8423 if (recog_memoized (insn) >= 0
8424 && recog_memoized (dep) >= 0)
8425 {
8426 if (GET_CODE (PATTERN (insn)) == SET)
8427 {
8428 if (GET_MODE_CLASS
8429 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8430 || GET_MODE_CLASS
8431 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8432 {
8433 enum attr_type attr_type_insn = get_attr_type (insn);
8434 enum attr_type attr_type_dep = get_attr_type (dep);
8435
8436 /* By default all dependencies of the form
8437 s0 = s0 <op> s1
8438 s0 = s0 <op> s2
8439 have an extra latency of 1 cycle because
8440 of the input and output dependency in this
8441 case. However this gets modeled as an true
8442 dependency and hence all these checks. */
8443 if (REG_P (SET_DEST (PATTERN (insn)))
8444 && REG_P (SET_DEST (PATTERN (dep)))
8445 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8446 SET_DEST (PATTERN (dep))))
8447 {
8448 /* FMACS is a special case where the dependant
8449 instruction can be issued 3 cycles before
8450 the normal latency in case of an output
8451 dependency. */
8452 if ((attr_type_insn == TYPE_FMACS
8453 || attr_type_insn == TYPE_FMACD)
8454 && (attr_type_dep == TYPE_FMACS
8455 || attr_type_dep == TYPE_FMACD))
8456 {
8457 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8458 *cost = insn_default_latency (dep) - 3;
8459 else
8460 *cost = insn_default_latency (dep);
8461 return false;
8462 }
8463 else
8464 {
8465 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8466 *cost = insn_default_latency (dep) + 1;
8467 else
8468 *cost = insn_default_latency (dep);
8469 }
8470 return false;
8471 }
8472 }
8473 }
8474 }
8475 break;
8476
8477 default:
8478 gcc_unreachable ();
8479 }
8480
8481 return true;
8482 }
8483
8484 /* Adjust cost hook for FA726TE. */
8485 static bool
8486 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8487 {
8488 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8489 have penalty of 3. */
8490 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8491 && recog_memoized (insn) >= 0
8492 && recog_memoized (dep) >= 0
8493 && get_attr_conds (dep) == CONDS_SET)
8494 {
8495 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8496 if (get_attr_conds (insn) == CONDS_USE
8497 && get_attr_type (insn) != TYPE_BRANCH)
8498 {
8499 *cost = 3;
8500 return false;
8501 }
8502
8503 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8504 || get_attr_conds (insn) == CONDS_USE)
8505 {
8506 *cost = 0;
8507 return false;
8508 }
8509 }
8510
8511 return true;
8512 }
8513
8514 /* Implement TARGET_REGISTER_MOVE_COST.
8515
8516 Moves between FPA_REGS and GENERAL_REGS are two memory insns.
8517 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
8518 it is typically more expensive than a single memory access. We set
8519 the cost to less than two memory accesses so that floating
8520 point to integer conversion does not go through memory. */
8521
8522 int
8523 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
8524 reg_class_t from, reg_class_t to)
8525 {
8526 if (TARGET_32BIT)
8527 {
8528 if ((from == FPA_REGS && to != FPA_REGS)
8529 || (from != FPA_REGS && to == FPA_REGS))
8530 return 20;
8531 else if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
8532 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
8533 return 15;
8534 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
8535 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
8536 return 4;
8537 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
8538 return 20;
8539 else if ((from == CIRRUS_REGS && to != CIRRUS_REGS)
8540 || (from != CIRRUS_REGS && to == CIRRUS_REGS))
8541 return 20;
8542 else
8543 return 2;
8544 }
8545 else
8546 {
8547 if (from == HI_REGS || to == HI_REGS)
8548 return 4;
8549 else
8550 return 2;
8551 }
8552 }
8553
8554 /* Implement TARGET_MEMORY_MOVE_COST. */
8555
8556 int
8557 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
8558 bool in ATTRIBUTE_UNUSED)
8559 {
8560 if (TARGET_32BIT)
8561 return 10;
8562 else
8563 {
8564 if (GET_MODE_SIZE (mode) < 4)
8565 return 8;
8566 else
8567 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
8568 }
8569 }
8570
8571 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8572 It corrects the value of COST based on the relationship between
8573 INSN and DEP through the dependence LINK. It returns the new
8574 value. There is a per-core adjust_cost hook to adjust scheduler costs
8575 and the per-core hook can choose to completely override the generic
8576 adjust_cost function. Only put bits of code into arm_adjust_cost that
8577 are common across all cores. */
8578 static int
8579 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8580 {
8581 rtx i_pat, d_pat;
8582
8583 /* When generating Thumb-1 code, we want to place flag-setting operations
8584 close to a conditional branch which depends on them, so that we can
8585 omit the comparison. */
8586 if (TARGET_THUMB1
8587 && REG_NOTE_KIND (link) == 0
8588 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8589 && recog_memoized (dep) >= 0
8590 && get_attr_conds (dep) == CONDS_SET)
8591 return 0;
8592
8593 if (current_tune->sched_adjust_cost != NULL)
8594 {
8595 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8596 return cost;
8597 }
8598
8599 /* XXX This is not strictly true for the FPA. */
8600 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8601 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8602 return 0;
8603
8604 /* Call insns don't incur a stall, even if they follow a load. */
8605 if (REG_NOTE_KIND (link) == 0
8606 && GET_CODE (insn) == CALL_INSN)
8607 return 1;
8608
8609 if ((i_pat = single_set (insn)) != NULL
8610 && GET_CODE (SET_SRC (i_pat)) == MEM
8611 && (d_pat = single_set (dep)) != NULL
8612 && GET_CODE (SET_DEST (d_pat)) == MEM)
8613 {
8614 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8615 /* This is a load after a store, there is no conflict if the load reads
8616 from a cached area. Assume that loads from the stack, and from the
8617 constant pool are cached, and that others will miss. This is a
8618 hack. */
8619
8620 if ((GET_CODE (src_mem) == SYMBOL_REF
8621 && CONSTANT_POOL_ADDRESS_P (src_mem))
8622 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8623 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8624 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8625 return 1;
8626 }
8627
8628 return cost;
8629 }
8630
8631 static int
8632 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
8633 {
8634 if (TARGET_32BIT)
8635 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
8636 else
8637 return (optimize > 0) ? 2 : 0;
8638 }
8639
8640 static int
8641 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
8642 {
8643 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
8644 }
8645
8646 static int fp_consts_inited = 0;
8647
8648 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8649 static const char * const strings_fp[8] =
8650 {
8651 "0", "1", "2", "3",
8652 "4", "5", "0.5", "10"
8653 };
8654
8655 static REAL_VALUE_TYPE values_fp[8];
8656
8657 static void
8658 init_fp_table (void)
8659 {
8660 int i;
8661 REAL_VALUE_TYPE r;
8662
8663 if (TARGET_VFP)
8664 fp_consts_inited = 1;
8665 else
8666 fp_consts_inited = 8;
8667
8668 for (i = 0; i < fp_consts_inited; i++)
8669 {
8670 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
8671 values_fp[i] = r;
8672 }
8673 }
8674
8675 /* Return TRUE if rtx X is a valid immediate FP constant. */
8676 int
8677 arm_const_double_rtx (rtx x)
8678 {
8679 REAL_VALUE_TYPE r;
8680 int i;
8681
8682 if (!fp_consts_inited)
8683 init_fp_table ();
8684
8685 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8686 if (REAL_VALUE_MINUS_ZERO (r))
8687 return 0;
8688
8689 for (i = 0; i < fp_consts_inited; i++)
8690 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8691 return 1;
8692
8693 return 0;
8694 }
8695
8696 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8697 int
8698 neg_const_double_rtx_ok_for_fpa (rtx x)
8699 {
8700 REAL_VALUE_TYPE r;
8701 int i;
8702
8703 if (!fp_consts_inited)
8704 init_fp_table ();
8705
8706 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8707 r = real_value_negate (&r);
8708 if (REAL_VALUE_MINUS_ZERO (r))
8709 return 0;
8710
8711 for (i = 0; i < 8; i++)
8712 if (REAL_VALUES_EQUAL (r, values_fp[i]))
8713 return 1;
8714
8715 return 0;
8716 }
8717
8718
8719 /* VFPv3 has a fairly wide range of representable immediates, formed from
8720 "quarter-precision" floating-point values. These can be evaluated using this
8721 formula (with ^ for exponentiation):
8722
8723 -1^s * n * 2^-r
8724
8725 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8726 16 <= n <= 31 and 0 <= r <= 7.
8727
8728 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8729
8730 - A (most-significant) is the sign bit.
8731 - BCD are the exponent (encoded as r XOR 3).
8732 - EFGH are the mantissa (encoded as n - 16).
8733 */
8734
8735 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8736 fconst[sd] instruction, or -1 if X isn't suitable. */
8737 static int
8738 vfp3_const_double_index (rtx x)
8739 {
8740 REAL_VALUE_TYPE r, m;
8741 int sign, exponent;
8742 unsigned HOST_WIDE_INT mantissa, mant_hi;
8743 unsigned HOST_WIDE_INT mask;
8744 HOST_WIDE_INT m1, m2;
8745 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8746
8747 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8748 return -1;
8749
8750 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8751
8752 /* We can't represent these things, so detect them first. */
8753 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8754 return -1;
8755
8756 /* Extract sign, exponent and mantissa. */
8757 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8758 r = real_value_abs (&r);
8759 exponent = REAL_EXP (&r);
8760 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8761 highest (sign) bit, with a fixed binary point at bit point_pos.
8762 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8763 bits for the mantissa, this may fail (low bits would be lost). */
8764 real_ldexp (&m, &r, point_pos - exponent);
8765 REAL_VALUE_TO_INT (&m1, &m2, m);
8766 mantissa = m1;
8767 mant_hi = m2;
8768
8769 /* If there are bits set in the low part of the mantissa, we can't
8770 represent this value. */
8771 if (mantissa != 0)
8772 return -1;
8773
8774 /* Now make it so that mantissa contains the most-significant bits, and move
8775 the point_pos to indicate that the least-significant bits have been
8776 discarded. */
8777 point_pos -= HOST_BITS_PER_WIDE_INT;
8778 mantissa = mant_hi;
8779
8780 /* We can permit four significant bits of mantissa only, plus a high bit
8781 which is always 1. */
8782 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8783 if ((mantissa & mask) != 0)
8784 return -1;
8785
8786 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8787 mantissa >>= point_pos - 5;
8788
8789 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8790 floating-point immediate zero with Neon using an integer-zero load, but
8791 that case is handled elsewhere.) */
8792 if (mantissa == 0)
8793 return -1;
8794
8795 gcc_assert (mantissa >= 16 && mantissa <= 31);
8796
8797 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8798 normalized significands are in the range [1, 2). (Our mantissa is shifted
8799 left 4 places at this point relative to normalized IEEE754 values). GCC
8800 internally uses [0.5, 1) (see real.c), so the exponent returned from
8801 REAL_EXP must be altered. */
8802 exponent = 5 - exponent;
8803
8804 if (exponent < 0 || exponent > 7)
8805 return -1;
8806
8807 /* Sign, mantissa and exponent are now in the correct form to plug into the
8808 formula described in the comment above. */
8809 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8810 }
8811
8812 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8813 int
8814 vfp3_const_double_rtx (rtx x)
8815 {
8816 if (!TARGET_VFP3)
8817 return 0;
8818
8819 return vfp3_const_double_index (x) != -1;
8820 }
8821
8822 /* Recognize immediates which can be used in various Neon instructions. Legal
8823 immediates are described by the following table (for VMVN variants, the
8824 bitwise inverse of the constant shown is recognized. In either case, VMOV
8825 is output and the correct instruction to use for a given constant is chosen
8826 by the assembler). The constant shown is replicated across all elements of
8827 the destination vector.
8828
8829 insn elems variant constant (binary)
8830 ---- ----- ------- -----------------
8831 vmov i32 0 00000000 00000000 00000000 abcdefgh
8832 vmov i32 1 00000000 00000000 abcdefgh 00000000
8833 vmov i32 2 00000000 abcdefgh 00000000 00000000
8834 vmov i32 3 abcdefgh 00000000 00000000 00000000
8835 vmov i16 4 00000000 abcdefgh
8836 vmov i16 5 abcdefgh 00000000
8837 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8838 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8839 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8840 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8841 vmvn i16 10 00000000 abcdefgh
8842 vmvn i16 11 abcdefgh 00000000
8843 vmov i32 12 00000000 00000000 abcdefgh 11111111
8844 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8845 vmov i32 14 00000000 abcdefgh 11111111 11111111
8846 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8847 vmov i8 16 abcdefgh
8848 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8849 eeeeeeee ffffffff gggggggg hhhhhhhh
8850 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8851
8852 For case 18, B = !b. Representable values are exactly those accepted by
8853 vfp3_const_double_index, but are output as floating-point numbers rather
8854 than indices.
8855
8856 Variants 0-5 (inclusive) may also be used as immediates for the second
8857 operand of VORR/VBIC instructions.
8858
8859 The INVERSE argument causes the bitwise inverse of the given operand to be
8860 recognized instead (used for recognizing legal immediates for the VAND/VORN
8861 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8862 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8863 output, rather than the real insns vbic/vorr).
8864
8865 INVERSE makes no difference to the recognition of float vectors.
8866
8867 The return value is the variant of immediate as shown in the above table, or
8868 -1 if the given value doesn't match any of the listed patterns.
8869 */
8870 static int
8871 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8872 rtx *modconst, int *elementwidth)
8873 {
8874 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8875 matches = 1; \
8876 for (i = 0; i < idx; i += (STRIDE)) \
8877 if (!(TEST)) \
8878 matches = 0; \
8879 if (matches) \
8880 { \
8881 immtype = (CLASS); \
8882 elsize = (ELSIZE); \
8883 break; \
8884 }
8885
8886 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8887 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8888 unsigned char bytes[16];
8889 int immtype = -1, matches;
8890 unsigned int invmask = inverse ? 0xff : 0;
8891
8892 /* Vectors of float constants. */
8893 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8894 {
8895 rtx el0 = CONST_VECTOR_ELT (op, 0);
8896 REAL_VALUE_TYPE r0;
8897
8898 if (!vfp3_const_double_rtx (el0))
8899 return -1;
8900
8901 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8902
8903 for (i = 1; i < n_elts; i++)
8904 {
8905 rtx elt = CONST_VECTOR_ELT (op, i);
8906 REAL_VALUE_TYPE re;
8907
8908 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8909
8910 if (!REAL_VALUES_EQUAL (r0, re))
8911 return -1;
8912 }
8913
8914 if (modconst)
8915 *modconst = CONST_VECTOR_ELT (op, 0);
8916
8917 if (elementwidth)
8918 *elementwidth = 0;
8919
8920 return 18;
8921 }
8922
8923 /* Splat vector constant out into a byte vector. */
8924 for (i = 0; i < n_elts; i++)
8925 {
8926 rtx el = CONST_VECTOR_ELT (op, i);
8927 unsigned HOST_WIDE_INT elpart;
8928 unsigned int part, parts;
8929
8930 if (GET_CODE (el) == CONST_INT)
8931 {
8932 elpart = INTVAL (el);
8933 parts = 1;
8934 }
8935 else if (GET_CODE (el) == CONST_DOUBLE)
8936 {
8937 elpart = CONST_DOUBLE_LOW (el);
8938 parts = 2;
8939 }
8940 else
8941 gcc_unreachable ();
8942
8943 for (part = 0; part < parts; part++)
8944 {
8945 unsigned int byte;
8946 for (byte = 0; byte < innersize; byte++)
8947 {
8948 bytes[idx++] = (elpart & 0xff) ^ invmask;
8949 elpart >>= BITS_PER_UNIT;
8950 }
8951 if (GET_CODE (el) == CONST_DOUBLE)
8952 elpart = CONST_DOUBLE_HIGH (el);
8953 }
8954 }
8955
8956 /* Sanity check. */
8957 gcc_assert (idx == GET_MODE_SIZE (mode));
8958
8959 do
8960 {
8961 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8962 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8963
8964 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8965 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8966
8967 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8968 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
8969
8970 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8971 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
8972
8973 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
8974
8975 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
8976
8977 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8978 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8979
8980 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8981 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8982
8983 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8984 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
8985
8986 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8987 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
8988
8989 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
8990
8991 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
8992
8993 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8994 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8995
8996 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8997 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
8998
8999 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
9000 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9001
9002 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
9003 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9004
9005 CHECK (1, 8, 16, bytes[i] == bytes[0]);
9006
9007 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
9008 && bytes[i] == bytes[(i + 8) % idx]);
9009 }
9010 while (0);
9011
9012 if (immtype == -1)
9013 return -1;
9014
9015 if (elementwidth)
9016 *elementwidth = elsize;
9017
9018 if (modconst)
9019 {
9020 unsigned HOST_WIDE_INT imm = 0;
9021
9022 /* Un-invert bytes of recognized vector, if necessary. */
9023 if (invmask != 0)
9024 for (i = 0; i < idx; i++)
9025 bytes[i] ^= invmask;
9026
9027 if (immtype == 17)
9028 {
9029 /* FIXME: Broken on 32-bit H_W_I hosts. */
9030 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
9031
9032 for (i = 0; i < 8; i++)
9033 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
9034 << (i * BITS_PER_UNIT);
9035
9036 *modconst = GEN_INT (imm);
9037 }
9038 else
9039 {
9040 unsigned HOST_WIDE_INT imm = 0;
9041
9042 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
9043 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
9044
9045 *modconst = GEN_INT (imm);
9046 }
9047 }
9048
9049 return immtype;
9050 #undef CHECK
9051 }
9052
9053 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
9054 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
9055 float elements), and a modified constant (whatever should be output for a
9056 VMOV) in *MODCONST. */
9057
9058 int
9059 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
9060 rtx *modconst, int *elementwidth)
9061 {
9062 rtx tmpconst;
9063 int tmpwidth;
9064 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
9065
9066 if (retval == -1)
9067 return 0;
9068
9069 if (modconst)
9070 *modconst = tmpconst;
9071
9072 if (elementwidth)
9073 *elementwidth = tmpwidth;
9074
9075 return 1;
9076 }
9077
9078 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
9079 the immediate is valid, write a constant suitable for using as an operand
9080 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
9081 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
9082
9083 int
9084 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
9085 rtx *modconst, int *elementwidth)
9086 {
9087 rtx tmpconst;
9088 int tmpwidth;
9089 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
9090
9091 if (retval < 0 || retval > 5)
9092 return 0;
9093
9094 if (modconst)
9095 *modconst = tmpconst;
9096
9097 if (elementwidth)
9098 *elementwidth = tmpwidth;
9099
9100 return 1;
9101 }
9102
9103 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
9104 the immediate is valid, write a constant suitable for using as an operand
9105 to VSHR/VSHL to *MODCONST and the corresponding element width to
9106 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9107 because they have different limitations. */
9108
9109 int
9110 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
9111 rtx *modconst, int *elementwidth,
9112 bool isleftshift)
9113 {
9114 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9115 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
9116 unsigned HOST_WIDE_INT last_elt = 0;
9117 unsigned HOST_WIDE_INT maxshift;
9118
9119 /* Split vector constant out into a byte vector. */
9120 for (i = 0; i < n_elts; i++)
9121 {
9122 rtx el = CONST_VECTOR_ELT (op, i);
9123 unsigned HOST_WIDE_INT elpart;
9124
9125 if (GET_CODE (el) == CONST_INT)
9126 elpart = INTVAL (el);
9127 else if (GET_CODE (el) == CONST_DOUBLE)
9128 return 0;
9129 else
9130 gcc_unreachable ();
9131
9132 if (i != 0 && elpart != last_elt)
9133 return 0;
9134
9135 last_elt = elpart;
9136 }
9137
9138 /* Shift less than element size. */
9139 maxshift = innersize * 8;
9140
9141 if (isleftshift)
9142 {
9143 /* Left shift immediate value can be from 0 to <size>-1. */
9144 if (last_elt >= maxshift)
9145 return 0;
9146 }
9147 else
9148 {
9149 /* Right shift immediate value can be from 1 to <size>. */
9150 if (last_elt == 0 || last_elt > maxshift)
9151 return 0;
9152 }
9153
9154 if (elementwidth)
9155 *elementwidth = innersize * 8;
9156
9157 if (modconst)
9158 *modconst = CONST_VECTOR_ELT (op, 0);
9159
9160 return 1;
9161 }
9162
9163 /* Return a string suitable for output of Neon immediate logic operation
9164 MNEM. */
9165
9166 char *
9167 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9168 int inverse, int quad)
9169 {
9170 int width, is_valid;
9171 static char templ[40];
9172
9173 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9174
9175 gcc_assert (is_valid != 0);
9176
9177 if (quad)
9178 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9179 else
9180 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9181
9182 return templ;
9183 }
9184
9185 /* Return a string suitable for output of Neon immediate shift operation
9186 (VSHR or VSHL) MNEM. */
9187
9188 char *
9189 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9190 enum machine_mode mode, int quad,
9191 bool isleftshift)
9192 {
9193 int width, is_valid;
9194 static char templ[40];
9195
9196 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9197 gcc_assert (is_valid != 0);
9198
9199 if (quad)
9200 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9201 else
9202 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9203
9204 return templ;
9205 }
9206
9207 /* Output a sequence of pairwise operations to implement a reduction.
9208 NOTE: We do "too much work" here, because pairwise operations work on two
9209 registers-worth of operands in one go. Unfortunately we can't exploit those
9210 extra calculations to do the full operation in fewer steps, I don't think.
9211 Although all vector elements of the result but the first are ignored, we
9212 actually calculate the same result in each of the elements. An alternative
9213 such as initially loading a vector with zero to use as each of the second
9214 operands would use up an additional register and take an extra instruction,
9215 for no particular gain. */
9216
9217 void
9218 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9219 rtx (*reduc) (rtx, rtx, rtx))
9220 {
9221 enum machine_mode inner = GET_MODE_INNER (mode);
9222 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9223 rtx tmpsum = op1;
9224
9225 for (i = parts / 2; i >= 1; i /= 2)
9226 {
9227 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9228 emit_insn (reduc (dest, tmpsum, tmpsum));
9229 tmpsum = dest;
9230 }
9231 }
9232
9233 /* If VALS is a vector constant that can be loaded into a register
9234 using VDUP, generate instructions to do so and return an RTX to
9235 assign to the register. Otherwise return NULL_RTX. */
9236
9237 static rtx
9238 neon_vdup_constant (rtx vals)
9239 {
9240 enum machine_mode mode = GET_MODE (vals);
9241 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9242 int n_elts = GET_MODE_NUNITS (mode);
9243 bool all_same = true;
9244 rtx x;
9245 int i;
9246
9247 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9248 return NULL_RTX;
9249
9250 for (i = 0; i < n_elts; ++i)
9251 {
9252 x = XVECEXP (vals, 0, i);
9253 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9254 all_same = false;
9255 }
9256
9257 if (!all_same)
9258 /* The elements are not all the same. We could handle repeating
9259 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9260 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9261 vdup.i16). */
9262 return NULL_RTX;
9263
9264 /* We can load this constant by using VDUP and a constant in a
9265 single ARM register. This will be cheaper than a vector
9266 load. */
9267
9268 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9269 return gen_rtx_VEC_DUPLICATE (mode, x);
9270 }
9271
9272 /* Generate code to load VALS, which is a PARALLEL containing only
9273 constants (for vec_init) or CONST_VECTOR, efficiently into a
9274 register. Returns an RTX to copy into the register, or NULL_RTX
9275 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9276
9277 rtx
9278 neon_make_constant (rtx vals)
9279 {
9280 enum machine_mode mode = GET_MODE (vals);
9281 rtx target;
9282 rtx const_vec = NULL_RTX;
9283 int n_elts = GET_MODE_NUNITS (mode);
9284 int n_const = 0;
9285 int i;
9286
9287 if (GET_CODE (vals) == CONST_VECTOR)
9288 const_vec = vals;
9289 else if (GET_CODE (vals) == PARALLEL)
9290 {
9291 /* A CONST_VECTOR must contain only CONST_INTs and
9292 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9293 Only store valid constants in a CONST_VECTOR. */
9294 for (i = 0; i < n_elts; ++i)
9295 {
9296 rtx x = XVECEXP (vals, 0, i);
9297 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
9298 n_const++;
9299 }
9300 if (n_const == n_elts)
9301 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9302 }
9303 else
9304 gcc_unreachable ();
9305
9306 if (const_vec != NULL
9307 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9308 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9309 return const_vec;
9310 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9311 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9312 pipeline cycle; creating the constant takes one or two ARM
9313 pipeline cycles. */
9314 return target;
9315 else if (const_vec != NULL_RTX)
9316 /* Load from constant pool. On Cortex-A8 this takes two cycles
9317 (for either double or quad vectors). We can not take advantage
9318 of single-cycle VLD1 because we need a PC-relative addressing
9319 mode. */
9320 return const_vec;
9321 else
9322 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9323 We can not construct an initializer. */
9324 return NULL_RTX;
9325 }
9326
9327 /* Initialize vector TARGET to VALS. */
9328
9329 void
9330 neon_expand_vector_init (rtx target, rtx vals)
9331 {
9332 enum machine_mode mode = GET_MODE (target);
9333 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9334 int n_elts = GET_MODE_NUNITS (mode);
9335 int n_var = 0, one_var = -1;
9336 bool all_same = true;
9337 rtx x, mem;
9338 int i;
9339
9340 for (i = 0; i < n_elts; ++i)
9341 {
9342 x = XVECEXP (vals, 0, i);
9343 if (!CONSTANT_P (x))
9344 ++n_var, one_var = i;
9345
9346 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9347 all_same = false;
9348 }
9349
9350 if (n_var == 0)
9351 {
9352 rtx constant = neon_make_constant (vals);
9353 if (constant != NULL_RTX)
9354 {
9355 emit_move_insn (target, constant);
9356 return;
9357 }
9358 }
9359
9360 /* Splat a single non-constant element if we can. */
9361 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9362 {
9363 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9364 emit_insn (gen_rtx_SET (VOIDmode, target,
9365 gen_rtx_VEC_DUPLICATE (mode, x)));
9366 return;
9367 }
9368
9369 /* One field is non-constant. Load constant then overwrite varying
9370 field. This is more efficient than using the stack. */
9371 if (n_var == 1)
9372 {
9373 rtx copy = copy_rtx (vals);
9374 rtx index = GEN_INT (one_var);
9375
9376 /* Load constant part of vector, substitute neighboring value for
9377 varying element. */
9378 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9379 neon_expand_vector_init (target, copy);
9380
9381 /* Insert variable. */
9382 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9383 switch (mode)
9384 {
9385 case V8QImode:
9386 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9387 break;
9388 case V16QImode:
9389 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9390 break;
9391 case V4HImode:
9392 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9393 break;
9394 case V8HImode:
9395 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9396 break;
9397 case V2SImode:
9398 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9399 break;
9400 case V4SImode:
9401 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9402 break;
9403 case V2SFmode:
9404 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9405 break;
9406 case V4SFmode:
9407 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9408 break;
9409 case V2DImode:
9410 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9411 break;
9412 default:
9413 gcc_unreachable ();
9414 }
9415 return;
9416 }
9417
9418 /* Construct the vector in memory one field at a time
9419 and load the whole vector. */
9420 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
9421 for (i = 0; i < n_elts; i++)
9422 emit_move_insn (adjust_address_nv (mem, inner_mode,
9423 i * GET_MODE_SIZE (inner_mode)),
9424 XVECEXP (vals, 0, i));
9425 emit_move_insn (target, mem);
9426 }
9427
9428 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9429 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9430 reported source locations are bogus. */
9431
9432 static void
9433 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9434 const char *err)
9435 {
9436 HOST_WIDE_INT lane;
9437
9438 gcc_assert (GET_CODE (operand) == CONST_INT);
9439
9440 lane = INTVAL (operand);
9441
9442 if (lane < low || lane >= high)
9443 error (err);
9444 }
9445
9446 /* Bounds-check lanes. */
9447
9448 void
9449 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9450 {
9451 bounds_check (operand, low, high, "lane out of range");
9452 }
9453
9454 /* Bounds-check constants. */
9455
9456 void
9457 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9458 {
9459 bounds_check (operand, low, high, "constant out of range");
9460 }
9461
9462 HOST_WIDE_INT
9463 neon_element_bits (enum machine_mode mode)
9464 {
9465 if (mode == DImode)
9466 return GET_MODE_BITSIZE (mode);
9467 else
9468 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9469 }
9470
9471 \f
9472 /* Predicates for `match_operand' and `match_operator'. */
9473
9474 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
9475 int
9476 cirrus_memory_offset (rtx op)
9477 {
9478 /* Reject eliminable registers. */
9479 if (! (reload_in_progress || reload_completed)
9480 && ( reg_mentioned_p (frame_pointer_rtx, op)
9481 || reg_mentioned_p (arg_pointer_rtx, op)
9482 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9483 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9484 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9485 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9486 return 0;
9487
9488 if (GET_CODE (op) == MEM)
9489 {
9490 rtx ind;
9491
9492 ind = XEXP (op, 0);
9493
9494 /* Match: (mem (reg)). */
9495 if (GET_CODE (ind) == REG)
9496 return 1;
9497
9498 /* Match:
9499 (mem (plus (reg)
9500 (const))). */
9501 if (GET_CODE (ind) == PLUS
9502 && GET_CODE (XEXP (ind, 0)) == REG
9503 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9504 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
9505 return 1;
9506 }
9507
9508 return 0;
9509 }
9510
9511 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9512 WB is true if full writeback address modes are allowed and is false
9513 if limited writeback address modes (POST_INC and PRE_DEC) are
9514 allowed. */
9515
9516 int
9517 arm_coproc_mem_operand (rtx op, bool wb)
9518 {
9519 rtx ind;
9520
9521 /* Reject eliminable registers. */
9522 if (! (reload_in_progress || reload_completed)
9523 && ( reg_mentioned_p (frame_pointer_rtx, op)
9524 || reg_mentioned_p (arg_pointer_rtx, op)
9525 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9526 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9527 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9528 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9529 return FALSE;
9530
9531 /* Constants are converted into offsets from labels. */
9532 if (GET_CODE (op) != MEM)
9533 return FALSE;
9534
9535 ind = XEXP (op, 0);
9536
9537 if (reload_completed
9538 && (GET_CODE (ind) == LABEL_REF
9539 || (GET_CODE (ind) == CONST
9540 && GET_CODE (XEXP (ind, 0)) == PLUS
9541 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9542 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9543 return TRUE;
9544
9545 /* Match: (mem (reg)). */
9546 if (GET_CODE (ind) == REG)
9547 return arm_address_register_rtx_p (ind, 0);
9548
9549 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9550 acceptable in any case (subject to verification by
9551 arm_address_register_rtx_p). We need WB to be true to accept
9552 PRE_INC and POST_DEC. */
9553 if (GET_CODE (ind) == POST_INC
9554 || GET_CODE (ind) == PRE_DEC
9555 || (wb
9556 && (GET_CODE (ind) == PRE_INC
9557 || GET_CODE (ind) == POST_DEC)))
9558 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9559
9560 if (wb
9561 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9562 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9563 && GET_CODE (XEXP (ind, 1)) == PLUS
9564 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9565 ind = XEXP (ind, 1);
9566
9567 /* Match:
9568 (plus (reg)
9569 (const)). */
9570 if (GET_CODE (ind) == PLUS
9571 && GET_CODE (XEXP (ind, 0)) == REG
9572 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9573 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9574 && INTVAL (XEXP (ind, 1)) > -1024
9575 && INTVAL (XEXP (ind, 1)) < 1024
9576 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9577 return TRUE;
9578
9579 return FALSE;
9580 }
9581
9582 /* Return TRUE if OP is a memory operand which we can load or store a vector
9583 to/from. TYPE is one of the following values:
9584 0 - Vector load/stor (vldr)
9585 1 - Core registers (ldm)
9586 2 - Element/structure loads (vld1)
9587 */
9588 int
9589 neon_vector_mem_operand (rtx op, int type)
9590 {
9591 rtx ind;
9592
9593 /* Reject eliminable registers. */
9594 if (! (reload_in_progress || reload_completed)
9595 && ( reg_mentioned_p (frame_pointer_rtx, op)
9596 || reg_mentioned_p (arg_pointer_rtx, op)
9597 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9598 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9599 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9600 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9601 return FALSE;
9602
9603 /* Constants are converted into offsets from labels. */
9604 if (GET_CODE (op) != MEM)
9605 return FALSE;
9606
9607 ind = XEXP (op, 0);
9608
9609 if (reload_completed
9610 && (GET_CODE (ind) == LABEL_REF
9611 || (GET_CODE (ind) == CONST
9612 && GET_CODE (XEXP (ind, 0)) == PLUS
9613 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9614 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9615 return TRUE;
9616
9617 /* Match: (mem (reg)). */
9618 if (GET_CODE (ind) == REG)
9619 return arm_address_register_rtx_p (ind, 0);
9620
9621 /* Allow post-increment with Neon registers. */
9622 if ((type != 1 && GET_CODE (ind) == POST_INC)
9623 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9624 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9625
9626 /* FIXME: vld1 allows register post-modify. */
9627
9628 /* Match:
9629 (plus (reg)
9630 (const)). */
9631 if (type == 0
9632 && GET_CODE (ind) == PLUS
9633 && GET_CODE (XEXP (ind, 0)) == REG
9634 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9635 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9636 && INTVAL (XEXP (ind, 1)) > -1024
9637 && INTVAL (XEXP (ind, 1)) < 1016
9638 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9639 return TRUE;
9640
9641 return FALSE;
9642 }
9643
9644 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9645 type. */
9646 int
9647 neon_struct_mem_operand (rtx op)
9648 {
9649 rtx ind;
9650
9651 /* Reject eliminable registers. */
9652 if (! (reload_in_progress || reload_completed)
9653 && ( reg_mentioned_p (frame_pointer_rtx, op)
9654 || reg_mentioned_p (arg_pointer_rtx, op)
9655 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9656 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9657 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9658 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9659 return FALSE;
9660
9661 /* Constants are converted into offsets from labels. */
9662 if (GET_CODE (op) != MEM)
9663 return FALSE;
9664
9665 ind = XEXP (op, 0);
9666
9667 if (reload_completed
9668 && (GET_CODE (ind) == LABEL_REF
9669 || (GET_CODE (ind) == CONST
9670 && GET_CODE (XEXP (ind, 0)) == PLUS
9671 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9672 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9673 return TRUE;
9674
9675 /* Match: (mem (reg)). */
9676 if (GET_CODE (ind) == REG)
9677 return arm_address_register_rtx_p (ind, 0);
9678
9679 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9680 if (GET_CODE (ind) == POST_INC
9681 || GET_CODE (ind) == PRE_DEC)
9682 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9683
9684 return FALSE;
9685 }
9686
9687 /* Return true if X is a register that will be eliminated later on. */
9688 int
9689 arm_eliminable_register (rtx x)
9690 {
9691 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9692 || REGNO (x) == ARG_POINTER_REGNUM
9693 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9694 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9695 }
9696
9697 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9698 coprocessor registers. Otherwise return NO_REGS. */
9699
9700 enum reg_class
9701 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9702 {
9703 if (mode == HFmode)
9704 {
9705 if (!TARGET_NEON_FP16)
9706 return GENERAL_REGS;
9707 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9708 return NO_REGS;
9709 return GENERAL_REGS;
9710 }
9711
9712 /* The neon move patterns handle all legitimate vector and struct
9713 addresses. */
9714 if (TARGET_NEON
9715 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
9716 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9717 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9718 || VALID_NEON_STRUCT_MODE (mode)))
9719 return NO_REGS;
9720
9721 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9722 return NO_REGS;
9723
9724 return GENERAL_REGS;
9725 }
9726
9727 /* Values which must be returned in the most-significant end of the return
9728 register. */
9729
9730 static bool
9731 arm_return_in_msb (const_tree valtype)
9732 {
9733 return (TARGET_AAPCS_BASED
9734 && BYTES_BIG_ENDIAN
9735 && (AGGREGATE_TYPE_P (valtype)
9736 || TREE_CODE (valtype) == COMPLEX_TYPE
9737 || FIXED_POINT_TYPE_P (valtype)));
9738 }
9739
9740 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9741 Use by the Cirrus Maverick code which has to workaround
9742 a hardware bug triggered by such instructions. */
9743 static bool
9744 arm_memory_load_p (rtx insn)
9745 {
9746 rtx body, lhs, rhs;;
9747
9748 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
9749 return false;
9750
9751 body = PATTERN (insn);
9752
9753 if (GET_CODE (body) != SET)
9754 return false;
9755
9756 lhs = XEXP (body, 0);
9757 rhs = XEXP (body, 1);
9758
9759 lhs = REG_OR_SUBREG_RTX (lhs);
9760
9761 /* If the destination is not a general purpose
9762 register we do not have to worry. */
9763 if (GET_CODE (lhs) != REG
9764 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
9765 return false;
9766
9767 /* As well as loads from memory we also have to react
9768 to loads of invalid constants which will be turned
9769 into loads from the minipool. */
9770 return (GET_CODE (rhs) == MEM
9771 || GET_CODE (rhs) == SYMBOL_REF
9772 || note_invalid_constants (insn, -1, false));
9773 }
9774
9775 /* Return TRUE if INSN is a Cirrus instruction. */
9776 static bool
9777 arm_cirrus_insn_p (rtx insn)
9778 {
9779 enum attr_cirrus attr;
9780
9781 /* get_attr cannot accept USE or CLOBBER. */
9782 if (!insn
9783 || GET_CODE (insn) != INSN
9784 || GET_CODE (PATTERN (insn)) == USE
9785 || GET_CODE (PATTERN (insn)) == CLOBBER)
9786 return 0;
9787
9788 attr = get_attr_cirrus (insn);
9789
9790 return attr != CIRRUS_NOT;
9791 }
9792
9793 /* Cirrus reorg for invalid instruction combinations. */
9794 static void
9795 cirrus_reorg (rtx first)
9796 {
9797 enum attr_cirrus attr;
9798 rtx body = PATTERN (first);
9799 rtx t;
9800 int nops;
9801
9802 /* Any branch must be followed by 2 non Cirrus instructions. */
9803 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
9804 {
9805 nops = 0;
9806 t = next_nonnote_insn (first);
9807
9808 if (arm_cirrus_insn_p (t))
9809 ++ nops;
9810
9811 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9812 ++ nops;
9813
9814 while (nops --)
9815 emit_insn_after (gen_nop (), first);
9816
9817 return;
9818 }
9819
9820 /* (float (blah)) is in parallel with a clobber. */
9821 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
9822 body = XVECEXP (body, 0, 0);
9823
9824 if (GET_CODE (body) == SET)
9825 {
9826 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
9827
9828 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9829 be followed by a non Cirrus insn. */
9830 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
9831 {
9832 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
9833 emit_insn_after (gen_nop (), first);
9834
9835 return;
9836 }
9837 else if (arm_memory_load_p (first))
9838 {
9839 unsigned int arm_regno;
9840
9841 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9842 ldr/cfmv64hr combination where the Rd field is the same
9843 in both instructions must be split with a non Cirrus
9844 insn. Example:
9845
9846 ldr r0, blah
9847 nop
9848 cfmvsr mvf0, r0. */
9849
9850 /* Get Arm register number for ldr insn. */
9851 if (GET_CODE (lhs) == REG)
9852 arm_regno = REGNO (lhs);
9853 else
9854 {
9855 gcc_assert (GET_CODE (rhs) == REG);
9856 arm_regno = REGNO (rhs);
9857 }
9858
9859 /* Next insn. */
9860 first = next_nonnote_insn (first);
9861
9862 if (! arm_cirrus_insn_p (first))
9863 return;
9864
9865 body = PATTERN (first);
9866
9867 /* (float (blah)) is in parallel with a clobber. */
9868 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
9869 body = XVECEXP (body, 0, 0);
9870
9871 if (GET_CODE (body) == FLOAT)
9872 body = XEXP (body, 0);
9873
9874 if (get_attr_cirrus (first) == CIRRUS_MOVE
9875 && GET_CODE (XEXP (body, 1)) == REG
9876 && arm_regno == REGNO (XEXP (body, 1)))
9877 emit_insn_after (gen_nop (), first);
9878
9879 return;
9880 }
9881 }
9882
9883 /* get_attr cannot accept USE or CLOBBER. */
9884 if (!first
9885 || GET_CODE (first) != INSN
9886 || GET_CODE (PATTERN (first)) == USE
9887 || GET_CODE (PATTERN (first)) == CLOBBER)
9888 return;
9889
9890 attr = get_attr_cirrus (first);
9891
9892 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9893 must be followed by a non-coprocessor instruction. */
9894 if (attr == CIRRUS_COMPARE)
9895 {
9896 nops = 0;
9897
9898 t = next_nonnote_insn (first);
9899
9900 if (arm_cirrus_insn_p (t))
9901 ++ nops;
9902
9903 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
9904 ++ nops;
9905
9906 while (nops --)
9907 emit_insn_after (gen_nop (), first);
9908
9909 return;
9910 }
9911 }
9912
9913 /* Return TRUE if X references a SYMBOL_REF. */
9914 int
9915 symbol_mentioned_p (rtx x)
9916 {
9917 const char * fmt;
9918 int i;
9919
9920 if (GET_CODE (x) == SYMBOL_REF)
9921 return 1;
9922
9923 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9924 are constant offsets, not symbols. */
9925 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9926 return 0;
9927
9928 fmt = GET_RTX_FORMAT (GET_CODE (x));
9929
9930 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9931 {
9932 if (fmt[i] == 'E')
9933 {
9934 int j;
9935
9936 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9937 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9938 return 1;
9939 }
9940 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9941 return 1;
9942 }
9943
9944 return 0;
9945 }
9946
9947 /* Return TRUE if X references a LABEL_REF. */
9948 int
9949 label_mentioned_p (rtx x)
9950 {
9951 const char * fmt;
9952 int i;
9953
9954 if (GET_CODE (x) == LABEL_REF)
9955 return 1;
9956
9957 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9958 instruction, but they are constant offsets, not symbols. */
9959 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9960 return 0;
9961
9962 fmt = GET_RTX_FORMAT (GET_CODE (x));
9963 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9964 {
9965 if (fmt[i] == 'E')
9966 {
9967 int j;
9968
9969 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9970 if (label_mentioned_p (XVECEXP (x, i, j)))
9971 return 1;
9972 }
9973 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9974 return 1;
9975 }
9976
9977 return 0;
9978 }
9979
9980 int
9981 tls_mentioned_p (rtx x)
9982 {
9983 switch (GET_CODE (x))
9984 {
9985 case CONST:
9986 return tls_mentioned_p (XEXP (x, 0));
9987
9988 case UNSPEC:
9989 if (XINT (x, 1) == UNSPEC_TLS)
9990 return 1;
9991
9992 default:
9993 return 0;
9994 }
9995 }
9996
9997 /* Must not copy any rtx that uses a pc-relative address. */
9998
9999 static int
10000 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
10001 {
10002 if (GET_CODE (*x) == UNSPEC
10003 && (XINT (*x, 1) == UNSPEC_PIC_BASE
10004 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
10005 return 1;
10006 return 0;
10007 }
10008
10009 static bool
10010 arm_cannot_copy_insn_p (rtx insn)
10011 {
10012 /* The tls call insn cannot be copied, as it is paired with a data
10013 word. */
10014 if (recog_memoized (insn) == CODE_FOR_tlscall)
10015 return true;
10016
10017 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
10018 }
10019
10020 enum rtx_code
10021 minmax_code (rtx x)
10022 {
10023 enum rtx_code code = GET_CODE (x);
10024
10025 switch (code)
10026 {
10027 case SMAX:
10028 return GE;
10029 case SMIN:
10030 return LE;
10031 case UMIN:
10032 return LEU;
10033 case UMAX:
10034 return GEU;
10035 default:
10036 gcc_unreachable ();
10037 }
10038 }
10039
10040 /* Match pair of min/max operators that can be implemented via usat/ssat. */
10041
10042 bool
10043 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
10044 int *mask, bool *signed_sat)
10045 {
10046 /* The high bound must be a power of two minus one. */
10047 int log = exact_log2 (INTVAL (hi_bound) + 1);
10048 if (log == -1)
10049 return false;
10050
10051 /* The low bound is either zero (for usat) or one less than the
10052 negation of the high bound (for ssat). */
10053 if (INTVAL (lo_bound) == 0)
10054 {
10055 if (mask)
10056 *mask = log;
10057 if (signed_sat)
10058 *signed_sat = false;
10059
10060 return true;
10061 }
10062
10063 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
10064 {
10065 if (mask)
10066 *mask = log + 1;
10067 if (signed_sat)
10068 *signed_sat = true;
10069
10070 return true;
10071 }
10072
10073 return false;
10074 }
10075
10076 /* Return 1 if memory locations are adjacent. */
10077 int
10078 adjacent_mem_locations (rtx a, rtx b)
10079 {
10080 /* We don't guarantee to preserve the order of these memory refs. */
10081 if (volatile_refs_p (a) || volatile_refs_p (b))
10082 return 0;
10083
10084 if ((GET_CODE (XEXP (a, 0)) == REG
10085 || (GET_CODE (XEXP (a, 0)) == PLUS
10086 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
10087 && (GET_CODE (XEXP (b, 0)) == REG
10088 || (GET_CODE (XEXP (b, 0)) == PLUS
10089 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
10090 {
10091 HOST_WIDE_INT val0 = 0, val1 = 0;
10092 rtx reg0, reg1;
10093 int val_diff;
10094
10095 if (GET_CODE (XEXP (a, 0)) == PLUS)
10096 {
10097 reg0 = XEXP (XEXP (a, 0), 0);
10098 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
10099 }
10100 else
10101 reg0 = XEXP (a, 0);
10102
10103 if (GET_CODE (XEXP (b, 0)) == PLUS)
10104 {
10105 reg1 = XEXP (XEXP (b, 0), 0);
10106 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
10107 }
10108 else
10109 reg1 = XEXP (b, 0);
10110
10111 /* Don't accept any offset that will require multiple
10112 instructions to handle, since this would cause the
10113 arith_adjacentmem pattern to output an overlong sequence. */
10114 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
10115 return 0;
10116
10117 /* Don't allow an eliminable register: register elimination can make
10118 the offset too large. */
10119 if (arm_eliminable_register (reg0))
10120 return 0;
10121
10122 val_diff = val1 - val0;
10123
10124 if (arm_ld_sched)
10125 {
10126 /* If the target has load delay slots, then there's no benefit
10127 to using an ldm instruction unless the offset is zero and
10128 we are optimizing for size. */
10129 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
10130 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
10131 && (val_diff == 4 || val_diff == -4));
10132 }
10133
10134 return ((REGNO (reg0) == REGNO (reg1))
10135 && (val_diff == 4 || val_diff == -4));
10136 }
10137
10138 return 0;
10139 }
10140
10141 /* Return true iff it would be profitable to turn a sequence of NOPS loads
10142 or stores (depending on IS_STORE) into a load-multiple or store-multiple
10143 instruction. ADD_OFFSET is nonzero if the base address register needs
10144 to be modified with an add instruction before we can use it. */
10145
10146 static bool
10147 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
10148 int nops, HOST_WIDE_INT add_offset)
10149 {
10150 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10151 if the offset isn't small enough. The reason 2 ldrs are faster
10152 is because these ARMs are able to do more than one cache access
10153 in a single cycle. The ARM9 and StrongARM have Harvard caches,
10154 whilst the ARM8 has a double bandwidth cache. This means that
10155 these cores can do both an instruction fetch and a data fetch in
10156 a single cycle, so the trick of calculating the address into a
10157 scratch register (one of the result regs) and then doing a load
10158 multiple actually becomes slower (and no smaller in code size).
10159 That is the transformation
10160
10161 ldr rd1, [rbase + offset]
10162 ldr rd2, [rbase + offset + 4]
10163
10164 to
10165
10166 add rd1, rbase, offset
10167 ldmia rd1, {rd1, rd2}
10168
10169 produces worse code -- '3 cycles + any stalls on rd2' instead of
10170 '2 cycles + any stalls on rd2'. On ARMs with only one cache
10171 access per cycle, the first sequence could never complete in less
10172 than 6 cycles, whereas the ldm sequence would only take 5 and
10173 would make better use of sequential accesses if not hitting the
10174 cache.
10175
10176 We cheat here and test 'arm_ld_sched' which we currently know to
10177 only be true for the ARM8, ARM9 and StrongARM. If this ever
10178 changes, then the test below needs to be reworked. */
10179 if (nops == 2 && arm_ld_sched && add_offset != 0)
10180 return false;
10181
10182 /* XScale has load-store double instructions, but they have stricter
10183 alignment requirements than load-store multiple, so we cannot
10184 use them.
10185
10186 For XScale ldm requires 2 + NREGS cycles to complete and blocks
10187 the pipeline until completion.
10188
10189 NREGS CYCLES
10190 1 3
10191 2 4
10192 3 5
10193 4 6
10194
10195 An ldr instruction takes 1-3 cycles, but does not block the
10196 pipeline.
10197
10198 NREGS CYCLES
10199 1 1-3
10200 2 2-6
10201 3 3-9
10202 4 4-12
10203
10204 Best case ldr will always win. However, the more ldr instructions
10205 we issue, the less likely we are to be able to schedule them well.
10206 Using ldr instructions also increases code size.
10207
10208 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10209 for counts of 3 or 4 regs. */
10210 if (nops <= 2 && arm_tune_xscale && !optimize_size)
10211 return false;
10212 return true;
10213 }
10214
10215 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10216 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10217 an array ORDER which describes the sequence to use when accessing the
10218 offsets that produces an ascending order. In this sequence, each
10219 offset must be larger by exactly 4 than the previous one. ORDER[0]
10220 must have been filled in with the lowest offset by the caller.
10221 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10222 we use to verify that ORDER produces an ascending order of registers.
10223 Return true if it was possible to construct such an order, false if
10224 not. */
10225
10226 static bool
10227 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10228 int *unsorted_regs)
10229 {
10230 int i;
10231 for (i = 1; i < nops; i++)
10232 {
10233 int j;
10234
10235 order[i] = order[i - 1];
10236 for (j = 0; j < nops; j++)
10237 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10238 {
10239 /* We must find exactly one offset that is higher than the
10240 previous one by 4. */
10241 if (order[i] != order[i - 1])
10242 return false;
10243 order[i] = j;
10244 }
10245 if (order[i] == order[i - 1])
10246 return false;
10247 /* The register numbers must be ascending. */
10248 if (unsorted_regs != NULL
10249 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10250 return false;
10251 }
10252 return true;
10253 }
10254
10255 /* Used to determine in a peephole whether a sequence of load
10256 instructions can be changed into a load-multiple instruction.
10257 NOPS is the number of separate load instructions we are examining. The
10258 first NOPS entries in OPERANDS are the destination registers, the
10259 next NOPS entries are memory operands. If this function is
10260 successful, *BASE is set to the common base register of the memory
10261 accesses; *LOAD_OFFSET is set to the first memory location's offset
10262 from that base register.
10263 REGS is an array filled in with the destination register numbers.
10264 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10265 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10266 the sequence of registers in REGS matches the loads from ascending memory
10267 locations, and the function verifies that the register numbers are
10268 themselves ascending. If CHECK_REGS is false, the register numbers
10269 are stored in the order they are found in the operands. */
10270 static int
10271 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10272 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10273 {
10274 int unsorted_regs[MAX_LDM_STM_OPS];
10275 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10276 int order[MAX_LDM_STM_OPS];
10277 rtx base_reg_rtx = NULL;
10278 int base_reg = -1;
10279 int i, ldm_case;
10280
10281 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10282 easily extended if required. */
10283 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10284
10285 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10286
10287 /* Loop over the operands and check that the memory references are
10288 suitable (i.e. immediate offsets from the same base register). At
10289 the same time, extract the target register, and the memory
10290 offsets. */
10291 for (i = 0; i < nops; i++)
10292 {
10293 rtx reg;
10294 rtx offset;
10295
10296 /* Convert a subreg of a mem into the mem itself. */
10297 if (GET_CODE (operands[nops + i]) == SUBREG)
10298 operands[nops + i] = alter_subreg (operands + (nops + i));
10299
10300 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10301
10302 /* Don't reorder volatile memory references; it doesn't seem worth
10303 looking for the case where the order is ok anyway. */
10304 if (MEM_VOLATILE_P (operands[nops + i]))
10305 return 0;
10306
10307 offset = const0_rtx;
10308
10309 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10310 || (GET_CODE (reg) == SUBREG
10311 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10312 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10313 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10314 == REG)
10315 || (GET_CODE (reg) == SUBREG
10316 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10317 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10318 == CONST_INT)))
10319 {
10320 if (i == 0)
10321 {
10322 base_reg = REGNO (reg);
10323 base_reg_rtx = reg;
10324 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10325 return 0;
10326 }
10327 else if (base_reg != (int) REGNO (reg))
10328 /* Not addressed from the same base register. */
10329 return 0;
10330
10331 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
10332 ? REGNO (operands[i])
10333 : REGNO (SUBREG_REG (operands[i])));
10334
10335 /* If it isn't an integer register, or if it overwrites the
10336 base register but isn't the last insn in the list, then
10337 we can't do this. */
10338 if (unsorted_regs[i] < 0
10339 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10340 || unsorted_regs[i] > 14
10341 || (i != nops - 1 && unsorted_regs[i] == base_reg))
10342 return 0;
10343
10344 unsorted_offsets[i] = INTVAL (offset);
10345 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10346 order[0] = i;
10347 }
10348 else
10349 /* Not a suitable memory address. */
10350 return 0;
10351 }
10352
10353 /* All the useful information has now been extracted from the
10354 operands into unsorted_regs and unsorted_offsets; additionally,
10355 order[0] has been set to the lowest offset in the list. Sort
10356 the offsets into order, verifying that they are adjacent, and
10357 check that the register numbers are ascending. */
10358 if (!compute_offset_order (nops, unsorted_offsets, order,
10359 check_regs ? unsorted_regs : NULL))
10360 return 0;
10361
10362 if (saved_order)
10363 memcpy (saved_order, order, sizeof order);
10364
10365 if (base)
10366 {
10367 *base = base_reg;
10368
10369 for (i = 0; i < nops; i++)
10370 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10371
10372 *load_offset = unsorted_offsets[order[0]];
10373 }
10374
10375 if (TARGET_THUMB1
10376 && !peep2_reg_dead_p (nops, base_reg_rtx))
10377 return 0;
10378
10379 if (unsorted_offsets[order[0]] == 0)
10380 ldm_case = 1; /* ldmia */
10381 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10382 ldm_case = 2; /* ldmib */
10383 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10384 ldm_case = 3; /* ldmda */
10385 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10386 ldm_case = 4; /* ldmdb */
10387 else if (const_ok_for_arm (unsorted_offsets[order[0]])
10388 || const_ok_for_arm (-unsorted_offsets[order[0]]))
10389 ldm_case = 5;
10390 else
10391 return 0;
10392
10393 if (!multiple_operation_profitable_p (false, nops,
10394 ldm_case == 5
10395 ? unsorted_offsets[order[0]] : 0))
10396 return 0;
10397
10398 return ldm_case;
10399 }
10400
10401 /* Used to determine in a peephole whether a sequence of store instructions can
10402 be changed into a store-multiple instruction.
10403 NOPS is the number of separate store instructions we are examining.
10404 NOPS_TOTAL is the total number of instructions recognized by the peephole
10405 pattern.
10406 The first NOPS entries in OPERANDS are the source registers, the next
10407 NOPS entries are memory operands. If this function is successful, *BASE is
10408 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10409 to the first memory location's offset from that base register. REGS is an
10410 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10411 likewise filled with the corresponding rtx's.
10412 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10413 numbers to an ascending order of stores.
10414 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10415 from ascending memory locations, and the function verifies that the register
10416 numbers are themselves ascending. If CHECK_REGS is false, the register
10417 numbers are stored in the order they are found in the operands. */
10418 static int
10419 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10420 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10421 HOST_WIDE_INT *load_offset, bool check_regs)
10422 {
10423 int unsorted_regs[MAX_LDM_STM_OPS];
10424 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10425 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10426 int order[MAX_LDM_STM_OPS];
10427 int base_reg = -1;
10428 rtx base_reg_rtx = NULL;
10429 int i, stm_case;
10430
10431 /* Write back of base register is currently only supported for Thumb 1. */
10432 int base_writeback = TARGET_THUMB1;
10433
10434 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10435 easily extended if required. */
10436 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10437
10438 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10439
10440 /* Loop over the operands and check that the memory references are
10441 suitable (i.e. immediate offsets from the same base register). At
10442 the same time, extract the target register, and the memory
10443 offsets. */
10444 for (i = 0; i < nops; i++)
10445 {
10446 rtx reg;
10447 rtx offset;
10448
10449 /* Convert a subreg of a mem into the mem itself. */
10450 if (GET_CODE (operands[nops + i]) == SUBREG)
10451 operands[nops + i] = alter_subreg (operands + (nops + i));
10452
10453 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10454
10455 /* Don't reorder volatile memory references; it doesn't seem worth
10456 looking for the case where the order is ok anyway. */
10457 if (MEM_VOLATILE_P (operands[nops + i]))
10458 return 0;
10459
10460 offset = const0_rtx;
10461
10462 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10463 || (GET_CODE (reg) == SUBREG
10464 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10465 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10466 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10467 == REG)
10468 || (GET_CODE (reg) == SUBREG
10469 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10470 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10471 == CONST_INT)))
10472 {
10473 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
10474 ? operands[i] : SUBREG_REG (operands[i]));
10475 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10476
10477 if (i == 0)
10478 {
10479 base_reg = REGNO (reg);
10480 base_reg_rtx = reg;
10481 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10482 return 0;
10483 }
10484 else if (base_reg != (int) REGNO (reg))
10485 /* Not addressed from the same base register. */
10486 return 0;
10487
10488 /* If it isn't an integer register, then we can't do this. */
10489 if (unsorted_regs[i] < 0
10490 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10491 /* The effects are unpredictable if the base register is
10492 both updated and stored. */
10493 || (base_writeback && unsorted_regs[i] == base_reg)
10494 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10495 || unsorted_regs[i] > 14)
10496 return 0;
10497
10498 unsorted_offsets[i] = INTVAL (offset);
10499 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10500 order[0] = i;
10501 }
10502 else
10503 /* Not a suitable memory address. */
10504 return 0;
10505 }
10506
10507 /* All the useful information has now been extracted from the
10508 operands into unsorted_regs and unsorted_offsets; additionally,
10509 order[0] has been set to the lowest offset in the list. Sort
10510 the offsets into order, verifying that they are adjacent, and
10511 check that the register numbers are ascending. */
10512 if (!compute_offset_order (nops, unsorted_offsets, order,
10513 check_regs ? unsorted_regs : NULL))
10514 return 0;
10515
10516 if (saved_order)
10517 memcpy (saved_order, order, sizeof order);
10518
10519 if (base)
10520 {
10521 *base = base_reg;
10522
10523 for (i = 0; i < nops; i++)
10524 {
10525 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10526 if (reg_rtxs)
10527 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
10528 }
10529
10530 *load_offset = unsorted_offsets[order[0]];
10531 }
10532
10533 if (TARGET_THUMB1
10534 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
10535 return 0;
10536
10537 if (unsorted_offsets[order[0]] == 0)
10538 stm_case = 1; /* stmia */
10539 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10540 stm_case = 2; /* stmib */
10541 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10542 stm_case = 3; /* stmda */
10543 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10544 stm_case = 4; /* stmdb */
10545 else
10546 return 0;
10547
10548 if (!multiple_operation_profitable_p (false, nops, 0))
10549 return 0;
10550
10551 return stm_case;
10552 }
10553 \f
10554 /* Routines for use in generating RTL. */
10555
10556 /* Generate a load-multiple instruction. COUNT is the number of loads in
10557 the instruction; REGS and MEMS are arrays containing the operands.
10558 BASEREG is the base register to be used in addressing the memory operands.
10559 WBACK_OFFSET is nonzero if the instruction should update the base
10560 register. */
10561
10562 static rtx
10563 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10564 HOST_WIDE_INT wback_offset)
10565 {
10566 int i = 0, j;
10567 rtx result;
10568
10569 if (!multiple_operation_profitable_p (false, count, 0))
10570 {
10571 rtx seq;
10572
10573 start_sequence ();
10574
10575 for (i = 0; i < count; i++)
10576 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
10577
10578 if (wback_offset != 0)
10579 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10580
10581 seq = get_insns ();
10582 end_sequence ();
10583
10584 return seq;
10585 }
10586
10587 result = gen_rtx_PARALLEL (VOIDmode,
10588 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10589 if (wback_offset != 0)
10590 {
10591 XVECEXP (result, 0, 0)
10592 = gen_rtx_SET (VOIDmode, basereg,
10593 plus_constant (basereg, wback_offset));
10594 i = 1;
10595 count++;
10596 }
10597
10598 for (j = 0; i < count; i++, j++)
10599 XVECEXP (result, 0, i)
10600 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
10601
10602 return result;
10603 }
10604
10605 /* Generate a store-multiple instruction. COUNT is the number of stores in
10606 the instruction; REGS and MEMS are arrays containing the operands.
10607 BASEREG is the base register to be used in addressing the memory operands.
10608 WBACK_OFFSET is nonzero if the instruction should update the base
10609 register. */
10610
10611 static rtx
10612 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10613 HOST_WIDE_INT wback_offset)
10614 {
10615 int i = 0, j;
10616 rtx result;
10617
10618 if (GET_CODE (basereg) == PLUS)
10619 basereg = XEXP (basereg, 0);
10620
10621 if (!multiple_operation_profitable_p (false, count, 0))
10622 {
10623 rtx seq;
10624
10625 start_sequence ();
10626
10627 for (i = 0; i < count; i++)
10628 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
10629
10630 if (wback_offset != 0)
10631 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10632
10633 seq = get_insns ();
10634 end_sequence ();
10635
10636 return seq;
10637 }
10638
10639 result = gen_rtx_PARALLEL (VOIDmode,
10640 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10641 if (wback_offset != 0)
10642 {
10643 XVECEXP (result, 0, 0)
10644 = gen_rtx_SET (VOIDmode, basereg,
10645 plus_constant (basereg, wback_offset));
10646 i = 1;
10647 count++;
10648 }
10649
10650 for (j = 0; i < count; i++, j++)
10651 XVECEXP (result, 0, i)
10652 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
10653
10654 return result;
10655 }
10656
10657 /* Generate either a load-multiple or a store-multiple instruction. This
10658 function can be used in situations where we can start with a single MEM
10659 rtx and adjust its address upwards.
10660 COUNT is the number of operations in the instruction, not counting a
10661 possible update of the base register. REGS is an array containing the
10662 register operands.
10663 BASEREG is the base register to be used in addressing the memory operands,
10664 which are constructed from BASEMEM.
10665 WRITE_BACK specifies whether the generated instruction should include an
10666 update of the base register.
10667 OFFSETP is used to pass an offset to and from this function; this offset
10668 is not used when constructing the address (instead BASEMEM should have an
10669 appropriate offset in its address), it is used only for setting
10670 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10671
10672 static rtx
10673 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10674 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10675 {
10676 rtx mems[MAX_LDM_STM_OPS];
10677 HOST_WIDE_INT offset = *offsetp;
10678 int i;
10679
10680 gcc_assert (count <= MAX_LDM_STM_OPS);
10681
10682 if (GET_CODE (basereg) == PLUS)
10683 basereg = XEXP (basereg, 0);
10684
10685 for (i = 0; i < count; i++)
10686 {
10687 rtx addr = plus_constant (basereg, i * 4);
10688 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10689 offset += 4;
10690 }
10691
10692 if (write_back)
10693 *offsetp = offset;
10694
10695 if (is_load)
10696 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10697 write_back ? 4 * count : 0);
10698 else
10699 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10700 write_back ? 4 * count : 0);
10701 }
10702
10703 rtx
10704 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10705 rtx basemem, HOST_WIDE_INT *offsetp)
10706 {
10707 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10708 offsetp);
10709 }
10710
10711 rtx
10712 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10713 rtx basemem, HOST_WIDE_INT *offsetp)
10714 {
10715 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10716 offsetp);
10717 }
10718
10719 /* Called from a peephole2 expander to turn a sequence of loads into an
10720 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10721 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10722 is true if we can reorder the registers because they are used commutatively
10723 subsequently.
10724 Returns true iff we could generate a new instruction. */
10725
10726 bool
10727 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10728 {
10729 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10730 rtx mems[MAX_LDM_STM_OPS];
10731 int i, j, base_reg;
10732 rtx base_reg_rtx;
10733 HOST_WIDE_INT offset;
10734 int write_back = FALSE;
10735 int ldm_case;
10736 rtx addr;
10737
10738 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10739 &base_reg, &offset, !sort_regs);
10740
10741 if (ldm_case == 0)
10742 return false;
10743
10744 if (sort_regs)
10745 for (i = 0; i < nops - 1; i++)
10746 for (j = i + 1; j < nops; j++)
10747 if (regs[i] > regs[j])
10748 {
10749 int t = regs[i];
10750 regs[i] = regs[j];
10751 regs[j] = t;
10752 }
10753 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10754
10755 if (TARGET_THUMB1)
10756 {
10757 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10758 gcc_assert (ldm_case == 1 || ldm_case == 5);
10759 write_back = TRUE;
10760 }
10761
10762 if (ldm_case == 5)
10763 {
10764 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10765 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10766 offset = 0;
10767 if (!TARGET_THUMB1)
10768 {
10769 base_reg = regs[0];
10770 base_reg_rtx = newbase;
10771 }
10772 }
10773
10774 for (i = 0; i < nops; i++)
10775 {
10776 addr = plus_constant (base_reg_rtx, offset + i * 4);
10777 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10778 SImode, addr, 0);
10779 }
10780 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10781 write_back ? offset + i * 4 : 0));
10782 return true;
10783 }
10784
10785 /* Called from a peephole2 expander to turn a sequence of stores into an
10786 STM instruction. OPERANDS are the operands found by the peephole matcher;
10787 NOPS indicates how many separate stores we are trying to combine.
10788 Returns true iff we could generate a new instruction. */
10789
10790 bool
10791 gen_stm_seq (rtx *operands, int nops)
10792 {
10793 int i;
10794 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10795 rtx mems[MAX_LDM_STM_OPS];
10796 int base_reg;
10797 rtx base_reg_rtx;
10798 HOST_WIDE_INT offset;
10799 int write_back = FALSE;
10800 int stm_case;
10801 rtx addr;
10802 bool base_reg_dies;
10803
10804 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10805 mem_order, &base_reg, &offset, true);
10806
10807 if (stm_case == 0)
10808 return false;
10809
10810 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10811
10812 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10813 if (TARGET_THUMB1)
10814 {
10815 gcc_assert (base_reg_dies);
10816 write_back = TRUE;
10817 }
10818
10819 if (stm_case == 5)
10820 {
10821 gcc_assert (base_reg_dies);
10822 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10823 offset = 0;
10824 }
10825
10826 addr = plus_constant (base_reg_rtx, offset);
10827
10828 for (i = 0; i < nops; i++)
10829 {
10830 addr = plus_constant (base_reg_rtx, offset + i * 4);
10831 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10832 SImode, addr, 0);
10833 }
10834 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10835 write_back ? offset + i * 4 : 0));
10836 return true;
10837 }
10838
10839 /* Called from a peephole2 expander to turn a sequence of stores that are
10840 preceded by constant loads into an STM instruction. OPERANDS are the
10841 operands found by the peephole matcher; NOPS indicates how many
10842 separate stores we are trying to combine; there are 2 * NOPS
10843 instructions in the peephole.
10844 Returns true iff we could generate a new instruction. */
10845
10846 bool
10847 gen_const_stm_seq (rtx *operands, int nops)
10848 {
10849 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10850 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10851 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10852 rtx mems[MAX_LDM_STM_OPS];
10853 int base_reg;
10854 rtx base_reg_rtx;
10855 HOST_WIDE_INT offset;
10856 int write_back = FALSE;
10857 int stm_case;
10858 rtx addr;
10859 bool base_reg_dies;
10860 int i, j;
10861 HARD_REG_SET allocated;
10862
10863 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10864 mem_order, &base_reg, &offset, false);
10865
10866 if (stm_case == 0)
10867 return false;
10868
10869 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10870
10871 /* If the same register is used more than once, try to find a free
10872 register. */
10873 CLEAR_HARD_REG_SET (allocated);
10874 for (i = 0; i < nops; i++)
10875 {
10876 for (j = i + 1; j < nops; j++)
10877 if (regs[i] == regs[j])
10878 {
10879 rtx t = peep2_find_free_register (0, nops * 2,
10880 TARGET_THUMB1 ? "l" : "r",
10881 SImode, &allocated);
10882 if (t == NULL_RTX)
10883 return false;
10884 reg_rtxs[i] = t;
10885 regs[i] = REGNO (t);
10886 }
10887 }
10888
10889 /* Compute an ordering that maps the register numbers to an ascending
10890 sequence. */
10891 reg_order[0] = 0;
10892 for (i = 0; i < nops; i++)
10893 if (regs[i] < regs[reg_order[0]])
10894 reg_order[0] = i;
10895
10896 for (i = 1; i < nops; i++)
10897 {
10898 int this_order = reg_order[i - 1];
10899 for (j = 0; j < nops; j++)
10900 if (regs[j] > regs[reg_order[i - 1]]
10901 && (this_order == reg_order[i - 1]
10902 || regs[j] < regs[this_order]))
10903 this_order = j;
10904 reg_order[i] = this_order;
10905 }
10906
10907 /* Ensure that registers that must be live after the instruction end
10908 up with the correct value. */
10909 for (i = 0; i < nops; i++)
10910 {
10911 int this_order = reg_order[i];
10912 if ((this_order != mem_order[i]
10913 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10914 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10915 return false;
10916 }
10917
10918 /* Load the constants. */
10919 for (i = 0; i < nops; i++)
10920 {
10921 rtx op = operands[2 * nops + mem_order[i]];
10922 sorted_regs[i] = regs[reg_order[i]];
10923 emit_move_insn (reg_rtxs[reg_order[i]], op);
10924 }
10925
10926 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10927
10928 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10929 if (TARGET_THUMB1)
10930 {
10931 gcc_assert (base_reg_dies);
10932 write_back = TRUE;
10933 }
10934
10935 if (stm_case == 5)
10936 {
10937 gcc_assert (base_reg_dies);
10938 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10939 offset = 0;
10940 }
10941
10942 addr = plus_constant (base_reg_rtx, offset);
10943
10944 for (i = 0; i < nops; i++)
10945 {
10946 addr = plus_constant (base_reg_rtx, offset + i * 4);
10947 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10948 SImode, addr, 0);
10949 }
10950 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10951 write_back ? offset + i * 4 : 0));
10952 return true;
10953 }
10954
10955 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
10956 unaligned copies on processors which support unaligned semantics for those
10957 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
10958 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
10959 An interleave factor of 1 (the minimum) will perform no interleaving.
10960 Load/store multiple are used for aligned addresses where possible. */
10961
10962 static void
10963 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
10964 HOST_WIDE_INT length,
10965 unsigned int interleave_factor)
10966 {
10967 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
10968 int *regnos = XALLOCAVEC (int, interleave_factor);
10969 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
10970 HOST_WIDE_INT i, j;
10971 HOST_WIDE_INT remaining = length, words;
10972 rtx halfword_tmp = NULL, byte_tmp = NULL;
10973 rtx dst, src;
10974 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
10975 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
10976 HOST_WIDE_INT srcoffset, dstoffset;
10977 HOST_WIDE_INT src_autoinc, dst_autoinc;
10978 rtx mem, addr;
10979
10980 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
10981
10982 /* Use hard registers if we have aligned source or destination so we can use
10983 load/store multiple with contiguous registers. */
10984 if (dst_aligned || src_aligned)
10985 for (i = 0; i < interleave_factor; i++)
10986 regs[i] = gen_rtx_REG (SImode, i);
10987 else
10988 for (i = 0; i < interleave_factor; i++)
10989 regs[i] = gen_reg_rtx (SImode);
10990
10991 dst = copy_addr_to_reg (XEXP (dstbase, 0));
10992 src = copy_addr_to_reg (XEXP (srcbase, 0));
10993
10994 srcoffset = dstoffset = 0;
10995
10996 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
10997 For copying the last bytes we want to subtract this offset again. */
10998 src_autoinc = dst_autoinc = 0;
10999
11000 for (i = 0; i < interleave_factor; i++)
11001 regnos[i] = i;
11002
11003 /* Copy BLOCK_SIZE_BYTES chunks. */
11004
11005 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
11006 {
11007 /* Load words. */
11008 if (src_aligned && interleave_factor > 1)
11009 {
11010 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
11011 TRUE, srcbase, &srcoffset));
11012 src_autoinc += UNITS_PER_WORD * interleave_factor;
11013 }
11014 else
11015 {
11016 for (j = 0; j < interleave_factor; j++)
11017 {
11018 addr = plus_constant (src, srcoffset + j * UNITS_PER_WORD
11019 - src_autoinc);
11020 mem = adjust_automodify_address (srcbase, SImode, addr,
11021 srcoffset + j * UNITS_PER_WORD);
11022 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11023 }
11024 srcoffset += block_size_bytes;
11025 }
11026
11027 /* Store words. */
11028 if (dst_aligned && interleave_factor > 1)
11029 {
11030 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
11031 TRUE, dstbase, &dstoffset));
11032 dst_autoinc += UNITS_PER_WORD * interleave_factor;
11033 }
11034 else
11035 {
11036 for (j = 0; j < interleave_factor; j++)
11037 {
11038 addr = plus_constant (dst, dstoffset + j * UNITS_PER_WORD
11039 - dst_autoinc);
11040 mem = adjust_automodify_address (dstbase, SImode, addr,
11041 dstoffset + j * UNITS_PER_WORD);
11042 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11043 }
11044 dstoffset += block_size_bytes;
11045 }
11046
11047 remaining -= block_size_bytes;
11048 }
11049
11050 /* Copy any whole words left (note these aren't interleaved with any
11051 subsequent halfword/byte load/stores in the interests of simplicity). */
11052
11053 words = remaining / UNITS_PER_WORD;
11054
11055 gcc_assert (words < interleave_factor);
11056
11057 if (src_aligned && words > 1)
11058 {
11059 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
11060 &srcoffset));
11061 src_autoinc += UNITS_PER_WORD * words;
11062 }
11063 else
11064 {
11065 for (j = 0; j < words; j++)
11066 {
11067 addr = plus_constant (src,
11068 srcoffset + j * UNITS_PER_WORD - src_autoinc);
11069 mem = adjust_automodify_address (srcbase, SImode, addr,
11070 srcoffset + j * UNITS_PER_WORD);
11071 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11072 }
11073 srcoffset += words * UNITS_PER_WORD;
11074 }
11075
11076 if (dst_aligned && words > 1)
11077 {
11078 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
11079 &dstoffset));
11080 dst_autoinc += words * UNITS_PER_WORD;
11081 }
11082 else
11083 {
11084 for (j = 0; j < words; j++)
11085 {
11086 addr = plus_constant (dst,
11087 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
11088 mem = adjust_automodify_address (dstbase, SImode, addr,
11089 dstoffset + j * UNITS_PER_WORD);
11090 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11091 }
11092 dstoffset += words * UNITS_PER_WORD;
11093 }
11094
11095 remaining -= words * UNITS_PER_WORD;
11096
11097 gcc_assert (remaining < 4);
11098
11099 /* Copy a halfword if necessary. */
11100
11101 if (remaining >= 2)
11102 {
11103 halfword_tmp = gen_reg_rtx (SImode);
11104
11105 addr = plus_constant (src, srcoffset - src_autoinc);
11106 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
11107 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
11108
11109 /* Either write out immediately, or delay until we've loaded the last
11110 byte, depending on interleave factor. */
11111 if (interleave_factor == 1)
11112 {
11113 addr = plus_constant (dst, dstoffset - dst_autoinc);
11114 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11115 emit_insn (gen_unaligned_storehi (mem,
11116 gen_lowpart (HImode, halfword_tmp)));
11117 halfword_tmp = NULL;
11118 dstoffset += 2;
11119 }
11120
11121 remaining -= 2;
11122 srcoffset += 2;
11123 }
11124
11125 gcc_assert (remaining < 2);
11126
11127 /* Copy last byte. */
11128
11129 if ((remaining & 1) != 0)
11130 {
11131 byte_tmp = gen_reg_rtx (SImode);
11132
11133 addr = plus_constant (src, srcoffset - src_autoinc);
11134 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
11135 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
11136
11137 if (interleave_factor == 1)
11138 {
11139 addr = plus_constant (dst, dstoffset - dst_autoinc);
11140 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11141 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11142 byte_tmp = NULL;
11143 dstoffset++;
11144 }
11145
11146 remaining--;
11147 srcoffset++;
11148 }
11149
11150 /* Store last halfword if we haven't done so already. */
11151
11152 if (halfword_tmp)
11153 {
11154 addr = plus_constant (dst, dstoffset - dst_autoinc);
11155 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11156 emit_insn (gen_unaligned_storehi (mem,
11157 gen_lowpart (HImode, halfword_tmp)));
11158 dstoffset += 2;
11159 }
11160
11161 /* Likewise for last byte. */
11162
11163 if (byte_tmp)
11164 {
11165 addr = plus_constant (dst, dstoffset - dst_autoinc);
11166 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11167 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11168 dstoffset++;
11169 }
11170
11171 gcc_assert (remaining == 0 && srcoffset == dstoffset);
11172 }
11173
11174 /* From mips_adjust_block_mem:
11175
11176 Helper function for doing a loop-based block operation on memory
11177 reference MEM. Each iteration of the loop will operate on LENGTH
11178 bytes of MEM.
11179
11180 Create a new base register for use within the loop and point it to
11181 the start of MEM. Create a new memory reference that uses this
11182 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
11183
11184 static void
11185 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
11186 rtx *loop_mem)
11187 {
11188 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
11189
11190 /* Although the new mem does not refer to a known location,
11191 it does keep up to LENGTH bytes of alignment. */
11192 *loop_mem = change_address (mem, BLKmode, *loop_reg);
11193 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
11194 }
11195
11196 /* From mips_block_move_loop:
11197
11198 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11199 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
11200 the memory regions do not overlap. */
11201
11202 static void
11203 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
11204 unsigned int interleave_factor,
11205 HOST_WIDE_INT bytes_per_iter)
11206 {
11207 rtx label, src_reg, dest_reg, final_src, test;
11208 HOST_WIDE_INT leftover;
11209
11210 leftover = length % bytes_per_iter;
11211 length -= leftover;
11212
11213 /* Create registers and memory references for use within the loop. */
11214 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
11215 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
11216
11217 /* Calculate the value that SRC_REG should have after the last iteration of
11218 the loop. */
11219 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
11220 0, 0, OPTAB_WIDEN);
11221
11222 /* Emit the start of the loop. */
11223 label = gen_label_rtx ();
11224 emit_label (label);
11225
11226 /* Emit the loop body. */
11227 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
11228 interleave_factor);
11229
11230 /* Move on to the next block. */
11231 emit_move_insn (src_reg, plus_constant (src_reg, bytes_per_iter));
11232 emit_move_insn (dest_reg, plus_constant (dest_reg, bytes_per_iter));
11233
11234 /* Emit the loop condition. */
11235 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
11236 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
11237
11238 /* Mop up any left-over bytes. */
11239 if (leftover)
11240 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
11241 }
11242
11243 /* Emit a block move when either the source or destination is unaligned (not
11244 aligned to a four-byte boundary). This may need further tuning depending on
11245 core type, optimize_size setting, etc. */
11246
11247 static int
11248 arm_movmemqi_unaligned (rtx *operands)
11249 {
11250 HOST_WIDE_INT length = INTVAL (operands[2]);
11251
11252 if (optimize_size)
11253 {
11254 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
11255 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
11256 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11257 size of code if optimizing for size. We'll use ldm/stm if src_aligned
11258 or dst_aligned though: allow more interleaving in those cases since the
11259 resulting code can be smaller. */
11260 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
11261 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
11262
11263 if (length > 12)
11264 arm_block_move_unaligned_loop (operands[0], operands[1], length,
11265 interleave_factor, bytes_per_iter);
11266 else
11267 arm_block_move_unaligned_straight (operands[0], operands[1], length,
11268 interleave_factor);
11269 }
11270 else
11271 {
11272 /* Note that the loop created by arm_block_move_unaligned_loop may be
11273 subject to loop unrolling, which makes tuning this condition a little
11274 redundant. */
11275 if (length > 32)
11276 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
11277 else
11278 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
11279 }
11280
11281 return 1;
11282 }
11283
11284 int
11285 arm_gen_movmemqi (rtx *operands)
11286 {
11287 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
11288 HOST_WIDE_INT srcoffset, dstoffset;
11289 int i;
11290 rtx src, dst, srcbase, dstbase;
11291 rtx part_bytes_reg = NULL;
11292 rtx mem;
11293
11294 if (GET_CODE (operands[2]) != CONST_INT
11295 || GET_CODE (operands[3]) != CONST_INT
11296 || INTVAL (operands[2]) > 64)
11297 return 0;
11298
11299 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
11300 return arm_movmemqi_unaligned (operands);
11301
11302 if (INTVAL (operands[3]) & 3)
11303 return 0;
11304
11305 dstbase = operands[0];
11306 srcbase = operands[1];
11307
11308 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
11309 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
11310
11311 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
11312 out_words_to_go = INTVAL (operands[2]) / 4;
11313 last_bytes = INTVAL (operands[2]) & 3;
11314 dstoffset = srcoffset = 0;
11315
11316 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
11317 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
11318
11319 for (i = 0; in_words_to_go >= 2; i+=4)
11320 {
11321 if (in_words_to_go > 4)
11322 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
11323 TRUE, srcbase, &srcoffset));
11324 else
11325 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
11326 src, FALSE, srcbase,
11327 &srcoffset));
11328
11329 if (out_words_to_go)
11330 {
11331 if (out_words_to_go > 4)
11332 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
11333 TRUE, dstbase, &dstoffset));
11334 else if (out_words_to_go != 1)
11335 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
11336 out_words_to_go, dst,
11337 (last_bytes == 0
11338 ? FALSE : TRUE),
11339 dstbase, &dstoffset));
11340 else
11341 {
11342 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11343 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
11344 if (last_bytes != 0)
11345 {
11346 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
11347 dstoffset += 4;
11348 }
11349 }
11350 }
11351
11352 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
11353 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
11354 }
11355
11356 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
11357 if (out_words_to_go)
11358 {
11359 rtx sreg;
11360
11361 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11362 sreg = copy_to_reg (mem);
11363
11364 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11365 emit_move_insn (mem, sreg);
11366 in_words_to_go--;
11367
11368 gcc_assert (!in_words_to_go); /* Sanity check */
11369 }
11370
11371 if (in_words_to_go)
11372 {
11373 gcc_assert (in_words_to_go > 0);
11374
11375 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11376 part_bytes_reg = copy_to_mode_reg (SImode, mem);
11377 }
11378
11379 gcc_assert (!last_bytes || part_bytes_reg);
11380
11381 if (BYTES_BIG_ENDIAN && last_bytes)
11382 {
11383 rtx tmp = gen_reg_rtx (SImode);
11384
11385 /* The bytes we want are in the top end of the word. */
11386 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
11387 GEN_INT (8 * (4 - last_bytes))));
11388 part_bytes_reg = tmp;
11389
11390 while (last_bytes)
11391 {
11392 mem = adjust_automodify_address (dstbase, QImode,
11393 plus_constant (dst, last_bytes - 1),
11394 dstoffset + last_bytes - 1);
11395 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11396
11397 if (--last_bytes)
11398 {
11399 tmp = gen_reg_rtx (SImode);
11400 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
11401 part_bytes_reg = tmp;
11402 }
11403 }
11404
11405 }
11406 else
11407 {
11408 if (last_bytes > 1)
11409 {
11410 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
11411 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
11412 last_bytes -= 2;
11413 if (last_bytes)
11414 {
11415 rtx tmp = gen_reg_rtx (SImode);
11416 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
11417 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
11418 part_bytes_reg = tmp;
11419 dstoffset += 2;
11420 }
11421 }
11422
11423 if (last_bytes)
11424 {
11425 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
11426 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11427 }
11428 }
11429
11430 return 1;
11431 }
11432
11433 /* Select a dominance comparison mode if possible for a test of the general
11434 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
11435 COND_OR == DOM_CC_X_AND_Y => (X && Y)
11436 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
11437 COND_OR == DOM_CC_X_OR_Y => (X || Y)
11438 In all cases OP will be either EQ or NE, but we don't need to know which
11439 here. If we are unable to support a dominance comparison we return
11440 CC mode. This will then fail to match for the RTL expressions that
11441 generate this call. */
11442 enum machine_mode
11443 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
11444 {
11445 enum rtx_code cond1, cond2;
11446 int swapped = 0;
11447
11448 /* Currently we will probably get the wrong result if the individual
11449 comparisons are not simple. This also ensures that it is safe to
11450 reverse a comparison if necessary. */
11451 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
11452 != CCmode)
11453 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
11454 != CCmode))
11455 return CCmode;
11456
11457 /* The if_then_else variant of this tests the second condition if the
11458 first passes, but is true if the first fails. Reverse the first
11459 condition to get a true "inclusive-or" expression. */
11460 if (cond_or == DOM_CC_NX_OR_Y)
11461 cond1 = reverse_condition (cond1);
11462
11463 /* If the comparisons are not equal, and one doesn't dominate the other,
11464 then we can't do this. */
11465 if (cond1 != cond2
11466 && !comparison_dominates_p (cond1, cond2)
11467 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
11468 return CCmode;
11469
11470 if (swapped)
11471 {
11472 enum rtx_code temp = cond1;
11473 cond1 = cond2;
11474 cond2 = temp;
11475 }
11476
11477 switch (cond1)
11478 {
11479 case EQ:
11480 if (cond_or == DOM_CC_X_AND_Y)
11481 return CC_DEQmode;
11482
11483 switch (cond2)
11484 {
11485 case EQ: return CC_DEQmode;
11486 case LE: return CC_DLEmode;
11487 case LEU: return CC_DLEUmode;
11488 case GE: return CC_DGEmode;
11489 case GEU: return CC_DGEUmode;
11490 default: gcc_unreachable ();
11491 }
11492
11493 case LT:
11494 if (cond_or == DOM_CC_X_AND_Y)
11495 return CC_DLTmode;
11496
11497 switch (cond2)
11498 {
11499 case LT:
11500 return CC_DLTmode;
11501 case LE:
11502 return CC_DLEmode;
11503 case NE:
11504 return CC_DNEmode;
11505 default:
11506 gcc_unreachable ();
11507 }
11508
11509 case GT:
11510 if (cond_or == DOM_CC_X_AND_Y)
11511 return CC_DGTmode;
11512
11513 switch (cond2)
11514 {
11515 case GT:
11516 return CC_DGTmode;
11517 case GE:
11518 return CC_DGEmode;
11519 case NE:
11520 return CC_DNEmode;
11521 default:
11522 gcc_unreachable ();
11523 }
11524
11525 case LTU:
11526 if (cond_or == DOM_CC_X_AND_Y)
11527 return CC_DLTUmode;
11528
11529 switch (cond2)
11530 {
11531 case LTU:
11532 return CC_DLTUmode;
11533 case LEU:
11534 return CC_DLEUmode;
11535 case NE:
11536 return CC_DNEmode;
11537 default:
11538 gcc_unreachable ();
11539 }
11540
11541 case GTU:
11542 if (cond_or == DOM_CC_X_AND_Y)
11543 return CC_DGTUmode;
11544
11545 switch (cond2)
11546 {
11547 case GTU:
11548 return CC_DGTUmode;
11549 case GEU:
11550 return CC_DGEUmode;
11551 case NE:
11552 return CC_DNEmode;
11553 default:
11554 gcc_unreachable ();
11555 }
11556
11557 /* The remaining cases only occur when both comparisons are the
11558 same. */
11559 case NE:
11560 gcc_assert (cond1 == cond2);
11561 return CC_DNEmode;
11562
11563 case LE:
11564 gcc_assert (cond1 == cond2);
11565 return CC_DLEmode;
11566
11567 case GE:
11568 gcc_assert (cond1 == cond2);
11569 return CC_DGEmode;
11570
11571 case LEU:
11572 gcc_assert (cond1 == cond2);
11573 return CC_DLEUmode;
11574
11575 case GEU:
11576 gcc_assert (cond1 == cond2);
11577 return CC_DGEUmode;
11578
11579 default:
11580 gcc_unreachable ();
11581 }
11582 }
11583
11584 enum machine_mode
11585 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
11586 {
11587 /* All floating point compares return CCFP if it is an equality
11588 comparison, and CCFPE otherwise. */
11589 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11590 {
11591 switch (op)
11592 {
11593 case EQ:
11594 case NE:
11595 case UNORDERED:
11596 case ORDERED:
11597 case UNLT:
11598 case UNLE:
11599 case UNGT:
11600 case UNGE:
11601 case UNEQ:
11602 case LTGT:
11603 return CCFPmode;
11604
11605 case LT:
11606 case LE:
11607 case GT:
11608 case GE:
11609 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
11610 return CCFPmode;
11611 return CCFPEmode;
11612
11613 default:
11614 gcc_unreachable ();
11615 }
11616 }
11617
11618 /* A compare with a shifted operand. Because of canonicalization, the
11619 comparison will have to be swapped when we emit the assembler. */
11620 if (GET_MODE (y) == SImode
11621 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11622 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11623 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
11624 || GET_CODE (x) == ROTATERT))
11625 return CC_SWPmode;
11626
11627 /* This operation is performed swapped, but since we only rely on the Z
11628 flag we don't need an additional mode. */
11629 if (GET_MODE (y) == SImode
11630 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11631 && GET_CODE (x) == NEG
11632 && (op == EQ || op == NE))
11633 return CC_Zmode;
11634
11635 /* This is a special case that is used by combine to allow a
11636 comparison of a shifted byte load to be split into a zero-extend
11637 followed by a comparison of the shifted integer (only valid for
11638 equalities and unsigned inequalities). */
11639 if (GET_MODE (x) == SImode
11640 && GET_CODE (x) == ASHIFT
11641 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
11642 && GET_CODE (XEXP (x, 0)) == SUBREG
11643 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
11644 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
11645 && (op == EQ || op == NE
11646 || op == GEU || op == GTU || op == LTU || op == LEU)
11647 && GET_CODE (y) == CONST_INT)
11648 return CC_Zmode;
11649
11650 /* A construct for a conditional compare, if the false arm contains
11651 0, then both conditions must be true, otherwise either condition
11652 must be true. Not all conditions are possible, so CCmode is
11653 returned if it can't be done. */
11654 if (GET_CODE (x) == IF_THEN_ELSE
11655 && (XEXP (x, 2) == const0_rtx
11656 || XEXP (x, 2) == const1_rtx)
11657 && COMPARISON_P (XEXP (x, 0))
11658 && COMPARISON_P (XEXP (x, 1)))
11659 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11660 INTVAL (XEXP (x, 2)));
11661
11662 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
11663 if (GET_CODE (x) == AND
11664 && (op == EQ || op == NE)
11665 && COMPARISON_P (XEXP (x, 0))
11666 && COMPARISON_P (XEXP (x, 1)))
11667 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11668 DOM_CC_X_AND_Y);
11669
11670 if (GET_CODE (x) == IOR
11671 && (op == EQ || op == NE)
11672 && COMPARISON_P (XEXP (x, 0))
11673 && COMPARISON_P (XEXP (x, 1)))
11674 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11675 DOM_CC_X_OR_Y);
11676
11677 /* An operation (on Thumb) where we want to test for a single bit.
11678 This is done by shifting that bit up into the top bit of a
11679 scratch register; we can then branch on the sign bit. */
11680 if (TARGET_THUMB1
11681 && GET_MODE (x) == SImode
11682 && (op == EQ || op == NE)
11683 && GET_CODE (x) == ZERO_EXTRACT
11684 && XEXP (x, 1) == const1_rtx)
11685 return CC_Nmode;
11686
11687 /* An operation that sets the condition codes as a side-effect, the
11688 V flag is not set correctly, so we can only use comparisons where
11689 this doesn't matter. (For LT and GE we can use "mi" and "pl"
11690 instead.) */
11691 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
11692 if (GET_MODE (x) == SImode
11693 && y == const0_rtx
11694 && (op == EQ || op == NE || op == LT || op == GE)
11695 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
11696 || GET_CODE (x) == AND || GET_CODE (x) == IOR
11697 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
11698 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
11699 || GET_CODE (x) == LSHIFTRT
11700 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11701 || GET_CODE (x) == ROTATERT
11702 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
11703 return CC_NOOVmode;
11704
11705 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
11706 return CC_Zmode;
11707
11708 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
11709 && GET_CODE (x) == PLUS
11710 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
11711 return CC_Cmode;
11712
11713 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
11714 {
11715 /* To keep things simple, always use the Cirrus cfcmp64 if it is
11716 available. */
11717 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
11718 return CCmode;
11719
11720 switch (op)
11721 {
11722 case EQ:
11723 case NE:
11724 /* A DImode comparison against zero can be implemented by
11725 or'ing the two halves together. */
11726 if (y == const0_rtx)
11727 return CC_Zmode;
11728
11729 /* We can do an equality test in three Thumb instructions. */
11730 if (!TARGET_32BIT)
11731 return CC_Zmode;
11732
11733 /* FALLTHROUGH */
11734
11735 case LTU:
11736 case LEU:
11737 case GTU:
11738 case GEU:
11739 /* DImode unsigned comparisons can be implemented by cmp +
11740 cmpeq without a scratch register. Not worth doing in
11741 Thumb-2. */
11742 if (TARGET_32BIT)
11743 return CC_CZmode;
11744
11745 /* FALLTHROUGH */
11746
11747 case LT:
11748 case LE:
11749 case GT:
11750 case GE:
11751 /* DImode signed and unsigned comparisons can be implemented
11752 by cmp + sbcs with a scratch register, but that does not
11753 set the Z flag - we must reverse GT/LE/GTU/LEU. */
11754 gcc_assert (op != EQ && op != NE);
11755 return CC_NCVmode;
11756
11757 default:
11758 gcc_unreachable ();
11759 }
11760 }
11761
11762 return CCmode;
11763 }
11764
11765 /* X and Y are two things to compare using CODE. Emit the compare insn and
11766 return the rtx for register 0 in the proper mode. FP means this is a
11767 floating point compare: I don't think that it is needed on the arm. */
11768 rtx
11769 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
11770 {
11771 enum machine_mode mode;
11772 rtx cc_reg;
11773 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
11774
11775 /* We might have X as a constant, Y as a register because of the predicates
11776 used for cmpdi. If so, force X to a register here. */
11777 if (dimode_comparison && !REG_P (x))
11778 x = force_reg (DImode, x);
11779
11780 mode = SELECT_CC_MODE (code, x, y);
11781 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
11782
11783 if (dimode_comparison
11784 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
11785 && mode != CC_CZmode)
11786 {
11787 rtx clobber, set;
11788
11789 /* To compare two non-zero values for equality, XOR them and
11790 then compare against zero. Not used for ARM mode; there
11791 CC_CZmode is cheaper. */
11792 if (mode == CC_Zmode && y != const0_rtx)
11793 {
11794 gcc_assert (!reload_completed);
11795 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
11796 y = const0_rtx;
11797 }
11798
11799 /* A scratch register is required. */
11800 if (reload_completed)
11801 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
11802 else
11803 scratch = gen_rtx_SCRATCH (SImode);
11804
11805 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11806 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
11807 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11808 }
11809 else
11810 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
11811
11812 return cc_reg;
11813 }
11814
11815 /* Generate a sequence of insns that will generate the correct return
11816 address mask depending on the physical architecture that the program
11817 is running on. */
11818 rtx
11819 arm_gen_return_addr_mask (void)
11820 {
11821 rtx reg = gen_reg_rtx (Pmode);
11822
11823 emit_insn (gen_return_addr_mask (reg));
11824 return reg;
11825 }
11826
11827 void
11828 arm_reload_in_hi (rtx *operands)
11829 {
11830 rtx ref = operands[1];
11831 rtx base, scratch;
11832 HOST_WIDE_INT offset = 0;
11833
11834 if (GET_CODE (ref) == SUBREG)
11835 {
11836 offset = SUBREG_BYTE (ref);
11837 ref = SUBREG_REG (ref);
11838 }
11839
11840 if (GET_CODE (ref) == REG)
11841 {
11842 /* We have a pseudo which has been spilt onto the stack; there
11843 are two cases here: the first where there is a simple
11844 stack-slot replacement and a second where the stack-slot is
11845 out of range, or is used as a subreg. */
11846 if (reg_equiv_mem (REGNO (ref)))
11847 {
11848 ref = reg_equiv_mem (REGNO (ref));
11849 base = find_replacement (&XEXP (ref, 0));
11850 }
11851 else
11852 /* The slot is out of range, or was dressed up in a SUBREG. */
11853 base = reg_equiv_address (REGNO (ref));
11854 }
11855 else
11856 base = find_replacement (&XEXP (ref, 0));
11857
11858 /* Handle the case where the address is too complex to be offset by 1. */
11859 if (GET_CODE (base) == MINUS
11860 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11861 {
11862 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11863
11864 emit_set_insn (base_plus, base);
11865 base = base_plus;
11866 }
11867 else if (GET_CODE (base) == PLUS)
11868 {
11869 /* The addend must be CONST_INT, or we would have dealt with it above. */
11870 HOST_WIDE_INT hi, lo;
11871
11872 offset += INTVAL (XEXP (base, 1));
11873 base = XEXP (base, 0);
11874
11875 /* Rework the address into a legal sequence of insns. */
11876 /* Valid range for lo is -4095 -> 4095 */
11877 lo = (offset >= 0
11878 ? (offset & 0xfff)
11879 : -((-offset) & 0xfff));
11880
11881 /* Corner case, if lo is the max offset then we would be out of range
11882 once we have added the additional 1 below, so bump the msb into the
11883 pre-loading insn(s). */
11884 if (lo == 4095)
11885 lo &= 0x7ff;
11886
11887 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11888 ^ (HOST_WIDE_INT) 0x80000000)
11889 - (HOST_WIDE_INT) 0x80000000);
11890
11891 gcc_assert (hi + lo == offset);
11892
11893 if (hi != 0)
11894 {
11895 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11896
11897 /* Get the base address; addsi3 knows how to handle constants
11898 that require more than one insn. */
11899 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11900 base = base_plus;
11901 offset = lo;
11902 }
11903 }
11904
11905 /* Operands[2] may overlap operands[0] (though it won't overlap
11906 operands[1]), that's why we asked for a DImode reg -- so we can
11907 use the bit that does not overlap. */
11908 if (REGNO (operands[2]) == REGNO (operands[0]))
11909 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11910 else
11911 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11912
11913 emit_insn (gen_zero_extendqisi2 (scratch,
11914 gen_rtx_MEM (QImode,
11915 plus_constant (base,
11916 offset))));
11917 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
11918 gen_rtx_MEM (QImode,
11919 plus_constant (base,
11920 offset + 1))));
11921 if (!BYTES_BIG_ENDIAN)
11922 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11923 gen_rtx_IOR (SImode,
11924 gen_rtx_ASHIFT
11925 (SImode,
11926 gen_rtx_SUBREG (SImode, operands[0], 0),
11927 GEN_INT (8)),
11928 scratch));
11929 else
11930 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11931 gen_rtx_IOR (SImode,
11932 gen_rtx_ASHIFT (SImode, scratch,
11933 GEN_INT (8)),
11934 gen_rtx_SUBREG (SImode, operands[0], 0)));
11935 }
11936
11937 /* Handle storing a half-word to memory during reload by synthesizing as two
11938 byte stores. Take care not to clobber the input values until after we
11939 have moved them somewhere safe. This code assumes that if the DImode
11940 scratch in operands[2] overlaps either the input value or output address
11941 in some way, then that value must die in this insn (we absolutely need
11942 two scratch registers for some corner cases). */
11943 void
11944 arm_reload_out_hi (rtx *operands)
11945 {
11946 rtx ref = operands[0];
11947 rtx outval = operands[1];
11948 rtx base, scratch;
11949 HOST_WIDE_INT offset = 0;
11950
11951 if (GET_CODE (ref) == SUBREG)
11952 {
11953 offset = SUBREG_BYTE (ref);
11954 ref = SUBREG_REG (ref);
11955 }
11956
11957 if (GET_CODE (ref) == REG)
11958 {
11959 /* We have a pseudo which has been spilt onto the stack; there
11960 are two cases here: the first where there is a simple
11961 stack-slot replacement and a second where the stack-slot is
11962 out of range, or is used as a subreg. */
11963 if (reg_equiv_mem (REGNO (ref)))
11964 {
11965 ref = reg_equiv_mem (REGNO (ref));
11966 base = find_replacement (&XEXP (ref, 0));
11967 }
11968 else
11969 /* The slot is out of range, or was dressed up in a SUBREG. */
11970 base = reg_equiv_address (REGNO (ref));
11971 }
11972 else
11973 base = find_replacement (&XEXP (ref, 0));
11974
11975 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11976
11977 /* Handle the case where the address is too complex to be offset by 1. */
11978 if (GET_CODE (base) == MINUS
11979 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11980 {
11981 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11982
11983 /* Be careful not to destroy OUTVAL. */
11984 if (reg_overlap_mentioned_p (base_plus, outval))
11985 {
11986 /* Updating base_plus might destroy outval, see if we can
11987 swap the scratch and base_plus. */
11988 if (!reg_overlap_mentioned_p (scratch, outval))
11989 {
11990 rtx tmp = scratch;
11991 scratch = base_plus;
11992 base_plus = tmp;
11993 }
11994 else
11995 {
11996 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11997
11998 /* Be conservative and copy OUTVAL into the scratch now,
11999 this should only be necessary if outval is a subreg
12000 of something larger than a word. */
12001 /* XXX Might this clobber base? I can't see how it can,
12002 since scratch is known to overlap with OUTVAL, and
12003 must be wider than a word. */
12004 emit_insn (gen_movhi (scratch_hi, outval));
12005 outval = scratch_hi;
12006 }
12007 }
12008
12009 emit_set_insn (base_plus, base);
12010 base = base_plus;
12011 }
12012 else if (GET_CODE (base) == PLUS)
12013 {
12014 /* The addend must be CONST_INT, or we would have dealt with it above. */
12015 HOST_WIDE_INT hi, lo;
12016
12017 offset += INTVAL (XEXP (base, 1));
12018 base = XEXP (base, 0);
12019
12020 /* Rework the address into a legal sequence of insns. */
12021 /* Valid range for lo is -4095 -> 4095 */
12022 lo = (offset >= 0
12023 ? (offset & 0xfff)
12024 : -((-offset) & 0xfff));
12025
12026 /* Corner case, if lo is the max offset then we would be out of range
12027 once we have added the additional 1 below, so bump the msb into the
12028 pre-loading insn(s). */
12029 if (lo == 4095)
12030 lo &= 0x7ff;
12031
12032 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
12033 ^ (HOST_WIDE_INT) 0x80000000)
12034 - (HOST_WIDE_INT) 0x80000000);
12035
12036 gcc_assert (hi + lo == offset);
12037
12038 if (hi != 0)
12039 {
12040 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12041
12042 /* Be careful not to destroy OUTVAL. */
12043 if (reg_overlap_mentioned_p (base_plus, outval))
12044 {
12045 /* Updating base_plus might destroy outval, see if we
12046 can swap the scratch and base_plus. */
12047 if (!reg_overlap_mentioned_p (scratch, outval))
12048 {
12049 rtx tmp = scratch;
12050 scratch = base_plus;
12051 base_plus = tmp;
12052 }
12053 else
12054 {
12055 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12056
12057 /* Be conservative and copy outval into scratch now,
12058 this should only be necessary if outval is a
12059 subreg of something larger than a word. */
12060 /* XXX Might this clobber base? I can't see how it
12061 can, since scratch is known to overlap with
12062 outval. */
12063 emit_insn (gen_movhi (scratch_hi, outval));
12064 outval = scratch_hi;
12065 }
12066 }
12067
12068 /* Get the base address; addsi3 knows how to handle constants
12069 that require more than one insn. */
12070 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12071 base = base_plus;
12072 offset = lo;
12073 }
12074 }
12075
12076 if (BYTES_BIG_ENDIAN)
12077 {
12078 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12079 plus_constant (base, offset + 1)),
12080 gen_lowpart (QImode, outval)));
12081 emit_insn (gen_lshrsi3 (scratch,
12082 gen_rtx_SUBREG (SImode, outval, 0),
12083 GEN_INT (8)));
12084 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
12085 gen_lowpart (QImode, scratch)));
12086 }
12087 else
12088 {
12089 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
12090 gen_lowpart (QImode, outval)));
12091 emit_insn (gen_lshrsi3 (scratch,
12092 gen_rtx_SUBREG (SImode, outval, 0),
12093 GEN_INT (8)));
12094 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12095 plus_constant (base, offset + 1)),
12096 gen_lowpart (QImode, scratch)));
12097 }
12098 }
12099
12100 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
12101 (padded to the size of a word) should be passed in a register. */
12102
12103 static bool
12104 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
12105 {
12106 if (TARGET_AAPCS_BASED)
12107 return must_pass_in_stack_var_size (mode, type);
12108 else
12109 return must_pass_in_stack_var_size_or_pad (mode, type);
12110 }
12111
12112
12113 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
12114 Return true if an argument passed on the stack should be padded upwards,
12115 i.e. if the least-significant byte has useful data.
12116 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
12117 aggregate types are placed in the lowest memory address. */
12118
12119 bool
12120 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
12121 {
12122 if (!TARGET_AAPCS_BASED)
12123 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
12124
12125 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
12126 return false;
12127
12128 return true;
12129 }
12130
12131
12132 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12133 Return !BYTES_BIG_ENDIAN if the least significant byte of the
12134 register has useful data, and return the opposite if the most
12135 significant byte does. */
12136
12137 bool
12138 arm_pad_reg_upward (enum machine_mode mode,
12139 tree type, int first ATTRIBUTE_UNUSED)
12140 {
12141 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
12142 {
12143 /* For AAPCS, small aggregates, small fixed-point types,
12144 and small complex types are always padded upwards. */
12145 if (type)
12146 {
12147 if ((AGGREGATE_TYPE_P (type)
12148 || TREE_CODE (type) == COMPLEX_TYPE
12149 || FIXED_POINT_TYPE_P (type))
12150 && int_size_in_bytes (type) <= 4)
12151 return true;
12152 }
12153 else
12154 {
12155 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
12156 && GET_MODE_SIZE (mode) <= 4)
12157 return true;
12158 }
12159 }
12160
12161 /* Otherwise, use default padding. */
12162 return !BYTES_BIG_ENDIAN;
12163 }
12164
12165 \f
12166 /* Print a symbolic form of X to the debug file, F. */
12167 static void
12168 arm_print_value (FILE *f, rtx x)
12169 {
12170 switch (GET_CODE (x))
12171 {
12172 case CONST_INT:
12173 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
12174 return;
12175
12176 case CONST_DOUBLE:
12177 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
12178 return;
12179
12180 case CONST_VECTOR:
12181 {
12182 int i;
12183
12184 fprintf (f, "<");
12185 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
12186 {
12187 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
12188 if (i < (CONST_VECTOR_NUNITS (x) - 1))
12189 fputc (',', f);
12190 }
12191 fprintf (f, ">");
12192 }
12193 return;
12194
12195 case CONST_STRING:
12196 fprintf (f, "\"%s\"", XSTR (x, 0));
12197 return;
12198
12199 case SYMBOL_REF:
12200 fprintf (f, "`%s'", XSTR (x, 0));
12201 return;
12202
12203 case LABEL_REF:
12204 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
12205 return;
12206
12207 case CONST:
12208 arm_print_value (f, XEXP (x, 0));
12209 return;
12210
12211 case PLUS:
12212 arm_print_value (f, XEXP (x, 0));
12213 fprintf (f, "+");
12214 arm_print_value (f, XEXP (x, 1));
12215 return;
12216
12217 case PC:
12218 fprintf (f, "pc");
12219 return;
12220
12221 default:
12222 fprintf (f, "????");
12223 return;
12224 }
12225 }
12226 \f
12227 /* Routines for manipulation of the constant pool. */
12228
12229 /* Arm instructions cannot load a large constant directly into a
12230 register; they have to come from a pc relative load. The constant
12231 must therefore be placed in the addressable range of the pc
12232 relative load. Depending on the precise pc relative load
12233 instruction the range is somewhere between 256 bytes and 4k. This
12234 means that we often have to dump a constant inside a function, and
12235 generate code to branch around it.
12236
12237 It is important to minimize this, since the branches will slow
12238 things down and make the code larger.
12239
12240 Normally we can hide the table after an existing unconditional
12241 branch so that there is no interruption of the flow, but in the
12242 worst case the code looks like this:
12243
12244 ldr rn, L1
12245 ...
12246 b L2
12247 align
12248 L1: .long value
12249 L2:
12250 ...
12251
12252 ldr rn, L3
12253 ...
12254 b L4
12255 align
12256 L3: .long value
12257 L4:
12258 ...
12259
12260 We fix this by performing a scan after scheduling, which notices
12261 which instructions need to have their operands fetched from the
12262 constant table and builds the table.
12263
12264 The algorithm starts by building a table of all the constants that
12265 need fixing up and all the natural barriers in the function (places
12266 where a constant table can be dropped without breaking the flow).
12267 For each fixup we note how far the pc-relative replacement will be
12268 able to reach and the offset of the instruction into the function.
12269
12270 Having built the table we then group the fixes together to form
12271 tables that are as large as possible (subject to addressing
12272 constraints) and emit each table of constants after the last
12273 barrier that is within range of all the instructions in the group.
12274 If a group does not contain a barrier, then we forcibly create one
12275 by inserting a jump instruction into the flow. Once the table has
12276 been inserted, the insns are then modified to reference the
12277 relevant entry in the pool.
12278
12279 Possible enhancements to the algorithm (not implemented) are:
12280
12281 1) For some processors and object formats, there may be benefit in
12282 aligning the pools to the start of cache lines; this alignment
12283 would need to be taken into account when calculating addressability
12284 of a pool. */
12285
12286 /* These typedefs are located at the start of this file, so that
12287 they can be used in the prototypes there. This comment is to
12288 remind readers of that fact so that the following structures
12289 can be understood more easily.
12290
12291 typedef struct minipool_node Mnode;
12292 typedef struct minipool_fixup Mfix; */
12293
12294 struct minipool_node
12295 {
12296 /* Doubly linked chain of entries. */
12297 Mnode * next;
12298 Mnode * prev;
12299 /* The maximum offset into the code that this entry can be placed. While
12300 pushing fixes for forward references, all entries are sorted in order
12301 of increasing max_address. */
12302 HOST_WIDE_INT max_address;
12303 /* Similarly for an entry inserted for a backwards ref. */
12304 HOST_WIDE_INT min_address;
12305 /* The number of fixes referencing this entry. This can become zero
12306 if we "unpush" an entry. In this case we ignore the entry when we
12307 come to emit the code. */
12308 int refcount;
12309 /* The offset from the start of the minipool. */
12310 HOST_WIDE_INT offset;
12311 /* The value in table. */
12312 rtx value;
12313 /* The mode of value. */
12314 enum machine_mode mode;
12315 /* The size of the value. With iWMMXt enabled
12316 sizes > 4 also imply an alignment of 8-bytes. */
12317 int fix_size;
12318 };
12319
12320 struct minipool_fixup
12321 {
12322 Mfix * next;
12323 rtx insn;
12324 HOST_WIDE_INT address;
12325 rtx * loc;
12326 enum machine_mode mode;
12327 int fix_size;
12328 rtx value;
12329 Mnode * minipool;
12330 HOST_WIDE_INT forwards;
12331 HOST_WIDE_INT backwards;
12332 };
12333
12334 /* Fixes less than a word need padding out to a word boundary. */
12335 #define MINIPOOL_FIX_SIZE(mode) \
12336 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
12337
12338 static Mnode * minipool_vector_head;
12339 static Mnode * minipool_vector_tail;
12340 static rtx minipool_vector_label;
12341 static int minipool_pad;
12342
12343 /* The linked list of all minipool fixes required for this function. */
12344 Mfix * minipool_fix_head;
12345 Mfix * minipool_fix_tail;
12346 /* The fix entry for the current minipool, once it has been placed. */
12347 Mfix * minipool_barrier;
12348
12349 /* Determines if INSN is the start of a jump table. Returns the end
12350 of the TABLE or NULL_RTX. */
12351 static rtx
12352 is_jump_table (rtx insn)
12353 {
12354 rtx table;
12355
12356 if (jump_to_label_p (insn)
12357 && ((table = next_real_insn (JUMP_LABEL (insn)))
12358 == next_real_insn (insn))
12359 && table != NULL
12360 && GET_CODE (table) == JUMP_INSN
12361 && (GET_CODE (PATTERN (table)) == ADDR_VEC
12362 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
12363 return table;
12364
12365 return NULL_RTX;
12366 }
12367
12368 #ifndef JUMP_TABLES_IN_TEXT_SECTION
12369 #define JUMP_TABLES_IN_TEXT_SECTION 0
12370 #endif
12371
12372 static HOST_WIDE_INT
12373 get_jump_table_size (rtx insn)
12374 {
12375 /* ADDR_VECs only take room if read-only data does into the text
12376 section. */
12377 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
12378 {
12379 rtx body = PATTERN (insn);
12380 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
12381 HOST_WIDE_INT size;
12382 HOST_WIDE_INT modesize;
12383
12384 modesize = GET_MODE_SIZE (GET_MODE (body));
12385 size = modesize * XVECLEN (body, elt);
12386 switch (modesize)
12387 {
12388 case 1:
12389 /* Round up size of TBB table to a halfword boundary. */
12390 size = (size + 1) & ~(HOST_WIDE_INT)1;
12391 break;
12392 case 2:
12393 /* No padding necessary for TBH. */
12394 break;
12395 case 4:
12396 /* Add two bytes for alignment on Thumb. */
12397 if (TARGET_THUMB)
12398 size += 2;
12399 break;
12400 default:
12401 gcc_unreachable ();
12402 }
12403 return size;
12404 }
12405
12406 return 0;
12407 }
12408
12409 /* Return the maximum amount of padding that will be inserted before
12410 label LABEL. */
12411
12412 static HOST_WIDE_INT
12413 get_label_padding (rtx label)
12414 {
12415 HOST_WIDE_INT align, min_insn_size;
12416
12417 align = 1 << label_to_alignment (label);
12418 min_insn_size = TARGET_THUMB ? 2 : 4;
12419 return align > min_insn_size ? align - min_insn_size : 0;
12420 }
12421
12422 /* Move a minipool fix MP from its current location to before MAX_MP.
12423 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
12424 constraints may need updating. */
12425 static Mnode *
12426 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
12427 HOST_WIDE_INT max_address)
12428 {
12429 /* The code below assumes these are different. */
12430 gcc_assert (mp != max_mp);
12431
12432 if (max_mp == NULL)
12433 {
12434 if (max_address < mp->max_address)
12435 mp->max_address = max_address;
12436 }
12437 else
12438 {
12439 if (max_address > max_mp->max_address - mp->fix_size)
12440 mp->max_address = max_mp->max_address - mp->fix_size;
12441 else
12442 mp->max_address = max_address;
12443
12444 /* Unlink MP from its current position. Since max_mp is non-null,
12445 mp->prev must be non-null. */
12446 mp->prev->next = mp->next;
12447 if (mp->next != NULL)
12448 mp->next->prev = mp->prev;
12449 else
12450 minipool_vector_tail = mp->prev;
12451
12452 /* Re-insert it before MAX_MP. */
12453 mp->next = max_mp;
12454 mp->prev = max_mp->prev;
12455 max_mp->prev = mp;
12456
12457 if (mp->prev != NULL)
12458 mp->prev->next = mp;
12459 else
12460 minipool_vector_head = mp;
12461 }
12462
12463 /* Save the new entry. */
12464 max_mp = mp;
12465
12466 /* Scan over the preceding entries and adjust their addresses as
12467 required. */
12468 while (mp->prev != NULL
12469 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12470 {
12471 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12472 mp = mp->prev;
12473 }
12474
12475 return max_mp;
12476 }
12477
12478 /* Add a constant to the minipool for a forward reference. Returns the
12479 node added or NULL if the constant will not fit in this pool. */
12480 static Mnode *
12481 add_minipool_forward_ref (Mfix *fix)
12482 {
12483 /* If set, max_mp is the first pool_entry that has a lower
12484 constraint than the one we are trying to add. */
12485 Mnode * max_mp = NULL;
12486 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
12487 Mnode * mp;
12488
12489 /* If the minipool starts before the end of FIX->INSN then this FIX
12490 can not be placed into the current pool. Furthermore, adding the
12491 new constant pool entry may cause the pool to start FIX_SIZE bytes
12492 earlier. */
12493 if (minipool_vector_head &&
12494 (fix->address + get_attr_length (fix->insn)
12495 >= minipool_vector_head->max_address - fix->fix_size))
12496 return NULL;
12497
12498 /* Scan the pool to see if a constant with the same value has
12499 already been added. While we are doing this, also note the
12500 location where we must insert the constant if it doesn't already
12501 exist. */
12502 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12503 {
12504 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12505 && fix->mode == mp->mode
12506 && (GET_CODE (fix->value) != CODE_LABEL
12507 || (CODE_LABEL_NUMBER (fix->value)
12508 == CODE_LABEL_NUMBER (mp->value)))
12509 && rtx_equal_p (fix->value, mp->value))
12510 {
12511 /* More than one fix references this entry. */
12512 mp->refcount++;
12513 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
12514 }
12515
12516 /* Note the insertion point if necessary. */
12517 if (max_mp == NULL
12518 && mp->max_address > max_address)
12519 max_mp = mp;
12520
12521 /* If we are inserting an 8-bytes aligned quantity and
12522 we have not already found an insertion point, then
12523 make sure that all such 8-byte aligned quantities are
12524 placed at the start of the pool. */
12525 if (ARM_DOUBLEWORD_ALIGN
12526 && max_mp == NULL
12527 && fix->fix_size >= 8
12528 && mp->fix_size < 8)
12529 {
12530 max_mp = mp;
12531 max_address = mp->max_address;
12532 }
12533 }
12534
12535 /* The value is not currently in the minipool, so we need to create
12536 a new entry for it. If MAX_MP is NULL, the entry will be put on
12537 the end of the list since the placement is less constrained than
12538 any existing entry. Otherwise, we insert the new fix before
12539 MAX_MP and, if necessary, adjust the constraints on the other
12540 entries. */
12541 mp = XNEW (Mnode);
12542 mp->fix_size = fix->fix_size;
12543 mp->mode = fix->mode;
12544 mp->value = fix->value;
12545 mp->refcount = 1;
12546 /* Not yet required for a backwards ref. */
12547 mp->min_address = -65536;
12548
12549 if (max_mp == NULL)
12550 {
12551 mp->max_address = max_address;
12552 mp->next = NULL;
12553 mp->prev = minipool_vector_tail;
12554
12555 if (mp->prev == NULL)
12556 {
12557 minipool_vector_head = mp;
12558 minipool_vector_label = gen_label_rtx ();
12559 }
12560 else
12561 mp->prev->next = mp;
12562
12563 minipool_vector_tail = mp;
12564 }
12565 else
12566 {
12567 if (max_address > max_mp->max_address - mp->fix_size)
12568 mp->max_address = max_mp->max_address - mp->fix_size;
12569 else
12570 mp->max_address = max_address;
12571
12572 mp->next = max_mp;
12573 mp->prev = max_mp->prev;
12574 max_mp->prev = mp;
12575 if (mp->prev != NULL)
12576 mp->prev->next = mp;
12577 else
12578 minipool_vector_head = mp;
12579 }
12580
12581 /* Save the new entry. */
12582 max_mp = mp;
12583
12584 /* Scan over the preceding entries and adjust their addresses as
12585 required. */
12586 while (mp->prev != NULL
12587 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12588 {
12589 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12590 mp = mp->prev;
12591 }
12592
12593 return max_mp;
12594 }
12595
12596 static Mnode *
12597 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
12598 HOST_WIDE_INT min_address)
12599 {
12600 HOST_WIDE_INT offset;
12601
12602 /* The code below assumes these are different. */
12603 gcc_assert (mp != min_mp);
12604
12605 if (min_mp == NULL)
12606 {
12607 if (min_address > mp->min_address)
12608 mp->min_address = min_address;
12609 }
12610 else
12611 {
12612 /* We will adjust this below if it is too loose. */
12613 mp->min_address = min_address;
12614
12615 /* Unlink MP from its current position. Since min_mp is non-null,
12616 mp->next must be non-null. */
12617 mp->next->prev = mp->prev;
12618 if (mp->prev != NULL)
12619 mp->prev->next = mp->next;
12620 else
12621 minipool_vector_head = mp->next;
12622
12623 /* Reinsert it after MIN_MP. */
12624 mp->prev = min_mp;
12625 mp->next = min_mp->next;
12626 min_mp->next = mp;
12627 if (mp->next != NULL)
12628 mp->next->prev = mp;
12629 else
12630 minipool_vector_tail = mp;
12631 }
12632
12633 min_mp = mp;
12634
12635 offset = 0;
12636 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12637 {
12638 mp->offset = offset;
12639 if (mp->refcount > 0)
12640 offset += mp->fix_size;
12641
12642 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
12643 mp->next->min_address = mp->min_address + mp->fix_size;
12644 }
12645
12646 return min_mp;
12647 }
12648
12649 /* Add a constant to the minipool for a backward reference. Returns the
12650 node added or NULL if the constant will not fit in this pool.
12651
12652 Note that the code for insertion for a backwards reference can be
12653 somewhat confusing because the calculated offsets for each fix do
12654 not take into account the size of the pool (which is still under
12655 construction. */
12656 static Mnode *
12657 add_minipool_backward_ref (Mfix *fix)
12658 {
12659 /* If set, min_mp is the last pool_entry that has a lower constraint
12660 than the one we are trying to add. */
12661 Mnode *min_mp = NULL;
12662 /* This can be negative, since it is only a constraint. */
12663 HOST_WIDE_INT min_address = fix->address - fix->backwards;
12664 Mnode *mp;
12665
12666 /* If we can't reach the current pool from this insn, or if we can't
12667 insert this entry at the end of the pool without pushing other
12668 fixes out of range, then we don't try. This ensures that we
12669 can't fail later on. */
12670 if (min_address >= minipool_barrier->address
12671 || (minipool_vector_tail->min_address + fix->fix_size
12672 >= minipool_barrier->address))
12673 return NULL;
12674
12675 /* Scan the pool to see if a constant with the same value has
12676 already been added. While we are doing this, also note the
12677 location where we must insert the constant if it doesn't already
12678 exist. */
12679 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
12680 {
12681 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12682 && fix->mode == mp->mode
12683 && (GET_CODE (fix->value) != CODE_LABEL
12684 || (CODE_LABEL_NUMBER (fix->value)
12685 == CODE_LABEL_NUMBER (mp->value)))
12686 && rtx_equal_p (fix->value, mp->value)
12687 /* Check that there is enough slack to move this entry to the
12688 end of the table (this is conservative). */
12689 && (mp->max_address
12690 > (minipool_barrier->address
12691 + minipool_vector_tail->offset
12692 + minipool_vector_tail->fix_size)))
12693 {
12694 mp->refcount++;
12695 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
12696 }
12697
12698 if (min_mp != NULL)
12699 mp->min_address += fix->fix_size;
12700 else
12701 {
12702 /* Note the insertion point if necessary. */
12703 if (mp->min_address < min_address)
12704 {
12705 /* For now, we do not allow the insertion of 8-byte alignment
12706 requiring nodes anywhere but at the start of the pool. */
12707 if (ARM_DOUBLEWORD_ALIGN
12708 && fix->fix_size >= 8 && mp->fix_size < 8)
12709 return NULL;
12710 else
12711 min_mp = mp;
12712 }
12713 else if (mp->max_address
12714 < minipool_barrier->address + mp->offset + fix->fix_size)
12715 {
12716 /* Inserting before this entry would push the fix beyond
12717 its maximum address (which can happen if we have
12718 re-located a forwards fix); force the new fix to come
12719 after it. */
12720 if (ARM_DOUBLEWORD_ALIGN
12721 && fix->fix_size >= 8 && mp->fix_size < 8)
12722 return NULL;
12723 else
12724 {
12725 min_mp = mp;
12726 min_address = mp->min_address + fix->fix_size;
12727 }
12728 }
12729 /* Do not insert a non-8-byte aligned quantity before 8-byte
12730 aligned quantities. */
12731 else if (ARM_DOUBLEWORD_ALIGN
12732 && fix->fix_size < 8
12733 && mp->fix_size >= 8)
12734 {
12735 min_mp = mp;
12736 min_address = mp->min_address + fix->fix_size;
12737 }
12738 }
12739 }
12740
12741 /* We need to create a new entry. */
12742 mp = XNEW (Mnode);
12743 mp->fix_size = fix->fix_size;
12744 mp->mode = fix->mode;
12745 mp->value = fix->value;
12746 mp->refcount = 1;
12747 mp->max_address = minipool_barrier->address + 65536;
12748
12749 mp->min_address = min_address;
12750
12751 if (min_mp == NULL)
12752 {
12753 mp->prev = NULL;
12754 mp->next = minipool_vector_head;
12755
12756 if (mp->next == NULL)
12757 {
12758 minipool_vector_tail = mp;
12759 minipool_vector_label = gen_label_rtx ();
12760 }
12761 else
12762 mp->next->prev = mp;
12763
12764 minipool_vector_head = mp;
12765 }
12766 else
12767 {
12768 mp->next = min_mp->next;
12769 mp->prev = min_mp;
12770 min_mp->next = mp;
12771
12772 if (mp->next != NULL)
12773 mp->next->prev = mp;
12774 else
12775 minipool_vector_tail = mp;
12776 }
12777
12778 /* Save the new entry. */
12779 min_mp = mp;
12780
12781 if (mp->prev)
12782 mp = mp->prev;
12783 else
12784 mp->offset = 0;
12785
12786 /* Scan over the following entries and adjust their offsets. */
12787 while (mp->next != NULL)
12788 {
12789 if (mp->next->min_address < mp->min_address + mp->fix_size)
12790 mp->next->min_address = mp->min_address + mp->fix_size;
12791
12792 if (mp->refcount)
12793 mp->next->offset = mp->offset + mp->fix_size;
12794 else
12795 mp->next->offset = mp->offset;
12796
12797 mp = mp->next;
12798 }
12799
12800 return min_mp;
12801 }
12802
12803 static void
12804 assign_minipool_offsets (Mfix *barrier)
12805 {
12806 HOST_WIDE_INT offset = 0;
12807 Mnode *mp;
12808
12809 minipool_barrier = barrier;
12810
12811 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12812 {
12813 mp->offset = offset;
12814
12815 if (mp->refcount > 0)
12816 offset += mp->fix_size;
12817 }
12818 }
12819
12820 /* Output the literal table */
12821 static void
12822 dump_minipool (rtx scan)
12823 {
12824 Mnode * mp;
12825 Mnode * nmp;
12826 int align64 = 0;
12827
12828 if (ARM_DOUBLEWORD_ALIGN)
12829 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12830 if (mp->refcount > 0 && mp->fix_size >= 8)
12831 {
12832 align64 = 1;
12833 break;
12834 }
12835
12836 if (dump_file)
12837 fprintf (dump_file,
12838 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
12839 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
12840
12841 scan = emit_label_after (gen_label_rtx (), scan);
12842 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
12843 scan = emit_label_after (minipool_vector_label, scan);
12844
12845 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
12846 {
12847 if (mp->refcount > 0)
12848 {
12849 if (dump_file)
12850 {
12851 fprintf (dump_file,
12852 ";; Offset %u, min %ld, max %ld ",
12853 (unsigned) mp->offset, (unsigned long) mp->min_address,
12854 (unsigned long) mp->max_address);
12855 arm_print_value (dump_file, mp->value);
12856 fputc ('\n', dump_file);
12857 }
12858
12859 switch (mp->fix_size)
12860 {
12861 #ifdef HAVE_consttable_1
12862 case 1:
12863 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
12864 break;
12865
12866 #endif
12867 #ifdef HAVE_consttable_2
12868 case 2:
12869 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
12870 break;
12871
12872 #endif
12873 #ifdef HAVE_consttable_4
12874 case 4:
12875 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
12876 break;
12877
12878 #endif
12879 #ifdef HAVE_consttable_8
12880 case 8:
12881 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
12882 break;
12883
12884 #endif
12885 #ifdef HAVE_consttable_16
12886 case 16:
12887 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
12888 break;
12889
12890 #endif
12891 default:
12892 gcc_unreachable ();
12893 }
12894 }
12895
12896 nmp = mp->next;
12897 free (mp);
12898 }
12899
12900 minipool_vector_head = minipool_vector_tail = NULL;
12901 scan = emit_insn_after (gen_consttable_end (), scan);
12902 scan = emit_barrier_after (scan);
12903 }
12904
12905 /* Return the cost of forcibly inserting a barrier after INSN. */
12906 static int
12907 arm_barrier_cost (rtx insn)
12908 {
12909 /* Basing the location of the pool on the loop depth is preferable,
12910 but at the moment, the basic block information seems to be
12911 corrupt by this stage of the compilation. */
12912 int base_cost = 50;
12913 rtx next = next_nonnote_insn (insn);
12914
12915 if (next != NULL && GET_CODE (next) == CODE_LABEL)
12916 base_cost -= 20;
12917
12918 switch (GET_CODE (insn))
12919 {
12920 case CODE_LABEL:
12921 /* It will always be better to place the table before the label, rather
12922 than after it. */
12923 return 50;
12924
12925 case INSN:
12926 case CALL_INSN:
12927 return base_cost;
12928
12929 case JUMP_INSN:
12930 return base_cost - 10;
12931
12932 default:
12933 return base_cost + 10;
12934 }
12935 }
12936
12937 /* Find the best place in the insn stream in the range
12938 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
12939 Create the barrier by inserting a jump and add a new fix entry for
12940 it. */
12941 static Mfix *
12942 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
12943 {
12944 HOST_WIDE_INT count = 0;
12945 rtx barrier;
12946 rtx from = fix->insn;
12947 /* The instruction after which we will insert the jump. */
12948 rtx selected = NULL;
12949 int selected_cost;
12950 /* The address at which the jump instruction will be placed. */
12951 HOST_WIDE_INT selected_address;
12952 Mfix * new_fix;
12953 HOST_WIDE_INT max_count = max_address - fix->address;
12954 rtx label = gen_label_rtx ();
12955
12956 selected_cost = arm_barrier_cost (from);
12957 selected_address = fix->address;
12958
12959 while (from && count < max_count)
12960 {
12961 rtx tmp;
12962 int new_cost;
12963
12964 /* This code shouldn't have been called if there was a natural barrier
12965 within range. */
12966 gcc_assert (GET_CODE (from) != BARRIER);
12967
12968 /* Count the length of this insn. This must stay in sync with the
12969 code that pushes minipool fixes. */
12970 if (LABEL_P (from))
12971 count += get_label_padding (from);
12972 else
12973 count += get_attr_length (from);
12974
12975 /* If there is a jump table, add its length. */
12976 tmp = is_jump_table (from);
12977 if (tmp != NULL)
12978 {
12979 count += get_jump_table_size (tmp);
12980
12981 /* Jump tables aren't in a basic block, so base the cost on
12982 the dispatch insn. If we select this location, we will
12983 still put the pool after the table. */
12984 new_cost = arm_barrier_cost (from);
12985
12986 if (count < max_count
12987 && (!selected || new_cost <= selected_cost))
12988 {
12989 selected = tmp;
12990 selected_cost = new_cost;
12991 selected_address = fix->address + count;
12992 }
12993
12994 /* Continue after the dispatch table. */
12995 from = NEXT_INSN (tmp);
12996 continue;
12997 }
12998
12999 new_cost = arm_barrier_cost (from);
13000
13001 if (count < max_count
13002 && (!selected || new_cost <= selected_cost))
13003 {
13004 selected = from;
13005 selected_cost = new_cost;
13006 selected_address = fix->address + count;
13007 }
13008
13009 from = NEXT_INSN (from);
13010 }
13011
13012 /* Make sure that we found a place to insert the jump. */
13013 gcc_assert (selected);
13014
13015 /* Make sure we do not split a call and its corresponding
13016 CALL_ARG_LOCATION note. */
13017 if (CALL_P (selected))
13018 {
13019 rtx next = NEXT_INSN (selected);
13020 if (next && NOTE_P (next)
13021 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
13022 selected = next;
13023 }
13024
13025 /* Create a new JUMP_INSN that branches around a barrier. */
13026 from = emit_jump_insn_after (gen_jump (label), selected);
13027 JUMP_LABEL (from) = label;
13028 barrier = emit_barrier_after (from);
13029 emit_label_after (label, barrier);
13030
13031 /* Create a minipool barrier entry for the new barrier. */
13032 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
13033 new_fix->insn = barrier;
13034 new_fix->address = selected_address;
13035 new_fix->next = fix->next;
13036 fix->next = new_fix;
13037
13038 return new_fix;
13039 }
13040
13041 /* Record that there is a natural barrier in the insn stream at
13042 ADDRESS. */
13043 static void
13044 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
13045 {
13046 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13047
13048 fix->insn = insn;
13049 fix->address = address;
13050
13051 fix->next = NULL;
13052 if (minipool_fix_head != NULL)
13053 minipool_fix_tail->next = fix;
13054 else
13055 minipool_fix_head = fix;
13056
13057 minipool_fix_tail = fix;
13058 }
13059
13060 /* Record INSN, which will need fixing up to load a value from the
13061 minipool. ADDRESS is the offset of the insn since the start of the
13062 function; LOC is a pointer to the part of the insn which requires
13063 fixing; VALUE is the constant that must be loaded, which is of type
13064 MODE. */
13065 static void
13066 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
13067 enum machine_mode mode, rtx value)
13068 {
13069 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13070
13071 fix->insn = insn;
13072 fix->address = address;
13073 fix->loc = loc;
13074 fix->mode = mode;
13075 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
13076 fix->value = value;
13077 fix->forwards = get_attr_pool_range (insn);
13078 fix->backwards = get_attr_neg_pool_range (insn);
13079 fix->minipool = NULL;
13080
13081 /* If an insn doesn't have a range defined for it, then it isn't
13082 expecting to be reworked by this code. Better to stop now than
13083 to generate duff assembly code. */
13084 gcc_assert (fix->forwards || fix->backwards);
13085
13086 /* If an entry requires 8-byte alignment then assume all constant pools
13087 require 4 bytes of padding. Trying to do this later on a per-pool
13088 basis is awkward because existing pool entries have to be modified. */
13089 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
13090 minipool_pad = 4;
13091
13092 if (dump_file)
13093 {
13094 fprintf (dump_file,
13095 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
13096 GET_MODE_NAME (mode),
13097 INSN_UID (insn), (unsigned long) address,
13098 -1 * (long)fix->backwards, (long)fix->forwards);
13099 arm_print_value (dump_file, fix->value);
13100 fprintf (dump_file, "\n");
13101 }
13102
13103 /* Add it to the chain of fixes. */
13104 fix->next = NULL;
13105
13106 if (minipool_fix_head != NULL)
13107 minipool_fix_tail->next = fix;
13108 else
13109 minipool_fix_head = fix;
13110
13111 minipool_fix_tail = fix;
13112 }
13113
13114 /* Return the cost of synthesizing a 64-bit constant VAL inline.
13115 Returns the number of insns needed, or 99 if we don't know how to
13116 do it. */
13117 int
13118 arm_const_double_inline_cost (rtx val)
13119 {
13120 rtx lowpart, highpart;
13121 enum machine_mode mode;
13122
13123 mode = GET_MODE (val);
13124
13125 if (mode == VOIDmode)
13126 mode = DImode;
13127
13128 gcc_assert (GET_MODE_SIZE (mode) == 8);
13129
13130 lowpart = gen_lowpart (SImode, val);
13131 highpart = gen_highpart_mode (SImode, mode, val);
13132
13133 gcc_assert (GET_CODE (lowpart) == CONST_INT);
13134 gcc_assert (GET_CODE (highpart) == CONST_INT);
13135
13136 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
13137 NULL_RTX, NULL_RTX, 0, 0)
13138 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
13139 NULL_RTX, NULL_RTX, 0, 0));
13140 }
13141
13142 /* Return true if it is worthwhile to split a 64-bit constant into two
13143 32-bit operations. This is the case if optimizing for size, or
13144 if we have load delay slots, or if one 32-bit part can be done with
13145 a single data operation. */
13146 bool
13147 arm_const_double_by_parts (rtx val)
13148 {
13149 enum machine_mode mode = GET_MODE (val);
13150 rtx part;
13151
13152 if (optimize_size || arm_ld_sched)
13153 return true;
13154
13155 if (mode == VOIDmode)
13156 mode = DImode;
13157
13158 part = gen_highpart_mode (SImode, mode, val);
13159
13160 gcc_assert (GET_CODE (part) == CONST_INT);
13161
13162 if (const_ok_for_arm (INTVAL (part))
13163 || const_ok_for_arm (~INTVAL (part)))
13164 return true;
13165
13166 part = gen_lowpart (SImode, val);
13167
13168 gcc_assert (GET_CODE (part) == CONST_INT);
13169
13170 if (const_ok_for_arm (INTVAL (part))
13171 || const_ok_for_arm (~INTVAL (part)))
13172 return true;
13173
13174 return false;
13175 }
13176
13177 /* Return true if it is possible to inline both the high and low parts
13178 of a 64-bit constant into 32-bit data processing instructions. */
13179 bool
13180 arm_const_double_by_immediates (rtx val)
13181 {
13182 enum machine_mode mode = GET_MODE (val);
13183 rtx part;
13184
13185 if (mode == VOIDmode)
13186 mode = DImode;
13187
13188 part = gen_highpart_mode (SImode, mode, val);
13189
13190 gcc_assert (GET_CODE (part) == CONST_INT);
13191
13192 if (!const_ok_for_arm (INTVAL (part)))
13193 return false;
13194
13195 part = gen_lowpart (SImode, val);
13196
13197 gcc_assert (GET_CODE (part) == CONST_INT);
13198
13199 if (!const_ok_for_arm (INTVAL (part)))
13200 return false;
13201
13202 return true;
13203 }
13204
13205 /* Scan INSN and note any of its operands that need fixing.
13206 If DO_PUSHES is false we do not actually push any of the fixups
13207 needed. The function returns TRUE if any fixups were needed/pushed.
13208 This is used by arm_memory_load_p() which needs to know about loads
13209 of constants that will be converted into minipool loads. */
13210 static bool
13211 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
13212 {
13213 bool result = false;
13214 int opno;
13215
13216 extract_insn (insn);
13217
13218 if (!constrain_operands (1))
13219 fatal_insn_not_found (insn);
13220
13221 if (recog_data.n_alternatives == 0)
13222 return false;
13223
13224 /* Fill in recog_op_alt with information about the constraints of
13225 this insn. */
13226 preprocess_constraints ();
13227
13228 for (opno = 0; opno < recog_data.n_operands; opno++)
13229 {
13230 /* Things we need to fix can only occur in inputs. */
13231 if (recog_data.operand_type[opno] != OP_IN)
13232 continue;
13233
13234 /* If this alternative is a memory reference, then any mention
13235 of constants in this alternative is really to fool reload
13236 into allowing us to accept one there. We need to fix them up
13237 now so that we output the right code. */
13238 if (recog_op_alt[opno][which_alternative].memory_ok)
13239 {
13240 rtx op = recog_data.operand[opno];
13241
13242 if (CONSTANT_P (op))
13243 {
13244 if (do_pushes)
13245 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
13246 recog_data.operand_mode[opno], op);
13247 result = true;
13248 }
13249 else if (GET_CODE (op) == MEM
13250 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
13251 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
13252 {
13253 if (do_pushes)
13254 {
13255 rtx cop = avoid_constant_pool_reference (op);
13256
13257 /* Casting the address of something to a mode narrower
13258 than a word can cause avoid_constant_pool_reference()
13259 to return the pool reference itself. That's no good to
13260 us here. Lets just hope that we can use the
13261 constant pool value directly. */
13262 if (op == cop)
13263 cop = get_pool_constant (XEXP (op, 0));
13264
13265 push_minipool_fix (insn, address,
13266 recog_data.operand_loc[opno],
13267 recog_data.operand_mode[opno], cop);
13268 }
13269
13270 result = true;
13271 }
13272 }
13273 }
13274
13275 return result;
13276 }
13277
13278 /* Convert instructions to their cc-clobbering variant if possible, since
13279 that allows us to use smaller encodings. */
13280
13281 static void
13282 thumb2_reorg (void)
13283 {
13284 basic_block bb;
13285 regset_head live;
13286
13287 INIT_REG_SET (&live);
13288
13289 /* We are freeing block_for_insn in the toplev to keep compatibility
13290 with old MDEP_REORGS that are not CFG based. Recompute it now. */
13291 compute_bb_for_insn ();
13292 df_analyze ();
13293
13294 FOR_EACH_BB (bb)
13295 {
13296 rtx insn;
13297
13298 COPY_REG_SET (&live, DF_LR_OUT (bb));
13299 df_simulate_initialize_backwards (bb, &live);
13300 FOR_BB_INSNS_REVERSE (bb, insn)
13301 {
13302 if (NONJUMP_INSN_P (insn)
13303 && !REGNO_REG_SET_P (&live, CC_REGNUM)
13304 && GET_CODE (PATTERN (insn)) == SET)
13305 {
13306 enum {SKIP, CONV, SWAP_CONV} action = SKIP;
13307 rtx pat = PATTERN (insn);
13308 rtx dst = XEXP (pat, 0);
13309 rtx src = XEXP (pat, 1);
13310 rtx op0 = NULL_RTX, op1 = NULL_RTX;
13311
13312 if (!OBJECT_P (src))
13313 op0 = XEXP (src, 0);
13314
13315 if (BINARY_P (src))
13316 op1 = XEXP (src, 1);
13317
13318 if (low_register_operand (dst, SImode))
13319 {
13320 switch (GET_CODE (src))
13321 {
13322 case PLUS:
13323 if (low_register_operand (op0, SImode))
13324 {
13325 /* ADDS <Rd>,<Rn>,<Rm> */
13326 if (low_register_operand (op1, SImode))
13327 action = CONV;
13328 /* ADDS <Rdn>,#<imm8> */
13329 /* SUBS <Rdn>,#<imm8> */
13330 else if (rtx_equal_p (dst, op0)
13331 && CONST_INT_P (op1)
13332 && IN_RANGE (INTVAL (op1), -255, 255))
13333 action = CONV;
13334 /* ADDS <Rd>,<Rn>,#<imm3> */
13335 /* SUBS <Rd>,<Rn>,#<imm3> */
13336 else if (CONST_INT_P (op1)
13337 && IN_RANGE (INTVAL (op1), -7, 7))
13338 action = CONV;
13339 }
13340 break;
13341
13342 case MINUS:
13343 /* RSBS <Rd>,<Rn>,#0
13344 Not handled here: see NEG below. */
13345 /* SUBS <Rd>,<Rn>,#<imm3>
13346 SUBS <Rdn>,#<imm8>
13347 Not handled here: see PLUS above. */
13348 /* SUBS <Rd>,<Rn>,<Rm> */
13349 if (low_register_operand (op0, SImode)
13350 && low_register_operand (op1, SImode))
13351 action = CONV;
13352 break;
13353
13354 case MULT:
13355 /* MULS <Rdm>,<Rn>,<Rdm>
13356 As an exception to the rule, this is only used
13357 when optimizing for size since MULS is slow on all
13358 known implementations. We do not even want to use
13359 MULS in cold code, if optimizing for speed, so we
13360 test the global flag here. */
13361 if (!optimize_size)
13362 break;
13363 /* else fall through. */
13364 case AND:
13365 case IOR:
13366 case XOR:
13367 /* ANDS <Rdn>,<Rm> */
13368 if (rtx_equal_p (dst, op0)
13369 && low_register_operand (op1, SImode))
13370 action = CONV;
13371 else if (rtx_equal_p (dst, op1)
13372 && low_register_operand (op0, SImode))
13373 action = SWAP_CONV;
13374 break;
13375
13376 case ASHIFTRT:
13377 case ASHIFT:
13378 case LSHIFTRT:
13379 /* ASRS <Rdn>,<Rm> */
13380 /* LSRS <Rdn>,<Rm> */
13381 /* LSLS <Rdn>,<Rm> */
13382 if (rtx_equal_p (dst, op0)
13383 && low_register_operand (op1, SImode))
13384 action = CONV;
13385 /* ASRS <Rd>,<Rm>,#<imm5> */
13386 /* LSRS <Rd>,<Rm>,#<imm5> */
13387 /* LSLS <Rd>,<Rm>,#<imm5> */
13388 else if (low_register_operand (op0, SImode)
13389 && CONST_INT_P (op1)
13390 && IN_RANGE (INTVAL (op1), 0, 31))
13391 action = CONV;
13392 break;
13393
13394 case ROTATERT:
13395 /* RORS <Rdn>,<Rm> */
13396 if (rtx_equal_p (dst, op0)
13397 && low_register_operand (op1, SImode))
13398 action = CONV;
13399 break;
13400
13401 case NOT:
13402 case NEG:
13403 /* MVNS <Rd>,<Rm> */
13404 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
13405 if (low_register_operand (op0, SImode))
13406 action = CONV;
13407 break;
13408
13409 case CONST_INT:
13410 /* MOVS <Rd>,#<imm8> */
13411 if (CONST_INT_P (src)
13412 && IN_RANGE (INTVAL (src), 0, 255))
13413 action = CONV;
13414 break;
13415
13416 case REG:
13417 /* MOVS and MOV<c> with registers have different
13418 encodings, so are not relevant here. */
13419 break;
13420
13421 default:
13422 break;
13423 }
13424 }
13425
13426 if (action != SKIP)
13427 {
13428 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13429 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13430 rtvec vec;
13431
13432 if (action == SWAP_CONV)
13433 {
13434 src = copy_rtx (src);
13435 XEXP (src, 0) = op1;
13436 XEXP (src, 1) = op0;
13437 pat = gen_rtx_SET (VOIDmode, dst, src);
13438 vec = gen_rtvec (2, pat, clobber);
13439 }
13440 else /* action == CONV */
13441 vec = gen_rtvec (2, pat, clobber);
13442
13443 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13444 INSN_CODE (insn) = -1;
13445 }
13446 }
13447
13448 if (NONDEBUG_INSN_P (insn))
13449 df_simulate_one_insn_backwards (bb, insn, &live);
13450 }
13451 }
13452
13453 CLEAR_REG_SET (&live);
13454 }
13455
13456 /* Gcc puts the pool in the wrong place for ARM, since we can only
13457 load addresses a limited distance around the pc. We do some
13458 special munging to move the constant pool values to the correct
13459 point in the code. */
13460 static void
13461 arm_reorg (void)
13462 {
13463 rtx insn;
13464 HOST_WIDE_INT address = 0;
13465 Mfix * fix;
13466
13467 if (TARGET_THUMB2)
13468 thumb2_reorg ();
13469
13470 minipool_fix_head = minipool_fix_tail = NULL;
13471
13472 /* The first insn must always be a note, or the code below won't
13473 scan it properly. */
13474 insn = get_insns ();
13475 gcc_assert (GET_CODE (insn) == NOTE);
13476 minipool_pad = 0;
13477
13478 /* Scan all the insns and record the operands that will need fixing. */
13479 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
13480 {
13481 if (TARGET_CIRRUS_FIX_INVALID_INSNS
13482 && (arm_cirrus_insn_p (insn)
13483 || GET_CODE (insn) == JUMP_INSN
13484 || arm_memory_load_p (insn)))
13485 cirrus_reorg (insn);
13486
13487 if (GET_CODE (insn) == BARRIER)
13488 push_minipool_barrier (insn, address);
13489 else if (INSN_P (insn))
13490 {
13491 rtx table;
13492
13493 note_invalid_constants (insn, address, true);
13494 address += get_attr_length (insn);
13495
13496 /* If the insn is a vector jump, add the size of the table
13497 and skip the table. */
13498 if ((table = is_jump_table (insn)) != NULL)
13499 {
13500 address += get_jump_table_size (table);
13501 insn = table;
13502 }
13503 }
13504 else if (LABEL_P (insn))
13505 /* Add the worst-case padding due to alignment. We don't add
13506 the _current_ padding because the minipool insertions
13507 themselves might change it. */
13508 address += get_label_padding (insn);
13509 }
13510
13511 fix = minipool_fix_head;
13512
13513 /* Now scan the fixups and perform the required changes. */
13514 while (fix)
13515 {
13516 Mfix * ftmp;
13517 Mfix * fdel;
13518 Mfix * last_added_fix;
13519 Mfix * last_barrier = NULL;
13520 Mfix * this_fix;
13521
13522 /* Skip any further barriers before the next fix. */
13523 while (fix && GET_CODE (fix->insn) == BARRIER)
13524 fix = fix->next;
13525
13526 /* No more fixes. */
13527 if (fix == NULL)
13528 break;
13529
13530 last_added_fix = NULL;
13531
13532 for (ftmp = fix; ftmp; ftmp = ftmp->next)
13533 {
13534 if (GET_CODE (ftmp->insn) == BARRIER)
13535 {
13536 if (ftmp->address >= minipool_vector_head->max_address)
13537 break;
13538
13539 last_barrier = ftmp;
13540 }
13541 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
13542 break;
13543
13544 last_added_fix = ftmp; /* Keep track of the last fix added. */
13545 }
13546
13547 /* If we found a barrier, drop back to that; any fixes that we
13548 could have reached but come after the barrier will now go in
13549 the next mini-pool. */
13550 if (last_barrier != NULL)
13551 {
13552 /* Reduce the refcount for those fixes that won't go into this
13553 pool after all. */
13554 for (fdel = last_barrier->next;
13555 fdel && fdel != ftmp;
13556 fdel = fdel->next)
13557 {
13558 fdel->minipool->refcount--;
13559 fdel->minipool = NULL;
13560 }
13561
13562 ftmp = last_barrier;
13563 }
13564 else
13565 {
13566 /* ftmp is first fix that we can't fit into this pool and
13567 there no natural barriers that we could use. Insert a
13568 new barrier in the code somewhere between the previous
13569 fix and this one, and arrange to jump around it. */
13570 HOST_WIDE_INT max_address;
13571
13572 /* The last item on the list of fixes must be a barrier, so
13573 we can never run off the end of the list of fixes without
13574 last_barrier being set. */
13575 gcc_assert (ftmp);
13576
13577 max_address = minipool_vector_head->max_address;
13578 /* Check that there isn't another fix that is in range that
13579 we couldn't fit into this pool because the pool was
13580 already too large: we need to put the pool before such an
13581 instruction. The pool itself may come just after the
13582 fix because create_fix_barrier also allows space for a
13583 jump instruction. */
13584 if (ftmp->address < max_address)
13585 max_address = ftmp->address + 1;
13586
13587 last_barrier = create_fix_barrier (last_added_fix, max_address);
13588 }
13589
13590 assign_minipool_offsets (last_barrier);
13591
13592 while (ftmp)
13593 {
13594 if (GET_CODE (ftmp->insn) != BARRIER
13595 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
13596 == NULL))
13597 break;
13598
13599 ftmp = ftmp->next;
13600 }
13601
13602 /* Scan over the fixes we have identified for this pool, fixing them
13603 up and adding the constants to the pool itself. */
13604 for (this_fix = fix; this_fix && ftmp != this_fix;
13605 this_fix = this_fix->next)
13606 if (GET_CODE (this_fix->insn) != BARRIER)
13607 {
13608 rtx addr
13609 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
13610 minipool_vector_label),
13611 this_fix->minipool->offset);
13612 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
13613 }
13614
13615 dump_minipool (last_barrier->insn);
13616 fix = ftmp;
13617 }
13618
13619 /* From now on we must synthesize any constants that we can't handle
13620 directly. This can happen if the RTL gets split during final
13621 instruction generation. */
13622 after_arm_reorg = 1;
13623
13624 /* Free the minipool memory. */
13625 obstack_free (&minipool_obstack, minipool_startobj);
13626 }
13627 \f
13628 /* Routines to output assembly language. */
13629
13630 /* If the rtx is the correct value then return the string of the number.
13631 In this way we can ensure that valid double constants are generated even
13632 when cross compiling. */
13633 const char *
13634 fp_immediate_constant (rtx x)
13635 {
13636 REAL_VALUE_TYPE r;
13637 int i;
13638
13639 if (!fp_consts_inited)
13640 init_fp_table ();
13641
13642 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13643 for (i = 0; i < 8; i++)
13644 if (REAL_VALUES_EQUAL (r, values_fp[i]))
13645 return strings_fp[i];
13646
13647 gcc_unreachable ();
13648 }
13649
13650 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
13651 static const char *
13652 fp_const_from_val (REAL_VALUE_TYPE *r)
13653 {
13654 int i;
13655
13656 if (!fp_consts_inited)
13657 init_fp_table ();
13658
13659 for (i = 0; i < 8; i++)
13660 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
13661 return strings_fp[i];
13662
13663 gcc_unreachable ();
13664 }
13665
13666 /* Output the operands of a LDM/STM instruction to STREAM.
13667 MASK is the ARM register set mask of which only bits 0-15 are important.
13668 REG is the base register, either the frame pointer or the stack pointer,
13669 INSTR is the possibly suffixed load or store instruction.
13670 RFE is nonzero if the instruction should also copy spsr to cpsr. */
13671
13672 static void
13673 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
13674 unsigned long mask, int rfe)
13675 {
13676 unsigned i;
13677 bool not_first = FALSE;
13678
13679 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
13680 fputc ('\t', stream);
13681 asm_fprintf (stream, instr, reg);
13682 fputc ('{', stream);
13683
13684 for (i = 0; i <= LAST_ARM_REGNUM; i++)
13685 if (mask & (1 << i))
13686 {
13687 if (not_first)
13688 fprintf (stream, ", ");
13689
13690 asm_fprintf (stream, "%r", i);
13691 not_first = TRUE;
13692 }
13693
13694 if (rfe)
13695 fprintf (stream, "}^\n");
13696 else
13697 fprintf (stream, "}\n");
13698 }
13699
13700
13701 /* Output a FLDMD instruction to STREAM.
13702 BASE if the register containing the address.
13703 REG and COUNT specify the register range.
13704 Extra registers may be added to avoid hardware bugs.
13705
13706 We output FLDMD even for ARMv5 VFP implementations. Although
13707 FLDMD is technically not supported until ARMv6, it is believed
13708 that all VFP implementations support its use in this context. */
13709
13710 static void
13711 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
13712 {
13713 int i;
13714
13715 /* Workaround ARM10 VFPr1 bug. */
13716 if (count == 2 && !arm_arch6)
13717 {
13718 if (reg == 15)
13719 reg--;
13720 count++;
13721 }
13722
13723 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
13724 load into multiple parts if we have to handle more than 16 registers. */
13725 if (count > 16)
13726 {
13727 vfp_output_fldmd (stream, base, reg, 16);
13728 vfp_output_fldmd (stream, base, reg + 16, count - 16);
13729 return;
13730 }
13731
13732 fputc ('\t', stream);
13733 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
13734
13735 for (i = reg; i < reg + count; i++)
13736 {
13737 if (i > reg)
13738 fputs (", ", stream);
13739 asm_fprintf (stream, "d%d", i);
13740 }
13741 fputs ("}\n", stream);
13742
13743 }
13744
13745
13746 /* Output the assembly for a store multiple. */
13747
13748 const char *
13749 vfp_output_fstmd (rtx * operands)
13750 {
13751 char pattern[100];
13752 int p;
13753 int base;
13754 int i;
13755
13756 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
13757 p = strlen (pattern);
13758
13759 gcc_assert (GET_CODE (operands[1]) == REG);
13760
13761 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
13762 for (i = 1; i < XVECLEN (operands[2], 0); i++)
13763 {
13764 p += sprintf (&pattern[p], ", d%d", base + i);
13765 }
13766 strcpy (&pattern[p], "}");
13767
13768 output_asm_insn (pattern, operands);
13769 return "";
13770 }
13771
13772
13773 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
13774 number of bytes pushed. */
13775
13776 static int
13777 vfp_emit_fstmd (int base_reg, int count)
13778 {
13779 rtx par;
13780 rtx dwarf;
13781 rtx tmp, reg;
13782 int i;
13783
13784 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
13785 register pairs are stored by a store multiple insn. We avoid this
13786 by pushing an extra pair. */
13787 if (count == 2 && !arm_arch6)
13788 {
13789 if (base_reg == LAST_VFP_REGNUM - 3)
13790 base_reg -= 2;
13791 count++;
13792 }
13793
13794 /* FSTMD may not store more than 16 doubleword registers at once. Split
13795 larger stores into multiple parts (up to a maximum of two, in
13796 practice). */
13797 if (count > 16)
13798 {
13799 int saved;
13800 /* NOTE: base_reg is an internal register number, so each D register
13801 counts as 2. */
13802 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
13803 saved += vfp_emit_fstmd (base_reg, 16);
13804 return saved;
13805 }
13806
13807 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
13808 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
13809
13810 reg = gen_rtx_REG (DFmode, base_reg);
13811 base_reg += 2;
13812
13813 XVECEXP (par, 0, 0)
13814 = gen_rtx_SET (VOIDmode,
13815 gen_frame_mem
13816 (BLKmode,
13817 gen_rtx_PRE_MODIFY (Pmode,
13818 stack_pointer_rtx,
13819 plus_constant
13820 (stack_pointer_rtx,
13821 - (count * 8)))
13822 ),
13823 gen_rtx_UNSPEC (BLKmode,
13824 gen_rtvec (1, reg),
13825 UNSPEC_PUSH_MULT));
13826
13827 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13828 plus_constant (stack_pointer_rtx, -(count * 8)));
13829 RTX_FRAME_RELATED_P (tmp) = 1;
13830 XVECEXP (dwarf, 0, 0) = tmp;
13831
13832 tmp = gen_rtx_SET (VOIDmode,
13833 gen_frame_mem (DFmode, stack_pointer_rtx),
13834 reg);
13835 RTX_FRAME_RELATED_P (tmp) = 1;
13836 XVECEXP (dwarf, 0, 1) = tmp;
13837
13838 for (i = 1; i < count; i++)
13839 {
13840 reg = gen_rtx_REG (DFmode, base_reg);
13841 base_reg += 2;
13842 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
13843
13844 tmp = gen_rtx_SET (VOIDmode,
13845 gen_frame_mem (DFmode,
13846 plus_constant (stack_pointer_rtx,
13847 i * 8)),
13848 reg);
13849 RTX_FRAME_RELATED_P (tmp) = 1;
13850 XVECEXP (dwarf, 0, i + 1) = tmp;
13851 }
13852
13853 par = emit_insn (par);
13854 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13855 RTX_FRAME_RELATED_P (par) = 1;
13856
13857 return count * 8;
13858 }
13859
13860 /* Emit a call instruction with pattern PAT. ADDR is the address of
13861 the call target. */
13862
13863 void
13864 arm_emit_call_insn (rtx pat, rtx addr)
13865 {
13866 rtx insn;
13867
13868 insn = emit_call_insn (pat);
13869
13870 /* The PIC register is live on entry to VxWorks PIC PLT entries.
13871 If the call might use such an entry, add a use of the PIC register
13872 to the instruction's CALL_INSN_FUNCTION_USAGE. */
13873 if (TARGET_VXWORKS_RTP
13874 && flag_pic
13875 && GET_CODE (addr) == SYMBOL_REF
13876 && (SYMBOL_REF_DECL (addr)
13877 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
13878 : !SYMBOL_REF_LOCAL_P (addr)))
13879 {
13880 require_pic_register ();
13881 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
13882 }
13883 }
13884
13885 /* Output a 'call' insn. */
13886 const char *
13887 output_call (rtx *operands)
13888 {
13889 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
13890
13891 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
13892 if (REGNO (operands[0]) == LR_REGNUM)
13893 {
13894 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
13895 output_asm_insn ("mov%?\t%0, %|lr", operands);
13896 }
13897
13898 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13899
13900 if (TARGET_INTERWORK || arm_arch4t)
13901 output_asm_insn ("bx%?\t%0", operands);
13902 else
13903 output_asm_insn ("mov%?\t%|pc, %0", operands);
13904
13905 return "";
13906 }
13907
13908 /* Output a 'call' insn that is a reference in memory. This is
13909 disabled for ARMv5 and we prefer a blx instead because otherwise
13910 there's a significant performance overhead. */
13911 const char *
13912 output_call_mem (rtx *operands)
13913 {
13914 gcc_assert (!arm_arch5);
13915 if (TARGET_INTERWORK)
13916 {
13917 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13918 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13919 output_asm_insn ("bx%?\t%|ip", operands);
13920 }
13921 else if (regno_use_in (LR_REGNUM, operands[0]))
13922 {
13923 /* LR is used in the memory address. We load the address in the
13924 first instruction. It's safe to use IP as the target of the
13925 load since the call will kill it anyway. */
13926 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13927 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13928 if (arm_arch4t)
13929 output_asm_insn ("bx%?\t%|ip", operands);
13930 else
13931 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
13932 }
13933 else
13934 {
13935 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13936 output_asm_insn ("ldr%?\t%|pc, %0", operands);
13937 }
13938
13939 return "";
13940 }
13941
13942
13943 /* Output a move from arm registers to an fpa registers.
13944 OPERANDS[0] is an fpa register.
13945 OPERANDS[1] is the first registers of an arm register pair. */
13946 const char *
13947 output_mov_long_double_fpa_from_arm (rtx *operands)
13948 {
13949 int arm_reg0 = REGNO (operands[1]);
13950 rtx ops[3];
13951
13952 gcc_assert (arm_reg0 != IP_REGNUM);
13953
13954 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13955 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13956 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
13957
13958 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
13959 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
13960
13961 return "";
13962 }
13963
13964 /* Output a move from an fpa register to arm registers.
13965 OPERANDS[0] is the first registers of an arm register pair.
13966 OPERANDS[1] is an fpa register. */
13967 const char *
13968 output_mov_long_double_arm_from_fpa (rtx *operands)
13969 {
13970 int arm_reg0 = REGNO (operands[0]);
13971 rtx ops[3];
13972
13973 gcc_assert (arm_reg0 != IP_REGNUM);
13974
13975 ops[0] = gen_rtx_REG (SImode, arm_reg0);
13976 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
13977 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
13978
13979 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
13980 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
13981 return "";
13982 }
13983
13984 /* Output a move from arm registers to arm registers of a long double
13985 OPERANDS[0] is the destination.
13986 OPERANDS[1] is the source. */
13987 const char *
13988 output_mov_long_double_arm_from_arm (rtx *operands)
13989 {
13990 /* We have to be careful here because the two might overlap. */
13991 int dest_start = REGNO (operands[0]);
13992 int src_start = REGNO (operands[1]);
13993 rtx ops[2];
13994 int i;
13995
13996 if (dest_start < src_start)
13997 {
13998 for (i = 0; i < 3; i++)
13999 {
14000 ops[0] = gen_rtx_REG (SImode, dest_start + i);
14001 ops[1] = gen_rtx_REG (SImode, src_start + i);
14002 output_asm_insn ("mov%?\t%0, %1", ops);
14003 }
14004 }
14005 else
14006 {
14007 for (i = 2; i >= 0; i--)
14008 {
14009 ops[0] = gen_rtx_REG (SImode, dest_start + i);
14010 ops[1] = gen_rtx_REG (SImode, src_start + i);
14011 output_asm_insn ("mov%?\t%0, %1", ops);
14012 }
14013 }
14014
14015 return "";
14016 }
14017
14018 void
14019 arm_emit_movpair (rtx dest, rtx src)
14020 {
14021 /* If the src is an immediate, simplify it. */
14022 if (CONST_INT_P (src))
14023 {
14024 HOST_WIDE_INT val = INTVAL (src);
14025 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
14026 if ((val >> 16) & 0x0000ffff)
14027 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
14028 GEN_INT (16)),
14029 GEN_INT ((val >> 16) & 0x0000ffff));
14030 return;
14031 }
14032 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
14033 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
14034 }
14035
14036 /* Output a move from arm registers to an fpa registers.
14037 OPERANDS[0] is an fpa register.
14038 OPERANDS[1] is the first registers of an arm register pair. */
14039 const char *
14040 output_mov_double_fpa_from_arm (rtx *operands)
14041 {
14042 int arm_reg0 = REGNO (operands[1]);
14043 rtx ops[2];
14044
14045 gcc_assert (arm_reg0 != IP_REGNUM);
14046
14047 ops[0] = gen_rtx_REG (SImode, arm_reg0);
14048 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
14049 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
14050 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
14051 return "";
14052 }
14053
14054 /* Output a move from an fpa register to arm registers.
14055 OPERANDS[0] is the first registers of an arm register pair.
14056 OPERANDS[1] is an fpa register. */
14057 const char *
14058 output_mov_double_arm_from_fpa (rtx *operands)
14059 {
14060 int arm_reg0 = REGNO (operands[0]);
14061 rtx ops[2];
14062
14063 gcc_assert (arm_reg0 != IP_REGNUM);
14064
14065 ops[0] = gen_rtx_REG (SImode, arm_reg0);
14066 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
14067 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
14068 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
14069 return "";
14070 }
14071
14072 /* Output a move between double words. It must be REG<-MEM
14073 or MEM<-REG. */
14074 const char *
14075 output_move_double (rtx *operands, bool emit, int *count)
14076 {
14077 enum rtx_code code0 = GET_CODE (operands[0]);
14078 enum rtx_code code1 = GET_CODE (operands[1]);
14079 rtx otherops[3];
14080 if (count)
14081 *count = 1;
14082
14083 /* The only case when this might happen is when
14084 you are looking at the length of a DImode instruction
14085 that has an invalid constant in it. */
14086 if (code0 == REG && code1 != MEM)
14087 {
14088 gcc_assert (!emit);
14089 *count = 2;
14090 return "";
14091 }
14092
14093 if (code0 == REG)
14094 {
14095 unsigned int reg0 = REGNO (operands[0]);
14096
14097 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
14098
14099 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
14100
14101 switch (GET_CODE (XEXP (operands[1], 0)))
14102 {
14103 case REG:
14104
14105 if (emit)
14106 {
14107 if (TARGET_LDRD
14108 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
14109 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
14110 else
14111 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14112 }
14113 break;
14114
14115 case PRE_INC:
14116 gcc_assert (TARGET_LDRD);
14117 if (emit)
14118 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
14119 break;
14120
14121 case PRE_DEC:
14122 if (emit)
14123 {
14124 if (TARGET_LDRD)
14125 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
14126 else
14127 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
14128 }
14129 break;
14130
14131 case POST_INC:
14132 if (emit)
14133 {
14134 if (TARGET_LDRD)
14135 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
14136 else
14137 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
14138 }
14139 break;
14140
14141 case POST_DEC:
14142 gcc_assert (TARGET_LDRD);
14143 if (emit)
14144 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
14145 break;
14146
14147 case PRE_MODIFY:
14148 case POST_MODIFY:
14149 /* Autoicrement addressing modes should never have overlapping
14150 base and destination registers, and overlapping index registers
14151 are already prohibited, so this doesn't need to worry about
14152 fix_cm3_ldrd. */
14153 otherops[0] = operands[0];
14154 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
14155 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
14156
14157 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
14158 {
14159 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
14160 {
14161 /* Registers overlap so split out the increment. */
14162 if (emit)
14163 {
14164 output_asm_insn ("add%?\t%1, %1, %2", otherops);
14165 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
14166 }
14167 if (count)
14168 *count = 2;
14169 }
14170 else
14171 {
14172 /* Use a single insn if we can.
14173 FIXME: IWMMXT allows offsets larger than ldrd can
14174 handle, fix these up with a pair of ldr. */
14175 if (TARGET_THUMB2
14176 || GET_CODE (otherops[2]) != CONST_INT
14177 || (INTVAL (otherops[2]) > -256
14178 && INTVAL (otherops[2]) < 256))
14179 {
14180 if (emit)
14181 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
14182 }
14183 else
14184 {
14185 if (emit)
14186 {
14187 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
14188 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14189 }
14190 if (count)
14191 *count = 2;
14192
14193 }
14194 }
14195 }
14196 else
14197 {
14198 /* Use a single insn if we can.
14199 FIXME: IWMMXT allows offsets larger than ldrd can handle,
14200 fix these up with a pair of ldr. */
14201 if (TARGET_THUMB2
14202 || GET_CODE (otherops[2]) != CONST_INT
14203 || (INTVAL (otherops[2]) > -256
14204 && INTVAL (otherops[2]) < 256))
14205 {
14206 if (emit)
14207 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
14208 }
14209 else
14210 {
14211 if (emit)
14212 {
14213 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14214 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
14215 }
14216 if (count)
14217 *count = 2;
14218 }
14219 }
14220 break;
14221
14222 case LABEL_REF:
14223 case CONST:
14224 /* We might be able to use ldrd %0, %1 here. However the range is
14225 different to ldr/adr, and it is broken on some ARMv7-M
14226 implementations. */
14227 /* Use the second register of the pair to avoid problematic
14228 overlap. */
14229 otherops[1] = operands[1];
14230 if (emit)
14231 output_asm_insn ("adr%?\t%0, %1", otherops);
14232 operands[1] = otherops[0];
14233 if (emit)
14234 {
14235 if (TARGET_LDRD)
14236 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14237 else
14238 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
14239 }
14240
14241 if (count)
14242 *count = 2;
14243 break;
14244
14245 /* ??? This needs checking for thumb2. */
14246 default:
14247 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
14248 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
14249 {
14250 otherops[0] = operands[0];
14251 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
14252 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
14253
14254 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
14255 {
14256 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
14257 {
14258 switch ((int) INTVAL (otherops[2]))
14259 {
14260 case -8:
14261 if (emit)
14262 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
14263 return "";
14264 case -4:
14265 if (TARGET_THUMB2)
14266 break;
14267 if (emit)
14268 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
14269 return "";
14270 case 4:
14271 if (TARGET_THUMB2)
14272 break;
14273 if (emit)
14274 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
14275 return "";
14276 }
14277 }
14278 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
14279 operands[1] = otherops[0];
14280 if (TARGET_LDRD
14281 && (GET_CODE (otherops[2]) == REG
14282 || TARGET_THUMB2
14283 || (GET_CODE (otherops[2]) == CONST_INT
14284 && INTVAL (otherops[2]) > -256
14285 && INTVAL (otherops[2]) < 256)))
14286 {
14287 if (reg_overlap_mentioned_p (operands[0],
14288 otherops[2]))
14289 {
14290 rtx tmp;
14291 /* Swap base and index registers over to
14292 avoid a conflict. */
14293 tmp = otherops[1];
14294 otherops[1] = otherops[2];
14295 otherops[2] = tmp;
14296 }
14297 /* If both registers conflict, it will usually
14298 have been fixed by a splitter. */
14299 if (reg_overlap_mentioned_p (operands[0], otherops[2])
14300 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
14301 {
14302 if (emit)
14303 {
14304 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14305 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14306 }
14307 if (count)
14308 *count = 2;
14309 }
14310 else
14311 {
14312 otherops[0] = operands[0];
14313 if (emit)
14314 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
14315 }
14316 return "";
14317 }
14318
14319 if (GET_CODE (otherops[2]) == CONST_INT)
14320 {
14321 if (emit)
14322 {
14323 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
14324 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
14325 else
14326 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14327 }
14328 }
14329 else
14330 {
14331 if (emit)
14332 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14333 }
14334 }
14335 else
14336 {
14337 if (emit)
14338 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
14339 }
14340
14341 if (count)
14342 *count = 2;
14343
14344 if (TARGET_LDRD)
14345 return "ldr%(d%)\t%0, [%1]";
14346
14347 return "ldm%(ia%)\t%1, %M0";
14348 }
14349 else
14350 {
14351 otherops[1] = adjust_address (operands[1], SImode, 4);
14352 /* Take care of overlapping base/data reg. */
14353 if (reg_mentioned_p (operands[0], operands[1]))
14354 {
14355 if (emit)
14356 {
14357 output_asm_insn ("ldr%?\t%0, %1", otherops);
14358 output_asm_insn ("ldr%?\t%0, %1", operands);
14359 }
14360 if (count)
14361 *count = 2;
14362
14363 }
14364 else
14365 {
14366 if (emit)
14367 {
14368 output_asm_insn ("ldr%?\t%0, %1", operands);
14369 output_asm_insn ("ldr%?\t%0, %1", otherops);
14370 }
14371 if (count)
14372 *count = 2;
14373 }
14374 }
14375 }
14376 }
14377 else
14378 {
14379 /* Constraints should ensure this. */
14380 gcc_assert (code0 == MEM && code1 == REG);
14381 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
14382
14383 switch (GET_CODE (XEXP (operands[0], 0)))
14384 {
14385 case REG:
14386 if (emit)
14387 {
14388 if (TARGET_LDRD)
14389 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
14390 else
14391 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14392 }
14393 break;
14394
14395 case PRE_INC:
14396 gcc_assert (TARGET_LDRD);
14397 if (emit)
14398 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
14399 break;
14400
14401 case PRE_DEC:
14402 if (emit)
14403 {
14404 if (TARGET_LDRD)
14405 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
14406 else
14407 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
14408 }
14409 break;
14410
14411 case POST_INC:
14412 if (emit)
14413 {
14414 if (TARGET_LDRD)
14415 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
14416 else
14417 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
14418 }
14419 break;
14420
14421 case POST_DEC:
14422 gcc_assert (TARGET_LDRD);
14423 if (emit)
14424 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
14425 break;
14426
14427 case PRE_MODIFY:
14428 case POST_MODIFY:
14429 otherops[0] = operands[1];
14430 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
14431 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
14432
14433 /* IWMMXT allows offsets larger than ldrd can handle,
14434 fix these up with a pair of ldr. */
14435 if (!TARGET_THUMB2
14436 && GET_CODE (otherops[2]) == CONST_INT
14437 && (INTVAL(otherops[2]) <= -256
14438 || INTVAL(otherops[2]) >= 256))
14439 {
14440 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14441 {
14442 if (emit)
14443 {
14444 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
14445 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14446 }
14447 if (count)
14448 *count = 2;
14449 }
14450 else
14451 {
14452 if (emit)
14453 {
14454 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14455 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
14456 }
14457 if (count)
14458 *count = 2;
14459 }
14460 }
14461 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14462 {
14463 if (emit)
14464 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
14465 }
14466 else
14467 {
14468 if (emit)
14469 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
14470 }
14471 break;
14472
14473 case PLUS:
14474 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
14475 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
14476 {
14477 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
14478 {
14479 case -8:
14480 if (emit)
14481 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
14482 return "";
14483
14484 case -4:
14485 if (TARGET_THUMB2)
14486 break;
14487 if (emit)
14488 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
14489 return "";
14490
14491 case 4:
14492 if (TARGET_THUMB2)
14493 break;
14494 if (emit)
14495 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
14496 return "";
14497 }
14498 }
14499 if (TARGET_LDRD
14500 && (GET_CODE (otherops[2]) == REG
14501 || TARGET_THUMB2
14502 || (GET_CODE (otherops[2]) == CONST_INT
14503 && INTVAL (otherops[2]) > -256
14504 && INTVAL (otherops[2]) < 256)))
14505 {
14506 otherops[0] = operands[1];
14507 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
14508 if (emit)
14509 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
14510 return "";
14511 }
14512 /* Fall through */
14513
14514 default:
14515 otherops[0] = adjust_address (operands[0], SImode, 4);
14516 otherops[1] = operands[1];
14517 if (emit)
14518 {
14519 output_asm_insn ("str%?\t%1, %0", operands);
14520 output_asm_insn ("str%?\t%H1, %0", otherops);
14521 }
14522 if (count)
14523 *count = 2;
14524 }
14525 }
14526
14527 return "";
14528 }
14529
14530 /* Output a move, load or store for quad-word vectors in ARM registers. Only
14531 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
14532
14533 const char *
14534 output_move_quad (rtx *operands)
14535 {
14536 if (REG_P (operands[0]))
14537 {
14538 /* Load, or reg->reg move. */
14539
14540 if (MEM_P (operands[1]))
14541 {
14542 switch (GET_CODE (XEXP (operands[1], 0)))
14543 {
14544 case REG:
14545 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14546 break;
14547
14548 case LABEL_REF:
14549 case CONST:
14550 output_asm_insn ("adr%?\t%0, %1", operands);
14551 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
14552 break;
14553
14554 default:
14555 gcc_unreachable ();
14556 }
14557 }
14558 else
14559 {
14560 rtx ops[2];
14561 int dest, src, i;
14562
14563 gcc_assert (REG_P (operands[1]));
14564
14565 dest = REGNO (operands[0]);
14566 src = REGNO (operands[1]);
14567
14568 /* This seems pretty dumb, but hopefully GCC won't try to do it
14569 very often. */
14570 if (dest < src)
14571 for (i = 0; i < 4; i++)
14572 {
14573 ops[0] = gen_rtx_REG (SImode, dest + i);
14574 ops[1] = gen_rtx_REG (SImode, src + i);
14575 output_asm_insn ("mov%?\t%0, %1", ops);
14576 }
14577 else
14578 for (i = 3; i >= 0; i--)
14579 {
14580 ops[0] = gen_rtx_REG (SImode, dest + i);
14581 ops[1] = gen_rtx_REG (SImode, src + i);
14582 output_asm_insn ("mov%?\t%0, %1", ops);
14583 }
14584 }
14585 }
14586 else
14587 {
14588 gcc_assert (MEM_P (operands[0]));
14589 gcc_assert (REG_P (operands[1]));
14590 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
14591
14592 switch (GET_CODE (XEXP (operands[0], 0)))
14593 {
14594 case REG:
14595 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14596 break;
14597
14598 default:
14599 gcc_unreachable ();
14600 }
14601 }
14602
14603 return "";
14604 }
14605
14606 /* Output a VFP load or store instruction. */
14607
14608 const char *
14609 output_move_vfp (rtx *operands)
14610 {
14611 rtx reg, mem, addr, ops[2];
14612 int load = REG_P (operands[0]);
14613 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
14614 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
14615 const char *templ;
14616 char buff[50];
14617 enum machine_mode mode;
14618
14619 reg = operands[!load];
14620 mem = operands[load];
14621
14622 mode = GET_MODE (reg);
14623
14624 gcc_assert (REG_P (reg));
14625 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
14626 gcc_assert (mode == SFmode
14627 || mode == DFmode
14628 || mode == SImode
14629 || mode == DImode
14630 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
14631 gcc_assert (MEM_P (mem));
14632
14633 addr = XEXP (mem, 0);
14634
14635 switch (GET_CODE (addr))
14636 {
14637 case PRE_DEC:
14638 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
14639 ops[0] = XEXP (addr, 0);
14640 ops[1] = reg;
14641 break;
14642
14643 case POST_INC:
14644 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
14645 ops[0] = XEXP (addr, 0);
14646 ops[1] = reg;
14647 break;
14648
14649 default:
14650 templ = "f%s%c%%?\t%%%s0, %%1%s";
14651 ops[0] = reg;
14652 ops[1] = mem;
14653 break;
14654 }
14655
14656 sprintf (buff, templ,
14657 load ? "ld" : "st",
14658 dp ? 'd' : 's',
14659 dp ? "P" : "",
14660 integer_p ? "\t%@ int" : "");
14661 output_asm_insn (buff, ops);
14662
14663 return "";
14664 }
14665
14666 /* Output a Neon quad-word load or store, or a load or store for
14667 larger structure modes.
14668
14669 WARNING: The ordering of elements is weird in big-endian mode,
14670 because we use VSTM, as required by the EABI. GCC RTL defines
14671 element ordering based on in-memory order. This can be differ
14672 from the architectural ordering of elements within a NEON register.
14673 The intrinsics defined in arm_neon.h use the NEON register element
14674 ordering, not the GCC RTL element ordering.
14675
14676 For example, the in-memory ordering of a big-endian a quadword
14677 vector with 16-bit elements when stored from register pair {d0,d1}
14678 will be (lowest address first, d0[N] is NEON register element N):
14679
14680 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
14681
14682 When necessary, quadword registers (dN, dN+1) are moved to ARM
14683 registers from rN in the order:
14684
14685 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
14686
14687 So that STM/LDM can be used on vectors in ARM registers, and the
14688 same memory layout will result as if VSTM/VLDM were used. */
14689
14690 const char *
14691 output_move_neon (rtx *operands)
14692 {
14693 rtx reg, mem, addr, ops[2];
14694 int regno, load = REG_P (operands[0]);
14695 const char *templ;
14696 char buff[50];
14697 enum machine_mode mode;
14698
14699 reg = operands[!load];
14700 mem = operands[load];
14701
14702 mode = GET_MODE (reg);
14703
14704 gcc_assert (REG_P (reg));
14705 regno = REGNO (reg);
14706 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
14707 || NEON_REGNO_OK_FOR_QUAD (regno));
14708 gcc_assert (VALID_NEON_DREG_MODE (mode)
14709 || VALID_NEON_QREG_MODE (mode)
14710 || VALID_NEON_STRUCT_MODE (mode));
14711 gcc_assert (MEM_P (mem));
14712
14713 addr = XEXP (mem, 0);
14714
14715 /* Strip off const from addresses like (const (plus (...))). */
14716 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14717 addr = XEXP (addr, 0);
14718
14719 switch (GET_CODE (addr))
14720 {
14721 case POST_INC:
14722 templ = "v%smia%%?\t%%0!, %%h1";
14723 ops[0] = XEXP (addr, 0);
14724 ops[1] = reg;
14725 break;
14726
14727 case PRE_DEC:
14728 /* FIXME: We should be using vld1/vst1 here in BE mode? */
14729 templ = "v%smdb%%?\t%%0!, %%h1";
14730 ops[0] = XEXP (addr, 0);
14731 ops[1] = reg;
14732 break;
14733
14734 case POST_MODIFY:
14735 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
14736 gcc_unreachable ();
14737
14738 case LABEL_REF:
14739 case PLUS:
14740 {
14741 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14742 int i;
14743 int overlap = -1;
14744 for (i = 0; i < nregs; i++)
14745 {
14746 /* We're only using DImode here because it's a convenient size. */
14747 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
14748 ops[1] = adjust_address (mem, DImode, 8 * i);
14749 if (reg_overlap_mentioned_p (ops[0], mem))
14750 {
14751 gcc_assert (overlap == -1);
14752 overlap = i;
14753 }
14754 else
14755 {
14756 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14757 output_asm_insn (buff, ops);
14758 }
14759 }
14760 if (overlap != -1)
14761 {
14762 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
14763 ops[1] = adjust_address (mem, SImode, 8 * overlap);
14764 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14765 output_asm_insn (buff, ops);
14766 }
14767
14768 return "";
14769 }
14770
14771 default:
14772 templ = "v%smia%%?\t%%m0, %%h1";
14773 ops[0] = mem;
14774 ops[1] = reg;
14775 }
14776
14777 sprintf (buff, templ, load ? "ld" : "st");
14778 output_asm_insn (buff, ops);
14779
14780 return "";
14781 }
14782
14783 /* Compute and return the length of neon_mov<mode>, where <mode> is
14784 one of VSTRUCT modes: EI, OI, CI or XI. */
14785 int
14786 arm_attr_length_move_neon (rtx insn)
14787 {
14788 rtx reg, mem, addr;
14789 int load;
14790 enum machine_mode mode;
14791
14792 extract_insn_cached (insn);
14793
14794 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
14795 {
14796 mode = GET_MODE (recog_data.operand[0]);
14797 switch (mode)
14798 {
14799 case EImode:
14800 case OImode:
14801 return 8;
14802 case CImode:
14803 return 12;
14804 case XImode:
14805 return 16;
14806 default:
14807 gcc_unreachable ();
14808 }
14809 }
14810
14811 load = REG_P (recog_data.operand[0]);
14812 reg = recog_data.operand[!load];
14813 mem = recog_data.operand[load];
14814
14815 gcc_assert (MEM_P (mem));
14816
14817 mode = GET_MODE (reg);
14818 addr = XEXP (mem, 0);
14819
14820 /* Strip off const from addresses like (const (plus (...))). */
14821 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14822 addr = XEXP (addr, 0);
14823
14824 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
14825 {
14826 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14827 return insns * 4;
14828 }
14829 else
14830 return 4;
14831 }
14832
14833 /* Return nonzero if the offset in the address is an immediate. Otherwise,
14834 return zero. */
14835
14836 int
14837 arm_address_offset_is_imm (rtx insn)
14838 {
14839 rtx mem, addr;
14840
14841 extract_insn_cached (insn);
14842
14843 if (REG_P (recog_data.operand[0]))
14844 return 0;
14845
14846 mem = recog_data.operand[0];
14847
14848 gcc_assert (MEM_P (mem));
14849
14850 addr = XEXP (mem, 0);
14851
14852 if (GET_CODE (addr) == REG
14853 || (GET_CODE (addr) == PLUS
14854 && GET_CODE (XEXP (addr, 0)) == REG
14855 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
14856 return 1;
14857 else
14858 return 0;
14859 }
14860
14861 /* Output an ADD r, s, #n where n may be too big for one instruction.
14862 If adding zero to one register, output nothing. */
14863 const char *
14864 output_add_immediate (rtx *operands)
14865 {
14866 HOST_WIDE_INT n = INTVAL (operands[2]);
14867
14868 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
14869 {
14870 if (n < 0)
14871 output_multi_immediate (operands,
14872 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
14873 -n);
14874 else
14875 output_multi_immediate (operands,
14876 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
14877 n);
14878 }
14879
14880 return "";
14881 }
14882
14883 /* Output a multiple immediate operation.
14884 OPERANDS is the vector of operands referred to in the output patterns.
14885 INSTR1 is the output pattern to use for the first constant.
14886 INSTR2 is the output pattern to use for subsequent constants.
14887 IMMED_OP is the index of the constant slot in OPERANDS.
14888 N is the constant value. */
14889 static const char *
14890 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
14891 int immed_op, HOST_WIDE_INT n)
14892 {
14893 #if HOST_BITS_PER_WIDE_INT > 32
14894 n &= 0xffffffff;
14895 #endif
14896
14897 if (n == 0)
14898 {
14899 /* Quick and easy output. */
14900 operands[immed_op] = const0_rtx;
14901 output_asm_insn (instr1, operands);
14902 }
14903 else
14904 {
14905 int i;
14906 const char * instr = instr1;
14907
14908 /* Note that n is never zero here (which would give no output). */
14909 for (i = 0; i < 32; i += 2)
14910 {
14911 if (n & (3 << i))
14912 {
14913 operands[immed_op] = GEN_INT (n & (255 << i));
14914 output_asm_insn (instr, operands);
14915 instr = instr2;
14916 i += 6;
14917 }
14918 }
14919 }
14920
14921 return "";
14922 }
14923
14924 /* Return the name of a shifter operation. */
14925 static const char *
14926 arm_shift_nmem(enum rtx_code code)
14927 {
14928 switch (code)
14929 {
14930 case ASHIFT:
14931 return ARM_LSL_NAME;
14932
14933 case ASHIFTRT:
14934 return "asr";
14935
14936 case LSHIFTRT:
14937 return "lsr";
14938
14939 case ROTATERT:
14940 return "ror";
14941
14942 default:
14943 abort();
14944 }
14945 }
14946
14947 /* Return the appropriate ARM instruction for the operation code.
14948 The returned result should not be overwritten. OP is the rtx of the
14949 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
14950 was shifted. */
14951 const char *
14952 arithmetic_instr (rtx op, int shift_first_arg)
14953 {
14954 switch (GET_CODE (op))
14955 {
14956 case PLUS:
14957 return "add";
14958
14959 case MINUS:
14960 return shift_first_arg ? "rsb" : "sub";
14961
14962 case IOR:
14963 return "orr";
14964
14965 case XOR:
14966 return "eor";
14967
14968 case AND:
14969 return "and";
14970
14971 case ASHIFT:
14972 case ASHIFTRT:
14973 case LSHIFTRT:
14974 case ROTATERT:
14975 return arm_shift_nmem(GET_CODE(op));
14976
14977 default:
14978 gcc_unreachable ();
14979 }
14980 }
14981
14982 /* Ensure valid constant shifts and return the appropriate shift mnemonic
14983 for the operation code. The returned result should not be overwritten.
14984 OP is the rtx code of the shift.
14985 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
14986 shift. */
14987 static const char *
14988 shift_op (rtx op, HOST_WIDE_INT *amountp)
14989 {
14990 const char * mnem;
14991 enum rtx_code code = GET_CODE (op);
14992
14993 switch (GET_CODE (XEXP (op, 1)))
14994 {
14995 case REG:
14996 case SUBREG:
14997 *amountp = -1;
14998 break;
14999
15000 case CONST_INT:
15001 *amountp = INTVAL (XEXP (op, 1));
15002 break;
15003
15004 default:
15005 gcc_unreachable ();
15006 }
15007
15008 switch (code)
15009 {
15010 case ROTATE:
15011 gcc_assert (*amountp != -1);
15012 *amountp = 32 - *amountp;
15013 code = ROTATERT;
15014
15015 /* Fall through. */
15016
15017 case ASHIFT:
15018 case ASHIFTRT:
15019 case LSHIFTRT:
15020 case ROTATERT:
15021 mnem = arm_shift_nmem(code);
15022 break;
15023
15024 case MULT:
15025 /* We never have to worry about the amount being other than a
15026 power of 2, since this case can never be reloaded from a reg. */
15027 gcc_assert (*amountp != -1);
15028 *amountp = int_log2 (*amountp);
15029 return ARM_LSL_NAME;
15030
15031 default:
15032 gcc_unreachable ();
15033 }
15034
15035 if (*amountp != -1)
15036 {
15037 /* This is not 100% correct, but follows from the desire to merge
15038 multiplication by a power of 2 with the recognizer for a
15039 shift. >=32 is not a valid shift for "lsl", so we must try and
15040 output a shift that produces the correct arithmetical result.
15041 Using lsr #32 is identical except for the fact that the carry bit
15042 is not set correctly if we set the flags; but we never use the
15043 carry bit from such an operation, so we can ignore that. */
15044 if (code == ROTATERT)
15045 /* Rotate is just modulo 32. */
15046 *amountp &= 31;
15047 else if (*amountp != (*amountp & 31))
15048 {
15049 if (code == ASHIFT)
15050 mnem = "lsr";
15051 *amountp = 32;
15052 }
15053
15054 /* Shifts of 0 are no-ops. */
15055 if (*amountp == 0)
15056 return NULL;
15057 }
15058
15059 return mnem;
15060 }
15061
15062 /* Obtain the shift from the POWER of two. */
15063
15064 static HOST_WIDE_INT
15065 int_log2 (HOST_WIDE_INT power)
15066 {
15067 HOST_WIDE_INT shift = 0;
15068
15069 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
15070 {
15071 gcc_assert (shift <= 31);
15072 shift++;
15073 }
15074
15075 return shift;
15076 }
15077
15078 /* Output a .ascii pseudo-op, keeping track of lengths. This is
15079 because /bin/as is horribly restrictive. The judgement about
15080 whether or not each character is 'printable' (and can be output as
15081 is) or not (and must be printed with an octal escape) must be made
15082 with reference to the *host* character set -- the situation is
15083 similar to that discussed in the comments above pp_c_char in
15084 c-pretty-print.c. */
15085
15086 #define MAX_ASCII_LEN 51
15087
15088 void
15089 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
15090 {
15091 int i;
15092 int len_so_far = 0;
15093
15094 fputs ("\t.ascii\t\"", stream);
15095
15096 for (i = 0; i < len; i++)
15097 {
15098 int c = p[i];
15099
15100 if (len_so_far >= MAX_ASCII_LEN)
15101 {
15102 fputs ("\"\n\t.ascii\t\"", stream);
15103 len_so_far = 0;
15104 }
15105
15106 if (ISPRINT (c))
15107 {
15108 if (c == '\\' || c == '\"')
15109 {
15110 putc ('\\', stream);
15111 len_so_far++;
15112 }
15113 putc (c, stream);
15114 len_so_far++;
15115 }
15116 else
15117 {
15118 fprintf (stream, "\\%03o", c);
15119 len_so_far += 4;
15120 }
15121 }
15122
15123 fputs ("\"\n", stream);
15124 }
15125 \f
15126 /* Compute the register save mask for registers 0 through 12
15127 inclusive. This code is used by arm_compute_save_reg_mask. */
15128
15129 static unsigned long
15130 arm_compute_save_reg0_reg12_mask (void)
15131 {
15132 unsigned long func_type = arm_current_func_type ();
15133 unsigned long save_reg_mask = 0;
15134 unsigned int reg;
15135
15136 if (IS_INTERRUPT (func_type))
15137 {
15138 unsigned int max_reg;
15139 /* Interrupt functions must not corrupt any registers,
15140 even call clobbered ones. If this is a leaf function
15141 we can just examine the registers used by the RTL, but
15142 otherwise we have to assume that whatever function is
15143 called might clobber anything, and so we have to save
15144 all the call-clobbered registers as well. */
15145 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
15146 /* FIQ handlers have registers r8 - r12 banked, so
15147 we only need to check r0 - r7, Normal ISRs only
15148 bank r14 and r15, so we must check up to r12.
15149 r13 is the stack pointer which is always preserved,
15150 so we do not need to consider it here. */
15151 max_reg = 7;
15152 else
15153 max_reg = 12;
15154
15155 for (reg = 0; reg <= max_reg; reg++)
15156 if (df_regs_ever_live_p (reg)
15157 || (! current_function_is_leaf && call_used_regs[reg]))
15158 save_reg_mask |= (1 << reg);
15159
15160 /* Also save the pic base register if necessary. */
15161 if (flag_pic
15162 && !TARGET_SINGLE_PIC_BASE
15163 && arm_pic_register != INVALID_REGNUM
15164 && crtl->uses_pic_offset_table)
15165 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15166 }
15167 else if (IS_VOLATILE(func_type))
15168 {
15169 /* For noreturn functions we historically omitted register saves
15170 altogether. However this really messes up debugging. As a
15171 compromise save just the frame pointers. Combined with the link
15172 register saved elsewhere this should be sufficient to get
15173 a backtrace. */
15174 if (frame_pointer_needed)
15175 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15176 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
15177 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15178 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
15179 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
15180 }
15181 else
15182 {
15183 /* In the normal case we only need to save those registers
15184 which are call saved and which are used by this function. */
15185 for (reg = 0; reg <= 11; reg++)
15186 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15187 save_reg_mask |= (1 << reg);
15188
15189 /* Handle the frame pointer as a special case. */
15190 if (frame_pointer_needed)
15191 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15192
15193 /* If we aren't loading the PIC register,
15194 don't stack it even though it may be live. */
15195 if (flag_pic
15196 && !TARGET_SINGLE_PIC_BASE
15197 && arm_pic_register != INVALID_REGNUM
15198 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
15199 || crtl->uses_pic_offset_table))
15200 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15201
15202 /* The prologue will copy SP into R0, so save it. */
15203 if (IS_STACKALIGN (func_type))
15204 save_reg_mask |= 1;
15205 }
15206
15207 /* Save registers so the exception handler can modify them. */
15208 if (crtl->calls_eh_return)
15209 {
15210 unsigned int i;
15211
15212 for (i = 0; ; i++)
15213 {
15214 reg = EH_RETURN_DATA_REGNO (i);
15215 if (reg == INVALID_REGNUM)
15216 break;
15217 save_reg_mask |= 1 << reg;
15218 }
15219 }
15220
15221 return save_reg_mask;
15222 }
15223
15224
15225 /* Compute the number of bytes used to store the static chain register on the
15226 stack, above the stack frame. We need to know this accurately to get the
15227 alignment of the rest of the stack frame correct. */
15228
15229 static int arm_compute_static_chain_stack_bytes (void)
15230 {
15231 unsigned long func_type = arm_current_func_type ();
15232 int static_chain_stack_bytes = 0;
15233
15234 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
15235 IS_NESTED (func_type) &&
15236 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
15237 static_chain_stack_bytes = 4;
15238
15239 return static_chain_stack_bytes;
15240 }
15241
15242
15243 /* Compute a bit mask of which registers need to be
15244 saved on the stack for the current function.
15245 This is used by arm_get_frame_offsets, which may add extra registers. */
15246
15247 static unsigned long
15248 arm_compute_save_reg_mask (void)
15249 {
15250 unsigned int save_reg_mask = 0;
15251 unsigned long func_type = arm_current_func_type ();
15252 unsigned int reg;
15253
15254 if (IS_NAKED (func_type))
15255 /* This should never really happen. */
15256 return 0;
15257
15258 /* If we are creating a stack frame, then we must save the frame pointer,
15259 IP (which will hold the old stack pointer), LR and the PC. */
15260 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15261 save_reg_mask |=
15262 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
15263 | (1 << IP_REGNUM)
15264 | (1 << LR_REGNUM)
15265 | (1 << PC_REGNUM);
15266
15267 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
15268
15269 /* Decide if we need to save the link register.
15270 Interrupt routines have their own banked link register,
15271 so they never need to save it.
15272 Otherwise if we do not use the link register we do not need to save
15273 it. If we are pushing other registers onto the stack however, we
15274 can save an instruction in the epilogue by pushing the link register
15275 now and then popping it back into the PC. This incurs extra memory
15276 accesses though, so we only do it when optimizing for size, and only
15277 if we know that we will not need a fancy return sequence. */
15278 if (df_regs_ever_live_p (LR_REGNUM)
15279 || (save_reg_mask
15280 && optimize_size
15281 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15282 && !crtl->calls_eh_return))
15283 save_reg_mask |= 1 << LR_REGNUM;
15284
15285 if (cfun->machine->lr_save_eliminated)
15286 save_reg_mask &= ~ (1 << LR_REGNUM);
15287
15288 if (TARGET_REALLY_IWMMXT
15289 && ((bit_count (save_reg_mask)
15290 + ARM_NUM_INTS (crtl->args.pretend_args_size +
15291 arm_compute_static_chain_stack_bytes())
15292 ) % 2) != 0)
15293 {
15294 /* The total number of registers that are going to be pushed
15295 onto the stack is odd. We need to ensure that the stack
15296 is 64-bit aligned before we start to save iWMMXt registers,
15297 and also before we start to create locals. (A local variable
15298 might be a double or long long which we will load/store using
15299 an iWMMXt instruction). Therefore we need to push another
15300 ARM register, so that the stack will be 64-bit aligned. We
15301 try to avoid using the arg registers (r0 -r3) as they might be
15302 used to pass values in a tail call. */
15303 for (reg = 4; reg <= 12; reg++)
15304 if ((save_reg_mask & (1 << reg)) == 0)
15305 break;
15306
15307 if (reg <= 12)
15308 save_reg_mask |= (1 << reg);
15309 else
15310 {
15311 cfun->machine->sibcall_blocked = 1;
15312 save_reg_mask |= (1 << 3);
15313 }
15314 }
15315
15316 /* We may need to push an additional register for use initializing the
15317 PIC base register. */
15318 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
15319 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
15320 {
15321 reg = thumb_find_work_register (1 << 4);
15322 if (!call_used_regs[reg])
15323 save_reg_mask |= (1 << reg);
15324 }
15325
15326 return save_reg_mask;
15327 }
15328
15329
15330 /* Compute a bit mask of which registers need to be
15331 saved on the stack for the current function. */
15332 static unsigned long
15333 thumb1_compute_save_reg_mask (void)
15334 {
15335 unsigned long mask;
15336 unsigned reg;
15337
15338 mask = 0;
15339 for (reg = 0; reg < 12; reg ++)
15340 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15341 mask |= 1 << reg;
15342
15343 if (flag_pic
15344 && !TARGET_SINGLE_PIC_BASE
15345 && arm_pic_register != INVALID_REGNUM
15346 && crtl->uses_pic_offset_table)
15347 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15348
15349 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
15350 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
15351 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15352
15353 /* LR will also be pushed if any lo regs are pushed. */
15354 if (mask & 0xff || thumb_force_lr_save ())
15355 mask |= (1 << LR_REGNUM);
15356
15357 /* Make sure we have a low work register if we need one.
15358 We will need one if we are going to push a high register,
15359 but we are not currently intending to push a low register. */
15360 if ((mask & 0xff) == 0
15361 && ((mask & 0x0f00) || TARGET_BACKTRACE))
15362 {
15363 /* Use thumb_find_work_register to choose which register
15364 we will use. If the register is live then we will
15365 have to push it. Use LAST_LO_REGNUM as our fallback
15366 choice for the register to select. */
15367 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
15368 /* Make sure the register returned by thumb_find_work_register is
15369 not part of the return value. */
15370 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
15371 reg = LAST_LO_REGNUM;
15372
15373 if (! call_used_regs[reg])
15374 mask |= 1 << reg;
15375 }
15376
15377 /* The 504 below is 8 bytes less than 512 because there are two possible
15378 alignment words. We can't tell here if they will be present or not so we
15379 have to play it safe and assume that they are. */
15380 if ((CALLER_INTERWORKING_SLOT_SIZE +
15381 ROUND_UP_WORD (get_frame_size ()) +
15382 crtl->outgoing_args_size) >= 504)
15383 {
15384 /* This is the same as the code in thumb1_expand_prologue() which
15385 determines which register to use for stack decrement. */
15386 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
15387 if (mask & (1 << reg))
15388 break;
15389
15390 if (reg > LAST_LO_REGNUM)
15391 {
15392 /* Make sure we have a register available for stack decrement. */
15393 mask |= 1 << LAST_LO_REGNUM;
15394 }
15395 }
15396
15397 return mask;
15398 }
15399
15400
15401 /* Return the number of bytes required to save VFP registers. */
15402 static int
15403 arm_get_vfp_saved_size (void)
15404 {
15405 unsigned int regno;
15406 int count;
15407 int saved;
15408
15409 saved = 0;
15410 /* Space for saved VFP registers. */
15411 if (TARGET_HARD_FLOAT && TARGET_VFP)
15412 {
15413 count = 0;
15414 for (regno = FIRST_VFP_REGNUM;
15415 regno < LAST_VFP_REGNUM;
15416 regno += 2)
15417 {
15418 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
15419 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
15420 {
15421 if (count > 0)
15422 {
15423 /* Workaround ARM10 VFPr1 bug. */
15424 if (count == 2 && !arm_arch6)
15425 count++;
15426 saved += count * 8;
15427 }
15428 count = 0;
15429 }
15430 else
15431 count++;
15432 }
15433 if (count > 0)
15434 {
15435 if (count == 2 && !arm_arch6)
15436 count++;
15437 saved += count * 8;
15438 }
15439 }
15440 return saved;
15441 }
15442
15443
15444 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
15445 everything bar the final return instruction. */
15446 const char *
15447 output_return_instruction (rtx operand, int really_return, int reverse)
15448 {
15449 char conditional[10];
15450 char instr[100];
15451 unsigned reg;
15452 unsigned long live_regs_mask;
15453 unsigned long func_type;
15454 arm_stack_offsets *offsets;
15455
15456 func_type = arm_current_func_type ();
15457
15458 if (IS_NAKED (func_type))
15459 return "";
15460
15461 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15462 {
15463 /* If this function was declared non-returning, and we have
15464 found a tail call, then we have to trust that the called
15465 function won't return. */
15466 if (really_return)
15467 {
15468 rtx ops[2];
15469
15470 /* Otherwise, trap an attempted return by aborting. */
15471 ops[0] = operand;
15472 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
15473 : "abort");
15474 assemble_external_libcall (ops[1]);
15475 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
15476 }
15477
15478 return "";
15479 }
15480
15481 gcc_assert (!cfun->calls_alloca || really_return);
15482
15483 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
15484
15485 cfun->machine->return_used_this_function = 1;
15486
15487 offsets = arm_get_frame_offsets ();
15488 live_regs_mask = offsets->saved_regs_mask;
15489
15490 if (live_regs_mask)
15491 {
15492 const char * return_reg;
15493
15494 /* If we do not have any special requirements for function exit
15495 (e.g. interworking) then we can load the return address
15496 directly into the PC. Otherwise we must load it into LR. */
15497 if (really_return
15498 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
15499 return_reg = reg_names[PC_REGNUM];
15500 else
15501 return_reg = reg_names[LR_REGNUM];
15502
15503 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
15504 {
15505 /* There are three possible reasons for the IP register
15506 being saved. 1) a stack frame was created, in which case
15507 IP contains the old stack pointer, or 2) an ISR routine
15508 corrupted it, or 3) it was saved to align the stack on
15509 iWMMXt. In case 1, restore IP into SP, otherwise just
15510 restore IP. */
15511 if (frame_pointer_needed)
15512 {
15513 live_regs_mask &= ~ (1 << IP_REGNUM);
15514 live_regs_mask |= (1 << SP_REGNUM);
15515 }
15516 else
15517 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
15518 }
15519
15520 /* On some ARM architectures it is faster to use LDR rather than
15521 LDM to load a single register. On other architectures, the
15522 cost is the same. In 26 bit mode, or for exception handlers,
15523 we have to use LDM to load the PC so that the CPSR is also
15524 restored. */
15525 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15526 if (live_regs_mask == (1U << reg))
15527 break;
15528
15529 if (reg <= LAST_ARM_REGNUM
15530 && (reg != LR_REGNUM
15531 || ! really_return
15532 || ! IS_INTERRUPT (func_type)))
15533 {
15534 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
15535 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
15536 }
15537 else
15538 {
15539 char *p;
15540 int first = 1;
15541
15542 /* Generate the load multiple instruction to restore the
15543 registers. Note we can get here, even if
15544 frame_pointer_needed is true, but only if sp already
15545 points to the base of the saved core registers. */
15546 if (live_regs_mask & (1 << SP_REGNUM))
15547 {
15548 unsigned HOST_WIDE_INT stack_adjust;
15549
15550 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
15551 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
15552
15553 if (stack_adjust && arm_arch5 && TARGET_ARM)
15554 if (TARGET_UNIFIED_ASM)
15555 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
15556 else
15557 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
15558 else
15559 {
15560 /* If we can't use ldmib (SA110 bug),
15561 then try to pop r3 instead. */
15562 if (stack_adjust)
15563 live_regs_mask |= 1 << 3;
15564
15565 if (TARGET_UNIFIED_ASM)
15566 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
15567 else
15568 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
15569 }
15570 }
15571 else
15572 if (TARGET_UNIFIED_ASM)
15573 sprintf (instr, "pop%s\t{", conditional);
15574 else
15575 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
15576
15577 p = instr + strlen (instr);
15578
15579 for (reg = 0; reg <= SP_REGNUM; reg++)
15580 if (live_regs_mask & (1 << reg))
15581 {
15582 int l = strlen (reg_names[reg]);
15583
15584 if (first)
15585 first = 0;
15586 else
15587 {
15588 memcpy (p, ", ", 2);
15589 p += 2;
15590 }
15591
15592 memcpy (p, "%|", 2);
15593 memcpy (p + 2, reg_names[reg], l);
15594 p += l + 2;
15595 }
15596
15597 if (live_regs_mask & (1 << LR_REGNUM))
15598 {
15599 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
15600 /* If returning from an interrupt, restore the CPSR. */
15601 if (IS_INTERRUPT (func_type))
15602 strcat (p, "^");
15603 }
15604 else
15605 strcpy (p, "}");
15606 }
15607
15608 output_asm_insn (instr, & operand);
15609
15610 /* See if we need to generate an extra instruction to
15611 perform the actual function return. */
15612 if (really_return
15613 && func_type != ARM_FT_INTERWORKED
15614 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
15615 {
15616 /* The return has already been handled
15617 by loading the LR into the PC. */
15618 really_return = 0;
15619 }
15620 }
15621
15622 if (really_return)
15623 {
15624 switch ((int) ARM_FUNC_TYPE (func_type))
15625 {
15626 case ARM_FT_ISR:
15627 case ARM_FT_FIQ:
15628 /* ??? This is wrong for unified assembly syntax. */
15629 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
15630 break;
15631
15632 case ARM_FT_INTERWORKED:
15633 sprintf (instr, "bx%s\t%%|lr", conditional);
15634 break;
15635
15636 case ARM_FT_EXCEPTION:
15637 /* ??? This is wrong for unified assembly syntax. */
15638 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
15639 break;
15640
15641 default:
15642 /* Use bx if it's available. */
15643 if (arm_arch5 || arm_arch4t)
15644 sprintf (instr, "bx%s\t%%|lr", conditional);
15645 else
15646 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
15647 break;
15648 }
15649
15650 output_asm_insn (instr, & operand);
15651 }
15652
15653 return "";
15654 }
15655
15656 /* Write the function name into the code section, directly preceding
15657 the function prologue.
15658
15659 Code will be output similar to this:
15660 t0
15661 .ascii "arm_poke_function_name", 0
15662 .align
15663 t1
15664 .word 0xff000000 + (t1 - t0)
15665 arm_poke_function_name
15666 mov ip, sp
15667 stmfd sp!, {fp, ip, lr, pc}
15668 sub fp, ip, #4
15669
15670 When performing a stack backtrace, code can inspect the value
15671 of 'pc' stored at 'fp' + 0. If the trace function then looks
15672 at location pc - 12 and the top 8 bits are set, then we know
15673 that there is a function name embedded immediately preceding this
15674 location and has length ((pc[-3]) & 0xff000000).
15675
15676 We assume that pc is declared as a pointer to an unsigned long.
15677
15678 It is of no benefit to output the function name if we are assembling
15679 a leaf function. These function types will not contain a stack
15680 backtrace structure, therefore it is not possible to determine the
15681 function name. */
15682 void
15683 arm_poke_function_name (FILE *stream, const char *name)
15684 {
15685 unsigned long alignlength;
15686 unsigned long length;
15687 rtx x;
15688
15689 length = strlen (name) + 1;
15690 alignlength = ROUND_UP_WORD (length);
15691
15692 ASM_OUTPUT_ASCII (stream, name, length);
15693 ASM_OUTPUT_ALIGN (stream, 2);
15694 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
15695 assemble_aligned_integer (UNITS_PER_WORD, x);
15696 }
15697
15698 /* Place some comments into the assembler stream
15699 describing the current function. */
15700 static void
15701 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
15702 {
15703 unsigned long func_type;
15704
15705 /* ??? Do we want to print some of the below anyway? */
15706 if (TARGET_THUMB1)
15707 return;
15708
15709 /* Sanity check. */
15710 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
15711
15712 func_type = arm_current_func_type ();
15713
15714 switch ((int) ARM_FUNC_TYPE (func_type))
15715 {
15716 default:
15717 case ARM_FT_NORMAL:
15718 break;
15719 case ARM_FT_INTERWORKED:
15720 asm_fprintf (f, "\t%@ Function supports interworking.\n");
15721 break;
15722 case ARM_FT_ISR:
15723 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
15724 break;
15725 case ARM_FT_FIQ:
15726 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
15727 break;
15728 case ARM_FT_EXCEPTION:
15729 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
15730 break;
15731 }
15732
15733 if (IS_NAKED (func_type))
15734 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
15735
15736 if (IS_VOLATILE (func_type))
15737 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
15738
15739 if (IS_NESTED (func_type))
15740 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
15741 if (IS_STACKALIGN (func_type))
15742 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
15743
15744 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
15745 crtl->args.size,
15746 crtl->args.pretend_args_size, frame_size);
15747
15748 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
15749 frame_pointer_needed,
15750 cfun->machine->uses_anonymous_args);
15751
15752 if (cfun->machine->lr_save_eliminated)
15753 asm_fprintf (f, "\t%@ link register save eliminated.\n");
15754
15755 if (crtl->calls_eh_return)
15756 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
15757
15758 }
15759
15760 const char *
15761 arm_output_epilogue (rtx sibling)
15762 {
15763 int reg;
15764 unsigned long saved_regs_mask;
15765 unsigned long func_type;
15766 /* Floats_offset is the offset from the "virtual" frame. In an APCS
15767 frame that is $fp + 4 for a non-variadic function. */
15768 int floats_offset = 0;
15769 rtx operands[3];
15770 FILE * f = asm_out_file;
15771 unsigned int lrm_count = 0;
15772 int really_return = (sibling == NULL);
15773 int start_reg;
15774 arm_stack_offsets *offsets;
15775
15776 /* If we have already generated the return instruction
15777 then it is futile to generate anything else. */
15778 if (use_return_insn (FALSE, sibling) &&
15779 (cfun->machine->return_used_this_function != 0))
15780 return "";
15781
15782 func_type = arm_current_func_type ();
15783
15784 if (IS_NAKED (func_type))
15785 /* Naked functions don't have epilogues. */
15786 return "";
15787
15788 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15789 {
15790 rtx op;
15791
15792 /* A volatile function should never return. Call abort. */
15793 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
15794 assemble_external_libcall (op);
15795 output_asm_insn ("bl\t%a0", &op);
15796
15797 return "";
15798 }
15799
15800 /* If we are throwing an exception, then we really must be doing a
15801 return, so we can't tail-call. */
15802 gcc_assert (!crtl->calls_eh_return || really_return);
15803
15804 offsets = arm_get_frame_offsets ();
15805 saved_regs_mask = offsets->saved_regs_mask;
15806
15807 if (TARGET_IWMMXT)
15808 lrm_count = bit_count (saved_regs_mask);
15809
15810 floats_offset = offsets->saved_args;
15811 /* Compute how far away the floats will be. */
15812 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15813 if (saved_regs_mask & (1 << reg))
15814 floats_offset += 4;
15815
15816 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15817 {
15818 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
15819 int vfp_offset = offsets->frame;
15820
15821 if (TARGET_FPA_EMU2)
15822 {
15823 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15824 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15825 {
15826 floats_offset += 12;
15827 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
15828 reg, FP_REGNUM, floats_offset - vfp_offset);
15829 }
15830 }
15831 else
15832 {
15833 start_reg = LAST_FPA_REGNUM;
15834
15835 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
15836 {
15837 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15838 {
15839 floats_offset += 12;
15840
15841 /* We can't unstack more than four registers at once. */
15842 if (start_reg - reg == 3)
15843 {
15844 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
15845 reg, FP_REGNUM, floats_offset - vfp_offset);
15846 start_reg = reg - 1;
15847 }
15848 }
15849 else
15850 {
15851 if (reg != start_reg)
15852 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
15853 reg + 1, start_reg - reg,
15854 FP_REGNUM, floats_offset - vfp_offset);
15855 start_reg = reg - 1;
15856 }
15857 }
15858
15859 /* Just in case the last register checked also needs unstacking. */
15860 if (reg != start_reg)
15861 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
15862 reg + 1, start_reg - reg,
15863 FP_REGNUM, floats_offset - vfp_offset);
15864 }
15865
15866 if (TARGET_HARD_FLOAT && TARGET_VFP)
15867 {
15868 int saved_size;
15869
15870 /* The fldmd insns do not have base+offset addressing
15871 modes, so we use IP to hold the address. */
15872 saved_size = arm_get_vfp_saved_size ();
15873
15874 if (saved_size > 0)
15875 {
15876 floats_offset += saved_size;
15877 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
15878 FP_REGNUM, floats_offset - vfp_offset);
15879 }
15880 start_reg = FIRST_VFP_REGNUM;
15881 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
15882 {
15883 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
15884 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
15885 {
15886 if (start_reg != reg)
15887 vfp_output_fldmd (f, IP_REGNUM,
15888 (start_reg - FIRST_VFP_REGNUM) / 2,
15889 (reg - start_reg) / 2);
15890 start_reg = reg + 2;
15891 }
15892 }
15893 if (start_reg != reg)
15894 vfp_output_fldmd (f, IP_REGNUM,
15895 (start_reg - FIRST_VFP_REGNUM) / 2,
15896 (reg - start_reg) / 2);
15897 }
15898
15899 if (TARGET_IWMMXT)
15900 {
15901 /* The frame pointer is guaranteed to be non-double-word aligned.
15902 This is because it is set to (old_stack_pointer - 4) and the
15903 old_stack_pointer was double word aligned. Thus the offset to
15904 the iWMMXt registers to be loaded must also be non-double-word
15905 sized, so that the resultant address *is* double-word aligned.
15906 We can ignore floats_offset since that was already included in
15907 the live_regs_mask. */
15908 lrm_count += (lrm_count % 2 ? 2 : 1);
15909
15910 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
15911 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15912 {
15913 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
15914 reg, FP_REGNUM, lrm_count * 4);
15915 lrm_count += 2;
15916 }
15917 }
15918
15919 /* saved_regs_mask should contain the IP, which at the time of stack
15920 frame generation actually contains the old stack pointer. So a
15921 quick way to unwind the stack is just pop the IP register directly
15922 into the stack pointer. */
15923 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
15924 saved_regs_mask &= ~ (1 << IP_REGNUM);
15925 saved_regs_mask |= (1 << SP_REGNUM);
15926
15927 /* There are two registers left in saved_regs_mask - LR and PC. We
15928 only need to restore the LR register (the return address), but to
15929 save time we can load it directly into the PC, unless we need a
15930 special function exit sequence, or we are not really returning. */
15931 if (really_return
15932 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15933 && !crtl->calls_eh_return)
15934 /* Delete the LR from the register mask, so that the LR on
15935 the stack is loaded into the PC in the register mask. */
15936 saved_regs_mask &= ~ (1 << LR_REGNUM);
15937 else
15938 saved_regs_mask &= ~ (1 << PC_REGNUM);
15939
15940 /* We must use SP as the base register, because SP is one of the
15941 registers being restored. If an interrupt or page fault
15942 happens in the ldm instruction, the SP might or might not
15943 have been restored. That would be bad, as then SP will no
15944 longer indicate the safe area of stack, and we can get stack
15945 corruption. Using SP as the base register means that it will
15946 be reset correctly to the original value, should an interrupt
15947 occur. If the stack pointer already points at the right
15948 place, then omit the subtraction. */
15949 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
15950 || cfun->calls_alloca)
15951 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
15952 4 * bit_count (saved_regs_mask));
15953 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
15954
15955 if (IS_INTERRUPT (func_type))
15956 /* Interrupt handlers will have pushed the
15957 IP onto the stack, so restore it now. */
15958 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
15959 }
15960 else
15961 {
15962 /* This branch is executed for ARM mode (non-apcs frames) and
15963 Thumb-2 mode. Frame layout is essentially the same for those
15964 cases, except that in ARM mode frame pointer points to the
15965 first saved register, while in Thumb-2 mode the frame pointer points
15966 to the last saved register.
15967
15968 It is possible to make frame pointer point to last saved
15969 register in both cases, and remove some conditionals below.
15970 That means that fp setup in prologue would be just "mov fp, sp"
15971 and sp restore in epilogue would be just "mov sp, fp", whereas
15972 now we have to use add/sub in those cases. However, the value
15973 of that would be marginal, as both mov and add/sub are 32-bit
15974 in ARM mode, and it would require extra conditionals
15975 in arm_expand_prologue to distingish ARM-apcs-frame case
15976 (where frame pointer is required to point at first register)
15977 and ARM-non-apcs-frame. Therefore, such change is postponed
15978 until real need arise. */
15979 unsigned HOST_WIDE_INT amount;
15980 int rfe;
15981 /* Restore stack pointer if necessary. */
15982 if (TARGET_ARM && frame_pointer_needed)
15983 {
15984 operands[0] = stack_pointer_rtx;
15985 operands[1] = hard_frame_pointer_rtx;
15986
15987 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
15988 output_add_immediate (operands);
15989 }
15990 else
15991 {
15992 if (frame_pointer_needed)
15993 {
15994 /* For Thumb-2 restore sp from the frame pointer.
15995 Operand restrictions mean we have to incrememnt FP, then copy
15996 to SP. */
15997 amount = offsets->locals_base - offsets->saved_regs;
15998 operands[0] = hard_frame_pointer_rtx;
15999 }
16000 else
16001 {
16002 unsigned long count;
16003 operands[0] = stack_pointer_rtx;
16004 amount = offsets->outgoing_args - offsets->saved_regs;
16005 /* pop call clobbered registers if it avoids a
16006 separate stack adjustment. */
16007 count = offsets->saved_regs - offsets->saved_args;
16008 if (optimize_size
16009 && count != 0
16010 && !crtl->calls_eh_return
16011 && bit_count(saved_regs_mask) * 4 == count
16012 && !IS_INTERRUPT (func_type)
16013 && !IS_STACKALIGN (func_type)
16014 && !crtl->tail_call_emit)
16015 {
16016 unsigned long mask;
16017 /* Preserve return values, of any size. */
16018 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
16019 mask ^= 0xf;
16020 mask &= ~saved_regs_mask;
16021 reg = 0;
16022 while (bit_count (mask) * 4 > amount)
16023 {
16024 while ((mask & (1 << reg)) == 0)
16025 reg++;
16026 mask &= ~(1 << reg);
16027 }
16028 if (bit_count (mask) * 4 == amount) {
16029 amount = 0;
16030 saved_regs_mask |= mask;
16031 }
16032 }
16033 }
16034
16035 if (amount)
16036 {
16037 operands[1] = operands[0];
16038 operands[2] = GEN_INT (amount);
16039 output_add_immediate (operands);
16040 }
16041 if (frame_pointer_needed)
16042 asm_fprintf (f, "\tmov\t%r, %r\n",
16043 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
16044 }
16045
16046 if (TARGET_FPA_EMU2)
16047 {
16048 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
16049 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16050 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
16051 reg, SP_REGNUM);
16052 }
16053 else
16054 {
16055 start_reg = FIRST_FPA_REGNUM;
16056
16057 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
16058 {
16059 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16060 {
16061 if (reg - start_reg == 3)
16062 {
16063 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
16064 start_reg, SP_REGNUM);
16065 start_reg = reg + 1;
16066 }
16067 }
16068 else
16069 {
16070 if (reg != start_reg)
16071 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
16072 start_reg, reg - start_reg,
16073 SP_REGNUM);
16074
16075 start_reg = reg + 1;
16076 }
16077 }
16078
16079 /* Just in case the last register checked also needs unstacking. */
16080 if (reg != start_reg)
16081 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
16082 start_reg, reg - start_reg, SP_REGNUM);
16083 }
16084
16085 if (TARGET_HARD_FLOAT && TARGET_VFP)
16086 {
16087 int end_reg = LAST_VFP_REGNUM + 1;
16088
16089 /* Scan the registers in reverse order. We need to match
16090 any groupings made in the prologue and generate matching
16091 pop operations. */
16092 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
16093 {
16094 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
16095 && (!df_regs_ever_live_p (reg + 1)
16096 || call_used_regs[reg + 1]))
16097 {
16098 if (end_reg > reg + 2)
16099 vfp_output_fldmd (f, SP_REGNUM,
16100 (reg + 2 - FIRST_VFP_REGNUM) / 2,
16101 (end_reg - (reg + 2)) / 2);
16102 end_reg = reg;
16103 }
16104 }
16105 if (end_reg > reg + 2)
16106 vfp_output_fldmd (f, SP_REGNUM, 0,
16107 (end_reg - (reg + 2)) / 2);
16108 }
16109
16110 if (TARGET_IWMMXT)
16111 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
16112 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16113 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
16114
16115 /* If we can, restore the LR into the PC. */
16116 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
16117 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
16118 && !IS_STACKALIGN (func_type)
16119 && really_return
16120 && crtl->args.pretend_args_size == 0
16121 && saved_regs_mask & (1 << LR_REGNUM)
16122 && !crtl->calls_eh_return)
16123 {
16124 saved_regs_mask &= ~ (1 << LR_REGNUM);
16125 saved_regs_mask |= (1 << PC_REGNUM);
16126 rfe = IS_INTERRUPT (func_type);
16127 }
16128 else
16129 rfe = 0;
16130
16131 /* Load the registers off the stack. If we only have one register
16132 to load use the LDR instruction - it is faster. For Thumb-2
16133 always use pop and the assembler will pick the best instruction.*/
16134 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
16135 && !IS_INTERRUPT(func_type))
16136 {
16137 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
16138 }
16139 else if (saved_regs_mask)
16140 {
16141 if (saved_regs_mask & (1 << SP_REGNUM))
16142 /* Note - write back to the stack register is not enabled
16143 (i.e. "ldmfd sp!..."). We know that the stack pointer is
16144 in the list of registers and if we add writeback the
16145 instruction becomes UNPREDICTABLE. */
16146 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
16147 rfe);
16148 else if (TARGET_ARM)
16149 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
16150 rfe);
16151 else
16152 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
16153 }
16154
16155 if (crtl->args.pretend_args_size)
16156 {
16157 /* Unwind the pre-pushed regs. */
16158 operands[0] = operands[1] = stack_pointer_rtx;
16159 operands[2] = GEN_INT (crtl->args.pretend_args_size);
16160 output_add_immediate (operands);
16161 }
16162 }
16163
16164 /* We may have already restored PC directly from the stack. */
16165 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
16166 return "";
16167
16168 /* Stack adjustment for exception handler. */
16169 if (crtl->calls_eh_return)
16170 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
16171 ARM_EH_STACKADJ_REGNUM);
16172
16173 /* Generate the return instruction. */
16174 switch ((int) ARM_FUNC_TYPE (func_type))
16175 {
16176 case ARM_FT_ISR:
16177 case ARM_FT_FIQ:
16178 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
16179 break;
16180
16181 case ARM_FT_EXCEPTION:
16182 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
16183 break;
16184
16185 case ARM_FT_INTERWORKED:
16186 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
16187 break;
16188
16189 default:
16190 if (IS_STACKALIGN (func_type))
16191 {
16192 /* See comment in arm_expand_prologue. */
16193 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
16194 }
16195 if (arm_arch5 || arm_arch4t)
16196 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
16197 else
16198 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
16199 break;
16200 }
16201
16202 return "";
16203 }
16204
16205 static void
16206 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
16207 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
16208 {
16209 arm_stack_offsets *offsets;
16210
16211 if (TARGET_THUMB1)
16212 {
16213 int regno;
16214
16215 /* Emit any call-via-reg trampolines that are needed for v4t support
16216 of call_reg and call_value_reg type insns. */
16217 for (regno = 0; regno < LR_REGNUM; regno++)
16218 {
16219 rtx label = cfun->machine->call_via[regno];
16220
16221 if (label != NULL)
16222 {
16223 switch_to_section (function_section (current_function_decl));
16224 targetm.asm_out.internal_label (asm_out_file, "L",
16225 CODE_LABEL_NUMBER (label));
16226 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
16227 }
16228 }
16229
16230 /* ??? Probably not safe to set this here, since it assumes that a
16231 function will be emitted as assembly immediately after we generate
16232 RTL for it. This does not happen for inline functions. */
16233 cfun->machine->return_used_this_function = 0;
16234 }
16235 else /* TARGET_32BIT */
16236 {
16237 /* We need to take into account any stack-frame rounding. */
16238 offsets = arm_get_frame_offsets ();
16239
16240 gcc_assert (!use_return_insn (FALSE, NULL)
16241 || (cfun->machine->return_used_this_function != 0)
16242 || offsets->saved_regs == offsets->outgoing_args
16243 || frame_pointer_needed);
16244
16245 /* Reset the ARM-specific per-function variables. */
16246 after_arm_reorg = 0;
16247 }
16248 }
16249
16250 /* Generate and emit an insn that we will recognize as a push_multi.
16251 Unfortunately, since this insn does not reflect very well the actual
16252 semantics of the operation, we need to annotate the insn for the benefit
16253 of DWARF2 frame unwind information. */
16254 static rtx
16255 emit_multi_reg_push (unsigned long mask)
16256 {
16257 int num_regs = 0;
16258 int num_dwarf_regs;
16259 int i, j;
16260 rtx par;
16261 rtx dwarf;
16262 int dwarf_par_index;
16263 rtx tmp, reg;
16264
16265 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16266 if (mask & (1 << i))
16267 num_regs++;
16268
16269 gcc_assert (num_regs && num_regs <= 16);
16270
16271 /* We don't record the PC in the dwarf frame information. */
16272 num_dwarf_regs = num_regs;
16273 if (mask & (1 << PC_REGNUM))
16274 num_dwarf_regs--;
16275
16276 /* For the body of the insn we are going to generate an UNSPEC in
16277 parallel with several USEs. This allows the insn to be recognized
16278 by the push_multi pattern in the arm.md file.
16279
16280 The body of the insn looks something like this:
16281
16282 (parallel [
16283 (set (mem:BLK (pre_modify:SI (reg:SI sp)
16284 (const_int:SI <num>)))
16285 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
16286 (use (reg:SI XX))
16287 (use (reg:SI YY))
16288 ...
16289 ])
16290
16291 For the frame note however, we try to be more explicit and actually
16292 show each register being stored into the stack frame, plus a (single)
16293 decrement of the stack pointer. We do it this way in order to be
16294 friendly to the stack unwinding code, which only wants to see a single
16295 stack decrement per instruction. The RTL we generate for the note looks
16296 something like this:
16297
16298 (sequence [
16299 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
16300 (set (mem:SI (reg:SI sp)) (reg:SI r4))
16301 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
16302 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
16303 ...
16304 ])
16305
16306 FIXME:: In an ideal world the PRE_MODIFY would not exist and
16307 instead we'd have a parallel expression detailing all
16308 the stores to the various memory addresses so that debug
16309 information is more up-to-date. Remember however while writing
16310 this to take care of the constraints with the push instruction.
16311
16312 Note also that this has to be taken care of for the VFP registers.
16313
16314 For more see PR43399. */
16315
16316 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
16317 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
16318 dwarf_par_index = 1;
16319
16320 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16321 {
16322 if (mask & (1 << i))
16323 {
16324 reg = gen_rtx_REG (SImode, i);
16325
16326 XVECEXP (par, 0, 0)
16327 = gen_rtx_SET (VOIDmode,
16328 gen_frame_mem
16329 (BLKmode,
16330 gen_rtx_PRE_MODIFY (Pmode,
16331 stack_pointer_rtx,
16332 plus_constant
16333 (stack_pointer_rtx,
16334 -4 * num_regs))
16335 ),
16336 gen_rtx_UNSPEC (BLKmode,
16337 gen_rtvec (1, reg),
16338 UNSPEC_PUSH_MULT));
16339
16340 if (i != PC_REGNUM)
16341 {
16342 tmp = gen_rtx_SET (VOIDmode,
16343 gen_frame_mem (SImode, stack_pointer_rtx),
16344 reg);
16345 RTX_FRAME_RELATED_P (tmp) = 1;
16346 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
16347 dwarf_par_index++;
16348 }
16349
16350 break;
16351 }
16352 }
16353
16354 for (j = 1, i++; j < num_regs; i++)
16355 {
16356 if (mask & (1 << i))
16357 {
16358 reg = gen_rtx_REG (SImode, i);
16359
16360 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
16361
16362 if (i != PC_REGNUM)
16363 {
16364 tmp
16365 = gen_rtx_SET (VOIDmode,
16366 gen_frame_mem
16367 (SImode,
16368 plus_constant (stack_pointer_rtx,
16369 4 * j)),
16370 reg);
16371 RTX_FRAME_RELATED_P (tmp) = 1;
16372 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
16373 }
16374
16375 j++;
16376 }
16377 }
16378
16379 par = emit_insn (par);
16380
16381 tmp = gen_rtx_SET (VOIDmode,
16382 stack_pointer_rtx,
16383 plus_constant (stack_pointer_rtx, -4 * num_regs));
16384 RTX_FRAME_RELATED_P (tmp) = 1;
16385 XVECEXP (dwarf, 0, 0) = tmp;
16386
16387 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
16388
16389 return par;
16390 }
16391
16392 /* Calculate the size of the return value that is passed in registers. */
16393 static unsigned
16394 arm_size_return_regs (void)
16395 {
16396 enum machine_mode mode;
16397
16398 if (crtl->return_rtx != 0)
16399 mode = GET_MODE (crtl->return_rtx);
16400 else
16401 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16402
16403 return GET_MODE_SIZE (mode);
16404 }
16405
16406 static rtx
16407 emit_sfm (int base_reg, int count)
16408 {
16409 rtx par;
16410 rtx dwarf;
16411 rtx tmp, reg;
16412 int i;
16413
16414 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
16415 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
16416
16417 reg = gen_rtx_REG (XFmode, base_reg++);
16418
16419 XVECEXP (par, 0, 0)
16420 = gen_rtx_SET (VOIDmode,
16421 gen_frame_mem
16422 (BLKmode,
16423 gen_rtx_PRE_MODIFY (Pmode,
16424 stack_pointer_rtx,
16425 plus_constant
16426 (stack_pointer_rtx,
16427 -12 * count))
16428 ),
16429 gen_rtx_UNSPEC (BLKmode,
16430 gen_rtvec (1, reg),
16431 UNSPEC_PUSH_MULT));
16432 tmp = gen_rtx_SET (VOIDmode,
16433 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
16434 RTX_FRAME_RELATED_P (tmp) = 1;
16435 XVECEXP (dwarf, 0, 1) = tmp;
16436
16437 for (i = 1; i < count; i++)
16438 {
16439 reg = gen_rtx_REG (XFmode, base_reg++);
16440 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
16441
16442 tmp = gen_rtx_SET (VOIDmode,
16443 gen_frame_mem (XFmode,
16444 plus_constant (stack_pointer_rtx,
16445 i * 12)),
16446 reg);
16447 RTX_FRAME_RELATED_P (tmp) = 1;
16448 XVECEXP (dwarf, 0, i + 1) = tmp;
16449 }
16450
16451 tmp = gen_rtx_SET (VOIDmode,
16452 stack_pointer_rtx,
16453 plus_constant (stack_pointer_rtx, -12 * count));
16454
16455 RTX_FRAME_RELATED_P (tmp) = 1;
16456 XVECEXP (dwarf, 0, 0) = tmp;
16457
16458 par = emit_insn (par);
16459 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
16460
16461 return par;
16462 }
16463
16464
16465 /* Return true if the current function needs to save/restore LR. */
16466
16467 static bool
16468 thumb_force_lr_save (void)
16469 {
16470 return !cfun->machine->lr_save_eliminated
16471 && (!leaf_function_p ()
16472 || thumb_far_jump_used_p ()
16473 || df_regs_ever_live_p (LR_REGNUM));
16474 }
16475
16476
16477 /* Return true if r3 is used by any of the tail call insns in the
16478 current function. */
16479
16480 static bool
16481 any_sibcall_uses_r3 (void)
16482 {
16483 edge_iterator ei;
16484 edge e;
16485
16486 if (!crtl->tail_call_emit)
16487 return false;
16488 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16489 if (e->flags & EDGE_SIBCALL)
16490 {
16491 rtx call = BB_END (e->src);
16492 if (!CALL_P (call))
16493 call = prev_nonnote_nondebug_insn (call);
16494 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
16495 if (find_regno_fusage (call, USE, 3))
16496 return true;
16497 }
16498 return false;
16499 }
16500
16501
16502 /* Compute the distance from register FROM to register TO.
16503 These can be the arg pointer (26), the soft frame pointer (25),
16504 the stack pointer (13) or the hard frame pointer (11).
16505 In thumb mode r7 is used as the soft frame pointer, if needed.
16506 Typical stack layout looks like this:
16507
16508 old stack pointer -> | |
16509 ----
16510 | | \
16511 | | saved arguments for
16512 | | vararg functions
16513 | | /
16514 --
16515 hard FP & arg pointer -> | | \
16516 | | stack
16517 | | frame
16518 | | /
16519 --
16520 | | \
16521 | | call saved
16522 | | registers
16523 soft frame pointer -> | | /
16524 --
16525 | | \
16526 | | local
16527 | | variables
16528 locals base pointer -> | | /
16529 --
16530 | | \
16531 | | outgoing
16532 | | arguments
16533 current stack pointer -> | | /
16534 --
16535
16536 For a given function some or all of these stack components
16537 may not be needed, giving rise to the possibility of
16538 eliminating some of the registers.
16539
16540 The values returned by this function must reflect the behavior
16541 of arm_expand_prologue() and arm_compute_save_reg_mask().
16542
16543 The sign of the number returned reflects the direction of stack
16544 growth, so the values are positive for all eliminations except
16545 from the soft frame pointer to the hard frame pointer.
16546
16547 SFP may point just inside the local variables block to ensure correct
16548 alignment. */
16549
16550
16551 /* Calculate stack offsets. These are used to calculate register elimination
16552 offsets and in prologue/epilogue code. Also calculates which registers
16553 should be saved. */
16554
16555 static arm_stack_offsets *
16556 arm_get_frame_offsets (void)
16557 {
16558 struct arm_stack_offsets *offsets;
16559 unsigned long func_type;
16560 int leaf;
16561 int saved;
16562 int core_saved;
16563 HOST_WIDE_INT frame_size;
16564 int i;
16565
16566 offsets = &cfun->machine->stack_offsets;
16567
16568 /* We need to know if we are a leaf function. Unfortunately, it
16569 is possible to be called after start_sequence has been called,
16570 which causes get_insns to return the insns for the sequence,
16571 not the function, which will cause leaf_function_p to return
16572 the incorrect result.
16573
16574 to know about leaf functions once reload has completed, and the
16575 frame size cannot be changed after that time, so we can safely
16576 use the cached value. */
16577
16578 if (reload_completed)
16579 return offsets;
16580
16581 /* Initially this is the size of the local variables. It will translated
16582 into an offset once we have determined the size of preceding data. */
16583 frame_size = ROUND_UP_WORD (get_frame_size ());
16584
16585 leaf = leaf_function_p ();
16586
16587 /* Space for variadic functions. */
16588 offsets->saved_args = crtl->args.pretend_args_size;
16589
16590 /* In Thumb mode this is incorrect, but never used. */
16591 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
16592 arm_compute_static_chain_stack_bytes();
16593
16594 if (TARGET_32BIT)
16595 {
16596 unsigned int regno;
16597
16598 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
16599 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16600 saved = core_saved;
16601
16602 /* We know that SP will be doubleword aligned on entry, and we must
16603 preserve that condition at any subroutine call. We also require the
16604 soft frame pointer to be doubleword aligned. */
16605
16606 if (TARGET_REALLY_IWMMXT)
16607 {
16608 /* Check for the call-saved iWMMXt registers. */
16609 for (regno = FIRST_IWMMXT_REGNUM;
16610 regno <= LAST_IWMMXT_REGNUM;
16611 regno++)
16612 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16613 saved += 8;
16614 }
16615
16616 func_type = arm_current_func_type ();
16617 if (! IS_VOLATILE (func_type))
16618 {
16619 /* Space for saved FPA registers. */
16620 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
16621 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16622 saved += 12;
16623
16624 /* Space for saved VFP registers. */
16625 if (TARGET_HARD_FLOAT && TARGET_VFP)
16626 saved += arm_get_vfp_saved_size ();
16627 }
16628 }
16629 else /* TARGET_THUMB1 */
16630 {
16631 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
16632 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16633 saved = core_saved;
16634 if (TARGET_BACKTRACE)
16635 saved += 16;
16636 }
16637
16638 /* Saved registers include the stack frame. */
16639 offsets->saved_regs = offsets->saved_args + saved +
16640 arm_compute_static_chain_stack_bytes();
16641 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
16642 /* A leaf function does not need any stack alignment if it has nothing
16643 on the stack. */
16644 if (leaf && frame_size == 0
16645 /* However if it calls alloca(), we have a dynamically allocated
16646 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
16647 && ! cfun->calls_alloca)
16648 {
16649 offsets->outgoing_args = offsets->soft_frame;
16650 offsets->locals_base = offsets->soft_frame;
16651 return offsets;
16652 }
16653
16654 /* Ensure SFP has the correct alignment. */
16655 if (ARM_DOUBLEWORD_ALIGN
16656 && (offsets->soft_frame & 7))
16657 {
16658 offsets->soft_frame += 4;
16659 /* Try to align stack by pushing an extra reg. Don't bother doing this
16660 when there is a stack frame as the alignment will be rolled into
16661 the normal stack adjustment. */
16662 if (frame_size + crtl->outgoing_args_size == 0)
16663 {
16664 int reg = -1;
16665
16666 /* If it is safe to use r3, then do so. This sometimes
16667 generates better code on Thumb-2 by avoiding the need to
16668 use 32-bit push/pop instructions. */
16669 if (! any_sibcall_uses_r3 ()
16670 && arm_size_return_regs () <= 12
16671 && (offsets->saved_regs_mask & (1 << 3)) == 0)
16672 {
16673 reg = 3;
16674 }
16675 else
16676 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
16677 {
16678 if ((offsets->saved_regs_mask & (1 << i)) == 0)
16679 {
16680 reg = i;
16681 break;
16682 }
16683 }
16684
16685 if (reg != -1)
16686 {
16687 offsets->saved_regs += 4;
16688 offsets->saved_regs_mask |= (1 << reg);
16689 }
16690 }
16691 }
16692
16693 offsets->locals_base = offsets->soft_frame + frame_size;
16694 offsets->outgoing_args = (offsets->locals_base
16695 + crtl->outgoing_args_size);
16696
16697 if (ARM_DOUBLEWORD_ALIGN)
16698 {
16699 /* Ensure SP remains doubleword aligned. */
16700 if (offsets->outgoing_args & 7)
16701 offsets->outgoing_args += 4;
16702 gcc_assert (!(offsets->outgoing_args & 7));
16703 }
16704
16705 return offsets;
16706 }
16707
16708
16709 /* Calculate the relative offsets for the different stack pointers. Positive
16710 offsets are in the direction of stack growth. */
16711
16712 HOST_WIDE_INT
16713 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16714 {
16715 arm_stack_offsets *offsets;
16716
16717 offsets = arm_get_frame_offsets ();
16718
16719 /* OK, now we have enough information to compute the distances.
16720 There must be an entry in these switch tables for each pair
16721 of registers in ELIMINABLE_REGS, even if some of the entries
16722 seem to be redundant or useless. */
16723 switch (from)
16724 {
16725 case ARG_POINTER_REGNUM:
16726 switch (to)
16727 {
16728 case THUMB_HARD_FRAME_POINTER_REGNUM:
16729 return 0;
16730
16731 case FRAME_POINTER_REGNUM:
16732 /* This is the reverse of the soft frame pointer
16733 to hard frame pointer elimination below. */
16734 return offsets->soft_frame - offsets->saved_args;
16735
16736 case ARM_HARD_FRAME_POINTER_REGNUM:
16737 /* This is only non-zero in the case where the static chain register
16738 is stored above the frame. */
16739 return offsets->frame - offsets->saved_args - 4;
16740
16741 case STACK_POINTER_REGNUM:
16742 /* If nothing has been pushed on the stack at all
16743 then this will return -4. This *is* correct! */
16744 return offsets->outgoing_args - (offsets->saved_args + 4);
16745
16746 default:
16747 gcc_unreachable ();
16748 }
16749 gcc_unreachable ();
16750
16751 case FRAME_POINTER_REGNUM:
16752 switch (to)
16753 {
16754 case THUMB_HARD_FRAME_POINTER_REGNUM:
16755 return 0;
16756
16757 case ARM_HARD_FRAME_POINTER_REGNUM:
16758 /* The hard frame pointer points to the top entry in the
16759 stack frame. The soft frame pointer to the bottom entry
16760 in the stack frame. If there is no stack frame at all,
16761 then they are identical. */
16762
16763 return offsets->frame - offsets->soft_frame;
16764
16765 case STACK_POINTER_REGNUM:
16766 return offsets->outgoing_args - offsets->soft_frame;
16767
16768 default:
16769 gcc_unreachable ();
16770 }
16771 gcc_unreachable ();
16772
16773 default:
16774 /* You cannot eliminate from the stack pointer.
16775 In theory you could eliminate from the hard frame
16776 pointer to the stack pointer, but this will never
16777 happen, since if a stack frame is not needed the
16778 hard frame pointer will never be used. */
16779 gcc_unreachable ();
16780 }
16781 }
16782
16783 /* Given FROM and TO register numbers, say whether this elimination is
16784 allowed. Frame pointer elimination is automatically handled.
16785
16786 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
16787 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
16788 pointer, we must eliminate FRAME_POINTER_REGNUM into
16789 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
16790 ARG_POINTER_REGNUM. */
16791
16792 bool
16793 arm_can_eliminate (const int from, const int to)
16794 {
16795 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
16796 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
16797 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
16798 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
16799 true);
16800 }
16801
16802 /* Emit RTL to save coprocessor registers on function entry. Returns the
16803 number of bytes pushed. */
16804
16805 static int
16806 arm_save_coproc_regs(void)
16807 {
16808 int saved_size = 0;
16809 unsigned reg;
16810 unsigned start_reg;
16811 rtx insn;
16812
16813 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
16814 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
16815 {
16816 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16817 insn = gen_rtx_MEM (V2SImode, insn);
16818 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
16819 RTX_FRAME_RELATED_P (insn) = 1;
16820 saved_size += 8;
16821 }
16822
16823 /* Save any floating point call-saved registers used by this
16824 function. */
16825 if (TARGET_FPA_EMU2)
16826 {
16827 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
16828 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16829 {
16830 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16831 insn = gen_rtx_MEM (XFmode, insn);
16832 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
16833 RTX_FRAME_RELATED_P (insn) = 1;
16834 saved_size += 12;
16835 }
16836 }
16837 else
16838 {
16839 start_reg = LAST_FPA_REGNUM;
16840
16841 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
16842 {
16843 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
16844 {
16845 if (start_reg - reg == 3)
16846 {
16847 insn = emit_sfm (reg, 4);
16848 RTX_FRAME_RELATED_P (insn) = 1;
16849 saved_size += 48;
16850 start_reg = reg - 1;
16851 }
16852 }
16853 else
16854 {
16855 if (start_reg != reg)
16856 {
16857 insn = emit_sfm (reg + 1, start_reg - reg);
16858 RTX_FRAME_RELATED_P (insn) = 1;
16859 saved_size += (start_reg - reg) * 12;
16860 }
16861 start_reg = reg - 1;
16862 }
16863 }
16864
16865 if (start_reg != reg)
16866 {
16867 insn = emit_sfm (reg + 1, start_reg - reg);
16868 saved_size += (start_reg - reg) * 12;
16869 RTX_FRAME_RELATED_P (insn) = 1;
16870 }
16871 }
16872 if (TARGET_HARD_FLOAT && TARGET_VFP)
16873 {
16874 start_reg = FIRST_VFP_REGNUM;
16875
16876 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
16877 {
16878 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
16879 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
16880 {
16881 if (start_reg != reg)
16882 saved_size += vfp_emit_fstmd (start_reg,
16883 (reg - start_reg) / 2);
16884 start_reg = reg + 2;
16885 }
16886 }
16887 if (start_reg != reg)
16888 saved_size += vfp_emit_fstmd (start_reg,
16889 (reg - start_reg) / 2);
16890 }
16891 return saved_size;
16892 }
16893
16894
16895 /* Set the Thumb frame pointer from the stack pointer. */
16896
16897 static void
16898 thumb_set_frame_pointer (arm_stack_offsets *offsets)
16899 {
16900 HOST_WIDE_INT amount;
16901 rtx insn, dwarf;
16902
16903 amount = offsets->outgoing_args - offsets->locals_base;
16904 if (amount < 1024)
16905 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16906 stack_pointer_rtx, GEN_INT (amount)));
16907 else
16908 {
16909 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
16910 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
16911 expects the first two operands to be the same. */
16912 if (TARGET_THUMB2)
16913 {
16914 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16915 stack_pointer_rtx,
16916 hard_frame_pointer_rtx));
16917 }
16918 else
16919 {
16920 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16921 hard_frame_pointer_rtx,
16922 stack_pointer_rtx));
16923 }
16924 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
16925 plus_constant (stack_pointer_rtx, amount));
16926 RTX_FRAME_RELATED_P (dwarf) = 1;
16927 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16928 }
16929
16930 RTX_FRAME_RELATED_P (insn) = 1;
16931 }
16932
16933 /* Generate the prologue instructions for entry into an ARM or Thumb-2
16934 function. */
16935 void
16936 arm_expand_prologue (void)
16937 {
16938 rtx amount;
16939 rtx insn;
16940 rtx ip_rtx;
16941 unsigned long live_regs_mask;
16942 unsigned long func_type;
16943 int fp_offset = 0;
16944 int saved_pretend_args = 0;
16945 int saved_regs = 0;
16946 unsigned HOST_WIDE_INT args_to_push;
16947 arm_stack_offsets *offsets;
16948
16949 func_type = arm_current_func_type ();
16950
16951 /* Naked functions don't have prologues. */
16952 if (IS_NAKED (func_type))
16953 return;
16954
16955 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
16956 args_to_push = crtl->args.pretend_args_size;
16957
16958 /* Compute which register we will have to save onto the stack. */
16959 offsets = arm_get_frame_offsets ();
16960 live_regs_mask = offsets->saved_regs_mask;
16961
16962 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
16963
16964 if (IS_STACKALIGN (func_type))
16965 {
16966 rtx r0, r1;
16967
16968 /* Handle a word-aligned stack pointer. We generate the following:
16969
16970 mov r0, sp
16971 bic r1, r0, #7
16972 mov sp, r1
16973 <save and restore r0 in normal prologue/epilogue>
16974 mov sp, r0
16975 bx lr
16976
16977 The unwinder doesn't need to know about the stack realignment.
16978 Just tell it we saved SP in r0. */
16979 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
16980
16981 r0 = gen_rtx_REG (SImode, 0);
16982 r1 = gen_rtx_REG (SImode, 1);
16983
16984 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
16985 RTX_FRAME_RELATED_P (insn) = 1;
16986 add_reg_note (insn, REG_CFA_REGISTER, NULL);
16987
16988 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
16989
16990 /* ??? The CFA changes here, which may cause GDB to conclude that it
16991 has entered a different function. That said, the unwind info is
16992 correct, individually, before and after this instruction because
16993 we've described the save of SP, which will override the default
16994 handling of SP as restoring from the CFA. */
16995 emit_insn (gen_movsi (stack_pointer_rtx, r1));
16996 }
16997
16998 /* For APCS frames, if IP register is clobbered
16999 when creating frame, save that register in a special
17000 way. */
17001 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
17002 {
17003 if (IS_INTERRUPT (func_type))
17004 {
17005 /* Interrupt functions must not corrupt any registers.
17006 Creating a frame pointer however, corrupts the IP
17007 register, so we must push it first. */
17008 emit_multi_reg_push (1 << IP_REGNUM);
17009
17010 /* Do not set RTX_FRAME_RELATED_P on this insn.
17011 The dwarf stack unwinding code only wants to see one
17012 stack decrement per function, and this is not it. If
17013 this instruction is labeled as being part of the frame
17014 creation sequence then dwarf2out_frame_debug_expr will
17015 die when it encounters the assignment of IP to FP
17016 later on, since the use of SP here establishes SP as
17017 the CFA register and not IP.
17018
17019 Anyway this instruction is not really part of the stack
17020 frame creation although it is part of the prologue. */
17021 }
17022 else if (IS_NESTED (func_type))
17023 {
17024 /* The Static chain register is the same as the IP register
17025 used as a scratch register during stack frame creation.
17026 To get around this need to find somewhere to store IP
17027 whilst the frame is being created. We try the following
17028 places in order:
17029
17030 1. The last argument register.
17031 2. A slot on the stack above the frame. (This only
17032 works if the function is not a varargs function).
17033 3. Register r3, after pushing the argument registers
17034 onto the stack.
17035
17036 Note - we only need to tell the dwarf2 backend about the SP
17037 adjustment in the second variant; the static chain register
17038 doesn't need to be unwound, as it doesn't contain a value
17039 inherited from the caller. */
17040
17041 if (df_regs_ever_live_p (3) == false)
17042 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
17043 else if (args_to_push == 0)
17044 {
17045 rtx dwarf;
17046
17047 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
17048 saved_regs += 4;
17049
17050 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
17051 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
17052 fp_offset = 4;
17053
17054 /* Just tell the dwarf backend that we adjusted SP. */
17055 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17056 plus_constant (stack_pointer_rtx,
17057 -fp_offset));
17058 RTX_FRAME_RELATED_P (insn) = 1;
17059 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
17060 }
17061 else
17062 {
17063 /* Store the args on the stack. */
17064 if (cfun->machine->uses_anonymous_args)
17065 insn = emit_multi_reg_push
17066 ((0xf0 >> (args_to_push / 4)) & 0xf);
17067 else
17068 insn = emit_insn
17069 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17070 GEN_INT (- args_to_push)));
17071
17072 RTX_FRAME_RELATED_P (insn) = 1;
17073
17074 saved_pretend_args = 1;
17075 fp_offset = args_to_push;
17076 args_to_push = 0;
17077
17078 /* Now reuse r3 to preserve IP. */
17079 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
17080 }
17081 }
17082
17083 insn = emit_set_insn (ip_rtx,
17084 plus_constant (stack_pointer_rtx, fp_offset));
17085 RTX_FRAME_RELATED_P (insn) = 1;
17086 }
17087
17088 if (args_to_push)
17089 {
17090 /* Push the argument registers, or reserve space for them. */
17091 if (cfun->machine->uses_anonymous_args)
17092 insn = emit_multi_reg_push
17093 ((0xf0 >> (args_to_push / 4)) & 0xf);
17094 else
17095 insn = emit_insn
17096 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17097 GEN_INT (- args_to_push)));
17098 RTX_FRAME_RELATED_P (insn) = 1;
17099 }
17100
17101 /* If this is an interrupt service routine, and the link register
17102 is going to be pushed, and we're not generating extra
17103 push of IP (needed when frame is needed and frame layout if apcs),
17104 subtracting four from LR now will mean that the function return
17105 can be done with a single instruction. */
17106 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
17107 && (live_regs_mask & (1 << LR_REGNUM)) != 0
17108 && !(frame_pointer_needed && TARGET_APCS_FRAME)
17109 && TARGET_ARM)
17110 {
17111 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
17112
17113 emit_set_insn (lr, plus_constant (lr, -4));
17114 }
17115
17116 if (live_regs_mask)
17117 {
17118 saved_regs += bit_count (live_regs_mask) * 4;
17119 if (optimize_size && !frame_pointer_needed
17120 && saved_regs == offsets->saved_regs - offsets->saved_args)
17121 {
17122 /* If no coprocessor registers are being pushed and we don't have
17123 to worry about a frame pointer then push extra registers to
17124 create the stack frame. This is done is a way that does not
17125 alter the frame layout, so is independent of the epilogue. */
17126 int n;
17127 int frame;
17128 n = 0;
17129 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
17130 n++;
17131 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
17132 if (frame && n * 4 >= frame)
17133 {
17134 n = frame / 4;
17135 live_regs_mask |= (1 << n) - 1;
17136 saved_regs += frame;
17137 }
17138 }
17139 insn = emit_multi_reg_push (live_regs_mask);
17140 RTX_FRAME_RELATED_P (insn) = 1;
17141 }
17142
17143 if (! IS_VOLATILE (func_type))
17144 saved_regs += arm_save_coproc_regs ();
17145
17146 if (frame_pointer_needed && TARGET_ARM)
17147 {
17148 /* Create the new frame pointer. */
17149 if (TARGET_APCS_FRAME)
17150 {
17151 insn = GEN_INT (-(4 + args_to_push + fp_offset));
17152 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
17153 RTX_FRAME_RELATED_P (insn) = 1;
17154
17155 if (IS_NESTED (func_type))
17156 {
17157 /* Recover the static chain register. */
17158 if (!df_regs_ever_live_p (3)
17159 || saved_pretend_args)
17160 insn = gen_rtx_REG (SImode, 3);
17161 else /* if (crtl->args.pretend_args_size == 0) */
17162 {
17163 insn = plus_constant (hard_frame_pointer_rtx, 4);
17164 insn = gen_frame_mem (SImode, insn);
17165 }
17166 emit_set_insn (ip_rtx, insn);
17167 /* Add a USE to stop propagate_one_insn() from barfing. */
17168 emit_insn (gen_prologue_use (ip_rtx));
17169 }
17170 }
17171 else
17172 {
17173 insn = GEN_INT (saved_regs - 4);
17174 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
17175 stack_pointer_rtx, insn));
17176 RTX_FRAME_RELATED_P (insn) = 1;
17177 }
17178 }
17179
17180 if (flag_stack_usage_info)
17181 current_function_static_stack_size
17182 = offsets->outgoing_args - offsets->saved_args;
17183
17184 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
17185 {
17186 /* This add can produce multiple insns for a large constant, so we
17187 need to get tricky. */
17188 rtx last = get_last_insn ();
17189
17190 amount = GEN_INT (offsets->saved_args + saved_regs
17191 - offsets->outgoing_args);
17192
17193 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17194 amount));
17195 do
17196 {
17197 last = last ? NEXT_INSN (last) : get_insns ();
17198 RTX_FRAME_RELATED_P (last) = 1;
17199 }
17200 while (last != insn);
17201
17202 /* If the frame pointer is needed, emit a special barrier that
17203 will prevent the scheduler from moving stores to the frame
17204 before the stack adjustment. */
17205 if (frame_pointer_needed)
17206 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
17207 hard_frame_pointer_rtx));
17208 }
17209
17210
17211 if (frame_pointer_needed && TARGET_THUMB2)
17212 thumb_set_frame_pointer (offsets);
17213
17214 if (flag_pic && arm_pic_register != INVALID_REGNUM)
17215 {
17216 unsigned long mask;
17217
17218 mask = live_regs_mask;
17219 mask &= THUMB2_WORK_REGS;
17220 if (!IS_NESTED (func_type))
17221 mask |= (1 << IP_REGNUM);
17222 arm_load_pic_register (mask);
17223 }
17224
17225 /* If we are profiling, make sure no instructions are scheduled before
17226 the call to mcount. Similarly if the user has requested no
17227 scheduling in the prolog. Similarly if we want non-call exceptions
17228 using the EABI unwinder, to prevent faulting instructions from being
17229 swapped with a stack adjustment. */
17230 if (crtl->profile || !TARGET_SCHED_PROLOG
17231 || (arm_except_unwind_info (&global_options) == UI_TARGET
17232 && cfun->can_throw_non_call_exceptions))
17233 emit_insn (gen_blockage ());
17234
17235 /* If the link register is being kept alive, with the return address in it,
17236 then make sure that it does not get reused by the ce2 pass. */
17237 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
17238 cfun->machine->lr_save_eliminated = 1;
17239 }
17240 \f
17241 /* Print condition code to STREAM. Helper function for arm_print_operand. */
17242 static void
17243 arm_print_condition (FILE *stream)
17244 {
17245 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
17246 {
17247 /* Branch conversion is not implemented for Thumb-2. */
17248 if (TARGET_THUMB)
17249 {
17250 output_operand_lossage ("predicated Thumb instruction");
17251 return;
17252 }
17253 if (current_insn_predicate != NULL)
17254 {
17255 output_operand_lossage
17256 ("predicated instruction in conditional sequence");
17257 return;
17258 }
17259
17260 fputs (arm_condition_codes[arm_current_cc], stream);
17261 }
17262 else if (current_insn_predicate)
17263 {
17264 enum arm_cond_code code;
17265
17266 if (TARGET_THUMB1)
17267 {
17268 output_operand_lossage ("predicated Thumb instruction");
17269 return;
17270 }
17271
17272 code = get_arm_condition_code (current_insn_predicate);
17273 fputs (arm_condition_codes[code], stream);
17274 }
17275 }
17276
17277
17278 /* If CODE is 'd', then the X is a condition operand and the instruction
17279 should only be executed if the condition is true.
17280 if CODE is 'D', then the X is a condition operand and the instruction
17281 should only be executed if the condition is false: however, if the mode
17282 of the comparison is CCFPEmode, then always execute the instruction -- we
17283 do this because in these circumstances !GE does not necessarily imply LT;
17284 in these cases the instruction pattern will take care to make sure that
17285 an instruction containing %d will follow, thereby undoing the effects of
17286 doing this instruction unconditionally.
17287 If CODE is 'N' then X is a floating point operand that must be negated
17288 before output.
17289 If CODE is 'B' then output a bitwise inverted value of X (a const int).
17290 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
17291 static void
17292 arm_print_operand (FILE *stream, rtx x, int code)
17293 {
17294 switch (code)
17295 {
17296 case '@':
17297 fputs (ASM_COMMENT_START, stream);
17298 return;
17299
17300 case '_':
17301 fputs (user_label_prefix, stream);
17302 return;
17303
17304 case '|':
17305 fputs (REGISTER_PREFIX, stream);
17306 return;
17307
17308 case '?':
17309 arm_print_condition (stream);
17310 return;
17311
17312 case '(':
17313 /* Nothing in unified syntax, otherwise the current condition code. */
17314 if (!TARGET_UNIFIED_ASM)
17315 arm_print_condition (stream);
17316 break;
17317
17318 case ')':
17319 /* The current condition code in unified syntax, otherwise nothing. */
17320 if (TARGET_UNIFIED_ASM)
17321 arm_print_condition (stream);
17322 break;
17323
17324 case '.':
17325 /* The current condition code for a condition code setting instruction.
17326 Preceded by 's' in unified syntax, otherwise followed by 's'. */
17327 if (TARGET_UNIFIED_ASM)
17328 {
17329 fputc('s', stream);
17330 arm_print_condition (stream);
17331 }
17332 else
17333 {
17334 arm_print_condition (stream);
17335 fputc('s', stream);
17336 }
17337 return;
17338
17339 case '!':
17340 /* If the instruction is conditionally executed then print
17341 the current condition code, otherwise print 's'. */
17342 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
17343 if (current_insn_predicate)
17344 arm_print_condition (stream);
17345 else
17346 fputc('s', stream);
17347 break;
17348
17349 /* %# is a "break" sequence. It doesn't output anything, but is used to
17350 separate e.g. operand numbers from following text, if that text consists
17351 of further digits which we don't want to be part of the operand
17352 number. */
17353 case '#':
17354 return;
17355
17356 case 'N':
17357 {
17358 REAL_VALUE_TYPE r;
17359 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17360 r = real_value_negate (&r);
17361 fprintf (stream, "%s", fp_const_from_val (&r));
17362 }
17363 return;
17364
17365 /* An integer or symbol address without a preceding # sign. */
17366 case 'c':
17367 switch (GET_CODE (x))
17368 {
17369 case CONST_INT:
17370 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17371 break;
17372
17373 case SYMBOL_REF:
17374 output_addr_const (stream, x);
17375 break;
17376
17377 case CONST:
17378 if (GET_CODE (XEXP (x, 0)) == PLUS
17379 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
17380 {
17381 output_addr_const (stream, x);
17382 break;
17383 }
17384 /* Fall through. */
17385
17386 default:
17387 output_operand_lossage ("Unsupported operand for code '%c'", code);
17388 }
17389 return;
17390
17391 case 'B':
17392 if (GET_CODE (x) == CONST_INT)
17393 {
17394 HOST_WIDE_INT val;
17395 val = ARM_SIGN_EXTEND (~INTVAL (x));
17396 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
17397 }
17398 else
17399 {
17400 putc ('~', stream);
17401 output_addr_const (stream, x);
17402 }
17403 return;
17404
17405 case 'L':
17406 /* The low 16 bits of an immediate constant. */
17407 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
17408 return;
17409
17410 case 'i':
17411 fprintf (stream, "%s", arithmetic_instr (x, 1));
17412 return;
17413
17414 /* Truncate Cirrus shift counts. */
17415 case 's':
17416 if (GET_CODE (x) == CONST_INT)
17417 {
17418 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
17419 return;
17420 }
17421 arm_print_operand (stream, x, 0);
17422 return;
17423
17424 case 'I':
17425 fprintf (stream, "%s", arithmetic_instr (x, 0));
17426 return;
17427
17428 case 'S':
17429 {
17430 HOST_WIDE_INT val;
17431 const char *shift;
17432
17433 if (!shift_operator (x, SImode))
17434 {
17435 output_operand_lossage ("invalid shift operand");
17436 break;
17437 }
17438
17439 shift = shift_op (x, &val);
17440
17441 if (shift)
17442 {
17443 fprintf (stream, ", %s ", shift);
17444 if (val == -1)
17445 arm_print_operand (stream, XEXP (x, 1), 0);
17446 else
17447 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
17448 }
17449 }
17450 return;
17451
17452 /* An explanation of the 'Q', 'R' and 'H' register operands:
17453
17454 In a pair of registers containing a DI or DF value the 'Q'
17455 operand returns the register number of the register containing
17456 the least significant part of the value. The 'R' operand returns
17457 the register number of the register containing the most
17458 significant part of the value.
17459
17460 The 'H' operand returns the higher of the two register numbers.
17461 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
17462 same as the 'Q' operand, since the most significant part of the
17463 value is held in the lower number register. The reverse is true
17464 on systems where WORDS_BIG_ENDIAN is false.
17465
17466 The purpose of these operands is to distinguish between cases
17467 where the endian-ness of the values is important (for example
17468 when they are added together), and cases where the endian-ness
17469 is irrelevant, but the order of register operations is important.
17470 For example when loading a value from memory into a register
17471 pair, the endian-ness does not matter. Provided that the value
17472 from the lower memory address is put into the lower numbered
17473 register, and the value from the higher address is put into the
17474 higher numbered register, the load will work regardless of whether
17475 the value being loaded is big-wordian or little-wordian. The
17476 order of the two register loads can matter however, if the address
17477 of the memory location is actually held in one of the registers
17478 being overwritten by the load.
17479
17480 The 'Q' and 'R' constraints are also available for 64-bit
17481 constants. */
17482 case 'Q':
17483 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
17484 {
17485 rtx part = gen_lowpart (SImode, x);
17486 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17487 return;
17488 }
17489
17490 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17491 {
17492 output_operand_lossage ("invalid operand for code '%c'", code);
17493 return;
17494 }
17495
17496 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
17497 return;
17498
17499 case 'R':
17500 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
17501 {
17502 enum machine_mode mode = GET_MODE (x);
17503 rtx part;
17504
17505 if (mode == VOIDmode)
17506 mode = DImode;
17507 part = gen_highpart_mode (SImode, mode, x);
17508 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17509 return;
17510 }
17511
17512 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17513 {
17514 output_operand_lossage ("invalid operand for code '%c'", code);
17515 return;
17516 }
17517
17518 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
17519 return;
17520
17521 case 'H':
17522 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17523 {
17524 output_operand_lossage ("invalid operand for code '%c'", code);
17525 return;
17526 }
17527
17528 asm_fprintf (stream, "%r", REGNO (x) + 1);
17529 return;
17530
17531 case 'J':
17532 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17533 {
17534 output_operand_lossage ("invalid operand for code '%c'", code);
17535 return;
17536 }
17537
17538 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
17539 return;
17540
17541 case 'K':
17542 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17543 {
17544 output_operand_lossage ("invalid operand for code '%c'", code);
17545 return;
17546 }
17547
17548 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
17549 return;
17550
17551 case 'm':
17552 asm_fprintf (stream, "%r",
17553 GET_CODE (XEXP (x, 0)) == REG
17554 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
17555 return;
17556
17557 case 'M':
17558 asm_fprintf (stream, "{%r-%r}",
17559 REGNO (x),
17560 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
17561 return;
17562
17563 /* Like 'M', but writing doubleword vector registers, for use by Neon
17564 insns. */
17565 case 'h':
17566 {
17567 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
17568 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
17569 if (numregs == 1)
17570 asm_fprintf (stream, "{d%d}", regno);
17571 else
17572 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
17573 }
17574 return;
17575
17576 case 'd':
17577 /* CONST_TRUE_RTX means always -- that's the default. */
17578 if (x == const_true_rtx)
17579 return;
17580
17581 if (!COMPARISON_P (x))
17582 {
17583 output_operand_lossage ("invalid operand for code '%c'", code);
17584 return;
17585 }
17586
17587 fputs (arm_condition_codes[get_arm_condition_code (x)],
17588 stream);
17589 return;
17590
17591 case 'D':
17592 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
17593 want to do that. */
17594 if (x == const_true_rtx)
17595 {
17596 output_operand_lossage ("instruction never executed");
17597 return;
17598 }
17599 if (!COMPARISON_P (x))
17600 {
17601 output_operand_lossage ("invalid operand for code '%c'", code);
17602 return;
17603 }
17604
17605 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
17606 (get_arm_condition_code (x))],
17607 stream);
17608 return;
17609
17610 /* Cirrus registers can be accessed in a variety of ways:
17611 single floating point (f)
17612 double floating point (d)
17613 32bit integer (fx)
17614 64bit integer (dx). */
17615 case 'W': /* Cirrus register in F mode. */
17616 case 'X': /* Cirrus register in D mode. */
17617 case 'Y': /* Cirrus register in FX mode. */
17618 case 'Z': /* Cirrus register in DX mode. */
17619 gcc_assert (GET_CODE (x) == REG
17620 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
17621
17622 fprintf (stream, "mv%s%s",
17623 code == 'W' ? "f"
17624 : code == 'X' ? "d"
17625 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
17626
17627 return;
17628
17629 /* Print cirrus register in the mode specified by the register's mode. */
17630 case 'V':
17631 {
17632 int mode = GET_MODE (x);
17633
17634 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
17635 {
17636 output_operand_lossage ("invalid operand for code '%c'", code);
17637 return;
17638 }
17639
17640 fprintf (stream, "mv%s%s",
17641 mode == DFmode ? "d"
17642 : mode == SImode ? "fx"
17643 : mode == DImode ? "dx"
17644 : "f", reg_names[REGNO (x)] + 2);
17645
17646 return;
17647 }
17648
17649 case 'U':
17650 if (GET_CODE (x) != REG
17651 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
17652 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
17653 /* Bad value for wCG register number. */
17654 {
17655 output_operand_lossage ("invalid operand for code '%c'", code);
17656 return;
17657 }
17658
17659 else
17660 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
17661 return;
17662
17663 /* Print an iWMMXt control register name. */
17664 case 'w':
17665 if (GET_CODE (x) != CONST_INT
17666 || INTVAL (x) < 0
17667 || INTVAL (x) >= 16)
17668 /* Bad value for wC register number. */
17669 {
17670 output_operand_lossage ("invalid operand for code '%c'", code);
17671 return;
17672 }
17673
17674 else
17675 {
17676 static const char * wc_reg_names [16] =
17677 {
17678 "wCID", "wCon", "wCSSF", "wCASF",
17679 "wC4", "wC5", "wC6", "wC7",
17680 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
17681 "wC12", "wC13", "wC14", "wC15"
17682 };
17683
17684 fprintf (stream, wc_reg_names [INTVAL (x)]);
17685 }
17686 return;
17687
17688 /* Print the high single-precision register of a VFP double-precision
17689 register. */
17690 case 'p':
17691 {
17692 int mode = GET_MODE (x);
17693 int regno;
17694
17695 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
17696 {
17697 output_operand_lossage ("invalid operand for code '%c'", code);
17698 return;
17699 }
17700
17701 regno = REGNO (x);
17702 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
17703 {
17704 output_operand_lossage ("invalid operand for code '%c'", code);
17705 return;
17706 }
17707
17708 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
17709 }
17710 return;
17711
17712 /* Print a VFP/Neon double precision or quad precision register name. */
17713 case 'P':
17714 case 'q':
17715 {
17716 int mode = GET_MODE (x);
17717 int is_quad = (code == 'q');
17718 int regno;
17719
17720 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
17721 {
17722 output_operand_lossage ("invalid operand for code '%c'", code);
17723 return;
17724 }
17725
17726 if (GET_CODE (x) != REG
17727 || !IS_VFP_REGNUM (REGNO (x)))
17728 {
17729 output_operand_lossage ("invalid operand for code '%c'", code);
17730 return;
17731 }
17732
17733 regno = REGNO (x);
17734 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
17735 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
17736 {
17737 output_operand_lossage ("invalid operand for code '%c'", code);
17738 return;
17739 }
17740
17741 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
17742 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
17743 }
17744 return;
17745
17746 /* These two codes print the low/high doubleword register of a Neon quad
17747 register, respectively. For pair-structure types, can also print
17748 low/high quadword registers. */
17749 case 'e':
17750 case 'f':
17751 {
17752 int mode = GET_MODE (x);
17753 int regno;
17754
17755 if ((GET_MODE_SIZE (mode) != 16
17756 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
17757 {
17758 output_operand_lossage ("invalid operand for code '%c'", code);
17759 return;
17760 }
17761
17762 regno = REGNO (x);
17763 if (!NEON_REGNO_OK_FOR_QUAD (regno))
17764 {
17765 output_operand_lossage ("invalid operand for code '%c'", code);
17766 return;
17767 }
17768
17769 if (GET_MODE_SIZE (mode) == 16)
17770 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
17771 + (code == 'f' ? 1 : 0));
17772 else
17773 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
17774 + (code == 'f' ? 1 : 0));
17775 }
17776 return;
17777
17778 /* Print a VFPv3 floating-point constant, represented as an integer
17779 index. */
17780 case 'G':
17781 {
17782 int index = vfp3_const_double_index (x);
17783 gcc_assert (index != -1);
17784 fprintf (stream, "%d", index);
17785 }
17786 return;
17787
17788 /* Print bits representing opcode features for Neon.
17789
17790 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
17791 and polynomials as unsigned.
17792
17793 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
17794
17795 Bit 2 is 1 for rounding functions, 0 otherwise. */
17796
17797 /* Identify the type as 's', 'u', 'p' or 'f'. */
17798 case 'T':
17799 {
17800 HOST_WIDE_INT bits = INTVAL (x);
17801 fputc ("uspf"[bits & 3], stream);
17802 }
17803 return;
17804
17805 /* Likewise, but signed and unsigned integers are both 'i'. */
17806 case 'F':
17807 {
17808 HOST_WIDE_INT bits = INTVAL (x);
17809 fputc ("iipf"[bits & 3], stream);
17810 }
17811 return;
17812
17813 /* As for 'T', but emit 'u' instead of 'p'. */
17814 case 't':
17815 {
17816 HOST_WIDE_INT bits = INTVAL (x);
17817 fputc ("usuf"[bits & 3], stream);
17818 }
17819 return;
17820
17821 /* Bit 2: rounding (vs none). */
17822 case 'O':
17823 {
17824 HOST_WIDE_INT bits = INTVAL (x);
17825 fputs ((bits & 4) != 0 ? "r" : "", stream);
17826 }
17827 return;
17828
17829 /* Memory operand for vld1/vst1 instruction. */
17830 case 'A':
17831 {
17832 rtx addr;
17833 bool postinc = FALSE;
17834 unsigned align, memsize, align_bits;
17835
17836 gcc_assert (GET_CODE (x) == MEM);
17837 addr = XEXP (x, 0);
17838 if (GET_CODE (addr) == POST_INC)
17839 {
17840 postinc = 1;
17841 addr = XEXP (addr, 0);
17842 }
17843 asm_fprintf (stream, "[%r", REGNO (addr));
17844
17845 /* We know the alignment of this access, so we can emit a hint in the
17846 instruction (for some alignments) as an aid to the memory subsystem
17847 of the target. */
17848 align = MEM_ALIGN (x) >> 3;
17849 memsize = MEM_SIZE (x);
17850
17851 /* Only certain alignment specifiers are supported by the hardware. */
17852 if (memsize == 16 && (align % 32) == 0)
17853 align_bits = 256;
17854 else if (memsize == 16 && (align % 16) == 0)
17855 align_bits = 128;
17856 else if (memsize >= 8 && (align % 8) == 0)
17857 align_bits = 64;
17858 else
17859 align_bits = 0;
17860
17861 if (align_bits != 0)
17862 asm_fprintf (stream, ":%d", align_bits);
17863
17864 asm_fprintf (stream, "]");
17865
17866 if (postinc)
17867 fputs("!", stream);
17868 }
17869 return;
17870
17871 case 'C':
17872 {
17873 rtx addr;
17874
17875 gcc_assert (GET_CODE (x) == MEM);
17876 addr = XEXP (x, 0);
17877 gcc_assert (GET_CODE (addr) == REG);
17878 asm_fprintf (stream, "[%r]", REGNO (addr));
17879 }
17880 return;
17881
17882 /* Translate an S register number into a D register number and element index. */
17883 case 'y':
17884 {
17885 int mode = GET_MODE (x);
17886 int regno;
17887
17888 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
17889 {
17890 output_operand_lossage ("invalid operand for code '%c'", code);
17891 return;
17892 }
17893
17894 regno = REGNO (x);
17895 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17896 {
17897 output_operand_lossage ("invalid operand for code '%c'", code);
17898 return;
17899 }
17900
17901 regno = regno - FIRST_VFP_REGNUM;
17902 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
17903 }
17904 return;
17905
17906 case 'v':
17907 gcc_assert (GET_CODE (x) == CONST_DOUBLE);
17908 fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
17909 return;
17910
17911 /* Register specifier for vld1.16/vst1.16. Translate the S register
17912 number into a D register number and element index. */
17913 case 'z':
17914 {
17915 int mode = GET_MODE (x);
17916 int regno;
17917
17918 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
17919 {
17920 output_operand_lossage ("invalid operand for code '%c'", code);
17921 return;
17922 }
17923
17924 regno = REGNO (x);
17925 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17926 {
17927 output_operand_lossage ("invalid operand for code '%c'", code);
17928 return;
17929 }
17930
17931 regno = regno - FIRST_VFP_REGNUM;
17932 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
17933 }
17934 return;
17935
17936 default:
17937 if (x == 0)
17938 {
17939 output_operand_lossage ("missing operand");
17940 return;
17941 }
17942
17943 switch (GET_CODE (x))
17944 {
17945 case REG:
17946 asm_fprintf (stream, "%r", REGNO (x));
17947 break;
17948
17949 case MEM:
17950 output_memory_reference_mode = GET_MODE (x);
17951 output_address (XEXP (x, 0));
17952 break;
17953
17954 case CONST_DOUBLE:
17955 if (TARGET_NEON)
17956 {
17957 char fpstr[20];
17958 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17959 sizeof (fpstr), 0, 1);
17960 fprintf (stream, "#%s", fpstr);
17961 }
17962 else
17963 fprintf (stream, "#%s", fp_immediate_constant (x));
17964 break;
17965
17966 default:
17967 gcc_assert (GET_CODE (x) != NEG);
17968 fputc ('#', stream);
17969 if (GET_CODE (x) == HIGH)
17970 {
17971 fputs (":lower16:", stream);
17972 x = XEXP (x, 0);
17973 }
17974
17975 output_addr_const (stream, x);
17976 break;
17977 }
17978 }
17979 }
17980 \f
17981 /* Target hook for printing a memory address. */
17982 static void
17983 arm_print_operand_address (FILE *stream, rtx x)
17984 {
17985 if (TARGET_32BIT)
17986 {
17987 int is_minus = GET_CODE (x) == MINUS;
17988
17989 if (GET_CODE (x) == REG)
17990 asm_fprintf (stream, "[%r, #0]", REGNO (x));
17991 else if (GET_CODE (x) == PLUS || is_minus)
17992 {
17993 rtx base = XEXP (x, 0);
17994 rtx index = XEXP (x, 1);
17995 HOST_WIDE_INT offset = 0;
17996 if (GET_CODE (base) != REG
17997 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
17998 {
17999 /* Ensure that BASE is a register. */
18000 /* (one of them must be). */
18001 /* Also ensure the SP is not used as in index register. */
18002 rtx temp = base;
18003 base = index;
18004 index = temp;
18005 }
18006 switch (GET_CODE (index))
18007 {
18008 case CONST_INT:
18009 offset = INTVAL (index);
18010 if (is_minus)
18011 offset = -offset;
18012 asm_fprintf (stream, "[%r, #%wd]",
18013 REGNO (base), offset);
18014 break;
18015
18016 case REG:
18017 asm_fprintf (stream, "[%r, %s%r]",
18018 REGNO (base), is_minus ? "-" : "",
18019 REGNO (index));
18020 break;
18021
18022 case MULT:
18023 case ASHIFTRT:
18024 case LSHIFTRT:
18025 case ASHIFT:
18026 case ROTATERT:
18027 {
18028 asm_fprintf (stream, "[%r, %s%r",
18029 REGNO (base), is_minus ? "-" : "",
18030 REGNO (XEXP (index, 0)));
18031 arm_print_operand (stream, index, 'S');
18032 fputs ("]", stream);
18033 break;
18034 }
18035
18036 default:
18037 gcc_unreachable ();
18038 }
18039 }
18040 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
18041 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
18042 {
18043 extern enum machine_mode output_memory_reference_mode;
18044
18045 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
18046
18047 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
18048 asm_fprintf (stream, "[%r, #%s%d]!",
18049 REGNO (XEXP (x, 0)),
18050 GET_CODE (x) == PRE_DEC ? "-" : "",
18051 GET_MODE_SIZE (output_memory_reference_mode));
18052 else
18053 asm_fprintf (stream, "[%r], #%s%d",
18054 REGNO (XEXP (x, 0)),
18055 GET_CODE (x) == POST_DEC ? "-" : "",
18056 GET_MODE_SIZE (output_memory_reference_mode));
18057 }
18058 else if (GET_CODE (x) == PRE_MODIFY)
18059 {
18060 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
18061 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
18062 asm_fprintf (stream, "#%wd]!",
18063 INTVAL (XEXP (XEXP (x, 1), 1)));
18064 else
18065 asm_fprintf (stream, "%r]!",
18066 REGNO (XEXP (XEXP (x, 1), 1)));
18067 }
18068 else if (GET_CODE (x) == POST_MODIFY)
18069 {
18070 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
18071 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
18072 asm_fprintf (stream, "#%wd",
18073 INTVAL (XEXP (XEXP (x, 1), 1)));
18074 else
18075 asm_fprintf (stream, "%r",
18076 REGNO (XEXP (XEXP (x, 1), 1)));
18077 }
18078 else output_addr_const (stream, x);
18079 }
18080 else
18081 {
18082 if (GET_CODE (x) == REG)
18083 asm_fprintf (stream, "[%r]", REGNO (x));
18084 else if (GET_CODE (x) == POST_INC)
18085 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
18086 else if (GET_CODE (x) == PLUS)
18087 {
18088 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
18089 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
18090 asm_fprintf (stream, "[%r, #%wd]",
18091 REGNO (XEXP (x, 0)),
18092 INTVAL (XEXP (x, 1)));
18093 else
18094 asm_fprintf (stream, "[%r, %r]",
18095 REGNO (XEXP (x, 0)),
18096 REGNO (XEXP (x, 1)));
18097 }
18098 else
18099 output_addr_const (stream, x);
18100 }
18101 }
18102 \f
18103 /* Target hook for indicating whether a punctuation character for
18104 TARGET_PRINT_OPERAND is valid. */
18105 static bool
18106 arm_print_operand_punct_valid_p (unsigned char code)
18107 {
18108 return (code == '@' || code == '|' || code == '.'
18109 || code == '(' || code == ')' || code == '#'
18110 || (TARGET_32BIT && (code == '?'))
18111 || (TARGET_THUMB2 && (code == '!'))
18112 || (TARGET_THUMB && (code == '_')));
18113 }
18114 \f
18115 /* Target hook for assembling integer objects. The ARM version needs to
18116 handle word-sized values specially. */
18117 static bool
18118 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
18119 {
18120 enum machine_mode mode;
18121
18122 if (size == UNITS_PER_WORD && aligned_p)
18123 {
18124 fputs ("\t.word\t", asm_out_file);
18125 output_addr_const (asm_out_file, x);
18126
18127 /* Mark symbols as position independent. We only do this in the
18128 .text segment, not in the .data segment. */
18129 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
18130 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
18131 {
18132 /* See legitimize_pic_address for an explanation of the
18133 TARGET_VXWORKS_RTP check. */
18134 if (TARGET_VXWORKS_RTP
18135 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
18136 fputs ("(GOT)", asm_out_file);
18137 else
18138 fputs ("(GOTOFF)", asm_out_file);
18139 }
18140 fputc ('\n', asm_out_file);
18141 return true;
18142 }
18143
18144 mode = GET_MODE (x);
18145
18146 if (arm_vector_mode_supported_p (mode))
18147 {
18148 int i, units;
18149
18150 gcc_assert (GET_CODE (x) == CONST_VECTOR);
18151
18152 units = CONST_VECTOR_NUNITS (x);
18153 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
18154
18155 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
18156 for (i = 0; i < units; i++)
18157 {
18158 rtx elt = CONST_VECTOR_ELT (x, i);
18159 assemble_integer
18160 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
18161 }
18162 else
18163 for (i = 0; i < units; i++)
18164 {
18165 rtx elt = CONST_VECTOR_ELT (x, i);
18166 REAL_VALUE_TYPE rval;
18167
18168 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
18169
18170 assemble_real
18171 (rval, GET_MODE_INNER (mode),
18172 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
18173 }
18174
18175 return true;
18176 }
18177
18178 return default_assemble_integer (x, size, aligned_p);
18179 }
18180
18181 static void
18182 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
18183 {
18184 section *s;
18185
18186 if (!TARGET_AAPCS_BASED)
18187 {
18188 (is_ctor ?
18189 default_named_section_asm_out_constructor
18190 : default_named_section_asm_out_destructor) (symbol, priority);
18191 return;
18192 }
18193
18194 /* Put these in the .init_array section, using a special relocation. */
18195 if (priority != DEFAULT_INIT_PRIORITY)
18196 {
18197 char buf[18];
18198 sprintf (buf, "%s.%.5u",
18199 is_ctor ? ".init_array" : ".fini_array",
18200 priority);
18201 s = get_section (buf, SECTION_WRITE, NULL_TREE);
18202 }
18203 else if (is_ctor)
18204 s = ctors_section;
18205 else
18206 s = dtors_section;
18207
18208 switch_to_section (s);
18209 assemble_align (POINTER_SIZE);
18210 fputs ("\t.word\t", asm_out_file);
18211 output_addr_const (asm_out_file, symbol);
18212 fputs ("(target1)\n", asm_out_file);
18213 }
18214
18215 /* Add a function to the list of static constructors. */
18216
18217 static void
18218 arm_elf_asm_constructor (rtx symbol, int priority)
18219 {
18220 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
18221 }
18222
18223 /* Add a function to the list of static destructors. */
18224
18225 static void
18226 arm_elf_asm_destructor (rtx symbol, int priority)
18227 {
18228 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
18229 }
18230 \f
18231 /* A finite state machine takes care of noticing whether or not instructions
18232 can be conditionally executed, and thus decrease execution time and code
18233 size by deleting branch instructions. The fsm is controlled by
18234 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
18235
18236 /* The state of the fsm controlling condition codes are:
18237 0: normal, do nothing special
18238 1: make ASM_OUTPUT_OPCODE not output this instruction
18239 2: make ASM_OUTPUT_OPCODE not output this instruction
18240 3: make instructions conditional
18241 4: make instructions conditional
18242
18243 State transitions (state->state by whom under condition):
18244 0 -> 1 final_prescan_insn if the `target' is a label
18245 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
18246 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
18247 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
18248 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
18249 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
18250 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
18251 (the target insn is arm_target_insn).
18252
18253 If the jump clobbers the conditions then we use states 2 and 4.
18254
18255 A similar thing can be done with conditional return insns.
18256
18257 XXX In case the `target' is an unconditional branch, this conditionalising
18258 of the instructions always reduces code size, but not always execution
18259 time. But then, I want to reduce the code size to somewhere near what
18260 /bin/cc produces. */
18261
18262 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
18263 instructions. When a COND_EXEC instruction is seen the subsequent
18264 instructions are scanned so that multiple conditional instructions can be
18265 combined into a single IT block. arm_condexec_count and arm_condexec_mask
18266 specify the length and true/false mask for the IT block. These will be
18267 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
18268
18269 /* Returns the index of the ARM condition code string in
18270 `arm_condition_codes', or ARM_NV if the comparison is invalid.
18271 COMPARISON should be an rtx like `(eq (...) (...))'. */
18272
18273 enum arm_cond_code
18274 maybe_get_arm_condition_code (rtx comparison)
18275 {
18276 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
18277 enum arm_cond_code code;
18278 enum rtx_code comp_code = GET_CODE (comparison);
18279
18280 if (GET_MODE_CLASS (mode) != MODE_CC)
18281 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
18282 XEXP (comparison, 1));
18283
18284 switch (mode)
18285 {
18286 case CC_DNEmode: code = ARM_NE; goto dominance;
18287 case CC_DEQmode: code = ARM_EQ; goto dominance;
18288 case CC_DGEmode: code = ARM_GE; goto dominance;
18289 case CC_DGTmode: code = ARM_GT; goto dominance;
18290 case CC_DLEmode: code = ARM_LE; goto dominance;
18291 case CC_DLTmode: code = ARM_LT; goto dominance;
18292 case CC_DGEUmode: code = ARM_CS; goto dominance;
18293 case CC_DGTUmode: code = ARM_HI; goto dominance;
18294 case CC_DLEUmode: code = ARM_LS; goto dominance;
18295 case CC_DLTUmode: code = ARM_CC;
18296
18297 dominance:
18298 if (comp_code == EQ)
18299 return ARM_INVERSE_CONDITION_CODE (code);
18300 if (comp_code == NE)
18301 return code;
18302 return ARM_NV;
18303
18304 case CC_NOOVmode:
18305 switch (comp_code)
18306 {
18307 case NE: return ARM_NE;
18308 case EQ: return ARM_EQ;
18309 case GE: return ARM_PL;
18310 case LT: return ARM_MI;
18311 default: return ARM_NV;
18312 }
18313
18314 case CC_Zmode:
18315 switch (comp_code)
18316 {
18317 case NE: return ARM_NE;
18318 case EQ: return ARM_EQ;
18319 default: return ARM_NV;
18320 }
18321
18322 case CC_Nmode:
18323 switch (comp_code)
18324 {
18325 case NE: return ARM_MI;
18326 case EQ: return ARM_PL;
18327 default: return ARM_NV;
18328 }
18329
18330 case CCFPEmode:
18331 case CCFPmode:
18332 /* These encodings assume that AC=1 in the FPA system control
18333 byte. This allows us to handle all cases except UNEQ and
18334 LTGT. */
18335 switch (comp_code)
18336 {
18337 case GE: return ARM_GE;
18338 case GT: return ARM_GT;
18339 case LE: return ARM_LS;
18340 case LT: return ARM_MI;
18341 case NE: return ARM_NE;
18342 case EQ: return ARM_EQ;
18343 case ORDERED: return ARM_VC;
18344 case UNORDERED: return ARM_VS;
18345 case UNLT: return ARM_LT;
18346 case UNLE: return ARM_LE;
18347 case UNGT: return ARM_HI;
18348 case UNGE: return ARM_PL;
18349 /* UNEQ and LTGT do not have a representation. */
18350 case UNEQ: /* Fall through. */
18351 case LTGT: /* Fall through. */
18352 default: return ARM_NV;
18353 }
18354
18355 case CC_SWPmode:
18356 switch (comp_code)
18357 {
18358 case NE: return ARM_NE;
18359 case EQ: return ARM_EQ;
18360 case GE: return ARM_LE;
18361 case GT: return ARM_LT;
18362 case LE: return ARM_GE;
18363 case LT: return ARM_GT;
18364 case GEU: return ARM_LS;
18365 case GTU: return ARM_CC;
18366 case LEU: return ARM_CS;
18367 case LTU: return ARM_HI;
18368 default: return ARM_NV;
18369 }
18370
18371 case CC_Cmode:
18372 switch (comp_code)
18373 {
18374 case LTU: return ARM_CS;
18375 case GEU: return ARM_CC;
18376 default: return ARM_NV;
18377 }
18378
18379 case CC_CZmode:
18380 switch (comp_code)
18381 {
18382 case NE: return ARM_NE;
18383 case EQ: return ARM_EQ;
18384 case GEU: return ARM_CS;
18385 case GTU: return ARM_HI;
18386 case LEU: return ARM_LS;
18387 case LTU: return ARM_CC;
18388 default: return ARM_NV;
18389 }
18390
18391 case CC_NCVmode:
18392 switch (comp_code)
18393 {
18394 case GE: return ARM_GE;
18395 case LT: return ARM_LT;
18396 case GEU: return ARM_CS;
18397 case LTU: return ARM_CC;
18398 default: return ARM_NV;
18399 }
18400
18401 case CCmode:
18402 switch (comp_code)
18403 {
18404 case NE: return ARM_NE;
18405 case EQ: return ARM_EQ;
18406 case GE: return ARM_GE;
18407 case GT: return ARM_GT;
18408 case LE: return ARM_LE;
18409 case LT: return ARM_LT;
18410 case GEU: return ARM_CS;
18411 case GTU: return ARM_HI;
18412 case LEU: return ARM_LS;
18413 case LTU: return ARM_CC;
18414 default: return ARM_NV;
18415 }
18416
18417 default: gcc_unreachable ();
18418 }
18419 }
18420
18421 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
18422 static enum arm_cond_code
18423 get_arm_condition_code (rtx comparison)
18424 {
18425 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
18426 gcc_assert (code != ARM_NV);
18427 return code;
18428 }
18429
18430 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
18431 instructions. */
18432 void
18433 thumb2_final_prescan_insn (rtx insn)
18434 {
18435 rtx first_insn = insn;
18436 rtx body = PATTERN (insn);
18437 rtx predicate;
18438 enum arm_cond_code code;
18439 int n;
18440 int mask;
18441
18442 /* Remove the previous insn from the count of insns to be output. */
18443 if (arm_condexec_count)
18444 arm_condexec_count--;
18445
18446 /* Nothing to do if we are already inside a conditional block. */
18447 if (arm_condexec_count)
18448 return;
18449
18450 if (GET_CODE (body) != COND_EXEC)
18451 return;
18452
18453 /* Conditional jumps are implemented directly. */
18454 if (GET_CODE (insn) == JUMP_INSN)
18455 return;
18456
18457 predicate = COND_EXEC_TEST (body);
18458 arm_current_cc = get_arm_condition_code (predicate);
18459
18460 n = get_attr_ce_count (insn);
18461 arm_condexec_count = 1;
18462 arm_condexec_mask = (1 << n) - 1;
18463 arm_condexec_masklen = n;
18464 /* See if subsequent instructions can be combined into the same block. */
18465 for (;;)
18466 {
18467 insn = next_nonnote_insn (insn);
18468
18469 /* Jumping into the middle of an IT block is illegal, so a label or
18470 barrier terminates the block. */
18471 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
18472 break;
18473
18474 body = PATTERN (insn);
18475 /* USE and CLOBBER aren't really insns, so just skip them. */
18476 if (GET_CODE (body) == USE
18477 || GET_CODE (body) == CLOBBER)
18478 continue;
18479
18480 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
18481 if (GET_CODE (body) != COND_EXEC)
18482 break;
18483 /* Allow up to 4 conditionally executed instructions in a block. */
18484 n = get_attr_ce_count (insn);
18485 if (arm_condexec_masklen + n > 4)
18486 break;
18487
18488 predicate = COND_EXEC_TEST (body);
18489 code = get_arm_condition_code (predicate);
18490 mask = (1 << n) - 1;
18491 if (arm_current_cc == code)
18492 arm_condexec_mask |= (mask << arm_condexec_masklen);
18493 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
18494 break;
18495
18496 arm_condexec_count++;
18497 arm_condexec_masklen += n;
18498
18499 /* A jump must be the last instruction in a conditional block. */
18500 if (GET_CODE(insn) == JUMP_INSN)
18501 break;
18502 }
18503 /* Restore recog_data (getting the attributes of other insns can
18504 destroy this array, but final.c assumes that it remains intact
18505 across this call). */
18506 extract_constrain_insn_cached (first_insn);
18507 }
18508
18509 void
18510 arm_final_prescan_insn (rtx insn)
18511 {
18512 /* BODY will hold the body of INSN. */
18513 rtx body = PATTERN (insn);
18514
18515 /* This will be 1 if trying to repeat the trick, and things need to be
18516 reversed if it appears to fail. */
18517 int reverse = 0;
18518
18519 /* If we start with a return insn, we only succeed if we find another one. */
18520 int seeking_return = 0;
18521 enum rtx_code return_code = UNKNOWN;
18522
18523 /* START_INSN will hold the insn from where we start looking. This is the
18524 first insn after the following code_label if REVERSE is true. */
18525 rtx start_insn = insn;
18526
18527 /* If in state 4, check if the target branch is reached, in order to
18528 change back to state 0. */
18529 if (arm_ccfsm_state == 4)
18530 {
18531 if (insn == arm_target_insn)
18532 {
18533 arm_target_insn = NULL;
18534 arm_ccfsm_state = 0;
18535 }
18536 return;
18537 }
18538
18539 /* If in state 3, it is possible to repeat the trick, if this insn is an
18540 unconditional branch to a label, and immediately following this branch
18541 is the previous target label which is only used once, and the label this
18542 branch jumps to is not too far off. */
18543 if (arm_ccfsm_state == 3)
18544 {
18545 if (simplejump_p (insn))
18546 {
18547 start_insn = next_nonnote_insn (start_insn);
18548 if (GET_CODE (start_insn) == BARRIER)
18549 {
18550 /* XXX Isn't this always a barrier? */
18551 start_insn = next_nonnote_insn (start_insn);
18552 }
18553 if (GET_CODE (start_insn) == CODE_LABEL
18554 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18555 && LABEL_NUSES (start_insn) == 1)
18556 reverse = TRUE;
18557 else
18558 return;
18559 }
18560 else if (ANY_RETURN_P (body))
18561 {
18562 start_insn = next_nonnote_insn (start_insn);
18563 if (GET_CODE (start_insn) == BARRIER)
18564 start_insn = next_nonnote_insn (start_insn);
18565 if (GET_CODE (start_insn) == CODE_LABEL
18566 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18567 && LABEL_NUSES (start_insn) == 1)
18568 {
18569 reverse = TRUE;
18570 seeking_return = 1;
18571 return_code = GET_CODE (body);
18572 }
18573 else
18574 return;
18575 }
18576 else
18577 return;
18578 }
18579
18580 gcc_assert (!arm_ccfsm_state || reverse);
18581 if (GET_CODE (insn) != JUMP_INSN)
18582 return;
18583
18584 /* This jump might be paralleled with a clobber of the condition codes
18585 the jump should always come first */
18586 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
18587 body = XVECEXP (body, 0, 0);
18588
18589 if (reverse
18590 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
18591 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
18592 {
18593 int insns_skipped;
18594 int fail = FALSE, succeed = FALSE;
18595 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
18596 int then_not_else = TRUE;
18597 rtx this_insn = start_insn, label = 0;
18598
18599 /* Register the insn jumped to. */
18600 if (reverse)
18601 {
18602 if (!seeking_return)
18603 label = XEXP (SET_SRC (body), 0);
18604 }
18605 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
18606 label = XEXP (XEXP (SET_SRC (body), 1), 0);
18607 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
18608 {
18609 label = XEXP (XEXP (SET_SRC (body), 2), 0);
18610 then_not_else = FALSE;
18611 }
18612 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
18613 {
18614 seeking_return = 1;
18615 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
18616 }
18617 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
18618 {
18619 seeking_return = 1;
18620 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
18621 then_not_else = FALSE;
18622 }
18623 else
18624 gcc_unreachable ();
18625
18626 /* See how many insns this branch skips, and what kind of insns. If all
18627 insns are okay, and the label or unconditional branch to the same
18628 label is not too far away, succeed. */
18629 for (insns_skipped = 0;
18630 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
18631 {
18632 rtx scanbody;
18633
18634 this_insn = next_nonnote_insn (this_insn);
18635 if (!this_insn)
18636 break;
18637
18638 switch (GET_CODE (this_insn))
18639 {
18640 case CODE_LABEL:
18641 /* Succeed if it is the target label, otherwise fail since
18642 control falls in from somewhere else. */
18643 if (this_insn == label)
18644 {
18645 arm_ccfsm_state = 1;
18646 succeed = TRUE;
18647 }
18648 else
18649 fail = TRUE;
18650 break;
18651
18652 case BARRIER:
18653 /* Succeed if the following insn is the target label.
18654 Otherwise fail.
18655 If return insns are used then the last insn in a function
18656 will be a barrier. */
18657 this_insn = next_nonnote_insn (this_insn);
18658 if (this_insn && this_insn == label)
18659 {
18660 arm_ccfsm_state = 1;
18661 succeed = TRUE;
18662 }
18663 else
18664 fail = TRUE;
18665 break;
18666
18667 case CALL_INSN:
18668 /* The AAPCS says that conditional calls should not be
18669 used since they make interworking inefficient (the
18670 linker can't transform BL<cond> into BLX). That's
18671 only a problem if the machine has BLX. */
18672 if (arm_arch5)
18673 {
18674 fail = TRUE;
18675 break;
18676 }
18677
18678 /* Succeed if the following insn is the target label, or
18679 if the following two insns are a barrier and the
18680 target label. */
18681 this_insn = next_nonnote_insn (this_insn);
18682 if (this_insn && GET_CODE (this_insn) == BARRIER)
18683 this_insn = next_nonnote_insn (this_insn);
18684
18685 if (this_insn && this_insn == label
18686 && insns_skipped < max_insns_skipped)
18687 {
18688 arm_ccfsm_state = 1;
18689 succeed = TRUE;
18690 }
18691 else
18692 fail = TRUE;
18693 break;
18694
18695 case JUMP_INSN:
18696 /* If this is an unconditional branch to the same label, succeed.
18697 If it is to another label, do nothing. If it is conditional,
18698 fail. */
18699 /* XXX Probably, the tests for SET and the PC are
18700 unnecessary. */
18701
18702 scanbody = PATTERN (this_insn);
18703 if (GET_CODE (scanbody) == SET
18704 && GET_CODE (SET_DEST (scanbody)) == PC)
18705 {
18706 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
18707 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
18708 {
18709 arm_ccfsm_state = 2;
18710 succeed = TRUE;
18711 }
18712 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
18713 fail = TRUE;
18714 }
18715 /* Fail if a conditional return is undesirable (e.g. on a
18716 StrongARM), but still allow this if optimizing for size. */
18717 else if (GET_CODE (scanbody) == return_code
18718 && !use_return_insn (TRUE, NULL)
18719 && !optimize_size)
18720 fail = TRUE;
18721 else if (GET_CODE (scanbody) == return_code)
18722 {
18723 arm_ccfsm_state = 2;
18724 succeed = TRUE;
18725 }
18726 else if (GET_CODE (scanbody) == PARALLEL)
18727 {
18728 switch (get_attr_conds (this_insn))
18729 {
18730 case CONDS_NOCOND:
18731 break;
18732 default:
18733 fail = TRUE;
18734 break;
18735 }
18736 }
18737 else
18738 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
18739
18740 break;
18741
18742 case INSN:
18743 /* Instructions using or affecting the condition codes make it
18744 fail. */
18745 scanbody = PATTERN (this_insn);
18746 if (!(GET_CODE (scanbody) == SET
18747 || GET_CODE (scanbody) == PARALLEL)
18748 || get_attr_conds (this_insn) != CONDS_NOCOND)
18749 fail = TRUE;
18750
18751 /* A conditional cirrus instruction must be followed by
18752 a non Cirrus instruction. However, since we
18753 conditionalize instructions in this function and by
18754 the time we get here we can't add instructions
18755 (nops), because shorten_branches() has already been
18756 called, we will disable conditionalizing Cirrus
18757 instructions to be safe. */
18758 if (GET_CODE (scanbody) != USE
18759 && GET_CODE (scanbody) != CLOBBER
18760 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
18761 fail = TRUE;
18762 break;
18763
18764 default:
18765 break;
18766 }
18767 }
18768 if (succeed)
18769 {
18770 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
18771 arm_target_label = CODE_LABEL_NUMBER (label);
18772 else
18773 {
18774 gcc_assert (seeking_return || arm_ccfsm_state == 2);
18775
18776 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
18777 {
18778 this_insn = next_nonnote_insn (this_insn);
18779 gcc_assert (!this_insn
18780 || (GET_CODE (this_insn) != BARRIER
18781 && GET_CODE (this_insn) != CODE_LABEL));
18782 }
18783 if (!this_insn)
18784 {
18785 /* Oh, dear! we ran off the end.. give up. */
18786 extract_constrain_insn_cached (insn);
18787 arm_ccfsm_state = 0;
18788 arm_target_insn = NULL;
18789 return;
18790 }
18791 arm_target_insn = this_insn;
18792 }
18793
18794 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
18795 what it was. */
18796 if (!reverse)
18797 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
18798
18799 if (reverse || then_not_else)
18800 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
18801 }
18802
18803 /* Restore recog_data (getting the attributes of other insns can
18804 destroy this array, but final.c assumes that it remains intact
18805 across this call. */
18806 extract_constrain_insn_cached (insn);
18807 }
18808 }
18809
18810 /* Output IT instructions. */
18811 void
18812 thumb2_asm_output_opcode (FILE * stream)
18813 {
18814 char buff[5];
18815 int n;
18816
18817 if (arm_condexec_mask)
18818 {
18819 for (n = 0; n < arm_condexec_masklen; n++)
18820 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
18821 buff[n] = 0;
18822 asm_fprintf(stream, "i%s\t%s\n\t", buff,
18823 arm_condition_codes[arm_current_cc]);
18824 arm_condexec_mask = 0;
18825 }
18826 }
18827
18828 /* Returns true if REGNO is a valid register
18829 for holding a quantity of type MODE. */
18830 int
18831 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
18832 {
18833 if (GET_MODE_CLASS (mode) == MODE_CC)
18834 return (regno == CC_REGNUM
18835 || (TARGET_HARD_FLOAT && TARGET_VFP
18836 && regno == VFPCC_REGNUM));
18837
18838 if (TARGET_THUMB1)
18839 /* For the Thumb we only allow values bigger than SImode in
18840 registers 0 - 6, so that there is always a second low
18841 register available to hold the upper part of the value.
18842 We probably we ought to ensure that the register is the
18843 start of an even numbered register pair. */
18844 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
18845
18846 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
18847 && IS_CIRRUS_REGNUM (regno))
18848 /* We have outlawed SI values in Cirrus registers because they
18849 reside in the lower 32 bits, but SF values reside in the
18850 upper 32 bits. This causes gcc all sorts of grief. We can't
18851 even split the registers into pairs because Cirrus SI values
18852 get sign extended to 64bits-- aldyh. */
18853 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
18854
18855 if (TARGET_HARD_FLOAT && TARGET_VFP
18856 && IS_VFP_REGNUM (regno))
18857 {
18858 if (mode == SFmode || mode == SImode)
18859 return VFP_REGNO_OK_FOR_SINGLE (regno);
18860
18861 if (mode == DFmode)
18862 return VFP_REGNO_OK_FOR_DOUBLE (regno);
18863
18864 /* VFP registers can hold HFmode values, but there is no point in
18865 putting them there unless we have hardware conversion insns. */
18866 if (mode == HFmode)
18867 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
18868
18869 if (TARGET_NEON)
18870 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
18871 || (VALID_NEON_QREG_MODE (mode)
18872 && NEON_REGNO_OK_FOR_QUAD (regno))
18873 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
18874 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
18875 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
18876 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
18877 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
18878
18879 return FALSE;
18880 }
18881
18882 if (TARGET_REALLY_IWMMXT)
18883 {
18884 if (IS_IWMMXT_GR_REGNUM (regno))
18885 return mode == SImode;
18886
18887 if (IS_IWMMXT_REGNUM (regno))
18888 return VALID_IWMMXT_REG_MODE (mode);
18889 }
18890
18891 /* We allow almost any value to be stored in the general registers.
18892 Restrict doubleword quantities to even register pairs so that we can
18893 use ldrd. Do not allow very large Neon structure opaque modes in
18894 general registers; they would use too many. */
18895 if (regno <= LAST_ARM_REGNUM)
18896 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
18897 && ARM_NUM_REGS (mode) <= 4;
18898
18899 if (regno == FRAME_POINTER_REGNUM
18900 || regno == ARG_POINTER_REGNUM)
18901 /* We only allow integers in the fake hard registers. */
18902 return GET_MODE_CLASS (mode) == MODE_INT;
18903
18904 /* The only registers left are the FPA registers
18905 which we only allow to hold FP values. */
18906 return (TARGET_HARD_FLOAT && TARGET_FPA
18907 && GET_MODE_CLASS (mode) == MODE_FLOAT
18908 && regno >= FIRST_FPA_REGNUM
18909 && regno <= LAST_FPA_REGNUM);
18910 }
18911
18912 /* Implement MODES_TIEABLE_P. */
18913
18914 bool
18915 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18916 {
18917 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
18918 return true;
18919
18920 /* We specifically want to allow elements of "structure" modes to
18921 be tieable to the structure. This more general condition allows
18922 other rarer situations too. */
18923 if (TARGET_NEON
18924 && (VALID_NEON_DREG_MODE (mode1)
18925 || VALID_NEON_QREG_MODE (mode1)
18926 || VALID_NEON_STRUCT_MODE (mode1))
18927 && (VALID_NEON_DREG_MODE (mode2)
18928 || VALID_NEON_QREG_MODE (mode2)
18929 || VALID_NEON_STRUCT_MODE (mode2)))
18930 return true;
18931
18932 return false;
18933 }
18934
18935 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
18936 not used in arm mode. */
18937
18938 enum reg_class
18939 arm_regno_class (int regno)
18940 {
18941 if (TARGET_THUMB1)
18942 {
18943 if (regno == STACK_POINTER_REGNUM)
18944 return STACK_REG;
18945 if (regno == CC_REGNUM)
18946 return CC_REG;
18947 if (regno < 8)
18948 return LO_REGS;
18949 return HI_REGS;
18950 }
18951
18952 if (TARGET_THUMB2 && regno < 8)
18953 return LO_REGS;
18954
18955 if ( regno <= LAST_ARM_REGNUM
18956 || regno == FRAME_POINTER_REGNUM
18957 || regno == ARG_POINTER_REGNUM)
18958 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
18959
18960 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
18961 return TARGET_THUMB2 ? CC_REG : NO_REGS;
18962
18963 if (IS_CIRRUS_REGNUM (regno))
18964 return CIRRUS_REGS;
18965
18966 if (IS_VFP_REGNUM (regno))
18967 {
18968 if (regno <= D7_VFP_REGNUM)
18969 return VFP_D0_D7_REGS;
18970 else if (regno <= LAST_LO_VFP_REGNUM)
18971 return VFP_LO_REGS;
18972 else
18973 return VFP_HI_REGS;
18974 }
18975
18976 if (IS_IWMMXT_REGNUM (regno))
18977 return IWMMXT_REGS;
18978
18979 if (IS_IWMMXT_GR_REGNUM (regno))
18980 return IWMMXT_GR_REGS;
18981
18982 return FPA_REGS;
18983 }
18984
18985 /* Handle a special case when computing the offset
18986 of an argument from the frame pointer. */
18987 int
18988 arm_debugger_arg_offset (int value, rtx addr)
18989 {
18990 rtx insn;
18991
18992 /* We are only interested if dbxout_parms() failed to compute the offset. */
18993 if (value != 0)
18994 return 0;
18995
18996 /* We can only cope with the case where the address is held in a register. */
18997 if (GET_CODE (addr) != REG)
18998 return 0;
18999
19000 /* If we are using the frame pointer to point at the argument, then
19001 an offset of 0 is correct. */
19002 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
19003 return 0;
19004
19005 /* If we are using the stack pointer to point at the
19006 argument, then an offset of 0 is correct. */
19007 /* ??? Check this is consistent with thumb2 frame layout. */
19008 if ((TARGET_THUMB || !frame_pointer_needed)
19009 && REGNO (addr) == SP_REGNUM)
19010 return 0;
19011
19012 /* Oh dear. The argument is pointed to by a register rather
19013 than being held in a register, or being stored at a known
19014 offset from the frame pointer. Since GDB only understands
19015 those two kinds of argument we must translate the address
19016 held in the register into an offset from the frame pointer.
19017 We do this by searching through the insns for the function
19018 looking to see where this register gets its value. If the
19019 register is initialized from the frame pointer plus an offset
19020 then we are in luck and we can continue, otherwise we give up.
19021
19022 This code is exercised by producing debugging information
19023 for a function with arguments like this:
19024
19025 double func (double a, double b, int c, double d) {return d;}
19026
19027 Without this code the stab for parameter 'd' will be set to
19028 an offset of 0 from the frame pointer, rather than 8. */
19029
19030 /* The if() statement says:
19031
19032 If the insn is a normal instruction
19033 and if the insn is setting the value in a register
19034 and if the register being set is the register holding the address of the argument
19035 and if the address is computing by an addition
19036 that involves adding to a register
19037 which is the frame pointer
19038 a constant integer
19039
19040 then... */
19041
19042 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19043 {
19044 if ( GET_CODE (insn) == INSN
19045 && GET_CODE (PATTERN (insn)) == SET
19046 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
19047 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
19048 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
19049 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
19050 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
19051 )
19052 {
19053 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
19054
19055 break;
19056 }
19057 }
19058
19059 if (value == 0)
19060 {
19061 debug_rtx (addr);
19062 warning (0, "unable to compute real location of stacked parameter");
19063 value = 8; /* XXX magic hack */
19064 }
19065
19066 return value;
19067 }
19068 \f
19069 typedef enum {
19070 T_V8QI,
19071 T_V4HI,
19072 T_V2SI,
19073 T_V2SF,
19074 T_DI,
19075 T_V16QI,
19076 T_V8HI,
19077 T_V4SI,
19078 T_V4SF,
19079 T_V2DI,
19080 T_TI,
19081 T_EI,
19082 T_OI,
19083 T_MAX /* Size of enum. Keep last. */
19084 } neon_builtin_type_mode;
19085
19086 #define TYPE_MODE_BIT(X) (1 << (X))
19087
19088 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
19089 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
19090 | TYPE_MODE_BIT (T_DI))
19091 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
19092 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
19093 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
19094
19095 #define v8qi_UP T_V8QI
19096 #define v4hi_UP T_V4HI
19097 #define v2si_UP T_V2SI
19098 #define v2sf_UP T_V2SF
19099 #define di_UP T_DI
19100 #define v16qi_UP T_V16QI
19101 #define v8hi_UP T_V8HI
19102 #define v4si_UP T_V4SI
19103 #define v4sf_UP T_V4SF
19104 #define v2di_UP T_V2DI
19105 #define ti_UP T_TI
19106 #define ei_UP T_EI
19107 #define oi_UP T_OI
19108
19109 #define UP(X) X##_UP
19110
19111 typedef enum {
19112 NEON_BINOP,
19113 NEON_TERNOP,
19114 NEON_UNOP,
19115 NEON_GETLANE,
19116 NEON_SETLANE,
19117 NEON_CREATE,
19118 NEON_DUP,
19119 NEON_DUPLANE,
19120 NEON_COMBINE,
19121 NEON_SPLIT,
19122 NEON_LANEMUL,
19123 NEON_LANEMULL,
19124 NEON_LANEMULH,
19125 NEON_LANEMAC,
19126 NEON_SCALARMUL,
19127 NEON_SCALARMULL,
19128 NEON_SCALARMULH,
19129 NEON_SCALARMAC,
19130 NEON_CONVERT,
19131 NEON_FIXCONV,
19132 NEON_SELECT,
19133 NEON_RESULTPAIR,
19134 NEON_REINTERP,
19135 NEON_VTBL,
19136 NEON_VTBX,
19137 NEON_LOAD1,
19138 NEON_LOAD1LANE,
19139 NEON_STORE1,
19140 NEON_STORE1LANE,
19141 NEON_LOADSTRUCT,
19142 NEON_LOADSTRUCTLANE,
19143 NEON_STORESTRUCT,
19144 NEON_STORESTRUCTLANE,
19145 NEON_LOGICBINOP,
19146 NEON_SHIFTINSERT,
19147 NEON_SHIFTIMM,
19148 NEON_SHIFTACC
19149 } neon_itype;
19150
19151 typedef struct {
19152 const char *name;
19153 const neon_itype itype;
19154 const neon_builtin_type_mode mode;
19155 const enum insn_code code;
19156 unsigned int fcode;
19157 } neon_builtin_datum;
19158
19159 #define CF(N,X) CODE_FOR_neon_##N##X
19160
19161 #define VAR1(T, N, A) \
19162 {#N, NEON_##T, UP (A), CF (N, A), 0}
19163 #define VAR2(T, N, A, B) \
19164 VAR1 (T, N, A), \
19165 {#N, NEON_##T, UP (B), CF (N, B), 0}
19166 #define VAR3(T, N, A, B, C) \
19167 VAR2 (T, N, A, B), \
19168 {#N, NEON_##T, UP (C), CF (N, C), 0}
19169 #define VAR4(T, N, A, B, C, D) \
19170 VAR3 (T, N, A, B, C), \
19171 {#N, NEON_##T, UP (D), CF (N, D), 0}
19172 #define VAR5(T, N, A, B, C, D, E) \
19173 VAR4 (T, N, A, B, C, D), \
19174 {#N, NEON_##T, UP (E), CF (N, E), 0}
19175 #define VAR6(T, N, A, B, C, D, E, F) \
19176 VAR5 (T, N, A, B, C, D, E), \
19177 {#N, NEON_##T, UP (F), CF (N, F), 0}
19178 #define VAR7(T, N, A, B, C, D, E, F, G) \
19179 VAR6 (T, N, A, B, C, D, E, F), \
19180 {#N, NEON_##T, UP (G), CF (N, G), 0}
19181 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
19182 VAR7 (T, N, A, B, C, D, E, F, G), \
19183 {#N, NEON_##T, UP (H), CF (N, H), 0}
19184 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
19185 VAR8 (T, N, A, B, C, D, E, F, G, H), \
19186 {#N, NEON_##T, UP (I), CF (N, I), 0}
19187 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
19188 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
19189 {#N, NEON_##T, UP (J), CF (N, J), 0}
19190
19191 /* The mode entries in the following table correspond to the "key" type of the
19192 instruction variant, i.e. equivalent to that which would be specified after
19193 the assembler mnemonic, which usually refers to the last vector operand.
19194 (Signed/unsigned/polynomial types are not differentiated between though, and
19195 are all mapped onto the same mode for a given element size.) The modes
19196 listed per instruction should be the same as those defined for that
19197 instruction's pattern in neon.md. */
19198
19199 static neon_builtin_datum neon_builtin_data[] =
19200 {
19201 VAR10 (BINOP, vadd,
19202 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19203 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
19204 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
19205 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19206 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19207 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
19208 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19209 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19210 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
19211 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19212 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
19213 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
19214 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
19215 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
19216 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
19217 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
19218 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
19219 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
19220 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
19221 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
19222 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
19223 VAR2 (BINOP, vqdmull, v4hi, v2si),
19224 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19225 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19226 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19227 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
19228 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
19229 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
19230 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19231 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19232 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19233 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
19234 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19235 VAR10 (BINOP, vsub,
19236 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19237 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
19238 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
19239 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19240 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19241 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
19242 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19243 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19244 VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19245 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19246 VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19247 VAR2 (BINOP, vcage, v2sf, v4sf),
19248 VAR2 (BINOP, vcagt, v2sf, v4sf),
19249 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19250 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19251 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
19252 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19253 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
19254 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19255 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19256 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
19257 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19258 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19259 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
19260 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
19261 VAR2 (BINOP, vrecps, v2sf, v4sf),
19262 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
19263 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19264 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19265 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19266 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19267 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19268 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19269 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19270 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19271 VAR2 (UNOP, vcnt, v8qi, v16qi),
19272 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
19273 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
19274 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19275 /* FIXME: vget_lane supports more variants than this! */
19276 VAR10 (GETLANE, vget_lane,
19277 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19278 VAR10 (SETLANE, vset_lane,
19279 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19280 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
19281 VAR10 (DUP, vdup_n,
19282 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19283 VAR10 (DUPLANE, vdup_lane,
19284 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19285 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
19286 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
19287 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
19288 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
19289 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
19290 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
19291 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
19292 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19293 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19294 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
19295 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
19296 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19297 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
19298 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
19299 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19300 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19301 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
19302 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
19303 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19304 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
19305 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
19306 VAR10 (BINOP, vext,
19307 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19308 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19309 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
19310 VAR2 (UNOP, vrev16, v8qi, v16qi),
19311 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
19312 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
19313 VAR10 (SELECT, vbsl,
19314 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19315 VAR1 (VTBL, vtbl1, v8qi),
19316 VAR1 (VTBL, vtbl2, v8qi),
19317 VAR1 (VTBL, vtbl3, v8qi),
19318 VAR1 (VTBL, vtbl4, v8qi),
19319 VAR1 (VTBX, vtbx1, v8qi),
19320 VAR1 (VTBX, vtbx2, v8qi),
19321 VAR1 (VTBX, vtbx3, v8qi),
19322 VAR1 (VTBX, vtbx4, v8qi),
19323 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19324 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19325 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19326 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
19327 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
19328 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
19329 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
19330 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
19331 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
19332 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
19333 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
19334 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
19335 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
19336 VAR10 (LOAD1, vld1,
19337 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19338 VAR10 (LOAD1LANE, vld1_lane,
19339 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19340 VAR10 (LOAD1, vld1_dup,
19341 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19342 VAR10 (STORE1, vst1,
19343 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19344 VAR10 (STORE1LANE, vst1_lane,
19345 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19346 VAR9 (LOADSTRUCT,
19347 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19348 VAR7 (LOADSTRUCTLANE, vld2_lane,
19349 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19350 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
19351 VAR9 (STORESTRUCT, vst2,
19352 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19353 VAR7 (STORESTRUCTLANE, vst2_lane,
19354 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19355 VAR9 (LOADSTRUCT,
19356 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19357 VAR7 (LOADSTRUCTLANE, vld3_lane,
19358 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19359 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
19360 VAR9 (STORESTRUCT, vst3,
19361 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19362 VAR7 (STORESTRUCTLANE, vst3_lane,
19363 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19364 VAR9 (LOADSTRUCT, vld4,
19365 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19366 VAR7 (LOADSTRUCTLANE, vld4_lane,
19367 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19368 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
19369 VAR9 (STORESTRUCT, vst4,
19370 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19371 VAR7 (STORESTRUCTLANE, vst4_lane,
19372 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19373 VAR10 (LOGICBINOP, vand,
19374 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19375 VAR10 (LOGICBINOP, vorr,
19376 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19377 VAR10 (BINOP, veor,
19378 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19379 VAR10 (LOGICBINOP, vbic,
19380 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19381 VAR10 (LOGICBINOP, vorn,
19382 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
19383 };
19384
19385 #undef CF
19386 #undef VAR1
19387 #undef VAR2
19388 #undef VAR3
19389 #undef VAR4
19390 #undef VAR5
19391 #undef VAR6
19392 #undef VAR7
19393 #undef VAR8
19394 #undef VAR9
19395 #undef VAR10
19396
19397 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
19398 symbolic names defined here (which would require too much duplication).
19399 FIXME? */
19400 enum arm_builtins
19401 {
19402 ARM_BUILTIN_GETWCX,
19403 ARM_BUILTIN_SETWCX,
19404
19405 ARM_BUILTIN_WZERO,
19406
19407 ARM_BUILTIN_WAVG2BR,
19408 ARM_BUILTIN_WAVG2HR,
19409 ARM_BUILTIN_WAVG2B,
19410 ARM_BUILTIN_WAVG2H,
19411
19412 ARM_BUILTIN_WACCB,
19413 ARM_BUILTIN_WACCH,
19414 ARM_BUILTIN_WACCW,
19415
19416 ARM_BUILTIN_WMACS,
19417 ARM_BUILTIN_WMACSZ,
19418 ARM_BUILTIN_WMACU,
19419 ARM_BUILTIN_WMACUZ,
19420
19421 ARM_BUILTIN_WSADB,
19422 ARM_BUILTIN_WSADBZ,
19423 ARM_BUILTIN_WSADH,
19424 ARM_BUILTIN_WSADHZ,
19425
19426 ARM_BUILTIN_WALIGN,
19427
19428 ARM_BUILTIN_TMIA,
19429 ARM_BUILTIN_TMIAPH,
19430 ARM_BUILTIN_TMIABB,
19431 ARM_BUILTIN_TMIABT,
19432 ARM_BUILTIN_TMIATB,
19433 ARM_BUILTIN_TMIATT,
19434
19435 ARM_BUILTIN_TMOVMSKB,
19436 ARM_BUILTIN_TMOVMSKH,
19437 ARM_BUILTIN_TMOVMSKW,
19438
19439 ARM_BUILTIN_TBCSTB,
19440 ARM_BUILTIN_TBCSTH,
19441 ARM_BUILTIN_TBCSTW,
19442
19443 ARM_BUILTIN_WMADDS,
19444 ARM_BUILTIN_WMADDU,
19445
19446 ARM_BUILTIN_WPACKHSS,
19447 ARM_BUILTIN_WPACKWSS,
19448 ARM_BUILTIN_WPACKDSS,
19449 ARM_BUILTIN_WPACKHUS,
19450 ARM_BUILTIN_WPACKWUS,
19451 ARM_BUILTIN_WPACKDUS,
19452
19453 ARM_BUILTIN_WADDB,
19454 ARM_BUILTIN_WADDH,
19455 ARM_BUILTIN_WADDW,
19456 ARM_BUILTIN_WADDSSB,
19457 ARM_BUILTIN_WADDSSH,
19458 ARM_BUILTIN_WADDSSW,
19459 ARM_BUILTIN_WADDUSB,
19460 ARM_BUILTIN_WADDUSH,
19461 ARM_BUILTIN_WADDUSW,
19462 ARM_BUILTIN_WSUBB,
19463 ARM_BUILTIN_WSUBH,
19464 ARM_BUILTIN_WSUBW,
19465 ARM_BUILTIN_WSUBSSB,
19466 ARM_BUILTIN_WSUBSSH,
19467 ARM_BUILTIN_WSUBSSW,
19468 ARM_BUILTIN_WSUBUSB,
19469 ARM_BUILTIN_WSUBUSH,
19470 ARM_BUILTIN_WSUBUSW,
19471
19472 ARM_BUILTIN_WAND,
19473 ARM_BUILTIN_WANDN,
19474 ARM_BUILTIN_WOR,
19475 ARM_BUILTIN_WXOR,
19476
19477 ARM_BUILTIN_WCMPEQB,
19478 ARM_BUILTIN_WCMPEQH,
19479 ARM_BUILTIN_WCMPEQW,
19480 ARM_BUILTIN_WCMPGTUB,
19481 ARM_BUILTIN_WCMPGTUH,
19482 ARM_BUILTIN_WCMPGTUW,
19483 ARM_BUILTIN_WCMPGTSB,
19484 ARM_BUILTIN_WCMPGTSH,
19485 ARM_BUILTIN_WCMPGTSW,
19486
19487 ARM_BUILTIN_TEXTRMSB,
19488 ARM_BUILTIN_TEXTRMSH,
19489 ARM_BUILTIN_TEXTRMSW,
19490 ARM_BUILTIN_TEXTRMUB,
19491 ARM_BUILTIN_TEXTRMUH,
19492 ARM_BUILTIN_TEXTRMUW,
19493 ARM_BUILTIN_TINSRB,
19494 ARM_BUILTIN_TINSRH,
19495 ARM_BUILTIN_TINSRW,
19496
19497 ARM_BUILTIN_WMAXSW,
19498 ARM_BUILTIN_WMAXSH,
19499 ARM_BUILTIN_WMAXSB,
19500 ARM_BUILTIN_WMAXUW,
19501 ARM_BUILTIN_WMAXUH,
19502 ARM_BUILTIN_WMAXUB,
19503 ARM_BUILTIN_WMINSW,
19504 ARM_BUILTIN_WMINSH,
19505 ARM_BUILTIN_WMINSB,
19506 ARM_BUILTIN_WMINUW,
19507 ARM_BUILTIN_WMINUH,
19508 ARM_BUILTIN_WMINUB,
19509
19510 ARM_BUILTIN_WMULUM,
19511 ARM_BUILTIN_WMULSM,
19512 ARM_BUILTIN_WMULUL,
19513
19514 ARM_BUILTIN_PSADBH,
19515 ARM_BUILTIN_WSHUFH,
19516
19517 ARM_BUILTIN_WSLLH,
19518 ARM_BUILTIN_WSLLW,
19519 ARM_BUILTIN_WSLLD,
19520 ARM_BUILTIN_WSRAH,
19521 ARM_BUILTIN_WSRAW,
19522 ARM_BUILTIN_WSRAD,
19523 ARM_BUILTIN_WSRLH,
19524 ARM_BUILTIN_WSRLW,
19525 ARM_BUILTIN_WSRLD,
19526 ARM_BUILTIN_WRORH,
19527 ARM_BUILTIN_WRORW,
19528 ARM_BUILTIN_WRORD,
19529 ARM_BUILTIN_WSLLHI,
19530 ARM_BUILTIN_WSLLWI,
19531 ARM_BUILTIN_WSLLDI,
19532 ARM_BUILTIN_WSRAHI,
19533 ARM_BUILTIN_WSRAWI,
19534 ARM_BUILTIN_WSRADI,
19535 ARM_BUILTIN_WSRLHI,
19536 ARM_BUILTIN_WSRLWI,
19537 ARM_BUILTIN_WSRLDI,
19538 ARM_BUILTIN_WRORHI,
19539 ARM_BUILTIN_WRORWI,
19540 ARM_BUILTIN_WRORDI,
19541
19542 ARM_BUILTIN_WUNPCKIHB,
19543 ARM_BUILTIN_WUNPCKIHH,
19544 ARM_BUILTIN_WUNPCKIHW,
19545 ARM_BUILTIN_WUNPCKILB,
19546 ARM_BUILTIN_WUNPCKILH,
19547 ARM_BUILTIN_WUNPCKILW,
19548
19549 ARM_BUILTIN_WUNPCKEHSB,
19550 ARM_BUILTIN_WUNPCKEHSH,
19551 ARM_BUILTIN_WUNPCKEHSW,
19552 ARM_BUILTIN_WUNPCKEHUB,
19553 ARM_BUILTIN_WUNPCKEHUH,
19554 ARM_BUILTIN_WUNPCKEHUW,
19555 ARM_BUILTIN_WUNPCKELSB,
19556 ARM_BUILTIN_WUNPCKELSH,
19557 ARM_BUILTIN_WUNPCKELSW,
19558 ARM_BUILTIN_WUNPCKELUB,
19559 ARM_BUILTIN_WUNPCKELUH,
19560 ARM_BUILTIN_WUNPCKELUW,
19561
19562 ARM_BUILTIN_THREAD_POINTER,
19563
19564 ARM_BUILTIN_NEON_BASE,
19565
19566 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
19567 };
19568
19569 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
19570
19571 static void
19572 arm_init_neon_builtins (void)
19573 {
19574 unsigned int i, fcode;
19575 tree decl;
19576
19577 tree neon_intQI_type_node;
19578 tree neon_intHI_type_node;
19579 tree neon_polyQI_type_node;
19580 tree neon_polyHI_type_node;
19581 tree neon_intSI_type_node;
19582 tree neon_intDI_type_node;
19583 tree neon_float_type_node;
19584
19585 tree intQI_pointer_node;
19586 tree intHI_pointer_node;
19587 tree intSI_pointer_node;
19588 tree intDI_pointer_node;
19589 tree float_pointer_node;
19590
19591 tree const_intQI_node;
19592 tree const_intHI_node;
19593 tree const_intSI_node;
19594 tree const_intDI_node;
19595 tree const_float_node;
19596
19597 tree const_intQI_pointer_node;
19598 tree const_intHI_pointer_node;
19599 tree const_intSI_pointer_node;
19600 tree const_intDI_pointer_node;
19601 tree const_float_pointer_node;
19602
19603 tree V8QI_type_node;
19604 tree V4HI_type_node;
19605 tree V2SI_type_node;
19606 tree V2SF_type_node;
19607 tree V16QI_type_node;
19608 tree V8HI_type_node;
19609 tree V4SI_type_node;
19610 tree V4SF_type_node;
19611 tree V2DI_type_node;
19612
19613 tree intUQI_type_node;
19614 tree intUHI_type_node;
19615 tree intUSI_type_node;
19616 tree intUDI_type_node;
19617
19618 tree intEI_type_node;
19619 tree intOI_type_node;
19620 tree intCI_type_node;
19621 tree intXI_type_node;
19622
19623 tree V8QI_pointer_node;
19624 tree V4HI_pointer_node;
19625 tree V2SI_pointer_node;
19626 tree V2SF_pointer_node;
19627 tree V16QI_pointer_node;
19628 tree V8HI_pointer_node;
19629 tree V4SI_pointer_node;
19630 tree V4SF_pointer_node;
19631 tree V2DI_pointer_node;
19632
19633 tree void_ftype_pv8qi_v8qi_v8qi;
19634 tree void_ftype_pv4hi_v4hi_v4hi;
19635 tree void_ftype_pv2si_v2si_v2si;
19636 tree void_ftype_pv2sf_v2sf_v2sf;
19637 tree void_ftype_pdi_di_di;
19638 tree void_ftype_pv16qi_v16qi_v16qi;
19639 tree void_ftype_pv8hi_v8hi_v8hi;
19640 tree void_ftype_pv4si_v4si_v4si;
19641 tree void_ftype_pv4sf_v4sf_v4sf;
19642 tree void_ftype_pv2di_v2di_v2di;
19643
19644 tree reinterp_ftype_dreg[5][5];
19645 tree reinterp_ftype_qreg[5][5];
19646 tree dreg_types[5], qreg_types[5];
19647
19648 /* Create distinguished type nodes for NEON vector element types,
19649 and pointers to values of such types, so we can detect them later. */
19650 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19651 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19652 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19653 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19654 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
19655 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
19656 neon_float_type_node = make_node (REAL_TYPE);
19657 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
19658 layout_type (neon_float_type_node);
19659
19660 /* Define typedefs which exactly correspond to the modes we are basing vector
19661 types on. If you change these names you'll need to change
19662 the table used by arm_mangle_type too. */
19663 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
19664 "__builtin_neon_qi");
19665 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
19666 "__builtin_neon_hi");
19667 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
19668 "__builtin_neon_si");
19669 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
19670 "__builtin_neon_sf");
19671 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
19672 "__builtin_neon_di");
19673 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
19674 "__builtin_neon_poly8");
19675 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
19676 "__builtin_neon_poly16");
19677
19678 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
19679 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
19680 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
19681 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
19682 float_pointer_node = build_pointer_type (neon_float_type_node);
19683
19684 /* Next create constant-qualified versions of the above types. */
19685 const_intQI_node = build_qualified_type (neon_intQI_type_node,
19686 TYPE_QUAL_CONST);
19687 const_intHI_node = build_qualified_type (neon_intHI_type_node,
19688 TYPE_QUAL_CONST);
19689 const_intSI_node = build_qualified_type (neon_intSI_type_node,
19690 TYPE_QUAL_CONST);
19691 const_intDI_node = build_qualified_type (neon_intDI_type_node,
19692 TYPE_QUAL_CONST);
19693 const_float_node = build_qualified_type (neon_float_type_node,
19694 TYPE_QUAL_CONST);
19695
19696 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
19697 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
19698 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
19699 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
19700 const_float_pointer_node = build_pointer_type (const_float_node);
19701
19702 /* Now create vector types based on our NEON element types. */
19703 /* 64-bit vectors. */
19704 V8QI_type_node =
19705 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
19706 V4HI_type_node =
19707 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
19708 V2SI_type_node =
19709 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
19710 V2SF_type_node =
19711 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
19712 /* 128-bit vectors. */
19713 V16QI_type_node =
19714 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
19715 V8HI_type_node =
19716 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
19717 V4SI_type_node =
19718 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
19719 V4SF_type_node =
19720 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
19721 V2DI_type_node =
19722 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
19723
19724 /* Unsigned integer types for various mode sizes. */
19725 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
19726 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
19727 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
19728 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
19729
19730 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
19731 "__builtin_neon_uqi");
19732 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
19733 "__builtin_neon_uhi");
19734 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
19735 "__builtin_neon_usi");
19736 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
19737 "__builtin_neon_udi");
19738
19739 /* Opaque integer types for structures of vectors. */
19740 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
19741 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
19742 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
19743 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
19744
19745 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
19746 "__builtin_neon_ti");
19747 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
19748 "__builtin_neon_ei");
19749 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
19750 "__builtin_neon_oi");
19751 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
19752 "__builtin_neon_ci");
19753 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
19754 "__builtin_neon_xi");
19755
19756 /* Pointers to vector types. */
19757 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
19758 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
19759 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
19760 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
19761 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
19762 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
19763 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
19764 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
19765 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
19766
19767 /* Operations which return results as pairs. */
19768 void_ftype_pv8qi_v8qi_v8qi =
19769 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
19770 V8QI_type_node, NULL);
19771 void_ftype_pv4hi_v4hi_v4hi =
19772 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
19773 V4HI_type_node, NULL);
19774 void_ftype_pv2si_v2si_v2si =
19775 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
19776 V2SI_type_node, NULL);
19777 void_ftype_pv2sf_v2sf_v2sf =
19778 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
19779 V2SF_type_node, NULL);
19780 void_ftype_pdi_di_di =
19781 build_function_type_list (void_type_node, intDI_pointer_node,
19782 neon_intDI_type_node, neon_intDI_type_node, NULL);
19783 void_ftype_pv16qi_v16qi_v16qi =
19784 build_function_type_list (void_type_node, V16QI_pointer_node,
19785 V16QI_type_node, V16QI_type_node, NULL);
19786 void_ftype_pv8hi_v8hi_v8hi =
19787 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
19788 V8HI_type_node, NULL);
19789 void_ftype_pv4si_v4si_v4si =
19790 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
19791 V4SI_type_node, NULL);
19792 void_ftype_pv4sf_v4sf_v4sf =
19793 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
19794 V4SF_type_node, NULL);
19795 void_ftype_pv2di_v2di_v2di =
19796 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
19797 V2DI_type_node, NULL);
19798
19799 dreg_types[0] = V8QI_type_node;
19800 dreg_types[1] = V4HI_type_node;
19801 dreg_types[2] = V2SI_type_node;
19802 dreg_types[3] = V2SF_type_node;
19803 dreg_types[4] = neon_intDI_type_node;
19804
19805 qreg_types[0] = V16QI_type_node;
19806 qreg_types[1] = V8HI_type_node;
19807 qreg_types[2] = V4SI_type_node;
19808 qreg_types[3] = V4SF_type_node;
19809 qreg_types[4] = V2DI_type_node;
19810
19811 for (i = 0; i < 5; i++)
19812 {
19813 int j;
19814 for (j = 0; j < 5; j++)
19815 {
19816 reinterp_ftype_dreg[i][j]
19817 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
19818 reinterp_ftype_qreg[i][j]
19819 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
19820 }
19821 }
19822
19823 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
19824 i < ARRAY_SIZE (neon_builtin_data);
19825 i++, fcode++)
19826 {
19827 neon_builtin_datum *d = &neon_builtin_data[i];
19828
19829 const char* const modenames[] = {
19830 "v8qi", "v4hi", "v2si", "v2sf", "di",
19831 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
19832 "ti", "ei", "oi"
19833 };
19834 char namebuf[60];
19835 tree ftype = NULL;
19836 int is_load = 0, is_store = 0;
19837
19838 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
19839
19840 d->fcode = fcode;
19841
19842 switch (d->itype)
19843 {
19844 case NEON_LOAD1:
19845 case NEON_LOAD1LANE:
19846 case NEON_LOADSTRUCT:
19847 case NEON_LOADSTRUCTLANE:
19848 is_load = 1;
19849 /* Fall through. */
19850 case NEON_STORE1:
19851 case NEON_STORE1LANE:
19852 case NEON_STORESTRUCT:
19853 case NEON_STORESTRUCTLANE:
19854 if (!is_load)
19855 is_store = 1;
19856 /* Fall through. */
19857 case NEON_UNOP:
19858 case NEON_BINOP:
19859 case NEON_LOGICBINOP:
19860 case NEON_SHIFTINSERT:
19861 case NEON_TERNOP:
19862 case NEON_GETLANE:
19863 case NEON_SETLANE:
19864 case NEON_CREATE:
19865 case NEON_DUP:
19866 case NEON_DUPLANE:
19867 case NEON_SHIFTIMM:
19868 case NEON_SHIFTACC:
19869 case NEON_COMBINE:
19870 case NEON_SPLIT:
19871 case NEON_CONVERT:
19872 case NEON_FIXCONV:
19873 case NEON_LANEMUL:
19874 case NEON_LANEMULL:
19875 case NEON_LANEMULH:
19876 case NEON_LANEMAC:
19877 case NEON_SCALARMUL:
19878 case NEON_SCALARMULL:
19879 case NEON_SCALARMULH:
19880 case NEON_SCALARMAC:
19881 case NEON_SELECT:
19882 case NEON_VTBL:
19883 case NEON_VTBX:
19884 {
19885 int k;
19886 tree return_type = void_type_node, args = void_list_node;
19887
19888 /* Build a function type directly from the insn_data for
19889 this builtin. The build_function_type() function takes
19890 care of removing duplicates for us. */
19891 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
19892 {
19893 tree eltype;
19894
19895 if (is_load && k == 1)
19896 {
19897 /* Neon load patterns always have the memory
19898 operand in the operand 1 position. */
19899 gcc_assert (insn_data[d->code].operand[k].predicate
19900 == neon_struct_operand);
19901
19902 switch (d->mode)
19903 {
19904 case T_V8QI:
19905 case T_V16QI:
19906 eltype = const_intQI_pointer_node;
19907 break;
19908
19909 case T_V4HI:
19910 case T_V8HI:
19911 eltype = const_intHI_pointer_node;
19912 break;
19913
19914 case T_V2SI:
19915 case T_V4SI:
19916 eltype = const_intSI_pointer_node;
19917 break;
19918
19919 case T_V2SF:
19920 case T_V4SF:
19921 eltype = const_float_pointer_node;
19922 break;
19923
19924 case T_DI:
19925 case T_V2DI:
19926 eltype = const_intDI_pointer_node;
19927 break;
19928
19929 default: gcc_unreachable ();
19930 }
19931 }
19932 else if (is_store && k == 0)
19933 {
19934 /* Similarly, Neon store patterns use operand 0 as
19935 the memory location to store to. */
19936 gcc_assert (insn_data[d->code].operand[k].predicate
19937 == neon_struct_operand);
19938
19939 switch (d->mode)
19940 {
19941 case T_V8QI:
19942 case T_V16QI:
19943 eltype = intQI_pointer_node;
19944 break;
19945
19946 case T_V4HI:
19947 case T_V8HI:
19948 eltype = intHI_pointer_node;
19949 break;
19950
19951 case T_V2SI:
19952 case T_V4SI:
19953 eltype = intSI_pointer_node;
19954 break;
19955
19956 case T_V2SF:
19957 case T_V4SF:
19958 eltype = float_pointer_node;
19959 break;
19960
19961 case T_DI:
19962 case T_V2DI:
19963 eltype = intDI_pointer_node;
19964 break;
19965
19966 default: gcc_unreachable ();
19967 }
19968 }
19969 else
19970 {
19971 switch (insn_data[d->code].operand[k].mode)
19972 {
19973 case VOIDmode: eltype = void_type_node; break;
19974 /* Scalars. */
19975 case QImode: eltype = neon_intQI_type_node; break;
19976 case HImode: eltype = neon_intHI_type_node; break;
19977 case SImode: eltype = neon_intSI_type_node; break;
19978 case SFmode: eltype = neon_float_type_node; break;
19979 case DImode: eltype = neon_intDI_type_node; break;
19980 case TImode: eltype = intTI_type_node; break;
19981 case EImode: eltype = intEI_type_node; break;
19982 case OImode: eltype = intOI_type_node; break;
19983 case CImode: eltype = intCI_type_node; break;
19984 case XImode: eltype = intXI_type_node; break;
19985 /* 64-bit vectors. */
19986 case V8QImode: eltype = V8QI_type_node; break;
19987 case V4HImode: eltype = V4HI_type_node; break;
19988 case V2SImode: eltype = V2SI_type_node; break;
19989 case V2SFmode: eltype = V2SF_type_node; break;
19990 /* 128-bit vectors. */
19991 case V16QImode: eltype = V16QI_type_node; break;
19992 case V8HImode: eltype = V8HI_type_node; break;
19993 case V4SImode: eltype = V4SI_type_node; break;
19994 case V4SFmode: eltype = V4SF_type_node; break;
19995 case V2DImode: eltype = V2DI_type_node; break;
19996 default: gcc_unreachable ();
19997 }
19998 }
19999
20000 if (k == 0 && !is_store)
20001 return_type = eltype;
20002 else
20003 args = tree_cons (NULL_TREE, eltype, args);
20004 }
20005
20006 ftype = build_function_type (return_type, args);
20007 }
20008 break;
20009
20010 case NEON_RESULTPAIR:
20011 {
20012 switch (insn_data[d->code].operand[1].mode)
20013 {
20014 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
20015 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
20016 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
20017 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
20018 case DImode: ftype = void_ftype_pdi_di_di; break;
20019 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
20020 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
20021 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
20022 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
20023 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
20024 default: gcc_unreachable ();
20025 }
20026 }
20027 break;
20028
20029 case NEON_REINTERP:
20030 {
20031 /* We iterate over 5 doubleword types, then 5 quadword
20032 types. */
20033 int rhs = d->mode % 5;
20034 switch (insn_data[d->code].operand[0].mode)
20035 {
20036 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
20037 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
20038 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
20039 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
20040 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
20041 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
20042 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
20043 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
20044 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
20045 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
20046 default: gcc_unreachable ();
20047 }
20048 }
20049 break;
20050
20051 default:
20052 gcc_unreachable ();
20053 }
20054
20055 gcc_assert (ftype != NULL);
20056
20057 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
20058
20059 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
20060 NULL_TREE);
20061 arm_builtin_decls[fcode] = decl;
20062 }
20063 }
20064
20065 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
20066 do \
20067 { \
20068 if ((MASK) & insn_flags) \
20069 { \
20070 tree bdecl; \
20071 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
20072 BUILT_IN_MD, NULL, NULL_TREE); \
20073 arm_builtin_decls[CODE] = bdecl; \
20074 } \
20075 } \
20076 while (0)
20077
20078 struct builtin_description
20079 {
20080 const unsigned int mask;
20081 const enum insn_code icode;
20082 const char * const name;
20083 const enum arm_builtins code;
20084 const enum rtx_code comparison;
20085 const unsigned int flag;
20086 };
20087
20088 static const struct builtin_description bdesc_2arg[] =
20089 {
20090 #define IWMMXT_BUILTIN(code, string, builtin) \
20091 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
20092 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20093
20094 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
20095 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
20096 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
20097 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
20098 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
20099 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
20100 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
20101 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
20102 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
20103 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
20104 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
20105 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
20106 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
20107 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
20108 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
20109 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
20110 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
20111 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
20112 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
20113 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
20114 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
20115 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
20116 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
20117 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
20118 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
20119 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
20120 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
20121 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
20122 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
20123 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
20124 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
20125 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
20126 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
20127 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
20128 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
20129 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
20130 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
20131 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
20132 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
20133 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
20134 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
20135 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
20136 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
20137 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
20138 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
20139 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
20140 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
20141 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
20142 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
20143 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
20144 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
20145 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
20146 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
20147 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
20148 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
20149 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
20150 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
20151 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
20152
20153 #define IWMMXT_BUILTIN2(code, builtin) \
20154 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20155
20156 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
20157 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
20158 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
20159 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
20160 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
20161 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
20162 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
20163 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
20164 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
20165 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
20166 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
20167 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
20168 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
20169 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
20170 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
20171 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
20172 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
20173 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
20174 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
20175 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
20176 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
20177 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
20178 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
20179 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
20180 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
20181 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
20182 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
20183 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
20184 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
20185 IWMMXT_BUILTIN2 (rordi3, WRORDI)
20186 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
20187 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
20188 };
20189
20190 static const struct builtin_description bdesc_1arg[] =
20191 {
20192 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
20193 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
20194 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
20195 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
20196 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
20197 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
20198 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
20199 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
20200 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
20201 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
20202 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
20203 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
20204 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
20205 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
20206 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
20207 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
20208 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
20209 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
20210 };
20211
20212 /* Set up all the iWMMXt builtins. This is not called if
20213 TARGET_IWMMXT is zero. */
20214
20215 static void
20216 arm_init_iwmmxt_builtins (void)
20217 {
20218 const struct builtin_description * d;
20219 size_t i;
20220
20221 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
20222 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
20223 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
20224
20225 tree int_ftype_int
20226 = build_function_type_list (integer_type_node,
20227 integer_type_node, NULL_TREE);
20228 tree v8qi_ftype_v8qi_v8qi_int
20229 = build_function_type_list (V8QI_type_node,
20230 V8QI_type_node, V8QI_type_node,
20231 integer_type_node, NULL_TREE);
20232 tree v4hi_ftype_v4hi_int
20233 = build_function_type_list (V4HI_type_node,
20234 V4HI_type_node, integer_type_node, NULL_TREE);
20235 tree v2si_ftype_v2si_int
20236 = build_function_type_list (V2SI_type_node,
20237 V2SI_type_node, integer_type_node, NULL_TREE);
20238 tree v2si_ftype_di_di
20239 = build_function_type_list (V2SI_type_node,
20240 long_long_integer_type_node,
20241 long_long_integer_type_node,
20242 NULL_TREE);
20243 tree di_ftype_di_int
20244 = build_function_type_list (long_long_integer_type_node,
20245 long_long_integer_type_node,
20246 integer_type_node, NULL_TREE);
20247 tree di_ftype_di_int_int
20248 = build_function_type_list (long_long_integer_type_node,
20249 long_long_integer_type_node,
20250 integer_type_node,
20251 integer_type_node, NULL_TREE);
20252 tree int_ftype_v8qi
20253 = build_function_type_list (integer_type_node,
20254 V8QI_type_node, NULL_TREE);
20255 tree int_ftype_v4hi
20256 = build_function_type_list (integer_type_node,
20257 V4HI_type_node, NULL_TREE);
20258 tree int_ftype_v2si
20259 = build_function_type_list (integer_type_node,
20260 V2SI_type_node, NULL_TREE);
20261 tree int_ftype_v8qi_int
20262 = build_function_type_list (integer_type_node,
20263 V8QI_type_node, integer_type_node, NULL_TREE);
20264 tree int_ftype_v4hi_int
20265 = build_function_type_list (integer_type_node,
20266 V4HI_type_node, integer_type_node, NULL_TREE);
20267 tree int_ftype_v2si_int
20268 = build_function_type_list (integer_type_node,
20269 V2SI_type_node, integer_type_node, NULL_TREE);
20270 tree v8qi_ftype_v8qi_int_int
20271 = build_function_type_list (V8QI_type_node,
20272 V8QI_type_node, integer_type_node,
20273 integer_type_node, NULL_TREE);
20274 tree v4hi_ftype_v4hi_int_int
20275 = build_function_type_list (V4HI_type_node,
20276 V4HI_type_node, integer_type_node,
20277 integer_type_node, NULL_TREE);
20278 tree v2si_ftype_v2si_int_int
20279 = build_function_type_list (V2SI_type_node,
20280 V2SI_type_node, integer_type_node,
20281 integer_type_node, NULL_TREE);
20282 /* Miscellaneous. */
20283 tree v8qi_ftype_v4hi_v4hi
20284 = build_function_type_list (V8QI_type_node,
20285 V4HI_type_node, V4HI_type_node, NULL_TREE);
20286 tree v4hi_ftype_v2si_v2si
20287 = build_function_type_list (V4HI_type_node,
20288 V2SI_type_node, V2SI_type_node, NULL_TREE);
20289 tree v2si_ftype_v4hi_v4hi
20290 = build_function_type_list (V2SI_type_node,
20291 V4HI_type_node, V4HI_type_node, NULL_TREE);
20292 tree v2si_ftype_v8qi_v8qi
20293 = build_function_type_list (V2SI_type_node,
20294 V8QI_type_node, V8QI_type_node, NULL_TREE);
20295 tree v4hi_ftype_v4hi_di
20296 = build_function_type_list (V4HI_type_node,
20297 V4HI_type_node, long_long_integer_type_node,
20298 NULL_TREE);
20299 tree v2si_ftype_v2si_di
20300 = build_function_type_list (V2SI_type_node,
20301 V2SI_type_node, long_long_integer_type_node,
20302 NULL_TREE);
20303 tree void_ftype_int_int
20304 = build_function_type_list (void_type_node,
20305 integer_type_node, integer_type_node,
20306 NULL_TREE);
20307 tree di_ftype_void
20308 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
20309 tree di_ftype_v8qi
20310 = build_function_type_list (long_long_integer_type_node,
20311 V8QI_type_node, NULL_TREE);
20312 tree di_ftype_v4hi
20313 = build_function_type_list (long_long_integer_type_node,
20314 V4HI_type_node, NULL_TREE);
20315 tree di_ftype_v2si
20316 = build_function_type_list (long_long_integer_type_node,
20317 V2SI_type_node, NULL_TREE);
20318 tree v2si_ftype_v4hi
20319 = build_function_type_list (V2SI_type_node,
20320 V4HI_type_node, NULL_TREE);
20321 tree v4hi_ftype_v8qi
20322 = build_function_type_list (V4HI_type_node,
20323 V8QI_type_node, NULL_TREE);
20324
20325 tree di_ftype_di_v4hi_v4hi
20326 = build_function_type_list (long_long_unsigned_type_node,
20327 long_long_unsigned_type_node,
20328 V4HI_type_node, V4HI_type_node,
20329 NULL_TREE);
20330
20331 tree di_ftype_v4hi_v4hi
20332 = build_function_type_list (long_long_unsigned_type_node,
20333 V4HI_type_node,V4HI_type_node,
20334 NULL_TREE);
20335
20336 /* Normal vector binops. */
20337 tree v8qi_ftype_v8qi_v8qi
20338 = build_function_type_list (V8QI_type_node,
20339 V8QI_type_node, V8QI_type_node, NULL_TREE);
20340 tree v4hi_ftype_v4hi_v4hi
20341 = build_function_type_list (V4HI_type_node,
20342 V4HI_type_node,V4HI_type_node, NULL_TREE);
20343 tree v2si_ftype_v2si_v2si
20344 = build_function_type_list (V2SI_type_node,
20345 V2SI_type_node, V2SI_type_node, NULL_TREE);
20346 tree di_ftype_di_di
20347 = build_function_type_list (long_long_unsigned_type_node,
20348 long_long_unsigned_type_node,
20349 long_long_unsigned_type_node,
20350 NULL_TREE);
20351
20352 /* Add all builtins that are more or less simple operations on two
20353 operands. */
20354 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
20355 {
20356 /* Use one of the operands; the target can have a different mode for
20357 mask-generating compares. */
20358 enum machine_mode mode;
20359 tree type;
20360
20361 if (d->name == 0)
20362 continue;
20363
20364 mode = insn_data[d->icode].operand[1].mode;
20365
20366 switch (mode)
20367 {
20368 case V8QImode:
20369 type = v8qi_ftype_v8qi_v8qi;
20370 break;
20371 case V4HImode:
20372 type = v4hi_ftype_v4hi_v4hi;
20373 break;
20374 case V2SImode:
20375 type = v2si_ftype_v2si_v2si;
20376 break;
20377 case DImode:
20378 type = di_ftype_di_di;
20379 break;
20380
20381 default:
20382 gcc_unreachable ();
20383 }
20384
20385 def_mbuiltin (d->mask, d->name, type, d->code);
20386 }
20387
20388 /* Add the remaining MMX insns with somewhat more complicated types. */
20389 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
20390 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
20391 ARM_BUILTIN_ ## CODE)
20392
20393 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
20394 iwmmx_mbuiltin ("setwcx", void_ftype_int_int, SETWCX);
20395 iwmmx_mbuiltin ("getwcx", int_ftype_int, GETWCX);
20396
20397 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
20398 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
20399 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
20400 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
20401 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
20402 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
20403
20404 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
20405 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
20406 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
20407 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
20408 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
20409 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
20410
20411 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
20412 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
20413 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
20414 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
20415 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
20416 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
20417
20418 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
20419 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
20420 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
20421 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
20422 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
20423 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
20424
20425 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
20426
20427 iwmmx_mbuiltin ("wsadb", v2si_ftype_v8qi_v8qi, WSADB);
20428 iwmmx_mbuiltin ("wsadh", v2si_ftype_v4hi_v4hi, WSADH);
20429 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
20430 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
20431
20432 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
20433 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
20434 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
20435 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
20436 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
20437 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
20438 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
20439 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
20440 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
20441
20442 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
20443 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
20444 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
20445
20446 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
20447 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
20448 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
20449
20450 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
20451 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
20452 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
20453 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
20454 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
20455 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
20456
20457 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
20458 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
20459 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
20460 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
20461 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
20462 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
20463 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
20464 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
20465 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
20466 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
20467 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
20468 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
20469
20470 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
20471 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
20472 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
20473 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
20474
20475 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGN);
20476 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
20477 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
20478 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
20479 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
20480 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
20481 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
20482
20483 #undef iwmmx_mbuiltin
20484 }
20485
20486 static void
20487 arm_init_tls_builtins (void)
20488 {
20489 tree ftype, decl;
20490
20491 ftype = build_function_type (ptr_type_node, void_list_node);
20492 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
20493 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
20494 NULL, NULL_TREE);
20495 TREE_NOTHROW (decl) = 1;
20496 TREE_READONLY (decl) = 1;
20497 arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
20498 }
20499
20500 static void
20501 arm_init_fp16_builtins (void)
20502 {
20503 tree fp16_type = make_node (REAL_TYPE);
20504 TYPE_PRECISION (fp16_type) = 16;
20505 layout_type (fp16_type);
20506 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
20507 }
20508
20509 static void
20510 arm_init_builtins (void)
20511 {
20512 arm_init_tls_builtins ();
20513
20514 if (TARGET_REALLY_IWMMXT)
20515 arm_init_iwmmxt_builtins ();
20516
20517 if (TARGET_NEON)
20518 arm_init_neon_builtins ();
20519
20520 if (arm_fp16_format)
20521 arm_init_fp16_builtins ();
20522 }
20523
20524 /* Return the ARM builtin for CODE. */
20525
20526 static tree
20527 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
20528 {
20529 if (code >= ARM_BUILTIN_MAX)
20530 return error_mark_node;
20531
20532 return arm_builtin_decls[code];
20533 }
20534
20535 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20536
20537 static const char *
20538 arm_invalid_parameter_type (const_tree t)
20539 {
20540 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20541 return N_("function parameters cannot have __fp16 type");
20542 return NULL;
20543 }
20544
20545 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20546
20547 static const char *
20548 arm_invalid_return_type (const_tree t)
20549 {
20550 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20551 return N_("functions cannot return __fp16 type");
20552 return NULL;
20553 }
20554
20555 /* Implement TARGET_PROMOTED_TYPE. */
20556
20557 static tree
20558 arm_promoted_type (const_tree t)
20559 {
20560 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20561 return float_type_node;
20562 return NULL_TREE;
20563 }
20564
20565 /* Implement TARGET_CONVERT_TO_TYPE.
20566 Specifically, this hook implements the peculiarity of the ARM
20567 half-precision floating-point C semantics that requires conversions between
20568 __fp16 to or from double to do an intermediate conversion to float. */
20569
20570 static tree
20571 arm_convert_to_type (tree type, tree expr)
20572 {
20573 tree fromtype = TREE_TYPE (expr);
20574 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
20575 return NULL_TREE;
20576 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
20577 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
20578 return convert (type, convert (float_type_node, expr));
20579 return NULL_TREE;
20580 }
20581
20582 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
20583 This simply adds HFmode as a supported mode; even though we don't
20584 implement arithmetic on this type directly, it's supported by
20585 optabs conversions, much the way the double-word arithmetic is
20586 special-cased in the default hook. */
20587
20588 static bool
20589 arm_scalar_mode_supported_p (enum machine_mode mode)
20590 {
20591 if (mode == HFmode)
20592 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
20593 else if (ALL_FIXED_POINT_MODE_P (mode))
20594 return true;
20595 else
20596 return default_scalar_mode_supported_p (mode);
20597 }
20598
20599 /* Errors in the source file can cause expand_expr to return const0_rtx
20600 where we expect a vector. To avoid crashing, use one of the vector
20601 clear instructions. */
20602
20603 static rtx
20604 safe_vector_operand (rtx x, enum machine_mode mode)
20605 {
20606 if (x != const0_rtx)
20607 return x;
20608 x = gen_reg_rtx (mode);
20609
20610 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
20611 : gen_rtx_SUBREG (DImode, x, 0)));
20612 return x;
20613 }
20614
20615 /* Subroutine of arm_expand_builtin to take care of binop insns. */
20616
20617 static rtx
20618 arm_expand_binop_builtin (enum insn_code icode,
20619 tree exp, rtx target)
20620 {
20621 rtx pat;
20622 tree arg0 = CALL_EXPR_ARG (exp, 0);
20623 tree arg1 = CALL_EXPR_ARG (exp, 1);
20624 rtx op0 = expand_normal (arg0);
20625 rtx op1 = expand_normal (arg1);
20626 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20627 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20628 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20629
20630 if (VECTOR_MODE_P (mode0))
20631 op0 = safe_vector_operand (op0, mode0);
20632 if (VECTOR_MODE_P (mode1))
20633 op1 = safe_vector_operand (op1, mode1);
20634
20635 if (! target
20636 || GET_MODE (target) != tmode
20637 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20638 target = gen_reg_rtx (tmode);
20639
20640 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
20641
20642 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20643 op0 = copy_to_mode_reg (mode0, op0);
20644 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20645 op1 = copy_to_mode_reg (mode1, op1);
20646
20647 pat = GEN_FCN (icode) (target, op0, op1);
20648 if (! pat)
20649 return 0;
20650 emit_insn (pat);
20651 return target;
20652 }
20653
20654 /* Subroutine of arm_expand_builtin to take care of unop insns. */
20655
20656 static rtx
20657 arm_expand_unop_builtin (enum insn_code icode,
20658 tree exp, rtx target, int do_load)
20659 {
20660 rtx pat;
20661 tree arg0 = CALL_EXPR_ARG (exp, 0);
20662 rtx op0 = expand_normal (arg0);
20663 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20664 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20665
20666 if (! target
20667 || GET_MODE (target) != tmode
20668 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20669 target = gen_reg_rtx (tmode);
20670 if (do_load)
20671 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20672 else
20673 {
20674 if (VECTOR_MODE_P (mode0))
20675 op0 = safe_vector_operand (op0, mode0);
20676
20677 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20678 op0 = copy_to_mode_reg (mode0, op0);
20679 }
20680
20681 pat = GEN_FCN (icode) (target, op0);
20682 if (! pat)
20683 return 0;
20684 emit_insn (pat);
20685 return target;
20686 }
20687
20688 typedef enum {
20689 NEON_ARG_COPY_TO_REG,
20690 NEON_ARG_CONSTANT,
20691 NEON_ARG_MEMORY,
20692 NEON_ARG_STOP
20693 } builtin_arg;
20694
20695 #define NEON_MAX_BUILTIN_ARGS 5
20696
20697 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
20698 and return an expression for the accessed memory.
20699
20700 The intrinsic function operates on a block of registers that has
20701 mode REG_MODE. This block contains vectors of type TYPE_MODE.
20702 The function references the memory at EXP in mode MEM_MODE;
20703 this mode may be BLKmode if no more suitable mode is available. */
20704
20705 static tree
20706 neon_dereference_pointer (tree exp, enum machine_mode mem_mode,
20707 enum machine_mode reg_mode,
20708 neon_builtin_type_mode type_mode)
20709 {
20710 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
20711 tree elem_type, upper_bound, array_type;
20712
20713 /* Work out the size of the register block in bytes. */
20714 reg_size = GET_MODE_SIZE (reg_mode);
20715
20716 /* Work out the size of each vector in bytes. */
20717 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
20718 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
20719
20720 /* Work out how many vectors there are. */
20721 gcc_assert (reg_size % vector_size == 0);
20722 nvectors = reg_size / vector_size;
20723
20724 /* Work out how many elements are being loaded or stored.
20725 MEM_MODE == REG_MODE implies a one-to-one mapping between register
20726 and memory elements; anything else implies a lane load or store. */
20727 if (mem_mode == reg_mode)
20728 nelems = vector_size * nvectors;
20729 else
20730 nelems = nvectors;
20731
20732 /* Work out the type of each element. */
20733 gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp)));
20734 elem_type = TREE_TYPE (TREE_TYPE (exp));
20735
20736 /* Create a type that describes the full access. */
20737 upper_bound = build_int_cst (size_type_node, nelems - 1);
20738 array_type = build_array_type (elem_type, build_index_type (upper_bound));
20739
20740 /* Dereference EXP using that type. */
20741 return fold_build2 (MEM_REF, array_type, exp,
20742 build_int_cst (build_pointer_type (array_type), 0));
20743 }
20744
20745 /* Expand a Neon builtin. */
20746 static rtx
20747 arm_expand_neon_args (rtx target, int icode, int have_retval,
20748 neon_builtin_type_mode type_mode,
20749 tree exp, ...)
20750 {
20751 va_list ap;
20752 rtx pat;
20753 tree arg[NEON_MAX_BUILTIN_ARGS];
20754 rtx op[NEON_MAX_BUILTIN_ARGS];
20755 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20756 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
20757 enum machine_mode other_mode;
20758 int argc = 0;
20759 int opno;
20760
20761 if (have_retval
20762 && (!target
20763 || GET_MODE (target) != tmode
20764 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
20765 target = gen_reg_rtx (tmode);
20766
20767 va_start (ap, exp);
20768
20769 for (;;)
20770 {
20771 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
20772
20773 if (thisarg == NEON_ARG_STOP)
20774 break;
20775 else
20776 {
20777 opno = argc + have_retval;
20778 mode[argc] = insn_data[icode].operand[opno].mode;
20779 arg[argc] = CALL_EXPR_ARG (exp, argc);
20780 if (thisarg == NEON_ARG_MEMORY)
20781 {
20782 other_mode = insn_data[icode].operand[1 - opno].mode;
20783 arg[argc] = neon_dereference_pointer (arg[argc], mode[argc],
20784 other_mode, type_mode);
20785 }
20786 op[argc] = expand_normal (arg[argc]);
20787
20788 switch (thisarg)
20789 {
20790 case NEON_ARG_COPY_TO_REG:
20791 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
20792 if (!(*insn_data[icode].operand[opno].predicate)
20793 (op[argc], mode[argc]))
20794 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
20795 break;
20796
20797 case NEON_ARG_CONSTANT:
20798 /* FIXME: This error message is somewhat unhelpful. */
20799 if (!(*insn_data[icode].operand[opno].predicate)
20800 (op[argc], mode[argc]))
20801 error ("argument must be a constant");
20802 break;
20803
20804 case NEON_ARG_MEMORY:
20805 gcc_assert (MEM_P (op[argc]));
20806 PUT_MODE (op[argc], mode[argc]);
20807 /* ??? arm_neon.h uses the same built-in functions for signed
20808 and unsigned accesses, casting where necessary. This isn't
20809 alias safe. */
20810 set_mem_alias_set (op[argc], 0);
20811 if (!(*insn_data[icode].operand[opno].predicate)
20812 (op[argc], mode[argc]))
20813 op[argc] = (replace_equiv_address
20814 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
20815 break;
20816
20817 case NEON_ARG_STOP:
20818 gcc_unreachable ();
20819 }
20820
20821 argc++;
20822 }
20823 }
20824
20825 va_end (ap);
20826
20827 if (have_retval)
20828 switch (argc)
20829 {
20830 case 1:
20831 pat = GEN_FCN (icode) (target, op[0]);
20832 break;
20833
20834 case 2:
20835 pat = GEN_FCN (icode) (target, op[0], op[1]);
20836 break;
20837
20838 case 3:
20839 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
20840 break;
20841
20842 case 4:
20843 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
20844 break;
20845
20846 case 5:
20847 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
20848 break;
20849
20850 default:
20851 gcc_unreachable ();
20852 }
20853 else
20854 switch (argc)
20855 {
20856 case 1:
20857 pat = GEN_FCN (icode) (op[0]);
20858 break;
20859
20860 case 2:
20861 pat = GEN_FCN (icode) (op[0], op[1]);
20862 break;
20863
20864 case 3:
20865 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
20866 break;
20867
20868 case 4:
20869 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
20870 break;
20871
20872 case 5:
20873 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
20874 break;
20875
20876 default:
20877 gcc_unreachable ();
20878 }
20879
20880 if (!pat)
20881 return 0;
20882
20883 emit_insn (pat);
20884
20885 return target;
20886 }
20887
20888 /* Expand a Neon builtin. These are "special" because they don't have symbolic
20889 constants defined per-instruction or per instruction-variant. Instead, the
20890 required info is looked up in the table neon_builtin_data. */
20891 static rtx
20892 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
20893 {
20894 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
20895 neon_itype itype = d->itype;
20896 enum insn_code icode = d->code;
20897 neon_builtin_type_mode type_mode = d->mode;
20898
20899 switch (itype)
20900 {
20901 case NEON_UNOP:
20902 case NEON_CONVERT:
20903 case NEON_DUPLANE:
20904 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20905 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20906
20907 case NEON_BINOP:
20908 case NEON_SETLANE:
20909 case NEON_SCALARMUL:
20910 case NEON_SCALARMULL:
20911 case NEON_SCALARMULH:
20912 case NEON_SHIFTINSERT:
20913 case NEON_LOGICBINOP:
20914 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20915 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20916 NEON_ARG_STOP);
20917
20918 case NEON_TERNOP:
20919 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20920 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20921 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20922
20923 case NEON_GETLANE:
20924 case NEON_FIXCONV:
20925 case NEON_SHIFTIMM:
20926 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20927 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
20928 NEON_ARG_STOP);
20929
20930 case NEON_CREATE:
20931 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20932 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20933
20934 case NEON_DUP:
20935 case NEON_SPLIT:
20936 case NEON_REINTERP:
20937 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20938 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20939
20940 case NEON_COMBINE:
20941 case NEON_VTBL:
20942 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20943 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20944
20945 case NEON_RESULTPAIR:
20946 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20947 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20948 NEON_ARG_STOP);
20949
20950 case NEON_LANEMUL:
20951 case NEON_LANEMULL:
20952 case NEON_LANEMULH:
20953 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20954 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20955 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20956
20957 case NEON_LANEMAC:
20958 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20959 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20960 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20961
20962 case NEON_SHIFTACC:
20963 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20964 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20965 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20966
20967 case NEON_SCALARMAC:
20968 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20969 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20970 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20971
20972 case NEON_SELECT:
20973 case NEON_VTBX:
20974 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20975 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20976 NEON_ARG_STOP);
20977
20978 case NEON_LOAD1:
20979 case NEON_LOADSTRUCT:
20980 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20981 NEON_ARG_MEMORY, NEON_ARG_STOP);
20982
20983 case NEON_LOAD1LANE:
20984 case NEON_LOADSTRUCTLANE:
20985 return arm_expand_neon_args (target, icode, 1, type_mode, exp,
20986 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20987 NEON_ARG_STOP);
20988
20989 case NEON_STORE1:
20990 case NEON_STORESTRUCT:
20991 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20992 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20993
20994 case NEON_STORE1LANE:
20995 case NEON_STORESTRUCTLANE:
20996 return arm_expand_neon_args (target, icode, 0, type_mode, exp,
20997 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20998 NEON_ARG_STOP);
20999 }
21000
21001 gcc_unreachable ();
21002 }
21003
21004 /* Emit code to reinterpret one Neon type as another, without altering bits. */
21005 void
21006 neon_reinterpret (rtx dest, rtx src)
21007 {
21008 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
21009 }
21010
21011 /* Emit code to place a Neon pair result in memory locations (with equal
21012 registers). */
21013 void
21014 neon_emit_pair_result_insn (enum machine_mode mode,
21015 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
21016 rtx op1, rtx op2)
21017 {
21018 rtx mem = gen_rtx_MEM (mode, destaddr);
21019 rtx tmp1 = gen_reg_rtx (mode);
21020 rtx tmp2 = gen_reg_rtx (mode);
21021
21022 emit_insn (intfn (tmp1, op1, op2, tmp2));
21023
21024 emit_move_insn (mem, tmp1);
21025 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
21026 emit_move_insn (mem, tmp2);
21027 }
21028
21029 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
21030 not to early-clobber SRC registers in the process.
21031
21032 We assume that the operands described by SRC and DEST represent a
21033 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
21034 number of components into which the copy has been decomposed. */
21035 void
21036 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
21037 {
21038 unsigned int i;
21039
21040 if (!reg_overlap_mentioned_p (operands[0], operands[1])
21041 || REGNO (operands[0]) < REGNO (operands[1]))
21042 {
21043 for (i = 0; i < count; i++)
21044 {
21045 operands[2 * i] = dest[i];
21046 operands[2 * i + 1] = src[i];
21047 }
21048 }
21049 else
21050 {
21051 for (i = 0; i < count; i++)
21052 {
21053 operands[2 * i] = dest[count - i - 1];
21054 operands[2 * i + 1] = src[count - i - 1];
21055 }
21056 }
21057 }
21058
21059 /* Split operands into moves from op[1] + op[2] into op[0]. */
21060
21061 void
21062 neon_split_vcombine (rtx operands[3])
21063 {
21064 unsigned int dest = REGNO (operands[0]);
21065 unsigned int src1 = REGNO (operands[1]);
21066 unsigned int src2 = REGNO (operands[2]);
21067 enum machine_mode halfmode = GET_MODE (operands[1]);
21068 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
21069 rtx destlo, desthi;
21070
21071 if (src1 == dest && src2 == dest + halfregs)
21072 {
21073 /* No-op move. Can't split to nothing; emit something. */
21074 emit_note (NOTE_INSN_DELETED);
21075 return;
21076 }
21077
21078 /* Preserve register attributes for variable tracking. */
21079 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
21080 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
21081 GET_MODE_SIZE (halfmode));
21082
21083 /* Special case of reversed high/low parts. Use VSWP. */
21084 if (src2 == dest && src1 == dest + halfregs)
21085 {
21086 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
21087 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
21088 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
21089 return;
21090 }
21091
21092 if (!reg_overlap_mentioned_p (operands[2], destlo))
21093 {
21094 /* Try to avoid unnecessary moves if part of the result
21095 is in the right place already. */
21096 if (src1 != dest)
21097 emit_move_insn (destlo, operands[1]);
21098 if (src2 != dest + halfregs)
21099 emit_move_insn (desthi, operands[2]);
21100 }
21101 else
21102 {
21103 if (src2 != dest + halfregs)
21104 emit_move_insn (desthi, operands[2]);
21105 if (src1 != dest)
21106 emit_move_insn (destlo, operands[1]);
21107 }
21108 }
21109
21110 /* Expand an expression EXP that calls a built-in function,
21111 with result going to TARGET if that's convenient
21112 (and in mode MODE if that's convenient).
21113 SUBTARGET may be used as the target for computing one of EXP's operands.
21114 IGNORE is nonzero if the value is to be ignored. */
21115
21116 static rtx
21117 arm_expand_builtin (tree exp,
21118 rtx target,
21119 rtx subtarget ATTRIBUTE_UNUSED,
21120 enum machine_mode mode ATTRIBUTE_UNUSED,
21121 int ignore ATTRIBUTE_UNUSED)
21122 {
21123 const struct builtin_description * d;
21124 enum insn_code icode;
21125 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
21126 tree arg0;
21127 tree arg1;
21128 tree arg2;
21129 rtx op0;
21130 rtx op1;
21131 rtx op2;
21132 rtx pat;
21133 int fcode = DECL_FUNCTION_CODE (fndecl);
21134 size_t i;
21135 enum machine_mode tmode;
21136 enum machine_mode mode0;
21137 enum machine_mode mode1;
21138 enum machine_mode mode2;
21139
21140 if (fcode >= ARM_BUILTIN_NEON_BASE)
21141 return arm_expand_neon_builtin (fcode, exp, target);
21142
21143 switch (fcode)
21144 {
21145 case ARM_BUILTIN_TEXTRMSB:
21146 case ARM_BUILTIN_TEXTRMUB:
21147 case ARM_BUILTIN_TEXTRMSH:
21148 case ARM_BUILTIN_TEXTRMUH:
21149 case ARM_BUILTIN_TEXTRMSW:
21150 case ARM_BUILTIN_TEXTRMUW:
21151 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
21152 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
21153 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
21154 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
21155 : CODE_FOR_iwmmxt_textrmw);
21156
21157 arg0 = CALL_EXPR_ARG (exp, 0);
21158 arg1 = CALL_EXPR_ARG (exp, 1);
21159 op0 = expand_normal (arg0);
21160 op1 = expand_normal (arg1);
21161 tmode = insn_data[icode].operand[0].mode;
21162 mode0 = insn_data[icode].operand[1].mode;
21163 mode1 = insn_data[icode].operand[2].mode;
21164
21165 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21166 op0 = copy_to_mode_reg (mode0, op0);
21167 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21168 {
21169 /* @@@ better error message */
21170 error ("selector must be an immediate");
21171 return gen_reg_rtx (tmode);
21172 }
21173 if (target == 0
21174 || GET_MODE (target) != tmode
21175 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21176 target = gen_reg_rtx (tmode);
21177 pat = GEN_FCN (icode) (target, op0, op1);
21178 if (! pat)
21179 return 0;
21180 emit_insn (pat);
21181 return target;
21182
21183 case ARM_BUILTIN_TINSRB:
21184 case ARM_BUILTIN_TINSRH:
21185 case ARM_BUILTIN_TINSRW:
21186 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
21187 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
21188 : CODE_FOR_iwmmxt_tinsrw);
21189 arg0 = CALL_EXPR_ARG (exp, 0);
21190 arg1 = CALL_EXPR_ARG (exp, 1);
21191 arg2 = CALL_EXPR_ARG (exp, 2);
21192 op0 = expand_normal (arg0);
21193 op1 = expand_normal (arg1);
21194 op2 = expand_normal (arg2);
21195 tmode = insn_data[icode].operand[0].mode;
21196 mode0 = insn_data[icode].operand[1].mode;
21197 mode1 = insn_data[icode].operand[2].mode;
21198 mode2 = insn_data[icode].operand[3].mode;
21199
21200 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21201 op0 = copy_to_mode_reg (mode0, op0);
21202 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21203 op1 = copy_to_mode_reg (mode1, op1);
21204 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21205 {
21206 /* @@@ better error message */
21207 error ("selector must be an immediate");
21208 return const0_rtx;
21209 }
21210 if (target == 0
21211 || GET_MODE (target) != tmode
21212 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21213 target = gen_reg_rtx (tmode);
21214 pat = GEN_FCN (icode) (target, op0, op1, op2);
21215 if (! pat)
21216 return 0;
21217 emit_insn (pat);
21218 return target;
21219
21220 case ARM_BUILTIN_SETWCX:
21221 arg0 = CALL_EXPR_ARG (exp, 0);
21222 arg1 = CALL_EXPR_ARG (exp, 1);
21223 op0 = force_reg (SImode, expand_normal (arg0));
21224 op1 = expand_normal (arg1);
21225 emit_insn (gen_iwmmxt_tmcr (op1, op0));
21226 return 0;
21227
21228 case ARM_BUILTIN_GETWCX:
21229 arg0 = CALL_EXPR_ARG (exp, 0);
21230 op0 = expand_normal (arg0);
21231 target = gen_reg_rtx (SImode);
21232 emit_insn (gen_iwmmxt_tmrc (target, op0));
21233 return target;
21234
21235 case ARM_BUILTIN_WSHUFH:
21236 icode = CODE_FOR_iwmmxt_wshufh;
21237 arg0 = CALL_EXPR_ARG (exp, 0);
21238 arg1 = CALL_EXPR_ARG (exp, 1);
21239 op0 = expand_normal (arg0);
21240 op1 = expand_normal (arg1);
21241 tmode = insn_data[icode].operand[0].mode;
21242 mode1 = insn_data[icode].operand[1].mode;
21243 mode2 = insn_data[icode].operand[2].mode;
21244
21245 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21246 op0 = copy_to_mode_reg (mode1, op0);
21247 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21248 {
21249 /* @@@ better error message */
21250 error ("mask must be an immediate");
21251 return const0_rtx;
21252 }
21253 if (target == 0
21254 || GET_MODE (target) != tmode
21255 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21256 target = gen_reg_rtx (tmode);
21257 pat = GEN_FCN (icode) (target, op0, op1);
21258 if (! pat)
21259 return 0;
21260 emit_insn (pat);
21261 return target;
21262
21263 case ARM_BUILTIN_WSADB:
21264 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
21265 case ARM_BUILTIN_WSADH:
21266 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
21267 case ARM_BUILTIN_WSADBZ:
21268 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
21269 case ARM_BUILTIN_WSADHZ:
21270 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
21271
21272 /* Several three-argument builtins. */
21273 case ARM_BUILTIN_WMACS:
21274 case ARM_BUILTIN_WMACU:
21275 case ARM_BUILTIN_WALIGN:
21276 case ARM_BUILTIN_TMIA:
21277 case ARM_BUILTIN_TMIAPH:
21278 case ARM_BUILTIN_TMIATT:
21279 case ARM_BUILTIN_TMIATB:
21280 case ARM_BUILTIN_TMIABT:
21281 case ARM_BUILTIN_TMIABB:
21282 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
21283 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
21284 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
21285 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
21286 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
21287 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
21288 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
21289 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
21290 : CODE_FOR_iwmmxt_walign);
21291 arg0 = CALL_EXPR_ARG (exp, 0);
21292 arg1 = CALL_EXPR_ARG (exp, 1);
21293 arg2 = CALL_EXPR_ARG (exp, 2);
21294 op0 = expand_normal (arg0);
21295 op1 = expand_normal (arg1);
21296 op2 = expand_normal (arg2);
21297 tmode = insn_data[icode].operand[0].mode;
21298 mode0 = insn_data[icode].operand[1].mode;
21299 mode1 = insn_data[icode].operand[2].mode;
21300 mode2 = insn_data[icode].operand[3].mode;
21301
21302 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21303 op0 = copy_to_mode_reg (mode0, op0);
21304 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21305 op1 = copy_to_mode_reg (mode1, op1);
21306 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21307 op2 = copy_to_mode_reg (mode2, op2);
21308 if (target == 0
21309 || GET_MODE (target) != tmode
21310 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21311 target = gen_reg_rtx (tmode);
21312 pat = GEN_FCN (icode) (target, op0, op1, op2);
21313 if (! pat)
21314 return 0;
21315 emit_insn (pat);
21316 return target;
21317
21318 case ARM_BUILTIN_WZERO:
21319 target = gen_reg_rtx (DImode);
21320 emit_insn (gen_iwmmxt_clrdi (target));
21321 return target;
21322
21323 case ARM_BUILTIN_THREAD_POINTER:
21324 return arm_load_tp (target);
21325
21326 default:
21327 break;
21328 }
21329
21330 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21331 if (d->code == (const enum arm_builtins) fcode)
21332 return arm_expand_binop_builtin (d->icode, exp, target);
21333
21334 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21335 if (d->code == (const enum arm_builtins) fcode)
21336 return arm_expand_unop_builtin (d->icode, exp, target, 0);
21337
21338 /* @@@ Should really do something sensible here. */
21339 return NULL_RTX;
21340 }
21341 \f
21342 /* Return the number (counting from 0) of
21343 the least significant set bit in MASK. */
21344
21345 inline static int
21346 number_of_first_bit_set (unsigned mask)
21347 {
21348 return ctz_hwi (mask);
21349 }
21350
21351 /* Like emit_multi_reg_push, but allowing for a different set of
21352 registers to be described as saved. MASK is the set of registers
21353 to be saved; REAL_REGS is the set of registers to be described as
21354 saved. If REAL_REGS is 0, only describe the stack adjustment. */
21355
21356 static rtx
21357 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
21358 {
21359 unsigned long regno;
21360 rtx par[10], tmp, reg, insn;
21361 int i, j;
21362
21363 /* Build the parallel of the registers actually being stored. */
21364 for (i = 0; mask; ++i, mask &= mask - 1)
21365 {
21366 regno = ctz_hwi (mask);
21367 reg = gen_rtx_REG (SImode, regno);
21368
21369 if (i == 0)
21370 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
21371 else
21372 tmp = gen_rtx_USE (VOIDmode, reg);
21373
21374 par[i] = tmp;
21375 }
21376
21377 tmp = plus_constant (stack_pointer_rtx, -4 * i);
21378 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21379 tmp = gen_frame_mem (BLKmode, tmp);
21380 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
21381 par[0] = tmp;
21382
21383 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
21384 insn = emit_insn (tmp);
21385
21386 /* Always build the stack adjustment note for unwind info. */
21387 tmp = plus_constant (stack_pointer_rtx, -4 * i);
21388 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
21389 par[0] = tmp;
21390
21391 /* Build the parallel of the registers recorded as saved for unwind. */
21392 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
21393 {
21394 regno = ctz_hwi (real_regs);
21395 reg = gen_rtx_REG (SImode, regno);
21396
21397 tmp = plus_constant (stack_pointer_rtx, j * 4);
21398 tmp = gen_frame_mem (SImode, tmp);
21399 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
21400 RTX_FRAME_RELATED_P (tmp) = 1;
21401 par[j + 1] = tmp;
21402 }
21403
21404 if (j == 0)
21405 tmp = par[0];
21406 else
21407 {
21408 RTX_FRAME_RELATED_P (par[0]) = 1;
21409 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
21410 }
21411
21412 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
21413
21414 return insn;
21415 }
21416
21417 /* Emit code to push or pop registers to or from the stack. F is the
21418 assembly file. MASK is the registers to pop. */
21419 static void
21420 thumb_pop (FILE *f, unsigned long mask)
21421 {
21422 int regno;
21423 int lo_mask = mask & 0xFF;
21424 int pushed_words = 0;
21425
21426 gcc_assert (mask);
21427
21428 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
21429 {
21430 /* Special case. Do not generate a POP PC statement here, do it in
21431 thumb_exit() */
21432 thumb_exit (f, -1);
21433 return;
21434 }
21435
21436 fprintf (f, "\tpop\t{");
21437
21438 /* Look at the low registers first. */
21439 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
21440 {
21441 if (lo_mask & 1)
21442 {
21443 asm_fprintf (f, "%r", regno);
21444
21445 if ((lo_mask & ~1) != 0)
21446 fprintf (f, ", ");
21447
21448 pushed_words++;
21449 }
21450 }
21451
21452 if (mask & (1 << PC_REGNUM))
21453 {
21454 /* Catch popping the PC. */
21455 if (TARGET_INTERWORK || TARGET_BACKTRACE
21456 || crtl->calls_eh_return)
21457 {
21458 /* The PC is never poped directly, instead
21459 it is popped into r3 and then BX is used. */
21460 fprintf (f, "}\n");
21461
21462 thumb_exit (f, -1);
21463
21464 return;
21465 }
21466 else
21467 {
21468 if (mask & 0xFF)
21469 fprintf (f, ", ");
21470
21471 asm_fprintf (f, "%r", PC_REGNUM);
21472 }
21473 }
21474
21475 fprintf (f, "}\n");
21476 }
21477
21478 /* Generate code to return from a thumb function.
21479 If 'reg_containing_return_addr' is -1, then the return address is
21480 actually on the stack, at the stack pointer. */
21481 static void
21482 thumb_exit (FILE *f, int reg_containing_return_addr)
21483 {
21484 unsigned regs_available_for_popping;
21485 unsigned regs_to_pop;
21486 int pops_needed;
21487 unsigned available;
21488 unsigned required;
21489 int mode;
21490 int size;
21491 int restore_a4 = FALSE;
21492
21493 /* Compute the registers we need to pop. */
21494 regs_to_pop = 0;
21495 pops_needed = 0;
21496
21497 if (reg_containing_return_addr == -1)
21498 {
21499 regs_to_pop |= 1 << LR_REGNUM;
21500 ++pops_needed;
21501 }
21502
21503 if (TARGET_BACKTRACE)
21504 {
21505 /* Restore the (ARM) frame pointer and stack pointer. */
21506 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
21507 pops_needed += 2;
21508 }
21509
21510 /* If there is nothing to pop then just emit the BX instruction and
21511 return. */
21512 if (pops_needed == 0)
21513 {
21514 if (crtl->calls_eh_return)
21515 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21516
21517 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21518 return;
21519 }
21520 /* Otherwise if we are not supporting interworking and we have not created
21521 a backtrace structure and the function was not entered in ARM mode then
21522 just pop the return address straight into the PC. */
21523 else if (!TARGET_INTERWORK
21524 && !TARGET_BACKTRACE
21525 && !is_called_in_ARM_mode (current_function_decl)
21526 && !crtl->calls_eh_return)
21527 {
21528 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
21529 return;
21530 }
21531
21532 /* Find out how many of the (return) argument registers we can corrupt. */
21533 regs_available_for_popping = 0;
21534
21535 /* If returning via __builtin_eh_return, the bottom three registers
21536 all contain information needed for the return. */
21537 if (crtl->calls_eh_return)
21538 size = 12;
21539 else
21540 {
21541 /* If we can deduce the registers used from the function's
21542 return value. This is more reliable that examining
21543 df_regs_ever_live_p () because that will be set if the register is
21544 ever used in the function, not just if the register is used
21545 to hold a return value. */
21546
21547 if (crtl->return_rtx != 0)
21548 mode = GET_MODE (crtl->return_rtx);
21549 else
21550 mode = DECL_MODE (DECL_RESULT (current_function_decl));
21551
21552 size = GET_MODE_SIZE (mode);
21553
21554 if (size == 0)
21555 {
21556 /* In a void function we can use any argument register.
21557 In a function that returns a structure on the stack
21558 we can use the second and third argument registers. */
21559 if (mode == VOIDmode)
21560 regs_available_for_popping =
21561 (1 << ARG_REGISTER (1))
21562 | (1 << ARG_REGISTER (2))
21563 | (1 << ARG_REGISTER (3));
21564 else
21565 regs_available_for_popping =
21566 (1 << ARG_REGISTER (2))
21567 | (1 << ARG_REGISTER (3));
21568 }
21569 else if (size <= 4)
21570 regs_available_for_popping =
21571 (1 << ARG_REGISTER (2))
21572 | (1 << ARG_REGISTER (3));
21573 else if (size <= 8)
21574 regs_available_for_popping =
21575 (1 << ARG_REGISTER (3));
21576 }
21577
21578 /* Match registers to be popped with registers into which we pop them. */
21579 for (available = regs_available_for_popping,
21580 required = regs_to_pop;
21581 required != 0 && available != 0;
21582 available &= ~(available & - available),
21583 required &= ~(required & - required))
21584 -- pops_needed;
21585
21586 /* If we have any popping registers left over, remove them. */
21587 if (available > 0)
21588 regs_available_for_popping &= ~available;
21589
21590 /* Otherwise if we need another popping register we can use
21591 the fourth argument register. */
21592 else if (pops_needed)
21593 {
21594 /* If we have not found any free argument registers and
21595 reg a4 contains the return address, we must move it. */
21596 if (regs_available_for_popping == 0
21597 && reg_containing_return_addr == LAST_ARG_REGNUM)
21598 {
21599 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21600 reg_containing_return_addr = LR_REGNUM;
21601 }
21602 else if (size > 12)
21603 {
21604 /* Register a4 is being used to hold part of the return value,
21605 but we have dire need of a free, low register. */
21606 restore_a4 = TRUE;
21607
21608 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
21609 }
21610
21611 if (reg_containing_return_addr != LAST_ARG_REGNUM)
21612 {
21613 /* The fourth argument register is available. */
21614 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
21615
21616 --pops_needed;
21617 }
21618 }
21619
21620 /* Pop as many registers as we can. */
21621 thumb_pop (f, regs_available_for_popping);
21622
21623 /* Process the registers we popped. */
21624 if (reg_containing_return_addr == -1)
21625 {
21626 /* The return address was popped into the lowest numbered register. */
21627 regs_to_pop &= ~(1 << LR_REGNUM);
21628
21629 reg_containing_return_addr =
21630 number_of_first_bit_set (regs_available_for_popping);
21631
21632 /* Remove this register for the mask of available registers, so that
21633 the return address will not be corrupted by further pops. */
21634 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
21635 }
21636
21637 /* If we popped other registers then handle them here. */
21638 if (regs_available_for_popping)
21639 {
21640 int frame_pointer;
21641
21642 /* Work out which register currently contains the frame pointer. */
21643 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
21644
21645 /* Move it into the correct place. */
21646 asm_fprintf (f, "\tmov\t%r, %r\n",
21647 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
21648
21649 /* (Temporarily) remove it from the mask of popped registers. */
21650 regs_available_for_popping &= ~(1 << frame_pointer);
21651 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
21652
21653 if (regs_available_for_popping)
21654 {
21655 int stack_pointer;
21656
21657 /* We popped the stack pointer as well,
21658 find the register that contains it. */
21659 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
21660
21661 /* Move it into the stack register. */
21662 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
21663
21664 /* At this point we have popped all necessary registers, so
21665 do not worry about restoring regs_available_for_popping
21666 to its correct value:
21667
21668 assert (pops_needed == 0)
21669 assert (regs_available_for_popping == (1 << frame_pointer))
21670 assert (regs_to_pop == (1 << STACK_POINTER)) */
21671 }
21672 else
21673 {
21674 /* Since we have just move the popped value into the frame
21675 pointer, the popping register is available for reuse, and
21676 we know that we still have the stack pointer left to pop. */
21677 regs_available_for_popping |= (1 << frame_pointer);
21678 }
21679 }
21680
21681 /* If we still have registers left on the stack, but we no longer have
21682 any registers into which we can pop them, then we must move the return
21683 address into the link register and make available the register that
21684 contained it. */
21685 if (regs_available_for_popping == 0 && pops_needed > 0)
21686 {
21687 regs_available_for_popping |= 1 << reg_containing_return_addr;
21688
21689 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
21690 reg_containing_return_addr);
21691
21692 reg_containing_return_addr = LR_REGNUM;
21693 }
21694
21695 /* If we have registers left on the stack then pop some more.
21696 We know that at most we will want to pop FP and SP. */
21697 if (pops_needed > 0)
21698 {
21699 int popped_into;
21700 int move_to;
21701
21702 thumb_pop (f, regs_available_for_popping);
21703
21704 /* We have popped either FP or SP.
21705 Move whichever one it is into the correct register. */
21706 popped_into = number_of_first_bit_set (regs_available_for_popping);
21707 move_to = number_of_first_bit_set (regs_to_pop);
21708
21709 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
21710
21711 regs_to_pop &= ~(1 << move_to);
21712
21713 --pops_needed;
21714 }
21715
21716 /* If we still have not popped everything then we must have only
21717 had one register available to us and we are now popping the SP. */
21718 if (pops_needed > 0)
21719 {
21720 int popped_into;
21721
21722 thumb_pop (f, regs_available_for_popping);
21723
21724 popped_into = number_of_first_bit_set (regs_available_for_popping);
21725
21726 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
21727 /*
21728 assert (regs_to_pop == (1 << STACK_POINTER))
21729 assert (pops_needed == 1)
21730 */
21731 }
21732
21733 /* If necessary restore the a4 register. */
21734 if (restore_a4)
21735 {
21736 if (reg_containing_return_addr != LR_REGNUM)
21737 {
21738 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21739 reg_containing_return_addr = LR_REGNUM;
21740 }
21741
21742 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
21743 }
21744
21745 if (crtl->calls_eh_return)
21746 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21747
21748 /* Return to caller. */
21749 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21750 }
21751 \f
21752 /* Scan INSN just before assembler is output for it.
21753 For Thumb-1, we track the status of the condition codes; this
21754 information is used in the cbranchsi4_insn pattern. */
21755 void
21756 thumb1_final_prescan_insn (rtx insn)
21757 {
21758 if (flag_print_asm_name)
21759 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
21760 INSN_ADDRESSES (INSN_UID (insn)));
21761 /* Don't overwrite the previous setter when we get to a cbranch. */
21762 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
21763 {
21764 enum attr_conds conds;
21765
21766 if (cfun->machine->thumb1_cc_insn)
21767 {
21768 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
21769 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
21770 CC_STATUS_INIT;
21771 }
21772 conds = get_attr_conds (insn);
21773 if (conds == CONDS_SET)
21774 {
21775 rtx set = single_set (insn);
21776 cfun->machine->thumb1_cc_insn = insn;
21777 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
21778 cfun->machine->thumb1_cc_op1 = const0_rtx;
21779 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
21780 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
21781 {
21782 rtx src1 = XEXP (SET_SRC (set), 1);
21783 if (src1 == const0_rtx)
21784 cfun->machine->thumb1_cc_mode = CCmode;
21785 }
21786 }
21787 else if (conds != CONDS_NOCOND)
21788 cfun->machine->thumb1_cc_insn = NULL_RTX;
21789 }
21790 }
21791
21792 int
21793 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
21794 {
21795 unsigned HOST_WIDE_INT mask = 0xff;
21796 int i;
21797
21798 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
21799 if (val == 0) /* XXX */
21800 return 0;
21801
21802 for (i = 0; i < 25; i++)
21803 if ((val & (mask << i)) == val)
21804 return 1;
21805
21806 return 0;
21807 }
21808
21809 /* Returns nonzero if the current function contains,
21810 or might contain a far jump. */
21811 static int
21812 thumb_far_jump_used_p (void)
21813 {
21814 rtx insn;
21815
21816 /* This test is only important for leaf functions. */
21817 /* assert (!leaf_function_p ()); */
21818
21819 /* If we have already decided that far jumps may be used,
21820 do not bother checking again, and always return true even if
21821 it turns out that they are not being used. Once we have made
21822 the decision that far jumps are present (and that hence the link
21823 register will be pushed onto the stack) we cannot go back on it. */
21824 if (cfun->machine->far_jump_used)
21825 return 1;
21826
21827 /* If this function is not being called from the prologue/epilogue
21828 generation code then it must be being called from the
21829 INITIAL_ELIMINATION_OFFSET macro. */
21830 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
21831 {
21832 /* In this case we know that we are being asked about the elimination
21833 of the arg pointer register. If that register is not being used,
21834 then there are no arguments on the stack, and we do not have to
21835 worry that a far jump might force the prologue to push the link
21836 register, changing the stack offsets. In this case we can just
21837 return false, since the presence of far jumps in the function will
21838 not affect stack offsets.
21839
21840 If the arg pointer is live (or if it was live, but has now been
21841 eliminated and so set to dead) then we do have to test to see if
21842 the function might contain a far jump. This test can lead to some
21843 false negatives, since before reload is completed, then length of
21844 branch instructions is not known, so gcc defaults to returning their
21845 longest length, which in turn sets the far jump attribute to true.
21846
21847 A false negative will not result in bad code being generated, but it
21848 will result in a needless push and pop of the link register. We
21849 hope that this does not occur too often.
21850
21851 If we need doubleword stack alignment this could affect the other
21852 elimination offsets so we can't risk getting it wrong. */
21853 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
21854 cfun->machine->arg_pointer_live = 1;
21855 else if (!cfun->machine->arg_pointer_live)
21856 return 0;
21857 }
21858
21859 /* Check to see if the function contains a branch
21860 insn with the far jump attribute set. */
21861 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21862 {
21863 if (GET_CODE (insn) == JUMP_INSN
21864 /* Ignore tablejump patterns. */
21865 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21866 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
21867 && get_attr_far_jump (insn) == FAR_JUMP_YES
21868 )
21869 {
21870 /* Record the fact that we have decided that
21871 the function does use far jumps. */
21872 cfun->machine->far_jump_used = 1;
21873 return 1;
21874 }
21875 }
21876
21877 return 0;
21878 }
21879
21880 /* Return nonzero if FUNC must be entered in ARM mode. */
21881 int
21882 is_called_in_ARM_mode (tree func)
21883 {
21884 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
21885
21886 /* Ignore the problem about functions whose address is taken. */
21887 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
21888 return TRUE;
21889
21890 #ifdef ARM_PE
21891 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
21892 #else
21893 return FALSE;
21894 #endif
21895 }
21896
21897 /* Given the stack offsets and register mask in OFFSETS, decide how
21898 many additional registers to push instead of subtracting a constant
21899 from SP. For epilogues the principle is the same except we use pop.
21900 FOR_PROLOGUE indicates which we're generating. */
21901 static int
21902 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
21903 {
21904 HOST_WIDE_INT amount;
21905 unsigned long live_regs_mask = offsets->saved_regs_mask;
21906 /* Extract a mask of the ones we can give to the Thumb's push/pop
21907 instruction. */
21908 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
21909 /* Then count how many other high registers will need to be pushed. */
21910 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21911 int n_free, reg_base;
21912
21913 if (!for_prologue && frame_pointer_needed)
21914 amount = offsets->locals_base - offsets->saved_regs;
21915 else
21916 amount = offsets->outgoing_args - offsets->saved_regs;
21917
21918 /* If the stack frame size is 512 exactly, we can save one load
21919 instruction, which should make this a win even when optimizing
21920 for speed. */
21921 if (!optimize_size && amount != 512)
21922 return 0;
21923
21924 /* Can't do this if there are high registers to push. */
21925 if (high_regs_pushed != 0)
21926 return 0;
21927
21928 /* Shouldn't do it in the prologue if no registers would normally
21929 be pushed at all. In the epilogue, also allow it if we'll have
21930 a pop insn for the PC. */
21931 if (l_mask == 0
21932 && (for_prologue
21933 || TARGET_BACKTRACE
21934 || (live_regs_mask & 1 << LR_REGNUM) == 0
21935 || TARGET_INTERWORK
21936 || crtl->args.pretend_args_size != 0))
21937 return 0;
21938
21939 /* Don't do this if thumb_expand_prologue wants to emit instructions
21940 between the push and the stack frame allocation. */
21941 if (for_prologue
21942 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
21943 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
21944 return 0;
21945
21946 reg_base = 0;
21947 n_free = 0;
21948 if (!for_prologue)
21949 {
21950 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
21951 live_regs_mask >>= reg_base;
21952 }
21953
21954 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
21955 && (for_prologue || call_used_regs[reg_base + n_free]))
21956 {
21957 live_regs_mask >>= 1;
21958 n_free++;
21959 }
21960
21961 if (n_free == 0)
21962 return 0;
21963 gcc_assert (amount / 4 * 4 == amount);
21964
21965 if (amount >= 512 && (amount - n_free * 4) < 512)
21966 return (amount - 508) / 4;
21967 if (amount <= n_free * 4)
21968 return amount / 4;
21969 return 0;
21970 }
21971
21972 /* The bits which aren't usefully expanded as rtl. */
21973 const char *
21974 thumb_unexpanded_epilogue (void)
21975 {
21976 arm_stack_offsets *offsets;
21977 int regno;
21978 unsigned long live_regs_mask = 0;
21979 int high_regs_pushed = 0;
21980 int extra_pop;
21981 int had_to_push_lr;
21982 int size;
21983
21984 if (cfun->machine->return_used_this_function != 0)
21985 return "";
21986
21987 if (IS_NAKED (arm_current_func_type ()))
21988 return "";
21989
21990 offsets = arm_get_frame_offsets ();
21991 live_regs_mask = offsets->saved_regs_mask;
21992 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21993
21994 /* If we can deduce the registers used from the function's return value.
21995 This is more reliable that examining df_regs_ever_live_p () because that
21996 will be set if the register is ever used in the function, not just if
21997 the register is used to hold a return value. */
21998 size = arm_size_return_regs ();
21999
22000 extra_pop = thumb1_extra_regs_pushed (offsets, false);
22001 if (extra_pop > 0)
22002 {
22003 unsigned long extra_mask = (1 << extra_pop) - 1;
22004 live_regs_mask |= extra_mask << ((size + UNITS_PER_WORD - 1)
22005 / UNITS_PER_WORD);
22006 }
22007
22008 /* The prolog may have pushed some high registers to use as
22009 work registers. e.g. the testsuite file:
22010 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
22011 compiles to produce:
22012 push {r4, r5, r6, r7, lr}
22013 mov r7, r9
22014 mov r6, r8
22015 push {r6, r7}
22016 as part of the prolog. We have to undo that pushing here. */
22017
22018 if (high_regs_pushed)
22019 {
22020 unsigned long mask = live_regs_mask & 0xff;
22021 int next_hi_reg;
22022
22023 /* The available low registers depend on the size of the value we are
22024 returning. */
22025 if (size <= 12)
22026 mask |= 1 << 3;
22027 if (size <= 8)
22028 mask |= 1 << 2;
22029
22030 if (mask == 0)
22031 /* Oh dear! We have no low registers into which we can pop
22032 high registers! */
22033 internal_error
22034 ("no low registers available for popping high registers");
22035
22036 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
22037 if (live_regs_mask & (1 << next_hi_reg))
22038 break;
22039
22040 while (high_regs_pushed)
22041 {
22042 /* Find lo register(s) into which the high register(s) can
22043 be popped. */
22044 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
22045 {
22046 if (mask & (1 << regno))
22047 high_regs_pushed--;
22048 if (high_regs_pushed == 0)
22049 break;
22050 }
22051
22052 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
22053
22054 /* Pop the values into the low register(s). */
22055 thumb_pop (asm_out_file, mask);
22056
22057 /* Move the value(s) into the high registers. */
22058 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
22059 {
22060 if (mask & (1 << regno))
22061 {
22062 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
22063 regno);
22064
22065 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
22066 if (live_regs_mask & (1 << next_hi_reg))
22067 break;
22068 }
22069 }
22070 }
22071 live_regs_mask &= ~0x0f00;
22072 }
22073
22074 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
22075 live_regs_mask &= 0xff;
22076
22077 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
22078 {
22079 /* Pop the return address into the PC. */
22080 if (had_to_push_lr)
22081 live_regs_mask |= 1 << PC_REGNUM;
22082
22083 /* Either no argument registers were pushed or a backtrace
22084 structure was created which includes an adjusted stack
22085 pointer, so just pop everything. */
22086 if (live_regs_mask)
22087 thumb_pop (asm_out_file, live_regs_mask);
22088
22089 /* We have either just popped the return address into the
22090 PC or it is was kept in LR for the entire function.
22091 Note that thumb_pop has already called thumb_exit if the
22092 PC was in the list. */
22093 if (!had_to_push_lr)
22094 thumb_exit (asm_out_file, LR_REGNUM);
22095 }
22096 else
22097 {
22098 /* Pop everything but the return address. */
22099 if (live_regs_mask)
22100 thumb_pop (asm_out_file, live_regs_mask);
22101
22102 if (had_to_push_lr)
22103 {
22104 if (size > 12)
22105 {
22106 /* We have no free low regs, so save one. */
22107 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
22108 LAST_ARG_REGNUM);
22109 }
22110
22111 /* Get the return address into a temporary register. */
22112 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
22113
22114 if (size > 12)
22115 {
22116 /* Move the return address to lr. */
22117 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
22118 LAST_ARG_REGNUM);
22119 /* Restore the low register. */
22120 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
22121 IP_REGNUM);
22122 regno = LR_REGNUM;
22123 }
22124 else
22125 regno = LAST_ARG_REGNUM;
22126 }
22127 else
22128 regno = LR_REGNUM;
22129
22130 /* Remove the argument registers that were pushed onto the stack. */
22131 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
22132 SP_REGNUM, SP_REGNUM,
22133 crtl->args.pretend_args_size);
22134
22135 thumb_exit (asm_out_file, regno);
22136 }
22137
22138 return "";
22139 }
22140
22141 /* Functions to save and restore machine-specific function data. */
22142 static struct machine_function *
22143 arm_init_machine_status (void)
22144 {
22145 struct machine_function *machine;
22146 machine = ggc_alloc_cleared_machine_function ();
22147
22148 #if ARM_FT_UNKNOWN != 0
22149 machine->func_type = ARM_FT_UNKNOWN;
22150 #endif
22151 return machine;
22152 }
22153
22154 /* Return an RTX indicating where the return address to the
22155 calling function can be found. */
22156 rtx
22157 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
22158 {
22159 if (count != 0)
22160 return NULL_RTX;
22161
22162 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
22163 }
22164
22165 /* Do anything needed before RTL is emitted for each function. */
22166 void
22167 arm_init_expanders (void)
22168 {
22169 /* Arrange to initialize and mark the machine per-function status. */
22170 init_machine_status = arm_init_machine_status;
22171
22172 /* This is to stop the combine pass optimizing away the alignment
22173 adjustment of va_arg. */
22174 /* ??? It is claimed that this should not be necessary. */
22175 if (cfun)
22176 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
22177 }
22178
22179
22180 /* Like arm_compute_initial_elimination offset. Simpler because there
22181 isn't an ABI specified frame pointer for Thumb. Instead, we set it
22182 to point at the base of the local variables after static stack
22183 space for a function has been allocated. */
22184
22185 HOST_WIDE_INT
22186 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
22187 {
22188 arm_stack_offsets *offsets;
22189
22190 offsets = arm_get_frame_offsets ();
22191
22192 switch (from)
22193 {
22194 case ARG_POINTER_REGNUM:
22195 switch (to)
22196 {
22197 case STACK_POINTER_REGNUM:
22198 return offsets->outgoing_args - offsets->saved_args;
22199
22200 case FRAME_POINTER_REGNUM:
22201 return offsets->soft_frame - offsets->saved_args;
22202
22203 case ARM_HARD_FRAME_POINTER_REGNUM:
22204 return offsets->saved_regs - offsets->saved_args;
22205
22206 case THUMB_HARD_FRAME_POINTER_REGNUM:
22207 return offsets->locals_base - offsets->saved_args;
22208
22209 default:
22210 gcc_unreachable ();
22211 }
22212 break;
22213
22214 case FRAME_POINTER_REGNUM:
22215 switch (to)
22216 {
22217 case STACK_POINTER_REGNUM:
22218 return offsets->outgoing_args - offsets->soft_frame;
22219
22220 case ARM_HARD_FRAME_POINTER_REGNUM:
22221 return offsets->saved_regs - offsets->soft_frame;
22222
22223 case THUMB_HARD_FRAME_POINTER_REGNUM:
22224 return offsets->locals_base - offsets->soft_frame;
22225
22226 default:
22227 gcc_unreachable ();
22228 }
22229 break;
22230
22231 default:
22232 gcc_unreachable ();
22233 }
22234 }
22235
22236 /* Generate the function's prologue. */
22237
22238 void
22239 thumb1_expand_prologue (void)
22240 {
22241 rtx insn;
22242
22243 HOST_WIDE_INT amount;
22244 arm_stack_offsets *offsets;
22245 unsigned long func_type;
22246 int regno;
22247 unsigned long live_regs_mask;
22248 unsigned long l_mask;
22249 unsigned high_regs_pushed = 0;
22250
22251 func_type = arm_current_func_type ();
22252
22253 /* Naked functions don't have prologues. */
22254 if (IS_NAKED (func_type))
22255 return;
22256
22257 if (IS_INTERRUPT (func_type))
22258 {
22259 error ("interrupt Service Routines cannot be coded in Thumb mode");
22260 return;
22261 }
22262
22263 if (is_called_in_ARM_mode (current_function_decl))
22264 emit_insn (gen_prologue_thumb1_interwork ());
22265
22266 offsets = arm_get_frame_offsets ();
22267 live_regs_mask = offsets->saved_regs_mask;
22268
22269 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
22270 l_mask = live_regs_mask & 0x40ff;
22271 /* Then count how many other high registers will need to be pushed. */
22272 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
22273
22274 if (crtl->args.pretend_args_size)
22275 {
22276 rtx x = GEN_INT (-crtl->args.pretend_args_size);
22277
22278 if (cfun->machine->uses_anonymous_args)
22279 {
22280 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
22281 unsigned long mask;
22282
22283 mask = 1ul << (LAST_ARG_REGNUM + 1);
22284 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
22285
22286 insn = thumb1_emit_multi_reg_push (mask, 0);
22287 }
22288 else
22289 {
22290 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22291 stack_pointer_rtx, x));
22292 }
22293 RTX_FRAME_RELATED_P (insn) = 1;
22294 }
22295
22296 if (TARGET_BACKTRACE)
22297 {
22298 HOST_WIDE_INT offset = 0;
22299 unsigned work_register;
22300 rtx work_reg, x, arm_hfp_rtx;
22301
22302 /* We have been asked to create a stack backtrace structure.
22303 The code looks like this:
22304
22305 0 .align 2
22306 0 func:
22307 0 sub SP, #16 Reserve space for 4 registers.
22308 2 push {R7} Push low registers.
22309 4 add R7, SP, #20 Get the stack pointer before the push.
22310 6 str R7, [SP, #8] Store the stack pointer
22311 (before reserving the space).
22312 8 mov R7, PC Get hold of the start of this code + 12.
22313 10 str R7, [SP, #16] Store it.
22314 12 mov R7, FP Get hold of the current frame pointer.
22315 14 str R7, [SP, #4] Store it.
22316 16 mov R7, LR Get hold of the current return address.
22317 18 str R7, [SP, #12] Store it.
22318 20 add R7, SP, #16 Point at the start of the
22319 backtrace structure.
22320 22 mov FP, R7 Put this value into the frame pointer. */
22321
22322 work_register = thumb_find_work_register (live_regs_mask);
22323 work_reg = gen_rtx_REG (SImode, work_register);
22324 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
22325
22326 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22327 stack_pointer_rtx, GEN_INT (-16)));
22328 RTX_FRAME_RELATED_P (insn) = 1;
22329
22330 if (l_mask)
22331 {
22332 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
22333 RTX_FRAME_RELATED_P (insn) = 1;
22334
22335 offset = bit_count (l_mask) * UNITS_PER_WORD;
22336 }
22337
22338 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
22339 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22340
22341 x = plus_constant (stack_pointer_rtx, offset + 4);
22342 x = gen_frame_mem (SImode, x);
22343 emit_move_insn (x, work_reg);
22344
22345 /* Make sure that the instruction fetching the PC is in the right place
22346 to calculate "start of backtrace creation code + 12". */
22347 /* ??? The stores using the common WORK_REG ought to be enough to
22348 prevent the scheduler from doing anything weird. Failing that
22349 we could always move all of the following into an UNSPEC_VOLATILE. */
22350 if (l_mask)
22351 {
22352 x = gen_rtx_REG (SImode, PC_REGNUM);
22353 emit_move_insn (work_reg, x);
22354
22355 x = plus_constant (stack_pointer_rtx, offset + 12);
22356 x = gen_frame_mem (SImode, x);
22357 emit_move_insn (x, work_reg);
22358
22359 emit_move_insn (work_reg, arm_hfp_rtx);
22360
22361 x = plus_constant (stack_pointer_rtx, offset);
22362 x = gen_frame_mem (SImode, x);
22363 emit_move_insn (x, work_reg);
22364 }
22365 else
22366 {
22367 emit_move_insn (work_reg, arm_hfp_rtx);
22368
22369 x = plus_constant (stack_pointer_rtx, offset);
22370 x = gen_frame_mem (SImode, x);
22371 emit_move_insn (x, work_reg);
22372
22373 x = gen_rtx_REG (SImode, PC_REGNUM);
22374 emit_move_insn (work_reg, x);
22375
22376 x = plus_constant (stack_pointer_rtx, offset + 12);
22377 x = gen_frame_mem (SImode, x);
22378 emit_move_insn (x, work_reg);
22379 }
22380
22381 x = gen_rtx_REG (SImode, LR_REGNUM);
22382 emit_move_insn (work_reg, x);
22383
22384 x = plus_constant (stack_pointer_rtx, offset + 8);
22385 x = gen_frame_mem (SImode, x);
22386 emit_move_insn (x, work_reg);
22387
22388 x = GEN_INT (offset + 12);
22389 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22390
22391 emit_move_insn (arm_hfp_rtx, work_reg);
22392 }
22393 /* Optimization: If we are not pushing any low registers but we are going
22394 to push some high registers then delay our first push. This will just
22395 be a push of LR and we can combine it with the push of the first high
22396 register. */
22397 else if ((l_mask & 0xff) != 0
22398 || (high_regs_pushed == 0 && l_mask))
22399 {
22400 unsigned long mask = l_mask;
22401 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
22402 insn = thumb1_emit_multi_reg_push (mask, mask);
22403 RTX_FRAME_RELATED_P (insn) = 1;
22404 }
22405
22406 if (high_regs_pushed)
22407 {
22408 unsigned pushable_regs;
22409 unsigned next_hi_reg;
22410
22411 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
22412 if (live_regs_mask & (1 << next_hi_reg))
22413 break;
22414
22415 pushable_regs = l_mask & 0xff;
22416
22417 if (pushable_regs == 0)
22418 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
22419
22420 while (high_regs_pushed > 0)
22421 {
22422 unsigned long real_regs_mask = 0;
22423
22424 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
22425 {
22426 if (pushable_regs & (1 << regno))
22427 {
22428 emit_move_insn (gen_rtx_REG (SImode, regno),
22429 gen_rtx_REG (SImode, next_hi_reg));
22430
22431 high_regs_pushed --;
22432 real_regs_mask |= (1 << next_hi_reg);
22433
22434 if (high_regs_pushed)
22435 {
22436 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
22437 next_hi_reg --)
22438 if (live_regs_mask & (1 << next_hi_reg))
22439 break;
22440 }
22441 else
22442 {
22443 pushable_regs &= ~((1 << regno) - 1);
22444 break;
22445 }
22446 }
22447 }
22448
22449 /* If we had to find a work register and we have not yet
22450 saved the LR then add it to the list of regs to push. */
22451 if (l_mask == (1 << LR_REGNUM))
22452 {
22453 pushable_regs |= l_mask;
22454 real_regs_mask |= l_mask;
22455 l_mask = 0;
22456 }
22457
22458 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
22459 RTX_FRAME_RELATED_P (insn) = 1;
22460 }
22461 }
22462
22463 /* Load the pic register before setting the frame pointer,
22464 so we can use r7 as a temporary work register. */
22465 if (flag_pic && arm_pic_register != INVALID_REGNUM)
22466 arm_load_pic_register (live_regs_mask);
22467
22468 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
22469 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
22470 stack_pointer_rtx);
22471
22472 if (flag_stack_usage_info)
22473 current_function_static_stack_size
22474 = offsets->outgoing_args - offsets->saved_args;
22475
22476 amount = offsets->outgoing_args - offsets->saved_regs;
22477 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
22478 if (amount)
22479 {
22480 if (amount < 512)
22481 {
22482 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22483 GEN_INT (- amount)));
22484 RTX_FRAME_RELATED_P (insn) = 1;
22485 }
22486 else
22487 {
22488 rtx reg, dwarf;
22489
22490 /* The stack decrement is too big for an immediate value in a single
22491 insn. In theory we could issue multiple subtracts, but after
22492 three of them it becomes more space efficient to place the full
22493 value in the constant pool and load into a register. (Also the
22494 ARM debugger really likes to see only one stack decrement per
22495 function). So instead we look for a scratch register into which
22496 we can load the decrement, and then we subtract this from the
22497 stack pointer. Unfortunately on the thumb the only available
22498 scratch registers are the argument registers, and we cannot use
22499 these as they may hold arguments to the function. Instead we
22500 attempt to locate a call preserved register which is used by this
22501 function. If we can find one, then we know that it will have
22502 been pushed at the start of the prologue and so we can corrupt
22503 it now. */
22504 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
22505 if (live_regs_mask & (1 << regno))
22506 break;
22507
22508 gcc_assert(regno <= LAST_LO_REGNUM);
22509
22510 reg = gen_rtx_REG (SImode, regno);
22511
22512 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
22513
22514 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22515 stack_pointer_rtx, reg));
22516
22517 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
22518 plus_constant (stack_pointer_rtx,
22519 -amount));
22520 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22521 RTX_FRAME_RELATED_P (insn) = 1;
22522 }
22523 }
22524
22525 if (frame_pointer_needed)
22526 thumb_set_frame_pointer (offsets);
22527
22528 /* If we are profiling, make sure no instructions are scheduled before
22529 the call to mcount. Similarly if the user has requested no
22530 scheduling in the prolog. Similarly if we want non-call exceptions
22531 using the EABI unwinder, to prevent faulting instructions from being
22532 swapped with a stack adjustment. */
22533 if (crtl->profile || !TARGET_SCHED_PROLOG
22534 || (arm_except_unwind_info (&global_options) == UI_TARGET
22535 && cfun->can_throw_non_call_exceptions))
22536 emit_insn (gen_blockage ());
22537
22538 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
22539 if (live_regs_mask & 0xff)
22540 cfun->machine->lr_save_eliminated = 0;
22541 }
22542
22543
22544 void
22545 thumb1_expand_epilogue (void)
22546 {
22547 HOST_WIDE_INT amount;
22548 arm_stack_offsets *offsets;
22549 int regno;
22550
22551 /* Naked functions don't have prologues. */
22552 if (IS_NAKED (arm_current_func_type ()))
22553 return;
22554
22555 offsets = arm_get_frame_offsets ();
22556 amount = offsets->outgoing_args - offsets->saved_regs;
22557
22558 if (frame_pointer_needed)
22559 {
22560 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
22561 amount = offsets->locals_base - offsets->saved_regs;
22562 }
22563 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
22564
22565 gcc_assert (amount >= 0);
22566 if (amount)
22567 {
22568 emit_insn (gen_blockage ());
22569
22570 if (amount < 512)
22571 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22572 GEN_INT (amount)));
22573 else
22574 {
22575 /* r3 is always free in the epilogue. */
22576 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
22577
22578 emit_insn (gen_movsi (reg, GEN_INT (amount)));
22579 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
22580 }
22581 }
22582
22583 /* Emit a USE (stack_pointer_rtx), so that
22584 the stack adjustment will not be deleted. */
22585 emit_insn (gen_prologue_use (stack_pointer_rtx));
22586
22587 if (crtl->profile || !TARGET_SCHED_PROLOG)
22588 emit_insn (gen_blockage ());
22589
22590 /* Emit a clobber for each insn that will be restored in the epilogue,
22591 so that flow2 will get register lifetimes correct. */
22592 for (regno = 0; regno < 13; regno++)
22593 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
22594 emit_clobber (gen_rtx_REG (SImode, regno));
22595
22596 if (! df_regs_ever_live_p (LR_REGNUM))
22597 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
22598 }
22599
22600 /* Implementation of insn prologue_thumb1_interwork. This is the first
22601 "instruction" of a function called in ARM mode. Swap to thumb mode. */
22602
22603 const char *
22604 thumb1_output_interwork (void)
22605 {
22606 const char * name;
22607 FILE *f = asm_out_file;
22608
22609 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
22610 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
22611 == SYMBOL_REF);
22612 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
22613
22614 /* Generate code sequence to switch us into Thumb mode. */
22615 /* The .code 32 directive has already been emitted by
22616 ASM_DECLARE_FUNCTION_NAME. */
22617 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
22618 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
22619
22620 /* Generate a label, so that the debugger will notice the
22621 change in instruction sets. This label is also used by
22622 the assembler to bypass the ARM code when this function
22623 is called from a Thumb encoded function elsewhere in the
22624 same file. Hence the definition of STUB_NAME here must
22625 agree with the definition in gas/config/tc-arm.c. */
22626
22627 #define STUB_NAME ".real_start_of"
22628
22629 fprintf (f, "\t.code\t16\n");
22630 #ifdef ARM_PE
22631 if (arm_dllexport_name_p (name))
22632 name = arm_strip_name_encoding (name);
22633 #endif
22634 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
22635 fprintf (f, "\t.thumb_func\n");
22636 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
22637
22638 return "";
22639 }
22640
22641 /* Handle the case of a double word load into a low register from
22642 a computed memory address. The computed address may involve a
22643 register which is overwritten by the load. */
22644 const char *
22645 thumb_load_double_from_address (rtx *operands)
22646 {
22647 rtx addr;
22648 rtx base;
22649 rtx offset;
22650 rtx arg1;
22651 rtx arg2;
22652
22653 gcc_assert (GET_CODE (operands[0]) == REG);
22654 gcc_assert (GET_CODE (operands[1]) == MEM);
22655
22656 /* Get the memory address. */
22657 addr = XEXP (operands[1], 0);
22658
22659 /* Work out how the memory address is computed. */
22660 switch (GET_CODE (addr))
22661 {
22662 case REG:
22663 operands[2] = adjust_address (operands[1], SImode, 4);
22664
22665 if (REGNO (operands[0]) == REGNO (addr))
22666 {
22667 output_asm_insn ("ldr\t%H0, %2", operands);
22668 output_asm_insn ("ldr\t%0, %1", operands);
22669 }
22670 else
22671 {
22672 output_asm_insn ("ldr\t%0, %1", operands);
22673 output_asm_insn ("ldr\t%H0, %2", operands);
22674 }
22675 break;
22676
22677 case CONST:
22678 /* Compute <address> + 4 for the high order load. */
22679 operands[2] = adjust_address (operands[1], SImode, 4);
22680
22681 output_asm_insn ("ldr\t%0, %1", operands);
22682 output_asm_insn ("ldr\t%H0, %2", operands);
22683 break;
22684
22685 case PLUS:
22686 arg1 = XEXP (addr, 0);
22687 arg2 = XEXP (addr, 1);
22688
22689 if (CONSTANT_P (arg1))
22690 base = arg2, offset = arg1;
22691 else
22692 base = arg1, offset = arg2;
22693
22694 gcc_assert (GET_CODE (base) == REG);
22695
22696 /* Catch the case of <address> = <reg> + <reg> */
22697 if (GET_CODE (offset) == REG)
22698 {
22699 int reg_offset = REGNO (offset);
22700 int reg_base = REGNO (base);
22701 int reg_dest = REGNO (operands[0]);
22702
22703 /* Add the base and offset registers together into the
22704 higher destination register. */
22705 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
22706 reg_dest + 1, reg_base, reg_offset);
22707
22708 /* Load the lower destination register from the address in
22709 the higher destination register. */
22710 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
22711 reg_dest, reg_dest + 1);
22712
22713 /* Load the higher destination register from its own address
22714 plus 4. */
22715 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
22716 reg_dest + 1, reg_dest + 1);
22717 }
22718 else
22719 {
22720 /* Compute <address> + 4 for the high order load. */
22721 operands[2] = adjust_address (operands[1], SImode, 4);
22722
22723 /* If the computed address is held in the low order register
22724 then load the high order register first, otherwise always
22725 load the low order register first. */
22726 if (REGNO (operands[0]) == REGNO (base))
22727 {
22728 output_asm_insn ("ldr\t%H0, %2", operands);
22729 output_asm_insn ("ldr\t%0, %1", operands);
22730 }
22731 else
22732 {
22733 output_asm_insn ("ldr\t%0, %1", operands);
22734 output_asm_insn ("ldr\t%H0, %2", operands);
22735 }
22736 }
22737 break;
22738
22739 case LABEL_REF:
22740 /* With no registers to worry about we can just load the value
22741 directly. */
22742 operands[2] = adjust_address (operands[1], SImode, 4);
22743
22744 output_asm_insn ("ldr\t%H0, %2", operands);
22745 output_asm_insn ("ldr\t%0, %1", operands);
22746 break;
22747
22748 default:
22749 gcc_unreachable ();
22750 }
22751
22752 return "";
22753 }
22754
22755 const char *
22756 thumb_output_move_mem_multiple (int n, rtx *operands)
22757 {
22758 rtx tmp;
22759
22760 switch (n)
22761 {
22762 case 2:
22763 if (REGNO (operands[4]) > REGNO (operands[5]))
22764 {
22765 tmp = operands[4];
22766 operands[4] = operands[5];
22767 operands[5] = tmp;
22768 }
22769 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
22770 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
22771 break;
22772
22773 case 3:
22774 if (REGNO (operands[4]) > REGNO (operands[5]))
22775 {
22776 tmp = operands[4];
22777 operands[4] = operands[5];
22778 operands[5] = tmp;
22779 }
22780 if (REGNO (operands[5]) > REGNO (operands[6]))
22781 {
22782 tmp = operands[5];
22783 operands[5] = operands[6];
22784 operands[6] = tmp;
22785 }
22786 if (REGNO (operands[4]) > REGNO (operands[5]))
22787 {
22788 tmp = operands[4];
22789 operands[4] = operands[5];
22790 operands[5] = tmp;
22791 }
22792
22793 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
22794 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
22795 break;
22796
22797 default:
22798 gcc_unreachable ();
22799 }
22800
22801 return "";
22802 }
22803
22804 /* Output a call-via instruction for thumb state. */
22805 const char *
22806 thumb_call_via_reg (rtx reg)
22807 {
22808 int regno = REGNO (reg);
22809 rtx *labelp;
22810
22811 gcc_assert (regno < LR_REGNUM);
22812
22813 /* If we are in the normal text section we can use a single instance
22814 per compilation unit. If we are doing function sections, then we need
22815 an entry per section, since we can't rely on reachability. */
22816 if (in_section == text_section)
22817 {
22818 thumb_call_reg_needed = 1;
22819
22820 if (thumb_call_via_label[regno] == NULL)
22821 thumb_call_via_label[regno] = gen_label_rtx ();
22822 labelp = thumb_call_via_label + regno;
22823 }
22824 else
22825 {
22826 if (cfun->machine->call_via[regno] == NULL)
22827 cfun->machine->call_via[regno] = gen_label_rtx ();
22828 labelp = cfun->machine->call_via + regno;
22829 }
22830
22831 output_asm_insn ("bl\t%a0", labelp);
22832 return "";
22833 }
22834
22835 /* Routines for generating rtl. */
22836 void
22837 thumb_expand_movmemqi (rtx *operands)
22838 {
22839 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
22840 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
22841 HOST_WIDE_INT len = INTVAL (operands[2]);
22842 HOST_WIDE_INT offset = 0;
22843
22844 while (len >= 12)
22845 {
22846 emit_insn (gen_movmem12b (out, in, out, in));
22847 len -= 12;
22848 }
22849
22850 if (len >= 8)
22851 {
22852 emit_insn (gen_movmem8b (out, in, out, in));
22853 len -= 8;
22854 }
22855
22856 if (len >= 4)
22857 {
22858 rtx reg = gen_reg_rtx (SImode);
22859 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
22860 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
22861 len -= 4;
22862 offset += 4;
22863 }
22864
22865 if (len >= 2)
22866 {
22867 rtx reg = gen_reg_rtx (HImode);
22868 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
22869 plus_constant (in, offset))));
22870 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
22871 reg));
22872 len -= 2;
22873 offset += 2;
22874 }
22875
22876 if (len)
22877 {
22878 rtx reg = gen_reg_rtx (QImode);
22879 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
22880 plus_constant (in, offset))));
22881 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
22882 reg));
22883 }
22884 }
22885
22886 void
22887 thumb_reload_out_hi (rtx *operands)
22888 {
22889 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
22890 }
22891
22892 /* Handle reading a half-word from memory during reload. */
22893 void
22894 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
22895 {
22896 gcc_unreachable ();
22897 }
22898
22899 /* Return the length of a function name prefix
22900 that starts with the character 'c'. */
22901 static int
22902 arm_get_strip_length (int c)
22903 {
22904 switch (c)
22905 {
22906 ARM_NAME_ENCODING_LENGTHS
22907 default: return 0;
22908 }
22909 }
22910
22911 /* Return a pointer to a function's name with any
22912 and all prefix encodings stripped from it. */
22913 const char *
22914 arm_strip_name_encoding (const char *name)
22915 {
22916 int skip;
22917
22918 while ((skip = arm_get_strip_length (* name)))
22919 name += skip;
22920
22921 return name;
22922 }
22923
22924 /* If there is a '*' anywhere in the name's prefix, then
22925 emit the stripped name verbatim, otherwise prepend an
22926 underscore if leading underscores are being used. */
22927 void
22928 arm_asm_output_labelref (FILE *stream, const char *name)
22929 {
22930 int skip;
22931 int verbatim = 0;
22932
22933 while ((skip = arm_get_strip_length (* name)))
22934 {
22935 verbatim |= (*name == '*');
22936 name += skip;
22937 }
22938
22939 if (verbatim)
22940 fputs (name, stream);
22941 else
22942 asm_fprintf (stream, "%U%s", name);
22943 }
22944
22945 static void
22946 arm_file_start (void)
22947 {
22948 int val;
22949
22950 if (TARGET_UNIFIED_ASM)
22951 asm_fprintf (asm_out_file, "\t.syntax unified\n");
22952
22953 if (TARGET_BPABI)
22954 {
22955 const char *fpu_name;
22956 if (arm_selected_arch)
22957 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
22958 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
22959 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
22960 else
22961 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
22962
22963 if (TARGET_SOFT_FLOAT)
22964 {
22965 if (TARGET_VFP)
22966 fpu_name = "softvfp";
22967 else
22968 fpu_name = "softfpa";
22969 }
22970 else
22971 {
22972 fpu_name = arm_fpu_desc->name;
22973 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
22974 {
22975 if (TARGET_HARD_FLOAT)
22976 EMIT_EABI_ATTRIBUTE (Tag_ABI_HardFP_use, 27, 3);
22977 if (TARGET_HARD_FLOAT_ABI)
22978 EMIT_EABI_ATTRIBUTE (Tag_ABI_VFP_args, 28, 1);
22979 }
22980 }
22981 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
22982
22983 /* Some of these attributes only apply when the corresponding features
22984 are used. However we don't have any easy way of figuring this out.
22985 Conservatively record the setting that would have been used. */
22986
22987 if (flag_rounding_math)
22988 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_rounding, 19, 1);
22989
22990 if (!flag_unsafe_math_optimizations)
22991 {
22992 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_denormal, 20, 1);
22993 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_exceptions, 21, 1);
22994 }
22995 if (flag_signaling_nans)
22996 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_user_exceptions, 22, 1);
22997
22998 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_number_model, 23,
22999 flag_finite_math_only ? 1 : 3);
23000
23001 EMIT_EABI_ATTRIBUTE (Tag_ABI_align8_needed, 24, 1);
23002 EMIT_EABI_ATTRIBUTE (Tag_ABI_align8_preserved, 25, 1);
23003 EMIT_EABI_ATTRIBUTE (Tag_ABI_enum_size, 26, flag_short_enums ? 1 : 2);
23004
23005 /* Tag_ABI_optimization_goals. */
23006 if (optimize_size)
23007 val = 4;
23008 else if (optimize >= 2)
23009 val = 2;
23010 else if (optimize)
23011 val = 1;
23012 else
23013 val = 6;
23014 EMIT_EABI_ATTRIBUTE (Tag_ABI_optimization_goals, 30, val);
23015
23016 EMIT_EABI_ATTRIBUTE (Tag_CPU_unaligned_access, 34, unaligned_access);
23017
23018 if (arm_fp16_format)
23019 EMIT_EABI_ATTRIBUTE (Tag_ABI_FP_16bit_format, 38, (int) arm_fp16_format);
23020
23021 if (arm_lang_output_object_attributes_hook)
23022 arm_lang_output_object_attributes_hook();
23023 }
23024
23025 default_file_start ();
23026 }
23027
23028 static void
23029 arm_file_end (void)
23030 {
23031 int regno;
23032
23033 if (NEED_INDICATE_EXEC_STACK)
23034 /* Add .note.GNU-stack. */
23035 file_end_indicate_exec_stack ();
23036
23037 if (! thumb_call_reg_needed)
23038 return;
23039
23040 switch_to_section (text_section);
23041 asm_fprintf (asm_out_file, "\t.code 16\n");
23042 ASM_OUTPUT_ALIGN (asm_out_file, 1);
23043
23044 for (regno = 0; regno < LR_REGNUM; regno++)
23045 {
23046 rtx label = thumb_call_via_label[regno];
23047
23048 if (label != 0)
23049 {
23050 targetm.asm_out.internal_label (asm_out_file, "L",
23051 CODE_LABEL_NUMBER (label));
23052 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
23053 }
23054 }
23055 }
23056
23057 #ifndef ARM_PE
23058 /* Symbols in the text segment can be accessed without indirecting via the
23059 constant pool; it may take an extra binary operation, but this is still
23060 faster than indirecting via memory. Don't do this when not optimizing,
23061 since we won't be calculating al of the offsets necessary to do this
23062 simplification. */
23063
23064 static void
23065 arm_encode_section_info (tree decl, rtx rtl, int first)
23066 {
23067 if (optimize > 0 && TREE_CONSTANT (decl))
23068 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
23069
23070 default_encode_section_info (decl, rtl, first);
23071 }
23072 #endif /* !ARM_PE */
23073
23074 static void
23075 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
23076 {
23077 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
23078 && !strcmp (prefix, "L"))
23079 {
23080 arm_ccfsm_state = 0;
23081 arm_target_insn = NULL;
23082 }
23083 default_internal_label (stream, prefix, labelno);
23084 }
23085
23086 /* Output code to add DELTA to the first argument, and then jump
23087 to FUNCTION. Used for C++ multiple inheritance. */
23088 static void
23089 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
23090 HOST_WIDE_INT delta,
23091 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
23092 tree function)
23093 {
23094 static int thunk_label = 0;
23095 char label[256];
23096 char labelpc[256];
23097 int mi_delta = delta;
23098 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
23099 int shift = 0;
23100 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
23101 ? 1 : 0);
23102 if (mi_delta < 0)
23103 mi_delta = - mi_delta;
23104
23105 if (TARGET_THUMB1)
23106 {
23107 int labelno = thunk_label++;
23108 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
23109 /* Thunks are entered in arm mode when avaiable. */
23110 if (TARGET_THUMB1_ONLY)
23111 {
23112 /* push r3 so we can use it as a temporary. */
23113 /* TODO: Omit this save if r3 is not used. */
23114 fputs ("\tpush {r3}\n", file);
23115 fputs ("\tldr\tr3, ", file);
23116 }
23117 else
23118 {
23119 fputs ("\tldr\tr12, ", file);
23120 }
23121 assemble_name (file, label);
23122 fputc ('\n', file);
23123 if (flag_pic)
23124 {
23125 /* If we are generating PIC, the ldr instruction below loads
23126 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
23127 the address of the add + 8, so we have:
23128
23129 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
23130 = target + 1.
23131
23132 Note that we have "+ 1" because some versions of GNU ld
23133 don't set the low bit of the result for R_ARM_REL32
23134 relocations against thumb function symbols.
23135 On ARMv6M this is +4, not +8. */
23136 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
23137 assemble_name (file, labelpc);
23138 fputs (":\n", file);
23139 if (TARGET_THUMB1_ONLY)
23140 {
23141 /* This is 2 insns after the start of the thunk, so we know it
23142 is 4-byte aligned. */
23143 fputs ("\tadd\tr3, pc, r3\n", file);
23144 fputs ("\tmov r12, r3\n", file);
23145 }
23146 else
23147 fputs ("\tadd\tr12, pc, r12\n", file);
23148 }
23149 else if (TARGET_THUMB1_ONLY)
23150 fputs ("\tmov r12, r3\n", file);
23151 }
23152 if (TARGET_THUMB1_ONLY)
23153 {
23154 if (mi_delta > 255)
23155 {
23156 fputs ("\tldr\tr3, ", file);
23157 assemble_name (file, label);
23158 fputs ("+4\n", file);
23159 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
23160 mi_op, this_regno, this_regno);
23161 }
23162 else if (mi_delta != 0)
23163 {
23164 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23165 mi_op, this_regno, this_regno,
23166 mi_delta);
23167 }
23168 }
23169 else
23170 {
23171 /* TODO: Use movw/movt for large constants when available. */
23172 while (mi_delta != 0)
23173 {
23174 if ((mi_delta & (3 << shift)) == 0)
23175 shift += 2;
23176 else
23177 {
23178 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23179 mi_op, this_regno, this_regno,
23180 mi_delta & (0xff << shift));
23181 mi_delta &= ~(0xff << shift);
23182 shift += 8;
23183 }
23184 }
23185 }
23186 if (TARGET_THUMB1)
23187 {
23188 if (TARGET_THUMB1_ONLY)
23189 fputs ("\tpop\t{r3}\n", file);
23190
23191 fprintf (file, "\tbx\tr12\n");
23192 ASM_OUTPUT_ALIGN (file, 2);
23193 assemble_name (file, label);
23194 fputs (":\n", file);
23195 if (flag_pic)
23196 {
23197 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
23198 rtx tem = XEXP (DECL_RTL (function), 0);
23199 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
23200 tem = gen_rtx_MINUS (GET_MODE (tem),
23201 tem,
23202 gen_rtx_SYMBOL_REF (Pmode,
23203 ggc_strdup (labelpc)));
23204 assemble_integer (tem, 4, BITS_PER_WORD, 1);
23205 }
23206 else
23207 /* Output ".word .LTHUNKn". */
23208 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
23209
23210 if (TARGET_THUMB1_ONLY && mi_delta > 255)
23211 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
23212 }
23213 else
23214 {
23215 fputs ("\tb\t", file);
23216 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
23217 if (NEED_PLT_RELOC)
23218 fputs ("(PLT)", file);
23219 fputc ('\n', file);
23220 }
23221 }
23222
23223 int
23224 arm_emit_vector_const (FILE *file, rtx x)
23225 {
23226 int i;
23227 const char * pattern;
23228
23229 gcc_assert (GET_CODE (x) == CONST_VECTOR);
23230
23231 switch (GET_MODE (x))
23232 {
23233 case V2SImode: pattern = "%08x"; break;
23234 case V4HImode: pattern = "%04x"; break;
23235 case V8QImode: pattern = "%02x"; break;
23236 default: gcc_unreachable ();
23237 }
23238
23239 fprintf (file, "0x");
23240 for (i = CONST_VECTOR_NUNITS (x); i--;)
23241 {
23242 rtx element;
23243
23244 element = CONST_VECTOR_ELT (x, i);
23245 fprintf (file, pattern, INTVAL (element));
23246 }
23247
23248 return 1;
23249 }
23250
23251 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
23252 HFmode constant pool entries are actually loaded with ldr. */
23253 void
23254 arm_emit_fp16_const (rtx c)
23255 {
23256 REAL_VALUE_TYPE r;
23257 long bits;
23258
23259 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
23260 bits = real_to_target (NULL, &r, HFmode);
23261 if (WORDS_BIG_ENDIAN)
23262 assemble_zeros (2);
23263 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
23264 if (!WORDS_BIG_ENDIAN)
23265 assemble_zeros (2);
23266 }
23267
23268 const char *
23269 arm_output_load_gr (rtx *operands)
23270 {
23271 rtx reg;
23272 rtx offset;
23273 rtx wcgr;
23274 rtx sum;
23275
23276 if (GET_CODE (operands [1]) != MEM
23277 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
23278 || GET_CODE (reg = XEXP (sum, 0)) != REG
23279 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
23280 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
23281 return "wldrw%?\t%0, %1";
23282
23283 /* Fix up an out-of-range load of a GR register. */
23284 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
23285 wcgr = operands[0];
23286 operands[0] = reg;
23287 output_asm_insn ("ldr%?\t%0, %1", operands);
23288
23289 operands[0] = wcgr;
23290 operands[1] = reg;
23291 output_asm_insn ("tmcr%?\t%0, %1", operands);
23292 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
23293
23294 return "";
23295 }
23296
23297 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
23298
23299 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
23300 named arg and all anonymous args onto the stack.
23301 XXX I know the prologue shouldn't be pushing registers, but it is faster
23302 that way. */
23303
23304 static void
23305 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
23306 enum machine_mode mode,
23307 tree type,
23308 int *pretend_size,
23309 int second_time ATTRIBUTE_UNUSED)
23310 {
23311 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
23312 int nregs;
23313
23314 cfun->machine->uses_anonymous_args = 1;
23315 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
23316 {
23317 nregs = pcum->aapcs_ncrn;
23318 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
23319 nregs++;
23320 }
23321 else
23322 nregs = pcum->nregs;
23323
23324 if (nregs < NUM_ARG_REGS)
23325 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
23326 }
23327
23328 /* Return nonzero if the CONSUMER instruction (a store) does not need
23329 PRODUCER's value to calculate the address. */
23330
23331 int
23332 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
23333 {
23334 rtx value = PATTERN (producer);
23335 rtx addr = PATTERN (consumer);
23336
23337 if (GET_CODE (value) == COND_EXEC)
23338 value = COND_EXEC_CODE (value);
23339 if (GET_CODE (value) == PARALLEL)
23340 value = XVECEXP (value, 0, 0);
23341 value = XEXP (value, 0);
23342 if (GET_CODE (addr) == COND_EXEC)
23343 addr = COND_EXEC_CODE (addr);
23344 if (GET_CODE (addr) == PARALLEL)
23345 addr = XVECEXP (addr, 0, 0);
23346 addr = XEXP (addr, 0);
23347
23348 return !reg_overlap_mentioned_p (value, addr);
23349 }
23350
23351 /* Return nonzero if the CONSUMER instruction (a store) does need
23352 PRODUCER's value to calculate the address. */
23353
23354 int
23355 arm_early_store_addr_dep (rtx producer, rtx consumer)
23356 {
23357 return !arm_no_early_store_addr_dep (producer, consumer);
23358 }
23359
23360 /* Return nonzero if the CONSUMER instruction (a load) does need
23361 PRODUCER's value to calculate the address. */
23362
23363 int
23364 arm_early_load_addr_dep (rtx producer, rtx consumer)
23365 {
23366 rtx value = PATTERN (producer);
23367 rtx addr = PATTERN (consumer);
23368
23369 if (GET_CODE (value) == COND_EXEC)
23370 value = COND_EXEC_CODE (value);
23371 if (GET_CODE (value) == PARALLEL)
23372 value = XVECEXP (value, 0, 0);
23373 value = XEXP (value, 0);
23374 if (GET_CODE (addr) == COND_EXEC)
23375 addr = COND_EXEC_CODE (addr);
23376 if (GET_CODE (addr) == PARALLEL)
23377 addr = XVECEXP (addr, 0, 0);
23378 addr = XEXP (addr, 1);
23379
23380 return reg_overlap_mentioned_p (value, addr);
23381 }
23382
23383 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23384 have an early register shift value or amount dependency on the
23385 result of PRODUCER. */
23386
23387 int
23388 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
23389 {
23390 rtx value = PATTERN (producer);
23391 rtx op = PATTERN (consumer);
23392 rtx early_op;
23393
23394 if (GET_CODE (value) == COND_EXEC)
23395 value = COND_EXEC_CODE (value);
23396 if (GET_CODE (value) == PARALLEL)
23397 value = XVECEXP (value, 0, 0);
23398 value = XEXP (value, 0);
23399 if (GET_CODE (op) == COND_EXEC)
23400 op = COND_EXEC_CODE (op);
23401 if (GET_CODE (op) == PARALLEL)
23402 op = XVECEXP (op, 0, 0);
23403 op = XEXP (op, 1);
23404
23405 early_op = XEXP (op, 0);
23406 /* This is either an actual independent shift, or a shift applied to
23407 the first operand of another operation. We want the whole shift
23408 operation. */
23409 if (GET_CODE (early_op) == REG)
23410 early_op = op;
23411
23412 return !reg_overlap_mentioned_p (value, early_op);
23413 }
23414
23415 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23416 have an early register shift value dependency on the result of
23417 PRODUCER. */
23418
23419 int
23420 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
23421 {
23422 rtx value = PATTERN (producer);
23423 rtx op = PATTERN (consumer);
23424 rtx early_op;
23425
23426 if (GET_CODE (value) == COND_EXEC)
23427 value = COND_EXEC_CODE (value);
23428 if (GET_CODE (value) == PARALLEL)
23429 value = XVECEXP (value, 0, 0);
23430 value = XEXP (value, 0);
23431 if (GET_CODE (op) == COND_EXEC)
23432 op = COND_EXEC_CODE (op);
23433 if (GET_CODE (op) == PARALLEL)
23434 op = XVECEXP (op, 0, 0);
23435 op = XEXP (op, 1);
23436
23437 early_op = XEXP (op, 0);
23438
23439 /* This is either an actual independent shift, or a shift applied to
23440 the first operand of another operation. We want the value being
23441 shifted, in either case. */
23442 if (GET_CODE (early_op) != REG)
23443 early_op = XEXP (early_op, 0);
23444
23445 return !reg_overlap_mentioned_p (value, early_op);
23446 }
23447
23448 /* Return nonzero if the CONSUMER (a mul or mac op) does not
23449 have an early register mult dependency on the result of
23450 PRODUCER. */
23451
23452 int
23453 arm_no_early_mul_dep (rtx producer, rtx consumer)
23454 {
23455 rtx value = PATTERN (producer);
23456 rtx op = PATTERN (consumer);
23457
23458 if (GET_CODE (value) == COND_EXEC)
23459 value = COND_EXEC_CODE (value);
23460 if (GET_CODE (value) == PARALLEL)
23461 value = XVECEXP (value, 0, 0);
23462 value = XEXP (value, 0);
23463 if (GET_CODE (op) == COND_EXEC)
23464 op = COND_EXEC_CODE (op);
23465 if (GET_CODE (op) == PARALLEL)
23466 op = XVECEXP (op, 0, 0);
23467 op = XEXP (op, 1);
23468
23469 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
23470 {
23471 if (GET_CODE (XEXP (op, 0)) == MULT)
23472 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
23473 else
23474 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
23475 }
23476
23477 return 0;
23478 }
23479
23480 /* We can't rely on the caller doing the proper promotion when
23481 using APCS or ATPCS. */
23482
23483 static bool
23484 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
23485 {
23486 return !TARGET_AAPCS_BASED;
23487 }
23488
23489 static enum machine_mode
23490 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
23491 enum machine_mode mode,
23492 int *punsignedp ATTRIBUTE_UNUSED,
23493 const_tree fntype ATTRIBUTE_UNUSED,
23494 int for_return ATTRIBUTE_UNUSED)
23495 {
23496 if (GET_MODE_CLASS (mode) == MODE_INT
23497 && GET_MODE_SIZE (mode) < 4)
23498 return SImode;
23499
23500 return mode;
23501 }
23502
23503 /* AAPCS based ABIs use short enums by default. */
23504
23505 static bool
23506 arm_default_short_enums (void)
23507 {
23508 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
23509 }
23510
23511
23512 /* AAPCS requires that anonymous bitfields affect structure alignment. */
23513
23514 static bool
23515 arm_align_anon_bitfield (void)
23516 {
23517 return TARGET_AAPCS_BASED;
23518 }
23519
23520
23521 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
23522
23523 static tree
23524 arm_cxx_guard_type (void)
23525 {
23526 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
23527 }
23528
23529 /* Return non-zero if the consumer (a multiply-accumulate instruction)
23530 has an accumulator dependency on the result of the producer (a
23531 multiplication instruction) and no other dependency on that result. */
23532 int
23533 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
23534 {
23535 rtx mul = PATTERN (producer);
23536 rtx mac = PATTERN (consumer);
23537 rtx mul_result;
23538 rtx mac_op0, mac_op1, mac_acc;
23539
23540 if (GET_CODE (mul) == COND_EXEC)
23541 mul = COND_EXEC_CODE (mul);
23542 if (GET_CODE (mac) == COND_EXEC)
23543 mac = COND_EXEC_CODE (mac);
23544
23545 /* Check that mul is of the form (set (...) (mult ...))
23546 and mla is of the form (set (...) (plus (mult ...) (...))). */
23547 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
23548 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
23549 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
23550 return 0;
23551
23552 mul_result = XEXP (mul, 0);
23553 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
23554 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
23555 mac_acc = XEXP (XEXP (mac, 1), 1);
23556
23557 return (reg_overlap_mentioned_p (mul_result, mac_acc)
23558 && !reg_overlap_mentioned_p (mul_result, mac_op0)
23559 && !reg_overlap_mentioned_p (mul_result, mac_op1));
23560 }
23561
23562
23563 /* The EABI says test the least significant bit of a guard variable. */
23564
23565 static bool
23566 arm_cxx_guard_mask_bit (void)
23567 {
23568 return TARGET_AAPCS_BASED;
23569 }
23570
23571
23572 /* The EABI specifies that all array cookies are 8 bytes long. */
23573
23574 static tree
23575 arm_get_cookie_size (tree type)
23576 {
23577 tree size;
23578
23579 if (!TARGET_AAPCS_BASED)
23580 return default_cxx_get_cookie_size (type);
23581
23582 size = build_int_cst (sizetype, 8);
23583 return size;
23584 }
23585
23586
23587 /* The EABI says that array cookies should also contain the element size. */
23588
23589 static bool
23590 arm_cookie_has_size (void)
23591 {
23592 return TARGET_AAPCS_BASED;
23593 }
23594
23595
23596 /* The EABI says constructors and destructors should return a pointer to
23597 the object constructed/destroyed. */
23598
23599 static bool
23600 arm_cxx_cdtor_returns_this (void)
23601 {
23602 return TARGET_AAPCS_BASED;
23603 }
23604
23605 /* The EABI says that an inline function may never be the key
23606 method. */
23607
23608 static bool
23609 arm_cxx_key_method_may_be_inline (void)
23610 {
23611 return !TARGET_AAPCS_BASED;
23612 }
23613
23614 static void
23615 arm_cxx_determine_class_data_visibility (tree decl)
23616 {
23617 if (!TARGET_AAPCS_BASED
23618 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
23619 return;
23620
23621 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
23622 is exported. However, on systems without dynamic vague linkage,
23623 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
23624 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
23625 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
23626 else
23627 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
23628 DECL_VISIBILITY_SPECIFIED (decl) = 1;
23629 }
23630
23631 static bool
23632 arm_cxx_class_data_always_comdat (void)
23633 {
23634 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
23635 vague linkage if the class has no key function. */
23636 return !TARGET_AAPCS_BASED;
23637 }
23638
23639
23640 /* The EABI says __aeabi_atexit should be used to register static
23641 destructors. */
23642
23643 static bool
23644 arm_cxx_use_aeabi_atexit (void)
23645 {
23646 return TARGET_AAPCS_BASED;
23647 }
23648
23649
23650 void
23651 arm_set_return_address (rtx source, rtx scratch)
23652 {
23653 arm_stack_offsets *offsets;
23654 HOST_WIDE_INT delta;
23655 rtx addr;
23656 unsigned long saved_regs;
23657
23658 offsets = arm_get_frame_offsets ();
23659 saved_regs = offsets->saved_regs_mask;
23660
23661 if ((saved_regs & (1 << LR_REGNUM)) == 0)
23662 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
23663 else
23664 {
23665 if (frame_pointer_needed)
23666 addr = plus_constant(hard_frame_pointer_rtx, -4);
23667 else
23668 {
23669 /* LR will be the first saved register. */
23670 delta = offsets->outgoing_args - (offsets->frame + 4);
23671
23672
23673 if (delta >= 4096)
23674 {
23675 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
23676 GEN_INT (delta & ~4095)));
23677 addr = scratch;
23678 delta &= 4095;
23679 }
23680 else
23681 addr = stack_pointer_rtx;
23682
23683 addr = plus_constant (addr, delta);
23684 }
23685 emit_move_insn (gen_frame_mem (Pmode, addr), source);
23686 }
23687 }
23688
23689
23690 void
23691 thumb_set_return_address (rtx source, rtx scratch)
23692 {
23693 arm_stack_offsets *offsets;
23694 HOST_WIDE_INT delta;
23695 HOST_WIDE_INT limit;
23696 int reg;
23697 rtx addr;
23698 unsigned long mask;
23699
23700 emit_use (source);
23701
23702 offsets = arm_get_frame_offsets ();
23703 mask = offsets->saved_regs_mask;
23704 if (mask & (1 << LR_REGNUM))
23705 {
23706 limit = 1024;
23707 /* Find the saved regs. */
23708 if (frame_pointer_needed)
23709 {
23710 delta = offsets->soft_frame - offsets->saved_args;
23711 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
23712 if (TARGET_THUMB1)
23713 limit = 128;
23714 }
23715 else
23716 {
23717 delta = offsets->outgoing_args - offsets->saved_args;
23718 reg = SP_REGNUM;
23719 }
23720 /* Allow for the stack frame. */
23721 if (TARGET_THUMB1 && TARGET_BACKTRACE)
23722 delta -= 16;
23723 /* The link register is always the first saved register. */
23724 delta -= 4;
23725
23726 /* Construct the address. */
23727 addr = gen_rtx_REG (SImode, reg);
23728 if (delta > limit)
23729 {
23730 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
23731 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
23732 addr = scratch;
23733 }
23734 else
23735 addr = plus_constant (addr, delta);
23736
23737 emit_move_insn (gen_frame_mem (Pmode, addr), source);
23738 }
23739 else
23740 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
23741 }
23742
23743 /* Implements target hook vector_mode_supported_p. */
23744 bool
23745 arm_vector_mode_supported_p (enum machine_mode mode)
23746 {
23747 /* Neon also supports V2SImode, etc. listed in the clause below. */
23748 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
23749 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
23750 return true;
23751
23752 if ((TARGET_NEON || TARGET_IWMMXT)
23753 && ((mode == V2SImode)
23754 || (mode == V4HImode)
23755 || (mode == V8QImode)))
23756 return true;
23757
23758 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
23759 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
23760 || mode == V2HAmode))
23761 return true;
23762
23763 return false;
23764 }
23765
23766 /* Implements target hook array_mode_supported_p. */
23767
23768 static bool
23769 arm_array_mode_supported_p (enum machine_mode mode,
23770 unsigned HOST_WIDE_INT nelems)
23771 {
23772 if (TARGET_NEON
23773 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
23774 && (nelems >= 2 && nelems <= 4))
23775 return true;
23776
23777 return false;
23778 }
23779
23780 /* Use the option -mvectorize-with-neon-double to override the use of quardword
23781 registers when autovectorizing for Neon, at least until multiple vector
23782 widths are supported properly by the middle-end. */
23783
23784 static enum machine_mode
23785 arm_preferred_simd_mode (enum machine_mode mode)
23786 {
23787 if (TARGET_NEON)
23788 switch (mode)
23789 {
23790 case SFmode:
23791 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
23792 case SImode:
23793 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
23794 case HImode:
23795 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
23796 case QImode:
23797 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
23798 case DImode:
23799 if (!TARGET_NEON_VECTORIZE_DOUBLE)
23800 return V2DImode;
23801 break;
23802
23803 default:;
23804 }
23805
23806 if (TARGET_REALLY_IWMMXT)
23807 switch (mode)
23808 {
23809 case SImode:
23810 return V2SImode;
23811 case HImode:
23812 return V4HImode;
23813 case QImode:
23814 return V8QImode;
23815
23816 default:;
23817 }
23818
23819 return word_mode;
23820 }
23821
23822 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
23823
23824 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
23825 using r0-r4 for function arguments, r7 for the stack frame and don't have
23826 enough left over to do doubleword arithmetic. For Thumb-2 all the
23827 potentially problematic instructions accept high registers so this is not
23828 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
23829 that require many low registers. */
23830 static bool
23831 arm_class_likely_spilled_p (reg_class_t rclass)
23832 {
23833 if ((TARGET_THUMB1 && rclass == LO_REGS)
23834 || rclass == CC_REG)
23835 return true;
23836
23837 return false;
23838 }
23839
23840 /* Implements target hook small_register_classes_for_mode_p. */
23841 bool
23842 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
23843 {
23844 return TARGET_THUMB1;
23845 }
23846
23847 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
23848 ARM insns and therefore guarantee that the shift count is modulo 256.
23849 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
23850 guarantee no particular behavior for out-of-range counts. */
23851
23852 static unsigned HOST_WIDE_INT
23853 arm_shift_truncation_mask (enum machine_mode mode)
23854 {
23855 return mode == SImode ? 255 : 0;
23856 }
23857
23858
23859 /* Map internal gcc register numbers to DWARF2 register numbers. */
23860
23861 unsigned int
23862 arm_dbx_register_number (unsigned int regno)
23863 {
23864 if (regno < 16)
23865 return regno;
23866
23867 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
23868 compatibility. The EABI defines them as registers 96-103. */
23869 if (IS_FPA_REGNUM (regno))
23870 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
23871
23872 if (IS_VFP_REGNUM (regno))
23873 {
23874 /* See comment in arm_dwarf_register_span. */
23875 if (VFP_REGNO_OK_FOR_SINGLE (regno))
23876 return 64 + regno - FIRST_VFP_REGNUM;
23877 else
23878 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
23879 }
23880
23881 if (IS_IWMMXT_GR_REGNUM (regno))
23882 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
23883
23884 if (IS_IWMMXT_REGNUM (regno))
23885 return 112 + regno - FIRST_IWMMXT_REGNUM;
23886
23887 gcc_unreachable ();
23888 }
23889
23890 /* Dwarf models VFPv3 registers as 32 64-bit registers.
23891 GCC models tham as 64 32-bit registers, so we need to describe this to
23892 the DWARF generation code. Other registers can use the default. */
23893 static rtx
23894 arm_dwarf_register_span (rtx rtl)
23895 {
23896 unsigned regno;
23897 int nregs;
23898 int i;
23899 rtx p;
23900
23901 regno = REGNO (rtl);
23902 if (!IS_VFP_REGNUM (regno))
23903 return NULL_RTX;
23904
23905 /* XXX FIXME: The EABI defines two VFP register ranges:
23906 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
23907 256-287: D0-D31
23908 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
23909 corresponding D register. Until GDB supports this, we shall use the
23910 legacy encodings. We also use these encodings for D0-D15 for
23911 compatibility with older debuggers. */
23912 if (VFP_REGNO_OK_FOR_SINGLE (regno))
23913 return NULL_RTX;
23914
23915 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
23916 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
23917 regno = (regno - FIRST_VFP_REGNUM) / 2;
23918 for (i = 0; i < nregs; i++)
23919 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
23920
23921 return p;
23922 }
23923
23924 #if ARM_UNWIND_INFO
23925 /* Emit unwind directives for a store-multiple instruction or stack pointer
23926 push during alignment.
23927 These should only ever be generated by the function prologue code, so
23928 expect them to have a particular form. */
23929
23930 static void
23931 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
23932 {
23933 int i;
23934 HOST_WIDE_INT offset;
23935 HOST_WIDE_INT nregs;
23936 int reg_size;
23937 unsigned reg;
23938 unsigned lastreg;
23939 rtx e;
23940
23941 e = XVECEXP (p, 0, 0);
23942 if (GET_CODE (e) != SET)
23943 abort ();
23944
23945 /* First insn will adjust the stack pointer. */
23946 if (GET_CODE (e) != SET
23947 || GET_CODE (XEXP (e, 0)) != REG
23948 || REGNO (XEXP (e, 0)) != SP_REGNUM
23949 || GET_CODE (XEXP (e, 1)) != PLUS)
23950 abort ();
23951
23952 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
23953 nregs = XVECLEN (p, 0) - 1;
23954
23955 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
23956 if (reg < 16)
23957 {
23958 /* The function prologue may also push pc, but not annotate it as it is
23959 never restored. We turn this into a stack pointer adjustment. */
23960 if (nregs * 4 == offset - 4)
23961 {
23962 fprintf (asm_out_file, "\t.pad #4\n");
23963 offset -= 4;
23964 }
23965 reg_size = 4;
23966 fprintf (asm_out_file, "\t.save {");
23967 }
23968 else if (IS_VFP_REGNUM (reg))
23969 {
23970 reg_size = 8;
23971 fprintf (asm_out_file, "\t.vsave {");
23972 }
23973 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
23974 {
23975 /* FPA registers are done differently. */
23976 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
23977 return;
23978 }
23979 else
23980 /* Unknown register type. */
23981 abort ();
23982
23983 /* If the stack increment doesn't match the size of the saved registers,
23984 something has gone horribly wrong. */
23985 if (offset != nregs * reg_size)
23986 abort ();
23987
23988 offset = 0;
23989 lastreg = 0;
23990 /* The remaining insns will describe the stores. */
23991 for (i = 1; i <= nregs; i++)
23992 {
23993 /* Expect (set (mem <addr>) (reg)).
23994 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
23995 e = XVECEXP (p, 0, i);
23996 if (GET_CODE (e) != SET
23997 || GET_CODE (XEXP (e, 0)) != MEM
23998 || GET_CODE (XEXP (e, 1)) != REG)
23999 abort ();
24000
24001 reg = REGNO (XEXP (e, 1));
24002 if (reg < lastreg)
24003 abort ();
24004
24005 if (i != 1)
24006 fprintf (asm_out_file, ", ");
24007 /* We can't use %r for vfp because we need to use the
24008 double precision register names. */
24009 if (IS_VFP_REGNUM (reg))
24010 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
24011 else
24012 asm_fprintf (asm_out_file, "%r", reg);
24013
24014 #ifdef ENABLE_CHECKING
24015 /* Check that the addresses are consecutive. */
24016 e = XEXP (XEXP (e, 0), 0);
24017 if (GET_CODE (e) == PLUS)
24018 {
24019 offset += reg_size;
24020 if (GET_CODE (XEXP (e, 0)) != REG
24021 || REGNO (XEXP (e, 0)) != SP_REGNUM
24022 || GET_CODE (XEXP (e, 1)) != CONST_INT
24023 || offset != INTVAL (XEXP (e, 1)))
24024 abort ();
24025 }
24026 else if (i != 1
24027 || GET_CODE (e) != REG
24028 || REGNO (e) != SP_REGNUM)
24029 abort ();
24030 #endif
24031 }
24032 fprintf (asm_out_file, "}\n");
24033 }
24034
24035 /* Emit unwind directives for a SET. */
24036
24037 static void
24038 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
24039 {
24040 rtx e0;
24041 rtx e1;
24042 unsigned reg;
24043
24044 e0 = XEXP (p, 0);
24045 e1 = XEXP (p, 1);
24046 switch (GET_CODE (e0))
24047 {
24048 case MEM:
24049 /* Pushing a single register. */
24050 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
24051 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
24052 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
24053 abort ();
24054
24055 asm_fprintf (asm_out_file, "\t.save ");
24056 if (IS_VFP_REGNUM (REGNO (e1)))
24057 asm_fprintf(asm_out_file, "{d%d}\n",
24058 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
24059 else
24060 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
24061 break;
24062
24063 case REG:
24064 if (REGNO (e0) == SP_REGNUM)
24065 {
24066 /* A stack increment. */
24067 if (GET_CODE (e1) != PLUS
24068 || GET_CODE (XEXP (e1, 0)) != REG
24069 || REGNO (XEXP (e1, 0)) != SP_REGNUM
24070 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
24071 abort ();
24072
24073 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
24074 -INTVAL (XEXP (e1, 1)));
24075 }
24076 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
24077 {
24078 HOST_WIDE_INT offset;
24079
24080 if (GET_CODE (e1) == PLUS)
24081 {
24082 if (GET_CODE (XEXP (e1, 0)) != REG
24083 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
24084 abort ();
24085 reg = REGNO (XEXP (e1, 0));
24086 offset = INTVAL (XEXP (e1, 1));
24087 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
24088 HARD_FRAME_POINTER_REGNUM, reg,
24089 offset);
24090 }
24091 else if (GET_CODE (e1) == REG)
24092 {
24093 reg = REGNO (e1);
24094 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
24095 HARD_FRAME_POINTER_REGNUM, reg);
24096 }
24097 else
24098 abort ();
24099 }
24100 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
24101 {
24102 /* Move from sp to reg. */
24103 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
24104 }
24105 else if (GET_CODE (e1) == PLUS
24106 && GET_CODE (XEXP (e1, 0)) == REG
24107 && REGNO (XEXP (e1, 0)) == SP_REGNUM
24108 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
24109 {
24110 /* Set reg to offset from sp. */
24111 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
24112 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
24113 }
24114 else
24115 abort ();
24116 break;
24117
24118 default:
24119 abort ();
24120 }
24121 }
24122
24123
24124 /* Emit unwind directives for the given insn. */
24125
24126 static void
24127 arm_unwind_emit (FILE * asm_out_file, rtx insn)
24128 {
24129 rtx note, pat;
24130 bool handled_one = false;
24131
24132 if (arm_except_unwind_info (&global_options) != UI_TARGET)
24133 return;
24134
24135 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24136 && (TREE_NOTHROW (current_function_decl)
24137 || crtl->all_throwers_are_sibcalls))
24138 return;
24139
24140 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
24141 return;
24142
24143 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
24144 {
24145 pat = XEXP (note, 0);
24146 switch (REG_NOTE_KIND (note))
24147 {
24148 case REG_FRAME_RELATED_EXPR:
24149 goto found;
24150
24151 case REG_CFA_REGISTER:
24152 if (pat == NULL)
24153 {
24154 pat = PATTERN (insn);
24155 if (GET_CODE (pat) == PARALLEL)
24156 pat = XVECEXP (pat, 0, 0);
24157 }
24158
24159 /* Only emitted for IS_STACKALIGN re-alignment. */
24160 {
24161 rtx dest, src;
24162 unsigned reg;
24163
24164 src = SET_SRC (pat);
24165 dest = SET_DEST (pat);
24166
24167 gcc_assert (src == stack_pointer_rtx);
24168 reg = REGNO (dest);
24169 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
24170 reg + 0x90, reg);
24171 }
24172 handled_one = true;
24173 break;
24174
24175 case REG_CFA_DEF_CFA:
24176 case REG_CFA_EXPRESSION:
24177 case REG_CFA_ADJUST_CFA:
24178 case REG_CFA_OFFSET:
24179 /* ??? Only handling here what we actually emit. */
24180 gcc_unreachable ();
24181
24182 default:
24183 break;
24184 }
24185 }
24186 if (handled_one)
24187 return;
24188 pat = PATTERN (insn);
24189 found:
24190
24191 switch (GET_CODE (pat))
24192 {
24193 case SET:
24194 arm_unwind_emit_set (asm_out_file, pat);
24195 break;
24196
24197 case SEQUENCE:
24198 /* Store multiple. */
24199 arm_unwind_emit_sequence (asm_out_file, pat);
24200 break;
24201
24202 default:
24203 abort();
24204 }
24205 }
24206
24207
24208 /* Output a reference from a function exception table to the type_info
24209 object X. The EABI specifies that the symbol should be relocated by
24210 an R_ARM_TARGET2 relocation. */
24211
24212 static bool
24213 arm_output_ttype (rtx x)
24214 {
24215 fputs ("\t.word\t", asm_out_file);
24216 output_addr_const (asm_out_file, x);
24217 /* Use special relocations for symbol references. */
24218 if (GET_CODE (x) != CONST_INT)
24219 fputs ("(TARGET2)", asm_out_file);
24220 fputc ('\n', asm_out_file);
24221
24222 return TRUE;
24223 }
24224
24225 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
24226
24227 static void
24228 arm_asm_emit_except_personality (rtx personality)
24229 {
24230 fputs ("\t.personality\t", asm_out_file);
24231 output_addr_const (asm_out_file, personality);
24232 fputc ('\n', asm_out_file);
24233 }
24234
24235 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
24236
24237 static void
24238 arm_asm_init_sections (void)
24239 {
24240 exception_section = get_unnamed_section (0, output_section_asm_op,
24241 "\t.handlerdata");
24242 }
24243 #endif /* ARM_UNWIND_INFO */
24244
24245 /* Output unwind directives for the start/end of a function. */
24246
24247 void
24248 arm_output_fn_unwind (FILE * f, bool prologue)
24249 {
24250 if (arm_except_unwind_info (&global_options) != UI_TARGET)
24251 return;
24252
24253 if (prologue)
24254 fputs ("\t.fnstart\n", f);
24255 else
24256 {
24257 /* If this function will never be unwound, then mark it as such.
24258 The came condition is used in arm_unwind_emit to suppress
24259 the frame annotations. */
24260 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24261 && (TREE_NOTHROW (current_function_decl)
24262 || crtl->all_throwers_are_sibcalls))
24263 fputs("\t.cantunwind\n", f);
24264
24265 fputs ("\t.fnend\n", f);
24266 }
24267 }
24268
24269 static bool
24270 arm_emit_tls_decoration (FILE *fp, rtx x)
24271 {
24272 enum tls_reloc reloc;
24273 rtx val;
24274
24275 val = XVECEXP (x, 0, 0);
24276 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
24277
24278 output_addr_const (fp, val);
24279
24280 switch (reloc)
24281 {
24282 case TLS_GD32:
24283 fputs ("(tlsgd)", fp);
24284 break;
24285 case TLS_LDM32:
24286 fputs ("(tlsldm)", fp);
24287 break;
24288 case TLS_LDO32:
24289 fputs ("(tlsldo)", fp);
24290 break;
24291 case TLS_IE32:
24292 fputs ("(gottpoff)", fp);
24293 break;
24294 case TLS_LE32:
24295 fputs ("(tpoff)", fp);
24296 break;
24297 case TLS_DESCSEQ:
24298 fputs ("(tlsdesc)", fp);
24299 break;
24300 default:
24301 gcc_unreachable ();
24302 }
24303
24304 switch (reloc)
24305 {
24306 case TLS_GD32:
24307 case TLS_LDM32:
24308 case TLS_IE32:
24309 case TLS_DESCSEQ:
24310 fputs (" + (. - ", fp);
24311 output_addr_const (fp, XVECEXP (x, 0, 2));
24312 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
24313 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
24314 output_addr_const (fp, XVECEXP (x, 0, 3));
24315 fputc (')', fp);
24316 break;
24317 default:
24318 break;
24319 }
24320
24321 return TRUE;
24322 }
24323
24324 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
24325
24326 static void
24327 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
24328 {
24329 gcc_assert (size == 4);
24330 fputs ("\t.word\t", file);
24331 output_addr_const (file, x);
24332 fputs ("(tlsldo)", file);
24333 }
24334
24335 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
24336
24337 static bool
24338 arm_output_addr_const_extra (FILE *fp, rtx x)
24339 {
24340 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
24341 return arm_emit_tls_decoration (fp, x);
24342 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
24343 {
24344 char label[256];
24345 int labelno = INTVAL (XVECEXP (x, 0, 0));
24346
24347 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
24348 assemble_name_raw (fp, label);
24349
24350 return TRUE;
24351 }
24352 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
24353 {
24354 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
24355 if (GOT_PCREL)
24356 fputs ("+.", fp);
24357 fputs ("-(", fp);
24358 output_addr_const (fp, XVECEXP (x, 0, 0));
24359 fputc (')', fp);
24360 return TRUE;
24361 }
24362 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
24363 {
24364 output_addr_const (fp, XVECEXP (x, 0, 0));
24365 if (GOT_PCREL)
24366 fputs ("+.", fp);
24367 fputs ("-(", fp);
24368 output_addr_const (fp, XVECEXP (x, 0, 1));
24369 fputc (')', fp);
24370 return TRUE;
24371 }
24372 else if (GET_CODE (x) == CONST_VECTOR)
24373 return arm_emit_vector_const (fp, x);
24374
24375 return FALSE;
24376 }
24377
24378 /* Output assembly for a shift instruction.
24379 SET_FLAGS determines how the instruction modifies the condition codes.
24380 0 - Do not set condition codes.
24381 1 - Set condition codes.
24382 2 - Use smallest instruction. */
24383 const char *
24384 arm_output_shift(rtx * operands, int set_flags)
24385 {
24386 char pattern[100];
24387 static const char flag_chars[3] = {'?', '.', '!'};
24388 const char *shift;
24389 HOST_WIDE_INT val;
24390 char c;
24391
24392 c = flag_chars[set_flags];
24393 if (TARGET_UNIFIED_ASM)
24394 {
24395 shift = shift_op(operands[3], &val);
24396 if (shift)
24397 {
24398 if (val != -1)
24399 operands[2] = GEN_INT(val);
24400 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
24401 }
24402 else
24403 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
24404 }
24405 else
24406 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
24407 output_asm_insn (pattern, operands);
24408 return "";
24409 }
24410
24411 /* Output a Thumb-1 casesi dispatch sequence. */
24412 const char *
24413 thumb1_output_casesi (rtx *operands)
24414 {
24415 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
24416
24417 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24418
24419 switch (GET_MODE(diff_vec))
24420 {
24421 case QImode:
24422 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24423 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
24424 case HImode:
24425 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24426 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
24427 case SImode:
24428 return "bl\t%___gnu_thumb1_case_si";
24429 default:
24430 gcc_unreachable ();
24431 }
24432 }
24433
24434 /* Output a Thumb-2 casesi instruction. */
24435 const char *
24436 thumb2_output_casesi (rtx *operands)
24437 {
24438 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
24439
24440 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24441
24442 output_asm_insn ("cmp\t%0, %1", operands);
24443 output_asm_insn ("bhi\t%l3", operands);
24444 switch (GET_MODE(diff_vec))
24445 {
24446 case QImode:
24447 return "tbb\t[%|pc, %0]";
24448 case HImode:
24449 return "tbh\t[%|pc, %0, lsl #1]";
24450 case SImode:
24451 if (flag_pic)
24452 {
24453 output_asm_insn ("adr\t%4, %l2", operands);
24454 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
24455 output_asm_insn ("add\t%4, %4, %5", operands);
24456 return "bx\t%4";
24457 }
24458 else
24459 {
24460 output_asm_insn ("adr\t%4, %l2", operands);
24461 return "ldr\t%|pc, [%4, %0, lsl #2]";
24462 }
24463 default:
24464 gcc_unreachable ();
24465 }
24466 }
24467
24468 /* Most ARM cores are single issue, but some newer ones can dual issue.
24469 The scheduler descriptions rely on this being correct. */
24470 static int
24471 arm_issue_rate (void)
24472 {
24473 switch (arm_tune)
24474 {
24475 case cortexa15:
24476 return 3;
24477
24478 case cortexr4:
24479 case cortexr4f:
24480 case cortexr5:
24481 case genericv7a:
24482 case cortexa5:
24483 case cortexa8:
24484 case cortexa9:
24485 case fa726te:
24486 return 2;
24487
24488 default:
24489 return 1;
24490 }
24491 }
24492
24493 /* A table and a function to perform ARM-specific name mangling for
24494 NEON vector types in order to conform to the AAPCS (see "Procedure
24495 Call Standard for the ARM Architecture", Appendix A). To qualify
24496 for emission with the mangled names defined in that document, a
24497 vector type must not only be of the correct mode but also be
24498 composed of NEON vector element types (e.g. __builtin_neon_qi). */
24499 typedef struct
24500 {
24501 enum machine_mode mode;
24502 const char *element_type_name;
24503 const char *aapcs_name;
24504 } arm_mangle_map_entry;
24505
24506 static arm_mangle_map_entry arm_mangle_map[] = {
24507 /* 64-bit containerized types. */
24508 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
24509 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
24510 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
24511 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
24512 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
24513 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
24514 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
24515 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
24516 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
24517 /* 128-bit containerized types. */
24518 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
24519 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
24520 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
24521 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
24522 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
24523 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
24524 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
24525 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
24526 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
24527 { VOIDmode, NULL, NULL }
24528 };
24529
24530 const char *
24531 arm_mangle_type (const_tree type)
24532 {
24533 arm_mangle_map_entry *pos = arm_mangle_map;
24534
24535 /* The ARM ABI documents (10th October 2008) say that "__va_list"
24536 has to be managled as if it is in the "std" namespace. */
24537 if (TARGET_AAPCS_BASED
24538 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
24539 {
24540 static bool warned;
24541 if (!warned && warn_psabi && !in_system_header)
24542 {
24543 warned = true;
24544 inform (input_location,
24545 "the mangling of %<va_list%> has changed in GCC 4.4");
24546 }
24547 return "St9__va_list";
24548 }
24549
24550 /* Half-precision float. */
24551 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
24552 return "Dh";
24553
24554 if (TREE_CODE (type) != VECTOR_TYPE)
24555 return NULL;
24556
24557 /* Check the mode of the vector type, and the name of the vector
24558 element type, against the table. */
24559 while (pos->mode != VOIDmode)
24560 {
24561 tree elt_type = TREE_TYPE (type);
24562
24563 if (pos->mode == TYPE_MODE (type)
24564 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
24565 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
24566 pos->element_type_name))
24567 return pos->aapcs_name;
24568
24569 pos++;
24570 }
24571
24572 /* Use the default mangling for unrecognized (possibly user-defined)
24573 vector types. */
24574 return NULL;
24575 }
24576
24577 /* Order of allocation of core registers for Thumb: this allocation is
24578 written over the corresponding initial entries of the array
24579 initialized with REG_ALLOC_ORDER. We allocate all low registers
24580 first. Saving and restoring a low register is usually cheaper than
24581 using a call-clobbered high register. */
24582
24583 static const int thumb_core_reg_alloc_order[] =
24584 {
24585 3, 2, 1, 0, 4, 5, 6, 7,
24586 14, 12, 8, 9, 10, 11, 13, 15
24587 };
24588
24589 /* Adjust register allocation order when compiling for Thumb. */
24590
24591 void
24592 arm_order_regs_for_local_alloc (void)
24593 {
24594 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
24595 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
24596 if (TARGET_THUMB)
24597 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
24598 sizeof (thumb_core_reg_alloc_order));
24599 }
24600
24601 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
24602
24603 bool
24604 arm_frame_pointer_required (void)
24605 {
24606 return (cfun->has_nonlocal_label
24607 || SUBTARGET_FRAME_POINTER_REQUIRED
24608 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
24609 }
24610
24611 /* Only thumb1 can't support conditional execution, so return true if
24612 the target is not thumb1. */
24613 static bool
24614 arm_have_conditional_execution (void)
24615 {
24616 return !TARGET_THUMB1;
24617 }
24618
24619 static unsigned int
24620 arm_autovectorize_vector_sizes (void)
24621 {
24622 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
24623 }
24624
24625 static bool
24626 arm_vector_alignment_reachable (const_tree type, bool is_packed)
24627 {
24628 /* Vectors which aren't in packed structures will not be less aligned than
24629 the natural alignment of their element type, so this is safe. */
24630 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
24631 return !is_packed;
24632
24633 return default_builtin_vector_alignment_reachable (type, is_packed);
24634 }
24635
24636 static bool
24637 arm_builtin_support_vector_misalignment (enum machine_mode mode,
24638 const_tree type, int misalignment,
24639 bool is_packed)
24640 {
24641 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
24642 {
24643 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
24644
24645 if (is_packed)
24646 return align == 1;
24647
24648 /* If the misalignment is unknown, we should be able to handle the access
24649 so long as it is not to a member of a packed data structure. */
24650 if (misalignment == -1)
24651 return true;
24652
24653 /* Return true if the misalignment is a multiple of the natural alignment
24654 of the vector's element type. This is probably always going to be
24655 true in practice, since we've already established that this isn't a
24656 packed access. */
24657 return ((misalignment % align) == 0);
24658 }
24659
24660 return default_builtin_support_vector_misalignment (mode, type, misalignment,
24661 is_packed);
24662 }
24663
24664 static void
24665 arm_conditional_register_usage (void)
24666 {
24667 int regno;
24668
24669 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
24670 {
24671 for (regno = FIRST_FPA_REGNUM;
24672 regno <= LAST_FPA_REGNUM; ++regno)
24673 fixed_regs[regno] = call_used_regs[regno] = 1;
24674 }
24675
24676 if (TARGET_THUMB1 && optimize_size)
24677 {
24678 /* When optimizing for size on Thumb-1, it's better not
24679 to use the HI regs, because of the overhead of
24680 stacking them. */
24681 for (regno = FIRST_HI_REGNUM;
24682 regno <= LAST_HI_REGNUM; ++regno)
24683 fixed_regs[regno] = call_used_regs[regno] = 1;
24684 }
24685
24686 /* The link register can be clobbered by any branch insn,
24687 but we have no way to track that at present, so mark
24688 it as unavailable. */
24689 if (TARGET_THUMB1)
24690 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
24691
24692 if (TARGET_32BIT && TARGET_HARD_FLOAT)
24693 {
24694 if (TARGET_MAVERICK)
24695 {
24696 for (regno = FIRST_FPA_REGNUM;
24697 regno <= LAST_FPA_REGNUM; ++ regno)
24698 fixed_regs[regno] = call_used_regs[regno] = 1;
24699 for (regno = FIRST_CIRRUS_FP_REGNUM;
24700 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
24701 {
24702 fixed_regs[regno] = 0;
24703 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
24704 }
24705 }
24706 if (TARGET_VFP)
24707 {
24708 /* VFPv3 registers are disabled when earlier VFP
24709 versions are selected due to the definition of
24710 LAST_VFP_REGNUM. */
24711 for (regno = FIRST_VFP_REGNUM;
24712 regno <= LAST_VFP_REGNUM; ++ regno)
24713 {
24714 fixed_regs[regno] = 0;
24715 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
24716 || regno >= FIRST_VFP_REGNUM + 32;
24717 }
24718 }
24719 }
24720
24721 if (TARGET_REALLY_IWMMXT)
24722 {
24723 regno = FIRST_IWMMXT_GR_REGNUM;
24724 /* The 2002/10/09 revision of the XScale ABI has wCG0
24725 and wCG1 as call-preserved registers. The 2002/11/21
24726 revision changed this so that all wCG registers are
24727 scratch registers. */
24728 for (regno = FIRST_IWMMXT_GR_REGNUM;
24729 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
24730 fixed_regs[regno] = 0;
24731 /* The XScale ABI has wR0 - wR9 as scratch registers,
24732 the rest as call-preserved registers. */
24733 for (regno = FIRST_IWMMXT_REGNUM;
24734 regno <= LAST_IWMMXT_REGNUM; ++ regno)
24735 {
24736 fixed_regs[regno] = 0;
24737 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
24738 }
24739 }
24740
24741 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
24742 {
24743 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
24744 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
24745 }
24746 else if (TARGET_APCS_STACK)
24747 {
24748 fixed_regs[10] = 1;
24749 call_used_regs[10] = 1;
24750 }
24751 /* -mcaller-super-interworking reserves r11 for calls to
24752 _interwork_r11_call_via_rN(). Making the register global
24753 is an easy way of ensuring that it remains valid for all
24754 calls. */
24755 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
24756 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
24757 {
24758 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24759 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24760 if (TARGET_CALLER_INTERWORKING)
24761 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
24762 }
24763 SUBTARGET_CONDITIONAL_REGISTER_USAGE
24764 }
24765
24766 static reg_class_t
24767 arm_preferred_rename_class (reg_class_t rclass)
24768 {
24769 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
24770 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
24771 and code size can be reduced. */
24772 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
24773 return LO_REGS;
24774 else
24775 return NO_REGS;
24776 }
24777
24778 /* Compute the atrribute "length" of insn "*push_multi".
24779 So this function MUST be kept in sync with that insn pattern. */
24780 int
24781 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
24782 {
24783 int i, regno, hi_reg;
24784 int num_saves = XVECLEN (parallel_op, 0);
24785
24786 /* ARM mode. */
24787 if (TARGET_ARM)
24788 return 4;
24789 /* Thumb1 mode. */
24790 if (TARGET_THUMB1)
24791 return 2;
24792
24793 /* Thumb2 mode. */
24794 regno = REGNO (first_op);
24795 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
24796 for (i = 1; i < num_saves && !hi_reg; i++)
24797 {
24798 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
24799 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
24800 }
24801
24802 if (!hi_reg)
24803 return 2;
24804 return 4;
24805 }
24806
24807 /* Compute the number of instructions emitted by output_move_double. */
24808 int
24809 arm_count_output_move_double_insns (rtx *operands)
24810 {
24811 int count;
24812 rtx ops[2];
24813 /* output_move_double may modify the operands array, so call it
24814 here on a copy of the array. */
24815 ops[0] = operands[0];
24816 ops[1] = operands[1];
24817 output_move_double (ops, false, &count);
24818 return count;
24819 }
24820
24821 int
24822 vfp3_const_double_for_fract_bits (rtx operand)
24823 {
24824 REAL_VALUE_TYPE r0;
24825
24826 if (GET_CODE (operand) != CONST_DOUBLE)
24827 return 0;
24828
24829 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
24830 if (exact_real_inverse (DFmode, &r0))
24831 {
24832 if (exact_real_truncate (DFmode, &r0))
24833 {
24834 HOST_WIDE_INT value = real_to_integer (&r0);
24835 value = value & 0xffffffff;
24836 if ((value != 0) && ( (value & (value - 1)) == 0))
24837 return int_log2 (value);
24838 }
24839 }
24840 return 0;
24841 }
24842 \f
24843 /* Emit a memory barrier around an atomic sequence according to MODEL. */
24844
24845 static void
24846 arm_pre_atomic_barrier (enum memmodel model)
24847 {
24848 switch (model)
24849 {
24850 case MEMMODEL_RELAXED:
24851 case MEMMODEL_CONSUME:
24852 case MEMMODEL_ACQUIRE:
24853 break;
24854 case MEMMODEL_RELEASE:
24855 case MEMMODEL_ACQ_REL:
24856 case MEMMODEL_SEQ_CST:
24857 emit_insn (gen_memory_barrier ());
24858 break;
24859 default:
24860 gcc_unreachable ();
24861 }
24862 }
24863
24864 static void
24865 arm_post_atomic_barrier (enum memmodel model)
24866 {
24867 switch (model)
24868 {
24869 case MEMMODEL_RELAXED:
24870 case MEMMODEL_CONSUME:
24871 case MEMMODEL_RELEASE:
24872 break;
24873 case MEMMODEL_ACQUIRE:
24874 case MEMMODEL_ACQ_REL:
24875 case MEMMODEL_SEQ_CST:
24876 emit_insn (gen_memory_barrier ());
24877 break;
24878 default:
24879 gcc_unreachable ();
24880 }
24881 }
24882
24883 /* Emit the load-exclusive and store-exclusive instructions. */
24884
24885 static void
24886 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem)
24887 {
24888 rtx (*gen) (rtx, rtx);
24889
24890 switch (mode)
24891 {
24892 case QImode: gen = gen_arm_load_exclusiveqi; break;
24893 case HImode: gen = gen_arm_load_exclusivehi; break;
24894 case SImode: gen = gen_arm_load_exclusivesi; break;
24895 case DImode: gen = gen_arm_load_exclusivedi; break;
24896 default:
24897 gcc_unreachable ();
24898 }
24899
24900 emit_insn (gen (rval, mem));
24901 }
24902
24903 static void
24904 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, rtx mem)
24905 {
24906 rtx (*gen) (rtx, rtx, rtx);
24907
24908 switch (mode)
24909 {
24910 case QImode: gen = gen_arm_store_exclusiveqi; break;
24911 case HImode: gen = gen_arm_store_exclusivehi; break;
24912 case SImode: gen = gen_arm_store_exclusivesi; break;
24913 case DImode: gen = gen_arm_store_exclusivedi; break;
24914 default:
24915 gcc_unreachable ();
24916 }
24917
24918 emit_insn (gen (bval, rval, mem));
24919 }
24920
24921 /* Mark the previous jump instruction as unlikely. */
24922
24923 static void
24924 emit_unlikely_jump (rtx insn)
24925 {
24926 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
24927
24928 insn = emit_jump_insn (insn);
24929 add_reg_note (insn, REG_BR_PROB, very_unlikely);
24930 }
24931
24932 /* Expand a compare and swap pattern. */
24933
24934 void
24935 arm_expand_compare_and_swap (rtx operands[])
24936 {
24937 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
24938 enum machine_mode mode;
24939 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
24940
24941 bval = operands[0];
24942 rval = operands[1];
24943 mem = operands[2];
24944 oldval = operands[3];
24945 newval = operands[4];
24946 is_weak = operands[5];
24947 mod_s = operands[6];
24948 mod_f = operands[7];
24949 mode = GET_MODE (mem);
24950
24951 switch (mode)
24952 {
24953 case QImode:
24954 case HImode:
24955 /* For narrow modes, we're going to perform the comparison in SImode,
24956 so do the zero-extension now. */
24957 rval = gen_reg_rtx (SImode);
24958 oldval = convert_modes (SImode, mode, oldval, true);
24959 /* FALLTHRU */
24960
24961 case SImode:
24962 /* Force the value into a register if needed. We waited until after
24963 the zero-extension above to do this properly. */
24964 if (!arm_add_operand (oldval, mode))
24965 oldval = force_reg (mode, oldval);
24966 break;
24967
24968 case DImode:
24969 if (!cmpdi_operand (oldval, mode))
24970 oldval = force_reg (mode, oldval);
24971 break;
24972
24973 default:
24974 gcc_unreachable ();
24975 }
24976
24977 switch (mode)
24978 {
24979 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
24980 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
24981 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
24982 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
24983 default:
24984 gcc_unreachable ();
24985 }
24986
24987 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
24988
24989 if (mode == QImode || mode == HImode)
24990 emit_move_insn (operands[1], gen_lowpart (mode, rval));
24991
24992 /* In all cases, we arrange for success to be signaled by Z set.
24993 This arrangement allows for the boolean result to be used directly
24994 in a subsequent branch, post optimization. */
24995 x = gen_rtx_REG (CCmode, CC_REGNUM);
24996 x = gen_rtx_EQ (SImode, x, const0_rtx);
24997 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
24998 }
24999
25000 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
25001 another memory store between the load-exclusive and store-exclusive can
25002 reset the monitor from Exclusive to Open state. This means we must wait
25003 until after reload to split the pattern, lest we get a register spill in
25004 the middle of the atomic sequence. */
25005
25006 void
25007 arm_split_compare_and_swap (rtx operands[])
25008 {
25009 rtx rval, mem, oldval, newval, scratch;
25010 enum machine_mode mode;
25011 enum memmodel mod_s, mod_f;
25012 bool is_weak;
25013 rtx label1, label2, x, cond;
25014
25015 rval = operands[0];
25016 mem = operands[1];
25017 oldval = operands[2];
25018 newval = operands[3];
25019 is_weak = (operands[4] != const0_rtx);
25020 mod_s = (enum memmodel) INTVAL (operands[5]);
25021 mod_f = (enum memmodel) INTVAL (operands[6]);
25022 scratch = operands[7];
25023 mode = GET_MODE (mem);
25024
25025 arm_pre_atomic_barrier (mod_s);
25026
25027 label1 = NULL_RTX;
25028 if (!is_weak)
25029 {
25030 label1 = gen_label_rtx ();
25031 emit_label (label1);
25032 }
25033 label2 = gen_label_rtx ();
25034
25035 arm_emit_load_exclusive (mode, rval, mem);
25036
25037 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
25038 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25039 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
25040 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
25041 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
25042
25043 arm_emit_store_exclusive (mode, scratch, mem, newval);
25044
25045 /* Weak or strong, we want EQ to be true for success, so that we
25046 match the flags that we got from the compare above. */
25047 cond = gen_rtx_REG (CCmode, CC_REGNUM);
25048 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
25049 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
25050
25051 if (!is_weak)
25052 {
25053 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25054 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
25055 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
25056 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
25057 }
25058
25059 if (mod_f != MEMMODEL_RELAXED)
25060 emit_label (label2);
25061
25062 arm_post_atomic_barrier (mod_s);
25063
25064 if (mod_f == MEMMODEL_RELAXED)
25065 emit_label (label2);
25066 }
25067
25068 void
25069 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
25070 rtx value, rtx model_rtx, rtx cond)
25071 {
25072 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
25073 enum machine_mode mode = GET_MODE (mem);
25074 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
25075 rtx label, x;
25076
25077 arm_pre_atomic_barrier (model);
25078
25079 label = gen_label_rtx ();
25080 emit_label (label);
25081
25082 if (new_out)
25083 new_out = gen_lowpart (wmode, new_out);
25084 if (old_out)
25085 old_out = gen_lowpart (wmode, old_out);
25086 else
25087 old_out = new_out;
25088 value = simplify_gen_subreg (wmode, value, mode, 0);
25089
25090 arm_emit_load_exclusive (mode, old_out, mem);
25091
25092 switch (code)
25093 {
25094 case SET:
25095 new_out = value;
25096 break;
25097
25098 case NOT:
25099 x = gen_rtx_AND (wmode, old_out, value);
25100 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25101 x = gen_rtx_NOT (wmode, new_out);
25102 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25103 break;
25104
25105 case MINUS:
25106 if (CONST_INT_P (value))
25107 {
25108 value = GEN_INT (-INTVAL (value));
25109 code = PLUS;
25110 }
25111 /* FALLTHRU */
25112
25113 case PLUS:
25114 if (mode == DImode)
25115 {
25116 /* DImode plus/minus need to clobber flags. */
25117 /* The adddi3 and subdi3 patterns are incorrectly written so that
25118 they require matching operands, even when we could easily support
25119 three operands. Thankfully, this can be fixed up post-splitting,
25120 as the individual add+adc patterns do accept three operands and
25121 post-reload cprop can make these moves go away. */
25122 emit_move_insn (new_out, old_out);
25123 if (code == PLUS)
25124 x = gen_adddi3 (new_out, new_out, value);
25125 else
25126 x = gen_subdi3 (new_out, new_out, value);
25127 emit_insn (x);
25128 break;
25129 }
25130 /* FALLTHRU */
25131
25132 default:
25133 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
25134 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25135 break;
25136 }
25137
25138 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out));
25139
25140 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25141 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
25142
25143 arm_post_atomic_barrier (model);
25144 }
25145 \f
25146 #define MAX_VECT_LEN 16
25147
25148 struct expand_vec_perm_d
25149 {
25150 rtx target, op0, op1;
25151 unsigned char perm[MAX_VECT_LEN];
25152 enum machine_mode vmode;
25153 unsigned char nelt;
25154 bool one_vector_p;
25155 bool testing_p;
25156 };
25157
25158 /* Generate a variable permutation. */
25159
25160 static void
25161 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
25162 {
25163 enum machine_mode vmode = GET_MODE (target);
25164 bool one_vector_p = rtx_equal_p (op0, op1);
25165
25166 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
25167 gcc_checking_assert (GET_MODE (op0) == vmode);
25168 gcc_checking_assert (GET_MODE (op1) == vmode);
25169 gcc_checking_assert (GET_MODE (sel) == vmode);
25170 gcc_checking_assert (TARGET_NEON);
25171
25172 if (one_vector_p)
25173 {
25174 if (vmode == V8QImode)
25175 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
25176 else
25177 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
25178 }
25179 else
25180 {
25181 rtx pair;
25182
25183 if (vmode == V8QImode)
25184 {
25185 pair = gen_reg_rtx (V16QImode);
25186 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
25187 pair = gen_lowpart (TImode, pair);
25188 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
25189 }
25190 else
25191 {
25192 pair = gen_reg_rtx (OImode);
25193 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
25194 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
25195 }
25196 }
25197 }
25198
25199 void
25200 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
25201 {
25202 enum machine_mode vmode = GET_MODE (target);
25203 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
25204 bool one_vector_p = rtx_equal_p (op0, op1);
25205 rtx rmask[MAX_VECT_LEN], mask;
25206
25207 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
25208 numbering of elements for big-endian, we must reverse the order. */
25209 gcc_checking_assert (!BYTES_BIG_ENDIAN);
25210
25211 /* The VTBL instruction does not use a modulo index, so we must take care
25212 of that ourselves. */
25213 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
25214 for (i = 0; i < nelt; ++i)
25215 rmask[i] = mask;
25216 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
25217 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
25218
25219 arm_expand_vec_perm_1 (target, op0, op1, sel);
25220 }
25221
25222 /* Generate or test for an insn that supports a constant permutation. */
25223
25224 /* Recognize patterns for the VUZP insns. */
25225
25226 static bool
25227 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
25228 {
25229 unsigned int i, odd, mask, nelt = d->nelt;
25230 rtx out0, out1, in0, in1, x;
25231 rtx (*gen)(rtx, rtx, rtx, rtx);
25232
25233 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25234 return false;
25235
25236 /* Note that these are little-endian tests. Adjust for big-endian later. */
25237 if (d->perm[0] == 0)
25238 odd = 0;
25239 else if (d->perm[0] == 1)
25240 odd = 1;
25241 else
25242 return false;
25243 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25244
25245 for (i = 0; i < nelt; i++)
25246 {
25247 unsigned elt = (i * 2 + odd) & mask;
25248 if (d->perm[i] != elt)
25249 return false;
25250 }
25251
25252 /* Success! */
25253 if (d->testing_p)
25254 return true;
25255
25256 switch (d->vmode)
25257 {
25258 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
25259 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
25260 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
25261 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
25262 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
25263 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
25264 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
25265 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
25266 default:
25267 gcc_unreachable ();
25268 }
25269
25270 in0 = d->op0;
25271 in1 = d->op1;
25272 if (BYTES_BIG_ENDIAN)
25273 {
25274 x = in0, in0 = in1, in1 = x;
25275 odd = !odd;
25276 }
25277
25278 out0 = d->target;
25279 out1 = gen_reg_rtx (d->vmode);
25280 if (odd)
25281 x = out0, out0 = out1, out1 = x;
25282
25283 emit_insn (gen (out0, in0, in1, out1));
25284 return true;
25285 }
25286
25287 /* Recognize patterns for the VZIP insns. */
25288
25289 static bool
25290 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
25291 {
25292 unsigned int i, high, mask, nelt = d->nelt;
25293 rtx out0, out1, in0, in1, x;
25294 rtx (*gen)(rtx, rtx, rtx, rtx);
25295
25296 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25297 return false;
25298
25299 /* Note that these are little-endian tests. Adjust for big-endian later. */
25300 high = nelt / 2;
25301 if (d->perm[0] == high)
25302 ;
25303 else if (d->perm[0] == 0)
25304 high = 0;
25305 else
25306 return false;
25307 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25308
25309 for (i = 0; i < nelt / 2; i++)
25310 {
25311 unsigned elt = (i + high) & mask;
25312 if (d->perm[i * 2] != elt)
25313 return false;
25314 elt = (elt + nelt) & mask;
25315 if (d->perm[i * 2 + 1] != elt)
25316 return false;
25317 }
25318
25319 /* Success! */
25320 if (d->testing_p)
25321 return true;
25322
25323 switch (d->vmode)
25324 {
25325 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
25326 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
25327 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
25328 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
25329 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
25330 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
25331 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
25332 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
25333 default:
25334 gcc_unreachable ();
25335 }
25336
25337 in0 = d->op0;
25338 in1 = d->op1;
25339 if (BYTES_BIG_ENDIAN)
25340 {
25341 x = in0, in0 = in1, in1 = x;
25342 high = !high;
25343 }
25344
25345 out0 = d->target;
25346 out1 = gen_reg_rtx (d->vmode);
25347 if (high)
25348 x = out0, out0 = out1, out1 = x;
25349
25350 emit_insn (gen (out0, in0, in1, out1));
25351 return true;
25352 }
25353
25354 /* Recognize patterns for the VREV insns. */
25355
25356 static bool
25357 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
25358 {
25359 unsigned int i, j, diff, nelt = d->nelt;
25360 rtx (*gen)(rtx, rtx, rtx);
25361
25362 if (!d->one_vector_p)
25363 return false;
25364
25365 diff = d->perm[0];
25366 switch (diff)
25367 {
25368 case 7:
25369 switch (d->vmode)
25370 {
25371 case V16QImode: gen = gen_neon_vrev64v16qi; break;
25372 case V8QImode: gen = gen_neon_vrev64v8qi; break;
25373 default:
25374 return false;
25375 }
25376 break;
25377 case 3:
25378 switch (d->vmode)
25379 {
25380 case V16QImode: gen = gen_neon_vrev32v16qi; break;
25381 case V8QImode: gen = gen_neon_vrev32v8qi; break;
25382 case V8HImode: gen = gen_neon_vrev64v8hi; break;
25383 case V4HImode: gen = gen_neon_vrev64v4hi; break;
25384 default:
25385 return false;
25386 }
25387 break;
25388 case 1:
25389 switch (d->vmode)
25390 {
25391 case V16QImode: gen = gen_neon_vrev16v16qi; break;
25392 case V8QImode: gen = gen_neon_vrev16v8qi; break;
25393 case V8HImode: gen = gen_neon_vrev32v8hi; break;
25394 case V4HImode: gen = gen_neon_vrev32v4hi; break;
25395 case V4SImode: gen = gen_neon_vrev64v4si; break;
25396 case V2SImode: gen = gen_neon_vrev64v2si; break;
25397 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
25398 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
25399 default:
25400 return false;
25401 }
25402 break;
25403 default:
25404 return false;
25405 }
25406
25407 for (i = 0; i < nelt; i += diff)
25408 for (j = 0; j <= diff; j += 1)
25409 if (d->perm[i + j] != i + diff - j)
25410 return false;
25411
25412 /* Success! */
25413 if (d->testing_p)
25414 return true;
25415
25416 /* ??? The third operand is an artifact of the builtin infrastructure
25417 and is ignored by the actual instruction. */
25418 emit_insn (gen (d->target, d->op0, const0_rtx));
25419 return true;
25420 }
25421
25422 /* Recognize patterns for the VTRN insns. */
25423
25424 static bool
25425 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
25426 {
25427 unsigned int i, odd, mask, nelt = d->nelt;
25428 rtx out0, out1, in0, in1, x;
25429 rtx (*gen)(rtx, rtx, rtx, rtx);
25430
25431 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25432 return false;
25433
25434 /* Note that these are little-endian tests. Adjust for big-endian later. */
25435 if (d->perm[0] == 0)
25436 odd = 0;
25437 else if (d->perm[0] == 1)
25438 odd = 1;
25439 else
25440 return false;
25441 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25442
25443 for (i = 0; i < nelt; i += 2)
25444 {
25445 if (d->perm[i] != i + odd)
25446 return false;
25447 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
25448 return false;
25449 }
25450
25451 /* Success! */
25452 if (d->testing_p)
25453 return true;
25454
25455 switch (d->vmode)
25456 {
25457 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
25458 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
25459 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
25460 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
25461 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
25462 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
25463 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
25464 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
25465 default:
25466 gcc_unreachable ();
25467 }
25468
25469 in0 = d->op0;
25470 in1 = d->op1;
25471 if (BYTES_BIG_ENDIAN)
25472 {
25473 x = in0, in0 = in1, in1 = x;
25474 odd = !odd;
25475 }
25476
25477 out0 = d->target;
25478 out1 = gen_reg_rtx (d->vmode);
25479 if (odd)
25480 x = out0, out0 = out1, out1 = x;
25481
25482 emit_insn (gen (out0, in0, in1, out1));
25483 return true;
25484 }
25485
25486 /* The NEON VTBL instruction is a fully variable permuation that's even
25487 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
25488 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
25489 can do slightly better by expanding this as a constant where we don't
25490 have to apply a mask. */
25491
25492 static bool
25493 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
25494 {
25495 rtx rperm[MAX_VECT_LEN], sel;
25496 enum machine_mode vmode = d->vmode;
25497 unsigned int i, nelt = d->nelt;
25498
25499 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
25500 numbering of elements for big-endian, we must reverse the order. */
25501 if (BYTES_BIG_ENDIAN)
25502 return false;
25503
25504 if (d->testing_p)
25505 return true;
25506
25507 /* Generic code will try constant permutation twice. Once with the
25508 original mode and again with the elements lowered to QImode.
25509 So wait and don't do the selector expansion ourselves. */
25510 if (vmode != V8QImode && vmode != V16QImode)
25511 return false;
25512
25513 for (i = 0; i < nelt; ++i)
25514 rperm[i] = GEN_INT (d->perm[i]);
25515 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
25516 sel = force_reg (vmode, sel);
25517
25518 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
25519 return true;
25520 }
25521
25522 static bool
25523 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
25524 {
25525 /* The pattern matching functions above are written to look for a small
25526 number to begin the sequence (0, 1, N/2). If we begin with an index
25527 from the second operand, we can swap the operands. */
25528 if (d->perm[0] >= d->nelt)
25529 {
25530 unsigned i, nelt = d->nelt;
25531 rtx x;
25532
25533 for (i = 0; i < nelt; ++i)
25534 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
25535
25536 x = d->op0;
25537 d->op0 = d->op1;
25538 d->op1 = x;
25539 }
25540
25541 if (TARGET_NEON)
25542 {
25543 if (arm_evpc_neon_vuzp (d))
25544 return true;
25545 if (arm_evpc_neon_vzip (d))
25546 return true;
25547 if (arm_evpc_neon_vrev (d))
25548 return true;
25549 if (arm_evpc_neon_vtrn (d))
25550 return true;
25551 return arm_evpc_neon_vtbl (d);
25552 }
25553 return false;
25554 }
25555
25556 /* Expand a vec_perm_const pattern. */
25557
25558 bool
25559 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
25560 {
25561 struct expand_vec_perm_d d;
25562 int i, nelt, which;
25563
25564 d.target = target;
25565 d.op0 = op0;
25566 d.op1 = op1;
25567
25568 d.vmode = GET_MODE (target);
25569 gcc_assert (VECTOR_MODE_P (d.vmode));
25570 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
25571 d.testing_p = false;
25572
25573 for (i = which = 0; i < nelt; ++i)
25574 {
25575 rtx e = XVECEXP (sel, 0, i);
25576 int ei = INTVAL (e) & (2 * nelt - 1);
25577 which |= (ei < nelt ? 1 : 2);
25578 d.perm[i] = ei;
25579 }
25580
25581 switch (which)
25582 {
25583 default:
25584 gcc_unreachable();
25585
25586 case 3:
25587 d.one_vector_p = false;
25588 if (!rtx_equal_p (op0, op1))
25589 break;
25590
25591 /* The elements of PERM do not suggest that only the first operand
25592 is used, but both operands are identical. Allow easier matching
25593 of the permutation by folding the permutation into the single
25594 input vector. */
25595 /* FALLTHRU */
25596 case 2:
25597 for (i = 0; i < nelt; ++i)
25598 d.perm[i] &= nelt - 1;
25599 d.op0 = op1;
25600 d.one_vector_p = true;
25601 break;
25602
25603 case 1:
25604 d.op1 = op0;
25605 d.one_vector_p = true;
25606 break;
25607 }
25608
25609 return arm_expand_vec_perm_const_1 (&d);
25610 }
25611
25612 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
25613
25614 static bool
25615 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
25616 const unsigned char *sel)
25617 {
25618 struct expand_vec_perm_d d;
25619 unsigned int i, nelt, which;
25620 bool ret;
25621
25622 d.vmode = vmode;
25623 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
25624 d.testing_p = true;
25625 memcpy (d.perm, sel, nelt);
25626
25627 /* Categorize the set of elements in the selector. */
25628 for (i = which = 0; i < nelt; ++i)
25629 {
25630 unsigned char e = d.perm[i];
25631 gcc_assert (e < 2 * nelt);
25632 which |= (e < nelt ? 1 : 2);
25633 }
25634
25635 /* For all elements from second vector, fold the elements to first. */
25636 if (which == 2)
25637 for (i = 0; i < nelt; ++i)
25638 d.perm[i] -= nelt;
25639
25640 /* Check whether the mask can be applied to the vector type. */
25641 d.one_vector_p = (which != 3);
25642
25643 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
25644 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
25645 if (!d.one_vector_p)
25646 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
25647
25648 start_sequence ();
25649 ret = arm_expand_vec_perm_const_1 (&d);
25650 end_sequence ();
25651
25652 return ret;
25653 }
25654
25655 \f
25656 #include "gt-arm.h"