3bec7c57421ced32836bd8952824c88bc8688c43
[gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8
9 This file is part of GCC.
10
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
15
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "reload.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "diagnostic-core.h"
44 #include "recog.h"
45 #include "cgraph.h"
46 #include "ggc.h"
47 #include "except.h"
48 #include "tm_p.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "debug.h"
52 #include "langhooks.h"
53 #include "df.h"
54 #include "intl.h"
55 #include "libfuncs.h"
56 #include "params.h"
57 #include "opts.h"
58 #include "dumpfile.h"
59
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
63
64 void (*arm_lang_output_object_attributes_hook)(void);
65
66 struct four_ints
67 {
68 int i[4];
69 };
70
71 /* Forward function declarations. */
72 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
73 static int arm_compute_static_chain_stack_bytes (void);
74 static arm_stack_offsets *arm_get_frame_offsets (void);
75 static void arm_add_gc_roots (void);
76 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
77 HOST_WIDE_INT, rtx, rtx, int, int);
78 static unsigned bit_count (unsigned long);
79 static int arm_address_register_rtx_p (rtx, int);
80 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
81 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
82 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
83 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
84 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
85 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
86 inline static int thumb1_index_register_rtx_p (rtx, int);
87 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
88 static int thumb_far_jump_used_p (void);
89 static bool thumb_force_lr_save (void);
90 static unsigned arm_size_return_regs (void);
91 static bool arm_assemble_integer (rtx, unsigned int, int);
92 static void arm_print_operand (FILE *, rtx, int);
93 static void arm_print_operand_address (FILE *, rtx);
94 static bool arm_print_operand_punct_valid_p (unsigned char code);
95 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
96 static arm_cc get_arm_condition_code (rtx);
97 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
98 static rtx is_jump_table (rtx);
99 static const char *output_multi_immediate (rtx *, const char *, const char *,
100 int, HOST_WIDE_INT);
101 static const char *shift_op (rtx, HOST_WIDE_INT *);
102 static struct machine_function *arm_init_machine_status (void);
103 static void thumb_exit (FILE *, int);
104 static rtx is_jump_table (rtx);
105 static HOST_WIDE_INT get_jump_table_size (rtx);
106 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
107 static Mnode *add_minipool_forward_ref (Mfix *);
108 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
109 static Mnode *add_minipool_backward_ref (Mfix *);
110 static void assign_minipool_offsets (Mfix *);
111 static void arm_print_value (FILE *, rtx);
112 static void dump_minipool (rtx);
113 static int arm_barrier_cost (rtx);
114 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
115 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
116 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
117 rtx);
118 static void arm_reorg (void);
119 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
120 static unsigned long arm_compute_save_reg0_reg12_mask (void);
121 static unsigned long arm_compute_save_reg_mask (void);
122 static unsigned long arm_isr_value (tree);
123 static unsigned long arm_compute_func_type (void);
124 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
125 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
126 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
127 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
128 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
129 #endif
130 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
131 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
132 static int arm_comp_type_attributes (const_tree, const_tree);
133 static void arm_set_default_type_attributes (tree);
134 static int arm_adjust_cost (rtx, rtx, rtx, int);
135 static int optimal_immediate_sequence (enum rtx_code code,
136 unsigned HOST_WIDE_INT val,
137 struct four_ints *return_sequence);
138 static int optimal_immediate_sequence_1 (enum rtx_code code,
139 unsigned HOST_WIDE_INT val,
140 struct four_ints *return_sequence,
141 int i);
142 static int arm_get_strip_length (int);
143 static bool arm_function_ok_for_sibcall (tree, tree);
144 static enum machine_mode arm_promote_function_mode (const_tree,
145 enum machine_mode, int *,
146 const_tree, int);
147 static bool arm_return_in_memory (const_tree, const_tree);
148 static rtx arm_function_value (const_tree, const_tree, bool);
149 static rtx arm_libcall_value_1 (enum machine_mode);
150 static rtx arm_libcall_value (enum machine_mode, const_rtx);
151 static bool arm_function_value_regno_p (const unsigned int);
152 static void arm_internal_label (FILE *, const char *, unsigned long);
153 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
154 tree);
155 static bool arm_have_conditional_execution (void);
156 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
157 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
158 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
159 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
160 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
161 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
163 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
165 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
166 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
167 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
168 static void arm_init_builtins (void);
169 static void arm_init_iwmmxt_builtins (void);
170 static rtx safe_vector_operand (rtx, enum machine_mode);
171 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
172 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
173 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
174 static tree arm_builtin_decl (unsigned, bool);
175 static void emit_constant_insn (rtx cond, rtx pattern);
176 static rtx emit_set_insn (rtx, rtx);
177 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
178 tree, bool);
179 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
180 const_tree, bool);
181 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
182 const_tree, bool);
183 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
184 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
185 const_tree);
186 static rtx aapcs_libcall_value (enum machine_mode);
187 static int aapcs_select_return_coproc (const_tree, const_tree);
188
189 #ifdef OBJECT_FORMAT_ELF
190 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
191 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
192 #endif
193 #ifndef ARM_PE
194 static void arm_encode_section_info (tree, rtx, int);
195 #endif
196
197 static void arm_file_end (void);
198 static void arm_file_start (void);
199
200 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
201 tree, int *, int);
202 static bool arm_pass_by_reference (cumulative_args_t,
203 enum machine_mode, const_tree, bool);
204 static bool arm_promote_prototypes (const_tree);
205 static bool arm_default_short_enums (void);
206 static bool arm_align_anon_bitfield (void);
207 static bool arm_return_in_msb (const_tree);
208 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
209 static bool arm_return_in_memory (const_tree, const_tree);
210 #if ARM_UNWIND_INFO
211 static void arm_unwind_emit (FILE *, rtx);
212 static bool arm_output_ttype (rtx);
213 static void arm_asm_emit_except_personality (rtx);
214 static void arm_asm_init_sections (void);
215 #endif
216 static rtx arm_dwarf_register_span (rtx);
217
218 static tree arm_cxx_guard_type (void);
219 static bool arm_cxx_guard_mask_bit (void);
220 static tree arm_get_cookie_size (tree);
221 static bool arm_cookie_has_size (void);
222 static bool arm_cxx_cdtor_returns_this (void);
223 static bool arm_cxx_key_method_may_be_inline (void);
224 static void arm_cxx_determine_class_data_visibility (tree);
225 static bool arm_cxx_class_data_always_comdat (void);
226 static bool arm_cxx_use_aeabi_atexit (void);
227 static void arm_init_libfuncs (void);
228 static tree arm_build_builtin_va_list (void);
229 static void arm_expand_builtin_va_start (tree, rtx);
230 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
231 static void arm_option_override (void);
232 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
233 static bool arm_cannot_copy_insn_p (rtx);
234 static bool arm_tls_symbol_p (rtx x);
235 static int arm_issue_rate (void);
236 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
237 static bool arm_output_addr_const_extra (FILE *, rtx);
238 static bool arm_allocate_stack_slots_for_args (void);
239 static bool arm_warn_func_return (tree);
240 static const char *arm_invalid_parameter_type (const_tree t);
241 static const char *arm_invalid_return_type (const_tree t);
242 static tree arm_promoted_type (const_tree t);
243 static tree arm_convert_to_type (tree type, tree expr);
244 static bool arm_scalar_mode_supported_p (enum machine_mode);
245 static bool arm_frame_pointer_required (void);
246 static bool arm_can_eliminate (const int, const int);
247 static void arm_asm_trampoline_template (FILE *);
248 static void arm_trampoline_init (rtx, tree, rtx);
249 static rtx arm_trampoline_adjust_address (rtx);
250 static rtx arm_pic_static_addr (rtx orig, rtx reg);
251 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
252 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
253 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
254 static bool arm_array_mode_supported_p (enum machine_mode,
255 unsigned HOST_WIDE_INT);
256 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
257 static bool arm_class_likely_spilled_p (reg_class_t);
258 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
259 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
260 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
261 const_tree type,
262 int misalignment,
263 bool is_packed);
264 static void arm_conditional_register_usage (void);
265 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
266 static unsigned int arm_autovectorize_vector_sizes (void);
267 static int arm_default_branch_cost (bool, bool);
268 static int arm_cortex_a5_branch_cost (bool, bool);
269
270 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
271 const unsigned char *sel);
272
273 \f
274 /* Table of machine attributes. */
275 static const struct attribute_spec arm_attribute_table[] =
276 {
277 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
278 affects_type_identity } */
279 /* Function calls made to this symbol must be done indirectly, because
280 it may lie outside of the 26 bit addressing range of a normal function
281 call. */
282 { "long_call", 0, 0, false, true, true, NULL, false },
283 /* Whereas these functions are always known to reside within the 26 bit
284 addressing range. */
285 { "short_call", 0, 0, false, true, true, NULL, false },
286 /* Specify the procedure call conventions for a function. */
287 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
288 false },
289 /* Interrupt Service Routines have special prologue and epilogue requirements. */
290 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
291 false },
292 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
293 false },
294 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
295 false },
296 #ifdef ARM_PE
297 /* ARM/PE has three new attributes:
298 interfacearm - ?
299 dllexport - for exporting a function/variable that will live in a dll
300 dllimport - for importing a function/variable from a dll
301
302 Microsoft allows multiple declspecs in one __declspec, separating
303 them with spaces. We do NOT support this. Instead, use __declspec
304 multiple times.
305 */
306 { "dllimport", 0, 0, true, false, false, NULL, false },
307 { "dllexport", 0, 0, true, false, false, NULL, false },
308 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
309 false },
310 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
311 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
312 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
313 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
314 false },
315 #endif
316 { NULL, 0, 0, false, false, false, NULL, false }
317 };
318 \f
319 /* Initialize the GCC target structure. */
320 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
321 #undef TARGET_MERGE_DECL_ATTRIBUTES
322 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
323 #endif
324
325 #undef TARGET_LEGITIMIZE_ADDRESS
326 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
327
328 #undef TARGET_ATTRIBUTE_TABLE
329 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
330
331 #undef TARGET_ASM_FILE_START
332 #define TARGET_ASM_FILE_START arm_file_start
333 #undef TARGET_ASM_FILE_END
334 #define TARGET_ASM_FILE_END arm_file_end
335
336 #undef TARGET_ASM_ALIGNED_SI_OP
337 #define TARGET_ASM_ALIGNED_SI_OP NULL
338 #undef TARGET_ASM_INTEGER
339 #define TARGET_ASM_INTEGER arm_assemble_integer
340
341 #undef TARGET_PRINT_OPERAND
342 #define TARGET_PRINT_OPERAND arm_print_operand
343 #undef TARGET_PRINT_OPERAND_ADDRESS
344 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
345 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
346 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
347
348 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
349 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
350
351 #undef TARGET_ASM_FUNCTION_PROLOGUE
352 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
353
354 #undef TARGET_ASM_FUNCTION_EPILOGUE
355 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
356
357 #undef TARGET_OPTION_OVERRIDE
358 #define TARGET_OPTION_OVERRIDE arm_option_override
359
360 #undef TARGET_COMP_TYPE_ATTRIBUTES
361 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
362
363 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
364 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
365
366 #undef TARGET_SCHED_ADJUST_COST
367 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
368
369 #undef TARGET_REGISTER_MOVE_COST
370 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
371
372 #undef TARGET_MEMORY_MOVE_COST
373 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
374
375 #undef TARGET_ENCODE_SECTION_INFO
376 #ifdef ARM_PE
377 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
378 #else
379 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
380 #endif
381
382 #undef TARGET_STRIP_NAME_ENCODING
383 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
384
385 #undef TARGET_ASM_INTERNAL_LABEL
386 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
387
388 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
389 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
390
391 #undef TARGET_FUNCTION_VALUE
392 #define TARGET_FUNCTION_VALUE arm_function_value
393
394 #undef TARGET_LIBCALL_VALUE
395 #define TARGET_LIBCALL_VALUE arm_libcall_value
396
397 #undef TARGET_FUNCTION_VALUE_REGNO_P
398 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
399
400 #undef TARGET_ASM_OUTPUT_MI_THUNK
401 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
402 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
403 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
404
405 #undef TARGET_RTX_COSTS
406 #define TARGET_RTX_COSTS arm_rtx_costs
407 #undef TARGET_ADDRESS_COST
408 #define TARGET_ADDRESS_COST arm_address_cost
409
410 #undef TARGET_SHIFT_TRUNCATION_MASK
411 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
412 #undef TARGET_VECTOR_MODE_SUPPORTED_P
413 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
414 #undef TARGET_ARRAY_MODE_SUPPORTED_P
415 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
416 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
417 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
418 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
419 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
420 arm_autovectorize_vector_sizes
421
422 #undef TARGET_MACHINE_DEPENDENT_REORG
423 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
424
425 #undef TARGET_INIT_BUILTINS
426 #define TARGET_INIT_BUILTINS arm_init_builtins
427 #undef TARGET_EXPAND_BUILTIN
428 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
429 #undef TARGET_BUILTIN_DECL
430 #define TARGET_BUILTIN_DECL arm_builtin_decl
431
432 #undef TARGET_INIT_LIBFUNCS
433 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
434
435 #undef TARGET_PROMOTE_FUNCTION_MODE
436 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
437 #undef TARGET_PROMOTE_PROTOTYPES
438 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
439 #undef TARGET_PASS_BY_REFERENCE
440 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
441 #undef TARGET_ARG_PARTIAL_BYTES
442 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
443 #undef TARGET_FUNCTION_ARG
444 #define TARGET_FUNCTION_ARG arm_function_arg
445 #undef TARGET_FUNCTION_ARG_ADVANCE
446 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
447 #undef TARGET_FUNCTION_ARG_BOUNDARY
448 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
449
450 #undef TARGET_SETUP_INCOMING_VARARGS
451 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
452
453 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
454 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
455
456 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
457 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
458 #undef TARGET_TRAMPOLINE_INIT
459 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
460 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
461 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
462
463 #undef TARGET_WARN_FUNC_RETURN
464 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
465
466 #undef TARGET_DEFAULT_SHORT_ENUMS
467 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
468
469 #undef TARGET_ALIGN_ANON_BITFIELD
470 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
471
472 #undef TARGET_NARROW_VOLATILE_BITFIELD
473 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
474
475 #undef TARGET_CXX_GUARD_TYPE
476 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
477
478 #undef TARGET_CXX_GUARD_MASK_BIT
479 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
480
481 #undef TARGET_CXX_GET_COOKIE_SIZE
482 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
483
484 #undef TARGET_CXX_COOKIE_HAS_SIZE
485 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
486
487 #undef TARGET_CXX_CDTOR_RETURNS_THIS
488 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
489
490 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
491 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
492
493 #undef TARGET_CXX_USE_AEABI_ATEXIT
494 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
495
496 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
497 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
498 arm_cxx_determine_class_data_visibility
499
500 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
501 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
502
503 #undef TARGET_RETURN_IN_MSB
504 #define TARGET_RETURN_IN_MSB arm_return_in_msb
505
506 #undef TARGET_RETURN_IN_MEMORY
507 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
508
509 #undef TARGET_MUST_PASS_IN_STACK
510 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
511
512 #if ARM_UNWIND_INFO
513 #undef TARGET_ASM_UNWIND_EMIT
514 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
515
516 /* EABI unwinding tables use a different format for the typeinfo tables. */
517 #undef TARGET_ASM_TTYPE
518 #define TARGET_ASM_TTYPE arm_output_ttype
519
520 #undef TARGET_ARM_EABI_UNWINDER
521 #define TARGET_ARM_EABI_UNWINDER true
522
523 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
524 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
525
526 #undef TARGET_ASM_INIT_SECTIONS
527 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
528 #endif /* ARM_UNWIND_INFO */
529
530 #undef TARGET_DWARF_REGISTER_SPAN
531 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
532
533 #undef TARGET_CANNOT_COPY_INSN_P
534 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
535
536 #ifdef HAVE_AS_TLS
537 #undef TARGET_HAVE_TLS
538 #define TARGET_HAVE_TLS true
539 #endif
540
541 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
542 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
543
544 #undef TARGET_LEGITIMATE_CONSTANT_P
545 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
546
547 #undef TARGET_CANNOT_FORCE_CONST_MEM
548 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
549
550 #undef TARGET_MAX_ANCHOR_OFFSET
551 #define TARGET_MAX_ANCHOR_OFFSET 4095
552
553 /* The minimum is set such that the total size of the block
554 for a particular anchor is -4088 + 1 + 4095 bytes, which is
555 divisible by eight, ensuring natural spacing of anchors. */
556 #undef TARGET_MIN_ANCHOR_OFFSET
557 #define TARGET_MIN_ANCHOR_OFFSET -4088
558
559 #undef TARGET_SCHED_ISSUE_RATE
560 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
561
562 #undef TARGET_MANGLE_TYPE
563 #define TARGET_MANGLE_TYPE arm_mangle_type
564
565 #undef TARGET_BUILD_BUILTIN_VA_LIST
566 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
567 #undef TARGET_EXPAND_BUILTIN_VA_START
568 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
569 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
570 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
571
572 #ifdef HAVE_AS_TLS
573 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
574 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
575 #endif
576
577 #undef TARGET_LEGITIMATE_ADDRESS_P
578 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
579
580 #undef TARGET_PREFERRED_RELOAD_CLASS
581 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
582
583 #undef TARGET_INVALID_PARAMETER_TYPE
584 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
585
586 #undef TARGET_INVALID_RETURN_TYPE
587 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
588
589 #undef TARGET_PROMOTED_TYPE
590 #define TARGET_PROMOTED_TYPE arm_promoted_type
591
592 #undef TARGET_CONVERT_TO_TYPE
593 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
594
595 #undef TARGET_SCALAR_MODE_SUPPORTED_P
596 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
597
598 #undef TARGET_FRAME_POINTER_REQUIRED
599 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
600
601 #undef TARGET_CAN_ELIMINATE
602 #define TARGET_CAN_ELIMINATE arm_can_eliminate
603
604 #undef TARGET_CONDITIONAL_REGISTER_USAGE
605 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
606
607 #undef TARGET_CLASS_LIKELY_SPILLED_P
608 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
609
610 #undef TARGET_VECTOR_ALIGNMENT
611 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
612
613 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
614 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
615 arm_vector_alignment_reachable
616
617 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
618 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
619 arm_builtin_support_vector_misalignment
620
621 #undef TARGET_PREFERRED_RENAME_CLASS
622 #define TARGET_PREFERRED_RENAME_CLASS \
623 arm_preferred_rename_class
624
625 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
626 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
627 arm_vectorize_vec_perm_const_ok
628
629 struct gcc_target targetm = TARGET_INITIALIZER;
630 \f
631 /* Obstack for minipool constant handling. */
632 static struct obstack minipool_obstack;
633 static char * minipool_startobj;
634
635 /* The maximum number of insns skipped which
636 will be conditionalised if possible. */
637 static int max_insns_skipped = 5;
638
639 extern FILE * asm_out_file;
640
641 /* True if we are currently building a constant table. */
642 int making_const_table;
643
644 /* The processor for which instructions should be scheduled. */
645 enum processor_type arm_tune = arm_none;
646
647 /* The current tuning set. */
648 const struct tune_params *current_tune;
649
650 /* Which floating point hardware to schedule for. */
651 int arm_fpu_attr;
652
653 /* Which floating popint hardware to use. */
654 const struct arm_fpu_desc *arm_fpu_desc;
655
656 /* Used for Thumb call_via trampolines. */
657 rtx thumb_call_via_label[14];
658 static int thumb_call_reg_needed;
659
660 /* Bit values used to identify processor capabilities. */
661 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
662 #define FL_ARCH3M (1 << 1) /* Extended multiply */
663 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
664 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
665 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
666 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
667 #define FL_THUMB (1 << 6) /* Thumb aware */
668 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
669 #define FL_STRONG (1 << 8) /* StrongARM */
670 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
671 #define FL_XSCALE (1 << 10) /* XScale */
672 /* spare (1 << 11) */
673 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
674 media instructions. */
675 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
676 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
677 Note: ARM6 & 7 derivatives only. */
678 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
679 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
680 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
681 profile. */
682 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
683 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
684 #define FL_NEON (1 << 20) /* Neon instructions. */
685 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
686 architecture. */
687 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
688 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
689 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
690
691 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
692 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
693
694 /* Flags that only effect tuning, not available instructions. */
695 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
696 | FL_CO_PROC)
697
698 #define FL_FOR_ARCH2 FL_NOTM
699 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
700 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
701 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
702 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
703 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
704 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
705 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
706 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
707 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
708 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
709 #define FL_FOR_ARCH6J FL_FOR_ARCH6
710 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
711 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
712 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
713 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
714 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
715 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
716 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
717 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
718 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
719 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
720 #define FL_FOR_ARCH8A (FL_FOR_ARCH7 | FL_ARCH6K | FL_ARCH8 | FL_THUMB_DIV \
721 | FL_ARM_DIV | FL_NOTM)
722
723 /* The bits in this mask specify which
724 instructions we are allowed to generate. */
725 static unsigned long insn_flags = 0;
726
727 /* The bits in this mask specify which instruction scheduling options should
728 be used. */
729 static unsigned long tune_flags = 0;
730
731 /* The highest ARM architecture version supported by the
732 target. */
733 enum base_architecture arm_base_arch = BASE_ARCH_0;
734
735 /* The following are used in the arm.md file as equivalents to bits
736 in the above two flag variables. */
737
738 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
739 int arm_arch3m = 0;
740
741 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
742 int arm_arch4 = 0;
743
744 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
745 int arm_arch4t = 0;
746
747 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
748 int arm_arch5 = 0;
749
750 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
751 int arm_arch5e = 0;
752
753 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
754 int arm_arch6 = 0;
755
756 /* Nonzero if this chip supports the ARM 6K extensions. */
757 int arm_arch6k = 0;
758
759 /* Nonzero if instructions present in ARMv6-M can be used. */
760 int arm_arch6m = 0;
761
762 /* Nonzero if this chip supports the ARM 7 extensions. */
763 int arm_arch7 = 0;
764
765 /* Nonzero if instructions not present in the 'M' profile can be used. */
766 int arm_arch_notm = 0;
767
768 /* Nonzero if instructions present in ARMv7E-M can be used. */
769 int arm_arch7em = 0;
770
771 /* Nonzero if instructions present in ARMv8 can be used. */
772 int arm_arch8 = 0;
773
774 /* Nonzero if this chip can benefit from load scheduling. */
775 int arm_ld_sched = 0;
776
777 /* Nonzero if this chip is a StrongARM. */
778 int arm_tune_strongarm = 0;
779
780 /* Nonzero if this chip supports Intel Wireless MMX technology. */
781 int arm_arch_iwmmxt = 0;
782
783 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
784 int arm_arch_iwmmxt2 = 0;
785
786 /* Nonzero if this chip is an XScale. */
787 int arm_arch_xscale = 0;
788
789 /* Nonzero if tuning for XScale */
790 int arm_tune_xscale = 0;
791
792 /* Nonzero if we want to tune for stores that access the write-buffer.
793 This typically means an ARM6 or ARM7 with MMU or MPU. */
794 int arm_tune_wbuf = 0;
795
796 /* Nonzero if tuning for Cortex-A9. */
797 int arm_tune_cortex_a9 = 0;
798
799 /* Nonzero if generating Thumb instructions. */
800 int thumb_code = 0;
801
802 /* Nonzero if generating Thumb-1 instructions. */
803 int thumb1_code = 0;
804
805 /* Nonzero if we should define __THUMB_INTERWORK__ in the
806 preprocessor.
807 XXX This is a bit of a hack, it's intended to help work around
808 problems in GLD which doesn't understand that armv5t code is
809 interworking clean. */
810 int arm_cpp_interwork = 0;
811
812 /* Nonzero if chip supports Thumb 2. */
813 int arm_arch_thumb2;
814
815 /* Nonzero if chip supports integer division instruction. */
816 int arm_arch_arm_hwdiv;
817 int arm_arch_thumb_hwdiv;
818
819 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
820 we must report the mode of the memory reference from
821 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
822 enum machine_mode output_memory_reference_mode;
823
824 /* The register number to be used for the PIC offset register. */
825 unsigned arm_pic_register = INVALID_REGNUM;
826
827 /* Set to 1 after arm_reorg has started. Reset to start at the start of
828 the next function. */
829 static int after_arm_reorg = 0;
830
831 enum arm_pcs arm_pcs_default;
832
833 /* For an explanation of these variables, see final_prescan_insn below. */
834 int arm_ccfsm_state;
835 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
836 enum arm_cond_code arm_current_cc;
837
838 rtx arm_target_insn;
839 int arm_target_label;
840 /* The number of conditionally executed insns, including the current insn. */
841 int arm_condexec_count = 0;
842 /* A bitmask specifying the patterns for the IT block.
843 Zero means do not output an IT block before this insn. */
844 int arm_condexec_mask = 0;
845 /* The number of bits used in arm_condexec_mask. */
846 int arm_condexec_masklen = 0;
847
848 /* The condition codes of the ARM, and the inverse function. */
849 static const char * const arm_condition_codes[] =
850 {
851 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
852 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
853 };
854
855 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
856 int arm_regs_in_sequence[] =
857 {
858 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
859 };
860
861 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
862 #define streq(string1, string2) (strcmp (string1, string2) == 0)
863
864 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
865 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
866 | (1 << PIC_OFFSET_TABLE_REGNUM)))
867 \f
868 /* Initialization code. */
869
870 struct processors
871 {
872 const char *const name;
873 enum processor_type core;
874 const char *arch;
875 enum base_architecture base_arch;
876 const unsigned long flags;
877 const struct tune_params *const tune;
878 };
879
880
881 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
882 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
883 prefetch_slots, \
884 l1_size, \
885 l1_line_size
886
887 const struct tune_params arm_slowmul_tune =
888 {
889 arm_slowmul_rtx_costs,
890 NULL,
891 3, /* Constant limit. */
892 5, /* Max cond insns. */
893 ARM_PREFETCH_NOT_BENEFICIAL,
894 true, /* Prefer constant pool. */
895 arm_default_branch_cost,
896 false /* Prefer LDRD/STRD. */
897 };
898
899 const struct tune_params arm_fastmul_tune =
900 {
901 arm_fastmul_rtx_costs,
902 NULL,
903 1, /* Constant limit. */
904 5, /* Max cond insns. */
905 ARM_PREFETCH_NOT_BENEFICIAL,
906 true, /* Prefer constant pool. */
907 arm_default_branch_cost,
908 false /* Prefer LDRD/STRD. */
909 };
910
911 /* StrongARM has early execution of branches, so a sequence that is worth
912 skipping is shorter. Set max_insns_skipped to a lower value. */
913
914 const struct tune_params arm_strongarm_tune =
915 {
916 arm_fastmul_rtx_costs,
917 NULL,
918 1, /* Constant limit. */
919 3, /* Max cond insns. */
920 ARM_PREFETCH_NOT_BENEFICIAL,
921 true, /* Prefer constant pool. */
922 arm_default_branch_cost,
923 false /* Prefer LDRD/STRD. */
924 };
925
926 const struct tune_params arm_xscale_tune =
927 {
928 arm_xscale_rtx_costs,
929 xscale_sched_adjust_cost,
930 2, /* Constant limit. */
931 3, /* Max cond insns. */
932 ARM_PREFETCH_NOT_BENEFICIAL,
933 true, /* Prefer constant pool. */
934 arm_default_branch_cost,
935 false /* Prefer LDRD/STRD. */
936 };
937
938 const struct tune_params arm_9e_tune =
939 {
940 arm_9e_rtx_costs,
941 NULL,
942 1, /* Constant limit. */
943 5, /* Max cond insns. */
944 ARM_PREFETCH_NOT_BENEFICIAL,
945 true, /* Prefer constant pool. */
946 arm_default_branch_cost,
947 false /* Prefer LDRD/STRD. */
948 };
949
950 const struct tune_params arm_v6t2_tune =
951 {
952 arm_9e_rtx_costs,
953 NULL,
954 1, /* Constant limit. */
955 5, /* Max cond insns. */
956 ARM_PREFETCH_NOT_BENEFICIAL,
957 false, /* Prefer constant pool. */
958 arm_default_branch_cost,
959 false /* Prefer LDRD/STRD. */
960 };
961
962 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
963 const struct tune_params arm_cortex_tune =
964 {
965 arm_9e_rtx_costs,
966 NULL,
967 1, /* Constant limit. */
968 5, /* Max cond insns. */
969 ARM_PREFETCH_NOT_BENEFICIAL,
970 false, /* Prefer constant pool. */
971 arm_default_branch_cost,
972 false /* Prefer LDRD/STRD. */
973 };
974
975 const struct tune_params arm_cortex_a15_tune =
976 {
977 arm_9e_rtx_costs,
978 NULL,
979 1, /* Constant limit. */
980 5, /* Max cond insns. */
981 ARM_PREFETCH_NOT_BENEFICIAL,
982 false, /* Prefer constant pool. */
983 arm_default_branch_cost,
984 true /* Prefer LDRD/STRD. */
985 };
986
987 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
988 less appealing. Set max_insns_skipped to a low value. */
989
990 const struct tune_params arm_cortex_a5_tune =
991 {
992 arm_9e_rtx_costs,
993 NULL,
994 1, /* Constant limit. */
995 1, /* Max cond insns. */
996 ARM_PREFETCH_NOT_BENEFICIAL,
997 false, /* Prefer constant pool. */
998 arm_cortex_a5_branch_cost,
999 false /* Prefer LDRD/STRD. */
1000 };
1001
1002 const struct tune_params arm_cortex_a9_tune =
1003 {
1004 arm_9e_rtx_costs,
1005 cortex_a9_sched_adjust_cost,
1006 1, /* Constant limit. */
1007 5, /* Max cond insns. */
1008 ARM_PREFETCH_BENEFICIAL(4,32,32),
1009 false, /* Prefer constant pool. */
1010 arm_default_branch_cost,
1011 false /* Prefer LDRD/STRD. */
1012 };
1013
1014 const struct tune_params arm_fa726te_tune =
1015 {
1016 arm_9e_rtx_costs,
1017 fa726te_sched_adjust_cost,
1018 1, /* Constant limit. */
1019 5, /* Max cond insns. */
1020 ARM_PREFETCH_NOT_BENEFICIAL,
1021 true, /* Prefer constant pool. */
1022 arm_default_branch_cost,
1023 false /* Prefer LDRD/STRD. */
1024 };
1025
1026
1027 /* Not all of these give usefully different compilation alternatives,
1028 but there is no simple way of generalizing them. */
1029 static const struct processors all_cores[] =
1030 {
1031 /* ARM Cores */
1032 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
1033 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1034 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1035 #include "arm-cores.def"
1036 #undef ARM_CORE
1037 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1038 };
1039
1040 static const struct processors all_architectures[] =
1041 {
1042 /* ARM Architectures */
1043 /* We don't specify tuning costs here as it will be figured out
1044 from the core. */
1045
1046 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1047 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1048 #include "arm-arches.def"
1049 #undef ARM_ARCH
1050 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1051 };
1052
1053
1054 /* These are populated as commandline arguments are processed, or NULL
1055 if not specified. */
1056 static const struct processors *arm_selected_arch;
1057 static const struct processors *arm_selected_cpu;
1058 static const struct processors *arm_selected_tune;
1059
1060 /* The name of the preprocessor macro to define for this architecture. */
1061
1062 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1063
1064 /* Available values for -mfpu=. */
1065
1066 static const struct arm_fpu_desc all_fpus[] =
1067 {
1068 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1069 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1070 #include "arm-fpus.def"
1071 #undef ARM_FPU
1072 };
1073
1074
1075 /* Supported TLS relocations. */
1076
1077 enum tls_reloc {
1078 TLS_GD32,
1079 TLS_LDM32,
1080 TLS_LDO32,
1081 TLS_IE32,
1082 TLS_LE32,
1083 TLS_DESCSEQ /* GNU scheme */
1084 };
1085
1086 /* The maximum number of insns to be used when loading a constant. */
1087 inline static int
1088 arm_constant_limit (bool size_p)
1089 {
1090 return size_p ? 1 : current_tune->constant_limit;
1091 }
1092
1093 /* Emit an insn that's a simple single-set. Both the operands must be known
1094 to be valid. */
1095 inline static rtx
1096 emit_set_insn (rtx x, rtx y)
1097 {
1098 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1099 }
1100
1101 /* Return the number of bits set in VALUE. */
1102 static unsigned
1103 bit_count (unsigned long value)
1104 {
1105 unsigned long count = 0;
1106
1107 while (value)
1108 {
1109 count++;
1110 value &= value - 1; /* Clear the least-significant set bit. */
1111 }
1112
1113 return count;
1114 }
1115
1116 typedef struct
1117 {
1118 enum machine_mode mode;
1119 const char *name;
1120 } arm_fixed_mode_set;
1121
1122 /* A small helper for setting fixed-point library libfuncs. */
1123
1124 static void
1125 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1126 const char *funcname, const char *modename,
1127 int num_suffix)
1128 {
1129 char buffer[50];
1130
1131 if (num_suffix == 0)
1132 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1133 else
1134 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1135
1136 set_optab_libfunc (optable, mode, buffer);
1137 }
1138
1139 static void
1140 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1141 enum machine_mode from, const char *funcname,
1142 const char *toname, const char *fromname)
1143 {
1144 char buffer[50];
1145 const char *maybe_suffix_2 = "";
1146
1147 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1148 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1149 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1150 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1151 maybe_suffix_2 = "2";
1152
1153 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1154 maybe_suffix_2);
1155
1156 set_conv_libfunc (optable, to, from, buffer);
1157 }
1158
1159 /* Set up library functions unique to ARM. */
1160
1161 static void
1162 arm_init_libfuncs (void)
1163 {
1164 /* For Linux, we have access to kernel support for atomic operations. */
1165 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1166 init_sync_libfuncs (2 * UNITS_PER_WORD);
1167
1168 /* There are no special library functions unless we are using the
1169 ARM BPABI. */
1170 if (!TARGET_BPABI)
1171 return;
1172
1173 /* The functions below are described in Section 4 of the "Run-Time
1174 ABI for the ARM architecture", Version 1.0. */
1175
1176 /* Double-precision floating-point arithmetic. Table 2. */
1177 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1178 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1179 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1180 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1181 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1182
1183 /* Double-precision comparisons. Table 3. */
1184 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1185 set_optab_libfunc (ne_optab, DFmode, NULL);
1186 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1187 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1188 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1189 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1190 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1191
1192 /* Single-precision floating-point arithmetic. Table 4. */
1193 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1194 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1195 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1196 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1197 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1198
1199 /* Single-precision comparisons. Table 5. */
1200 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1201 set_optab_libfunc (ne_optab, SFmode, NULL);
1202 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1203 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1204 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1205 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1206 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1207
1208 /* Floating-point to integer conversions. Table 6. */
1209 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1210 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1211 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1212 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1213 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1214 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1215 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1216 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1217
1218 /* Conversions between floating types. Table 7. */
1219 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1220 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1221
1222 /* Integer to floating-point conversions. Table 8. */
1223 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1224 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1225 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1226 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1227 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1228 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1229 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1230 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1231
1232 /* Long long. Table 9. */
1233 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1234 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1235 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1236 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1237 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1238 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1239 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1240 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1241
1242 /* Integer (32/32->32) division. \S 4.3.1. */
1243 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1244 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1245
1246 /* The divmod functions are designed so that they can be used for
1247 plain division, even though they return both the quotient and the
1248 remainder. The quotient is returned in the usual location (i.e.,
1249 r0 for SImode, {r0, r1} for DImode), just as would be expected
1250 for an ordinary division routine. Because the AAPCS calling
1251 conventions specify that all of { r0, r1, r2, r3 } are
1252 callee-saved registers, there is no need to tell the compiler
1253 explicitly that those registers are clobbered by these
1254 routines. */
1255 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1256 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1257
1258 /* For SImode division the ABI provides div-without-mod routines,
1259 which are faster. */
1260 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1261 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1262
1263 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1264 divmod libcalls instead. */
1265 set_optab_libfunc (smod_optab, DImode, NULL);
1266 set_optab_libfunc (umod_optab, DImode, NULL);
1267 set_optab_libfunc (smod_optab, SImode, NULL);
1268 set_optab_libfunc (umod_optab, SImode, NULL);
1269
1270 /* Half-precision float operations. The compiler handles all operations
1271 with NULL libfuncs by converting the SFmode. */
1272 switch (arm_fp16_format)
1273 {
1274 case ARM_FP16_FORMAT_IEEE:
1275 case ARM_FP16_FORMAT_ALTERNATIVE:
1276
1277 /* Conversions. */
1278 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1279 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1280 ? "__gnu_f2h_ieee"
1281 : "__gnu_f2h_alternative"));
1282 set_conv_libfunc (sext_optab, SFmode, HFmode,
1283 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1284 ? "__gnu_h2f_ieee"
1285 : "__gnu_h2f_alternative"));
1286
1287 /* Arithmetic. */
1288 set_optab_libfunc (add_optab, HFmode, NULL);
1289 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1290 set_optab_libfunc (smul_optab, HFmode, NULL);
1291 set_optab_libfunc (neg_optab, HFmode, NULL);
1292 set_optab_libfunc (sub_optab, HFmode, NULL);
1293
1294 /* Comparisons. */
1295 set_optab_libfunc (eq_optab, HFmode, NULL);
1296 set_optab_libfunc (ne_optab, HFmode, NULL);
1297 set_optab_libfunc (lt_optab, HFmode, NULL);
1298 set_optab_libfunc (le_optab, HFmode, NULL);
1299 set_optab_libfunc (ge_optab, HFmode, NULL);
1300 set_optab_libfunc (gt_optab, HFmode, NULL);
1301 set_optab_libfunc (unord_optab, HFmode, NULL);
1302 break;
1303
1304 default:
1305 break;
1306 }
1307
1308 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1309 {
1310 const arm_fixed_mode_set fixed_arith_modes[] =
1311 {
1312 { QQmode, "qq" },
1313 { UQQmode, "uqq" },
1314 { HQmode, "hq" },
1315 { UHQmode, "uhq" },
1316 { SQmode, "sq" },
1317 { USQmode, "usq" },
1318 { DQmode, "dq" },
1319 { UDQmode, "udq" },
1320 { TQmode, "tq" },
1321 { UTQmode, "utq" },
1322 { HAmode, "ha" },
1323 { UHAmode, "uha" },
1324 { SAmode, "sa" },
1325 { USAmode, "usa" },
1326 { DAmode, "da" },
1327 { UDAmode, "uda" },
1328 { TAmode, "ta" },
1329 { UTAmode, "uta" }
1330 };
1331 const arm_fixed_mode_set fixed_conv_modes[] =
1332 {
1333 { QQmode, "qq" },
1334 { UQQmode, "uqq" },
1335 { HQmode, "hq" },
1336 { UHQmode, "uhq" },
1337 { SQmode, "sq" },
1338 { USQmode, "usq" },
1339 { DQmode, "dq" },
1340 { UDQmode, "udq" },
1341 { TQmode, "tq" },
1342 { UTQmode, "utq" },
1343 { HAmode, "ha" },
1344 { UHAmode, "uha" },
1345 { SAmode, "sa" },
1346 { USAmode, "usa" },
1347 { DAmode, "da" },
1348 { UDAmode, "uda" },
1349 { TAmode, "ta" },
1350 { UTAmode, "uta" },
1351 { QImode, "qi" },
1352 { HImode, "hi" },
1353 { SImode, "si" },
1354 { DImode, "di" },
1355 { TImode, "ti" },
1356 { SFmode, "sf" },
1357 { DFmode, "df" }
1358 };
1359 unsigned int i, j;
1360
1361 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1362 {
1363 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1364 "add", fixed_arith_modes[i].name, 3);
1365 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1366 "ssadd", fixed_arith_modes[i].name, 3);
1367 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1368 "usadd", fixed_arith_modes[i].name, 3);
1369 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1370 "sub", fixed_arith_modes[i].name, 3);
1371 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1372 "sssub", fixed_arith_modes[i].name, 3);
1373 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1374 "ussub", fixed_arith_modes[i].name, 3);
1375 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1376 "mul", fixed_arith_modes[i].name, 3);
1377 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1378 "ssmul", fixed_arith_modes[i].name, 3);
1379 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1380 "usmul", fixed_arith_modes[i].name, 3);
1381 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1382 "div", fixed_arith_modes[i].name, 3);
1383 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1384 "udiv", fixed_arith_modes[i].name, 3);
1385 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1386 "ssdiv", fixed_arith_modes[i].name, 3);
1387 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1388 "usdiv", fixed_arith_modes[i].name, 3);
1389 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1390 "neg", fixed_arith_modes[i].name, 2);
1391 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1392 "ssneg", fixed_arith_modes[i].name, 2);
1393 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1394 "usneg", fixed_arith_modes[i].name, 2);
1395 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1396 "ashl", fixed_arith_modes[i].name, 3);
1397 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1398 "ashr", fixed_arith_modes[i].name, 3);
1399 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1400 "lshr", fixed_arith_modes[i].name, 3);
1401 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1402 "ssashl", fixed_arith_modes[i].name, 3);
1403 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1404 "usashl", fixed_arith_modes[i].name, 3);
1405 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1406 "cmp", fixed_arith_modes[i].name, 2);
1407 }
1408
1409 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1410 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1411 {
1412 if (i == j
1413 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1414 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1415 continue;
1416
1417 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1418 fixed_conv_modes[j].mode, "fract",
1419 fixed_conv_modes[i].name,
1420 fixed_conv_modes[j].name);
1421 arm_set_fixed_conv_libfunc (satfract_optab,
1422 fixed_conv_modes[i].mode,
1423 fixed_conv_modes[j].mode, "satfract",
1424 fixed_conv_modes[i].name,
1425 fixed_conv_modes[j].name);
1426 arm_set_fixed_conv_libfunc (fractuns_optab,
1427 fixed_conv_modes[i].mode,
1428 fixed_conv_modes[j].mode, "fractuns",
1429 fixed_conv_modes[i].name,
1430 fixed_conv_modes[j].name);
1431 arm_set_fixed_conv_libfunc (satfractuns_optab,
1432 fixed_conv_modes[i].mode,
1433 fixed_conv_modes[j].mode, "satfractuns",
1434 fixed_conv_modes[i].name,
1435 fixed_conv_modes[j].name);
1436 }
1437 }
1438
1439 if (TARGET_AAPCS_BASED)
1440 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1441 }
1442
1443 /* On AAPCS systems, this is the "struct __va_list". */
1444 static GTY(()) tree va_list_type;
1445
1446 /* Return the type to use as __builtin_va_list. */
1447 static tree
1448 arm_build_builtin_va_list (void)
1449 {
1450 tree va_list_name;
1451 tree ap_field;
1452
1453 if (!TARGET_AAPCS_BASED)
1454 return std_build_builtin_va_list ();
1455
1456 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1457 defined as:
1458
1459 struct __va_list
1460 {
1461 void *__ap;
1462 };
1463
1464 The C Library ABI further reinforces this definition in \S
1465 4.1.
1466
1467 We must follow this definition exactly. The structure tag
1468 name is visible in C++ mangled names, and thus forms a part
1469 of the ABI. The field name may be used by people who
1470 #include <stdarg.h>. */
1471 /* Create the type. */
1472 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1473 /* Give it the required name. */
1474 va_list_name = build_decl (BUILTINS_LOCATION,
1475 TYPE_DECL,
1476 get_identifier ("__va_list"),
1477 va_list_type);
1478 DECL_ARTIFICIAL (va_list_name) = 1;
1479 TYPE_NAME (va_list_type) = va_list_name;
1480 TYPE_STUB_DECL (va_list_type) = va_list_name;
1481 /* Create the __ap field. */
1482 ap_field = build_decl (BUILTINS_LOCATION,
1483 FIELD_DECL,
1484 get_identifier ("__ap"),
1485 ptr_type_node);
1486 DECL_ARTIFICIAL (ap_field) = 1;
1487 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1488 TYPE_FIELDS (va_list_type) = ap_field;
1489 /* Compute its layout. */
1490 layout_type (va_list_type);
1491
1492 return va_list_type;
1493 }
1494
1495 /* Return an expression of type "void *" pointing to the next
1496 available argument in a variable-argument list. VALIST is the
1497 user-level va_list object, of type __builtin_va_list. */
1498 static tree
1499 arm_extract_valist_ptr (tree valist)
1500 {
1501 if (TREE_TYPE (valist) == error_mark_node)
1502 return error_mark_node;
1503
1504 /* On an AAPCS target, the pointer is stored within "struct
1505 va_list". */
1506 if (TARGET_AAPCS_BASED)
1507 {
1508 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1509 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1510 valist, ap_field, NULL_TREE);
1511 }
1512
1513 return valist;
1514 }
1515
1516 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1517 static void
1518 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1519 {
1520 valist = arm_extract_valist_ptr (valist);
1521 std_expand_builtin_va_start (valist, nextarg);
1522 }
1523
1524 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1525 static tree
1526 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1527 gimple_seq *post_p)
1528 {
1529 valist = arm_extract_valist_ptr (valist);
1530 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1531 }
1532
1533 /* Fix up any incompatible options that the user has specified. */
1534 static void
1535 arm_option_override (void)
1536 {
1537 if (global_options_set.x_arm_arch_option)
1538 arm_selected_arch = &all_architectures[arm_arch_option];
1539
1540 if (global_options_set.x_arm_cpu_option)
1541 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1542
1543 if (global_options_set.x_arm_tune_option)
1544 arm_selected_tune = &all_cores[(int) arm_tune_option];
1545
1546 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1547 SUBTARGET_OVERRIDE_OPTIONS;
1548 #endif
1549
1550 if (arm_selected_arch)
1551 {
1552 if (arm_selected_cpu)
1553 {
1554 /* Check for conflict between mcpu and march. */
1555 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1556 {
1557 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1558 arm_selected_cpu->name, arm_selected_arch->name);
1559 /* -march wins for code generation.
1560 -mcpu wins for default tuning. */
1561 if (!arm_selected_tune)
1562 arm_selected_tune = arm_selected_cpu;
1563
1564 arm_selected_cpu = arm_selected_arch;
1565 }
1566 else
1567 /* -mcpu wins. */
1568 arm_selected_arch = NULL;
1569 }
1570 else
1571 /* Pick a CPU based on the architecture. */
1572 arm_selected_cpu = arm_selected_arch;
1573 }
1574
1575 /* If the user did not specify a processor, choose one for them. */
1576 if (!arm_selected_cpu)
1577 {
1578 const struct processors * sel;
1579 unsigned int sought;
1580
1581 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1582 if (!arm_selected_cpu->name)
1583 {
1584 #ifdef SUBTARGET_CPU_DEFAULT
1585 /* Use the subtarget default CPU if none was specified by
1586 configure. */
1587 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1588 #endif
1589 /* Default to ARM6. */
1590 if (!arm_selected_cpu->name)
1591 arm_selected_cpu = &all_cores[arm6];
1592 }
1593
1594 sel = arm_selected_cpu;
1595 insn_flags = sel->flags;
1596
1597 /* Now check to see if the user has specified some command line
1598 switch that require certain abilities from the cpu. */
1599 sought = 0;
1600
1601 if (TARGET_INTERWORK || TARGET_THUMB)
1602 {
1603 sought |= (FL_THUMB | FL_MODE32);
1604
1605 /* There are no ARM processors that support both APCS-26 and
1606 interworking. Therefore we force FL_MODE26 to be removed
1607 from insn_flags here (if it was set), so that the search
1608 below will always be able to find a compatible processor. */
1609 insn_flags &= ~FL_MODE26;
1610 }
1611
1612 if (sought != 0 && ((sought & insn_flags) != sought))
1613 {
1614 /* Try to locate a CPU type that supports all of the abilities
1615 of the default CPU, plus the extra abilities requested by
1616 the user. */
1617 for (sel = all_cores; sel->name != NULL; sel++)
1618 if ((sel->flags & sought) == (sought | insn_flags))
1619 break;
1620
1621 if (sel->name == NULL)
1622 {
1623 unsigned current_bit_count = 0;
1624 const struct processors * best_fit = NULL;
1625
1626 /* Ideally we would like to issue an error message here
1627 saying that it was not possible to find a CPU compatible
1628 with the default CPU, but which also supports the command
1629 line options specified by the programmer, and so they
1630 ought to use the -mcpu=<name> command line option to
1631 override the default CPU type.
1632
1633 If we cannot find a cpu that has both the
1634 characteristics of the default cpu and the given
1635 command line options we scan the array again looking
1636 for a best match. */
1637 for (sel = all_cores; sel->name != NULL; sel++)
1638 if ((sel->flags & sought) == sought)
1639 {
1640 unsigned count;
1641
1642 count = bit_count (sel->flags & insn_flags);
1643
1644 if (count >= current_bit_count)
1645 {
1646 best_fit = sel;
1647 current_bit_count = count;
1648 }
1649 }
1650
1651 gcc_assert (best_fit);
1652 sel = best_fit;
1653 }
1654
1655 arm_selected_cpu = sel;
1656 }
1657 }
1658
1659 gcc_assert (arm_selected_cpu);
1660 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1661 if (!arm_selected_tune)
1662 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1663
1664 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1665 insn_flags = arm_selected_cpu->flags;
1666 arm_base_arch = arm_selected_cpu->base_arch;
1667
1668 arm_tune = arm_selected_tune->core;
1669 tune_flags = arm_selected_tune->flags;
1670 current_tune = arm_selected_tune->tune;
1671
1672 /* Make sure that the processor choice does not conflict with any of the
1673 other command line choices. */
1674 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1675 error ("target CPU does not support ARM mode");
1676
1677 /* BPABI targets use linker tricks to allow interworking on cores
1678 without thumb support. */
1679 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1680 {
1681 warning (0, "target CPU does not support interworking" );
1682 target_flags &= ~MASK_INTERWORK;
1683 }
1684
1685 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1686 {
1687 warning (0, "target CPU does not support THUMB instructions");
1688 target_flags &= ~MASK_THUMB;
1689 }
1690
1691 if (TARGET_APCS_FRAME && TARGET_THUMB)
1692 {
1693 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1694 target_flags &= ~MASK_APCS_FRAME;
1695 }
1696
1697 /* Callee super interworking implies thumb interworking. Adding
1698 this to the flags here simplifies the logic elsewhere. */
1699 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1700 target_flags |= MASK_INTERWORK;
1701
1702 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1703 from here where no function is being compiled currently. */
1704 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1705 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1706
1707 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1708 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1709
1710 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1711 {
1712 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1713 target_flags |= MASK_APCS_FRAME;
1714 }
1715
1716 if (TARGET_POKE_FUNCTION_NAME)
1717 target_flags |= MASK_APCS_FRAME;
1718
1719 if (TARGET_APCS_REENT && flag_pic)
1720 error ("-fpic and -mapcs-reent are incompatible");
1721
1722 if (TARGET_APCS_REENT)
1723 warning (0, "APCS reentrant code not supported. Ignored");
1724
1725 /* If this target is normally configured to use APCS frames, warn if they
1726 are turned off and debugging is turned on. */
1727 if (TARGET_ARM
1728 && write_symbols != NO_DEBUG
1729 && !TARGET_APCS_FRAME
1730 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1731 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1732
1733 if (TARGET_APCS_FLOAT)
1734 warning (0, "passing floating point arguments in fp regs not yet supported");
1735
1736 if (TARGET_LITTLE_WORDS)
1737 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1738 "will be removed in a future release");
1739
1740 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1741 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1742 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1743 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1744 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1745 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1746 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1747 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1748 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1749 arm_arch6m = arm_arch6 && !arm_arch_notm;
1750 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1751 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1752 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
1753 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1754 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1755
1756 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1757 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1758 thumb_code = TARGET_ARM == 0;
1759 thumb1_code = TARGET_THUMB1 != 0;
1760 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1761 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1762 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1763 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
1764 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1765 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1766 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1767
1768 /* If we are not using the default (ARM mode) section anchor offset
1769 ranges, then set the correct ranges now. */
1770 if (TARGET_THUMB1)
1771 {
1772 /* Thumb-1 LDR instructions cannot have negative offsets.
1773 Permissible positive offset ranges are 5-bit (for byte loads),
1774 6-bit (for halfword loads), or 7-bit (for word loads).
1775 Empirical results suggest a 7-bit anchor range gives the best
1776 overall code size. */
1777 targetm.min_anchor_offset = 0;
1778 targetm.max_anchor_offset = 127;
1779 }
1780 else if (TARGET_THUMB2)
1781 {
1782 /* The minimum is set such that the total size of the block
1783 for a particular anchor is 248 + 1 + 4095 bytes, which is
1784 divisible by eight, ensuring natural spacing of anchors. */
1785 targetm.min_anchor_offset = -248;
1786 targetm.max_anchor_offset = 4095;
1787 }
1788
1789 /* V5 code we generate is completely interworking capable, so we turn off
1790 TARGET_INTERWORK here to avoid many tests later on. */
1791
1792 /* XXX However, we must pass the right pre-processor defines to CPP
1793 or GLD can get confused. This is a hack. */
1794 if (TARGET_INTERWORK)
1795 arm_cpp_interwork = 1;
1796
1797 if (arm_arch5)
1798 target_flags &= ~MASK_INTERWORK;
1799
1800 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1801 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1802
1803 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1804 error ("iwmmxt abi requires an iwmmxt capable cpu");
1805
1806 if (!global_options_set.x_arm_fpu_index)
1807 {
1808 const char *target_fpu_name;
1809 bool ok;
1810
1811 #ifdef FPUTYPE_DEFAULT
1812 target_fpu_name = FPUTYPE_DEFAULT;
1813 #else
1814 target_fpu_name = "vfp";
1815 #endif
1816
1817 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1818 CL_TARGET);
1819 gcc_assert (ok);
1820 }
1821
1822 arm_fpu_desc = &all_fpus[arm_fpu_index];
1823
1824 switch (arm_fpu_desc->model)
1825 {
1826 case ARM_FP_MODEL_VFP:
1827 arm_fpu_attr = FPU_VFP;
1828 break;
1829
1830 default:
1831 gcc_unreachable();
1832 }
1833
1834 if (TARGET_AAPCS_BASED)
1835 {
1836 if (TARGET_CALLER_INTERWORKING)
1837 error ("AAPCS does not support -mcaller-super-interworking");
1838 else
1839 if (TARGET_CALLEE_INTERWORKING)
1840 error ("AAPCS does not support -mcallee-super-interworking");
1841 }
1842
1843 /* iWMMXt and NEON are incompatible. */
1844 if (TARGET_IWMMXT && TARGET_NEON)
1845 error ("iWMMXt and NEON are incompatible");
1846
1847 /* iWMMXt unsupported under Thumb mode. */
1848 if (TARGET_THUMB && TARGET_IWMMXT)
1849 error ("iWMMXt unsupported under Thumb mode");
1850
1851 /* __fp16 support currently assumes the core has ldrh. */
1852 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1853 sorry ("__fp16 and no ldrh");
1854
1855 /* If soft-float is specified then don't use FPU. */
1856 if (TARGET_SOFT_FLOAT)
1857 arm_fpu_attr = FPU_NONE;
1858
1859 if (TARGET_AAPCS_BASED)
1860 {
1861 if (arm_abi == ARM_ABI_IWMMXT)
1862 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1863 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1864 && TARGET_HARD_FLOAT
1865 && TARGET_VFP)
1866 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1867 else
1868 arm_pcs_default = ARM_PCS_AAPCS;
1869 }
1870 else
1871 {
1872 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1873 sorry ("-mfloat-abi=hard and VFP");
1874
1875 if (arm_abi == ARM_ABI_APCS)
1876 arm_pcs_default = ARM_PCS_APCS;
1877 else
1878 arm_pcs_default = ARM_PCS_ATPCS;
1879 }
1880
1881 /* For arm2/3 there is no need to do any scheduling if we are doing
1882 software floating-point. */
1883 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
1884 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1885
1886 /* Use the cp15 method if it is available. */
1887 if (target_thread_pointer == TP_AUTO)
1888 {
1889 if (arm_arch6k && !TARGET_THUMB1)
1890 target_thread_pointer = TP_CP15;
1891 else
1892 target_thread_pointer = TP_SOFT;
1893 }
1894
1895 if (TARGET_HARD_TP && TARGET_THUMB1)
1896 error ("can not use -mtp=cp15 with 16-bit Thumb");
1897
1898 /* Override the default structure alignment for AAPCS ABI. */
1899 if (!global_options_set.x_arm_structure_size_boundary)
1900 {
1901 if (TARGET_AAPCS_BASED)
1902 arm_structure_size_boundary = 8;
1903 }
1904 else
1905 {
1906 if (arm_structure_size_boundary != 8
1907 && arm_structure_size_boundary != 32
1908 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1909 {
1910 if (ARM_DOUBLEWORD_ALIGN)
1911 warning (0,
1912 "structure size boundary can only be set to 8, 32 or 64");
1913 else
1914 warning (0, "structure size boundary can only be set to 8 or 32");
1915 arm_structure_size_boundary
1916 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1917 }
1918 }
1919
1920 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1921 {
1922 error ("RTP PIC is incompatible with Thumb");
1923 flag_pic = 0;
1924 }
1925
1926 /* If stack checking is disabled, we can use r10 as the PIC register,
1927 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1928 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1929 {
1930 if (TARGET_VXWORKS_RTP)
1931 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1932 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1933 }
1934
1935 if (flag_pic && TARGET_VXWORKS_RTP)
1936 arm_pic_register = 9;
1937
1938 if (arm_pic_register_string != NULL)
1939 {
1940 int pic_register = decode_reg_name (arm_pic_register_string);
1941
1942 if (!flag_pic)
1943 warning (0, "-mpic-register= is useless without -fpic");
1944
1945 /* Prevent the user from choosing an obviously stupid PIC register. */
1946 else if (pic_register < 0 || call_used_regs[pic_register]
1947 || pic_register == HARD_FRAME_POINTER_REGNUM
1948 || pic_register == STACK_POINTER_REGNUM
1949 || pic_register >= PC_REGNUM
1950 || (TARGET_VXWORKS_RTP
1951 && (unsigned int) pic_register != arm_pic_register))
1952 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1953 else
1954 arm_pic_register = pic_register;
1955 }
1956
1957 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1958 if (fix_cm3_ldrd == 2)
1959 {
1960 if (arm_selected_cpu->core == cortexm3)
1961 fix_cm3_ldrd = 1;
1962 else
1963 fix_cm3_ldrd = 0;
1964 }
1965
1966 /* Enable -munaligned-access by default for
1967 - all ARMv6 architecture-based processors
1968 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
1969 - ARMv8 architecture-base processors.
1970
1971 Disable -munaligned-access by default for
1972 - all pre-ARMv6 architecture-based processors
1973 - ARMv6-M architecture-based processors. */
1974
1975 if (unaligned_access == 2)
1976 {
1977 if (arm_arch6 && (arm_arch_notm || arm_arch7))
1978 unaligned_access = 1;
1979 else
1980 unaligned_access = 0;
1981 }
1982 else if (unaligned_access == 1
1983 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
1984 {
1985 warning (0, "target CPU does not support unaligned accesses");
1986 unaligned_access = 0;
1987 }
1988
1989 if (TARGET_THUMB1 && flag_schedule_insns)
1990 {
1991 /* Don't warn since it's on by default in -O2. */
1992 flag_schedule_insns = 0;
1993 }
1994
1995 if (optimize_size)
1996 {
1997 /* If optimizing for size, bump the number of instructions that we
1998 are prepared to conditionally execute (even on a StrongARM). */
1999 max_insns_skipped = 6;
2000 }
2001 else
2002 max_insns_skipped = current_tune->max_insns_skipped;
2003
2004 /* Hot/Cold partitioning is not currently supported, since we can't
2005 handle literal pool placement in that case. */
2006 if (flag_reorder_blocks_and_partition)
2007 {
2008 inform (input_location,
2009 "-freorder-blocks-and-partition not supported on this architecture");
2010 flag_reorder_blocks_and_partition = 0;
2011 flag_reorder_blocks = 1;
2012 }
2013
2014 if (flag_pic)
2015 /* Hoisting PIC address calculations more aggressively provides a small,
2016 but measurable, size reduction for PIC code. Therefore, we decrease
2017 the bar for unrestricted expression hoisting to the cost of PIC address
2018 calculation, which is 2 instructions. */
2019 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2020 global_options.x_param_values,
2021 global_options_set.x_param_values);
2022
2023 /* ARM EABI defaults to strict volatile bitfields. */
2024 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2025 && abi_version_at_least(2))
2026 flag_strict_volatile_bitfields = 1;
2027
2028 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2029 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2030 if (flag_prefetch_loop_arrays < 0
2031 && HAVE_prefetch
2032 && optimize >= 3
2033 && current_tune->num_prefetch_slots > 0)
2034 flag_prefetch_loop_arrays = 1;
2035
2036 /* Set up parameters to be used in prefetching algorithm. Do not override the
2037 defaults unless we are tuning for a core we have researched values for. */
2038 if (current_tune->num_prefetch_slots > 0)
2039 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2040 current_tune->num_prefetch_slots,
2041 global_options.x_param_values,
2042 global_options_set.x_param_values);
2043 if (current_tune->l1_cache_line_size >= 0)
2044 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2045 current_tune->l1_cache_line_size,
2046 global_options.x_param_values,
2047 global_options_set.x_param_values);
2048 if (current_tune->l1_cache_size >= 0)
2049 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2050 current_tune->l1_cache_size,
2051 global_options.x_param_values,
2052 global_options_set.x_param_values);
2053
2054 /* Use the alternative scheduling-pressure algorithm by default. */
2055 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2056 global_options.x_param_values,
2057 global_options_set.x_param_values);
2058
2059 /* Register global variables with the garbage collector. */
2060 arm_add_gc_roots ();
2061 }
2062
2063 static void
2064 arm_add_gc_roots (void)
2065 {
2066 gcc_obstack_init(&minipool_obstack);
2067 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2068 }
2069 \f
2070 /* A table of known ARM exception types.
2071 For use with the interrupt function attribute. */
2072
2073 typedef struct
2074 {
2075 const char *const arg;
2076 const unsigned long return_value;
2077 }
2078 isr_attribute_arg;
2079
2080 static const isr_attribute_arg isr_attribute_args [] =
2081 {
2082 { "IRQ", ARM_FT_ISR },
2083 { "irq", ARM_FT_ISR },
2084 { "FIQ", ARM_FT_FIQ },
2085 { "fiq", ARM_FT_FIQ },
2086 { "ABORT", ARM_FT_ISR },
2087 { "abort", ARM_FT_ISR },
2088 { "ABORT", ARM_FT_ISR },
2089 { "abort", ARM_FT_ISR },
2090 { "UNDEF", ARM_FT_EXCEPTION },
2091 { "undef", ARM_FT_EXCEPTION },
2092 { "SWI", ARM_FT_EXCEPTION },
2093 { "swi", ARM_FT_EXCEPTION },
2094 { NULL, ARM_FT_NORMAL }
2095 };
2096
2097 /* Returns the (interrupt) function type of the current
2098 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2099
2100 static unsigned long
2101 arm_isr_value (tree argument)
2102 {
2103 const isr_attribute_arg * ptr;
2104 const char * arg;
2105
2106 if (!arm_arch_notm)
2107 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2108
2109 /* No argument - default to IRQ. */
2110 if (argument == NULL_TREE)
2111 return ARM_FT_ISR;
2112
2113 /* Get the value of the argument. */
2114 if (TREE_VALUE (argument) == NULL_TREE
2115 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2116 return ARM_FT_UNKNOWN;
2117
2118 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2119
2120 /* Check it against the list of known arguments. */
2121 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2122 if (streq (arg, ptr->arg))
2123 return ptr->return_value;
2124
2125 /* An unrecognized interrupt type. */
2126 return ARM_FT_UNKNOWN;
2127 }
2128
2129 /* Computes the type of the current function. */
2130
2131 static unsigned long
2132 arm_compute_func_type (void)
2133 {
2134 unsigned long type = ARM_FT_UNKNOWN;
2135 tree a;
2136 tree attr;
2137
2138 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2139
2140 /* Decide if the current function is volatile. Such functions
2141 never return, and many memory cycles can be saved by not storing
2142 register values that will never be needed again. This optimization
2143 was added to speed up context switching in a kernel application. */
2144 if (optimize > 0
2145 && (TREE_NOTHROW (current_function_decl)
2146 || !(flag_unwind_tables
2147 || (flag_exceptions
2148 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2149 && TREE_THIS_VOLATILE (current_function_decl))
2150 type |= ARM_FT_VOLATILE;
2151
2152 if (cfun->static_chain_decl != NULL)
2153 type |= ARM_FT_NESTED;
2154
2155 attr = DECL_ATTRIBUTES (current_function_decl);
2156
2157 a = lookup_attribute ("naked", attr);
2158 if (a != NULL_TREE)
2159 type |= ARM_FT_NAKED;
2160
2161 a = lookup_attribute ("isr", attr);
2162 if (a == NULL_TREE)
2163 a = lookup_attribute ("interrupt", attr);
2164
2165 if (a == NULL_TREE)
2166 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2167 else
2168 type |= arm_isr_value (TREE_VALUE (a));
2169
2170 return type;
2171 }
2172
2173 /* Returns the type of the current function. */
2174
2175 unsigned long
2176 arm_current_func_type (void)
2177 {
2178 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2179 cfun->machine->func_type = arm_compute_func_type ();
2180
2181 return cfun->machine->func_type;
2182 }
2183
2184 bool
2185 arm_allocate_stack_slots_for_args (void)
2186 {
2187 /* Naked functions should not allocate stack slots for arguments. */
2188 return !IS_NAKED (arm_current_func_type ());
2189 }
2190
2191 static bool
2192 arm_warn_func_return (tree decl)
2193 {
2194 /* Naked functions are implemented entirely in assembly, including the
2195 return sequence, so suppress warnings about this. */
2196 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
2197 }
2198
2199 \f
2200 /* Output assembler code for a block containing the constant parts
2201 of a trampoline, leaving space for the variable parts.
2202
2203 On the ARM, (if r8 is the static chain regnum, and remembering that
2204 referencing pc adds an offset of 8) the trampoline looks like:
2205 ldr r8, [pc, #0]
2206 ldr pc, [pc]
2207 .word static chain value
2208 .word function's address
2209 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2210
2211 static void
2212 arm_asm_trampoline_template (FILE *f)
2213 {
2214 if (TARGET_ARM)
2215 {
2216 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2217 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2218 }
2219 else if (TARGET_THUMB2)
2220 {
2221 /* The Thumb-2 trampoline is similar to the arm implementation.
2222 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2223 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2224 STATIC_CHAIN_REGNUM, PC_REGNUM);
2225 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2226 }
2227 else
2228 {
2229 ASM_OUTPUT_ALIGN (f, 2);
2230 fprintf (f, "\t.code\t16\n");
2231 fprintf (f, ".Ltrampoline_start:\n");
2232 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2233 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2234 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2235 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2236 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2237 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2238 }
2239 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2240 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2241 }
2242
2243 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2244
2245 static void
2246 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2247 {
2248 rtx fnaddr, mem, a_tramp;
2249
2250 emit_block_move (m_tramp, assemble_trampoline_template (),
2251 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2252
2253 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2254 emit_move_insn (mem, chain_value);
2255
2256 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2257 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2258 emit_move_insn (mem, fnaddr);
2259
2260 a_tramp = XEXP (m_tramp, 0);
2261 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2262 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2263 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
2264 }
2265
2266 /* Thumb trampolines should be entered in thumb mode, so set
2267 the bottom bit of the address. */
2268
2269 static rtx
2270 arm_trampoline_adjust_address (rtx addr)
2271 {
2272 if (TARGET_THUMB)
2273 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2274 NULL, 0, OPTAB_LIB_WIDEN);
2275 return addr;
2276 }
2277 \f
2278 /* Return 1 if it is possible to return using a single instruction.
2279 If SIBLING is non-null, this is a test for a return before a sibling
2280 call. SIBLING is the call insn, so we can examine its register usage. */
2281
2282 int
2283 use_return_insn (int iscond, rtx sibling)
2284 {
2285 int regno;
2286 unsigned int func_type;
2287 unsigned long saved_int_regs;
2288 unsigned HOST_WIDE_INT stack_adjust;
2289 arm_stack_offsets *offsets;
2290
2291 /* Never use a return instruction before reload has run. */
2292 if (!reload_completed)
2293 return 0;
2294
2295 func_type = arm_current_func_type ();
2296
2297 /* Naked, volatile and stack alignment functions need special
2298 consideration. */
2299 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2300 return 0;
2301
2302 /* So do interrupt functions that use the frame pointer and Thumb
2303 interrupt functions. */
2304 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2305 return 0;
2306
2307 offsets = arm_get_frame_offsets ();
2308 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2309
2310 /* As do variadic functions. */
2311 if (crtl->args.pretend_args_size
2312 || cfun->machine->uses_anonymous_args
2313 /* Or if the function calls __builtin_eh_return () */
2314 || crtl->calls_eh_return
2315 /* Or if the function calls alloca */
2316 || cfun->calls_alloca
2317 /* Or if there is a stack adjustment. However, if the stack pointer
2318 is saved on the stack, we can use a pre-incrementing stack load. */
2319 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2320 && stack_adjust == 4)))
2321 return 0;
2322
2323 saved_int_regs = offsets->saved_regs_mask;
2324
2325 /* Unfortunately, the insn
2326
2327 ldmib sp, {..., sp, ...}
2328
2329 triggers a bug on most SA-110 based devices, such that the stack
2330 pointer won't be correctly restored if the instruction takes a
2331 page fault. We work around this problem by popping r3 along with
2332 the other registers, since that is never slower than executing
2333 another instruction.
2334
2335 We test for !arm_arch5 here, because code for any architecture
2336 less than this could potentially be run on one of the buggy
2337 chips. */
2338 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2339 {
2340 /* Validate that r3 is a call-clobbered register (always true in
2341 the default abi) ... */
2342 if (!call_used_regs[3])
2343 return 0;
2344
2345 /* ... that it isn't being used for a return value ... */
2346 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2347 return 0;
2348
2349 /* ... or for a tail-call argument ... */
2350 if (sibling)
2351 {
2352 gcc_assert (CALL_P (sibling));
2353
2354 if (find_regno_fusage (sibling, USE, 3))
2355 return 0;
2356 }
2357
2358 /* ... and that there are no call-saved registers in r0-r2
2359 (always true in the default ABI). */
2360 if (saved_int_regs & 0x7)
2361 return 0;
2362 }
2363
2364 /* Can't be done if interworking with Thumb, and any registers have been
2365 stacked. */
2366 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2367 return 0;
2368
2369 /* On StrongARM, conditional returns are expensive if they aren't
2370 taken and multiple registers have been stacked. */
2371 if (iscond && arm_tune_strongarm)
2372 {
2373 /* Conditional return when just the LR is stored is a simple
2374 conditional-load instruction, that's not expensive. */
2375 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2376 return 0;
2377
2378 if (flag_pic
2379 && arm_pic_register != INVALID_REGNUM
2380 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2381 return 0;
2382 }
2383
2384 /* If there are saved registers but the LR isn't saved, then we need
2385 two instructions for the return. */
2386 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2387 return 0;
2388
2389 /* Can't be done if any of the VFP regs are pushed,
2390 since this also requires an insn. */
2391 if (TARGET_HARD_FLOAT && TARGET_VFP)
2392 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2393 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2394 return 0;
2395
2396 if (TARGET_REALLY_IWMMXT)
2397 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2398 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2399 return 0;
2400
2401 return 1;
2402 }
2403
2404 /* Return TRUE if int I is a valid immediate ARM constant. */
2405
2406 int
2407 const_ok_for_arm (HOST_WIDE_INT i)
2408 {
2409 int lowbit;
2410
2411 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2412 be all zero, or all one. */
2413 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2414 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2415 != ((~(unsigned HOST_WIDE_INT) 0)
2416 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2417 return FALSE;
2418
2419 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2420
2421 /* Fast return for 0 and small values. We must do this for zero, since
2422 the code below can't handle that one case. */
2423 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2424 return TRUE;
2425
2426 /* Get the number of trailing zeros. */
2427 lowbit = ffs((int) i) - 1;
2428
2429 /* Only even shifts are allowed in ARM mode so round down to the
2430 nearest even number. */
2431 if (TARGET_ARM)
2432 lowbit &= ~1;
2433
2434 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2435 return TRUE;
2436
2437 if (TARGET_ARM)
2438 {
2439 /* Allow rotated constants in ARM mode. */
2440 if (lowbit <= 4
2441 && ((i & ~0xc000003f) == 0
2442 || (i & ~0xf000000f) == 0
2443 || (i & ~0xfc000003) == 0))
2444 return TRUE;
2445 }
2446 else
2447 {
2448 HOST_WIDE_INT v;
2449
2450 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2451 v = i & 0xff;
2452 v |= v << 16;
2453 if (i == v || i == (v | (v << 8)))
2454 return TRUE;
2455
2456 /* Allow repeated pattern 0xXY00XY00. */
2457 v = i & 0xff00;
2458 v |= v << 16;
2459 if (i == v)
2460 return TRUE;
2461 }
2462
2463 return FALSE;
2464 }
2465
2466 /* Return true if I is a valid constant for the operation CODE. */
2467 int
2468 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2469 {
2470 if (const_ok_for_arm (i))
2471 return 1;
2472
2473 switch (code)
2474 {
2475 case SET:
2476 /* See if we can use movw. */
2477 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2478 return 1;
2479 else
2480 /* Otherwise, try mvn. */
2481 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2482
2483 case PLUS:
2484 /* See if we can use addw or subw. */
2485 if (TARGET_THUMB2
2486 && ((i & 0xfffff000) == 0
2487 || ((-i) & 0xfffff000) == 0))
2488 return 1;
2489 /* else fall through. */
2490
2491 case COMPARE:
2492 case EQ:
2493 case NE:
2494 case GT:
2495 case LE:
2496 case LT:
2497 case GE:
2498 case GEU:
2499 case LTU:
2500 case GTU:
2501 case LEU:
2502 case UNORDERED:
2503 case ORDERED:
2504 case UNEQ:
2505 case UNGE:
2506 case UNLT:
2507 case UNGT:
2508 case UNLE:
2509 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2510
2511 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2512 case XOR:
2513 return 0;
2514
2515 case IOR:
2516 if (TARGET_THUMB2)
2517 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2518 return 0;
2519
2520 case AND:
2521 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2522
2523 default:
2524 gcc_unreachable ();
2525 }
2526 }
2527
2528 /* Return true if I is a valid di mode constant for the operation CODE. */
2529 int
2530 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
2531 {
2532 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
2533 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
2534 rtx hi = GEN_INT (hi_val);
2535 rtx lo = GEN_INT (lo_val);
2536
2537 if (TARGET_THUMB1)
2538 return 0;
2539
2540 switch (code)
2541 {
2542 case PLUS:
2543 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
2544
2545 default:
2546 return 0;
2547 }
2548 }
2549
2550 /* Emit a sequence of insns to handle a large constant.
2551 CODE is the code of the operation required, it can be any of SET, PLUS,
2552 IOR, AND, XOR, MINUS;
2553 MODE is the mode in which the operation is being performed;
2554 VAL is the integer to operate on;
2555 SOURCE is the other operand (a register, or a null-pointer for SET);
2556 SUBTARGETS means it is safe to create scratch registers if that will
2557 either produce a simpler sequence, or we will want to cse the values.
2558 Return value is the number of insns emitted. */
2559
2560 /* ??? Tweak this for thumb2. */
2561 int
2562 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2563 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2564 {
2565 rtx cond;
2566
2567 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2568 cond = COND_EXEC_TEST (PATTERN (insn));
2569 else
2570 cond = NULL_RTX;
2571
2572 if (subtargets || code == SET
2573 || (REG_P (target) && REG_P (source)
2574 && REGNO (target) != REGNO (source)))
2575 {
2576 /* After arm_reorg has been called, we can't fix up expensive
2577 constants by pushing them into memory so we must synthesize
2578 them in-line, regardless of the cost. This is only likely to
2579 be more costly on chips that have load delay slots and we are
2580 compiling without running the scheduler (so no splitting
2581 occurred before the final instruction emission).
2582
2583 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2584 */
2585 if (!after_arm_reorg
2586 && !cond
2587 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2588 1, 0)
2589 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2590 + (code != SET))))
2591 {
2592 if (code == SET)
2593 {
2594 /* Currently SET is the only monadic value for CODE, all
2595 the rest are diadic. */
2596 if (TARGET_USE_MOVT)
2597 arm_emit_movpair (target, GEN_INT (val));
2598 else
2599 emit_set_insn (target, GEN_INT (val));
2600
2601 return 1;
2602 }
2603 else
2604 {
2605 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2606
2607 if (TARGET_USE_MOVT)
2608 arm_emit_movpair (temp, GEN_INT (val));
2609 else
2610 emit_set_insn (temp, GEN_INT (val));
2611
2612 /* For MINUS, the value is subtracted from, since we never
2613 have subtraction of a constant. */
2614 if (code == MINUS)
2615 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2616 else
2617 emit_set_insn (target,
2618 gen_rtx_fmt_ee (code, mode, source, temp));
2619 return 2;
2620 }
2621 }
2622 }
2623
2624 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2625 1);
2626 }
2627
2628 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2629 ARM/THUMB2 immediates, and add up to VAL.
2630 Thr function return value gives the number of insns required. */
2631 static int
2632 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2633 struct four_ints *return_sequence)
2634 {
2635 int best_consecutive_zeros = 0;
2636 int i;
2637 int best_start = 0;
2638 int insns1, insns2;
2639 struct four_ints tmp_sequence;
2640
2641 /* If we aren't targeting ARM, the best place to start is always at
2642 the bottom, otherwise look more closely. */
2643 if (TARGET_ARM)
2644 {
2645 for (i = 0; i < 32; i += 2)
2646 {
2647 int consecutive_zeros = 0;
2648
2649 if (!(val & (3 << i)))
2650 {
2651 while ((i < 32) && !(val & (3 << i)))
2652 {
2653 consecutive_zeros += 2;
2654 i += 2;
2655 }
2656 if (consecutive_zeros > best_consecutive_zeros)
2657 {
2658 best_consecutive_zeros = consecutive_zeros;
2659 best_start = i - consecutive_zeros;
2660 }
2661 i -= 2;
2662 }
2663 }
2664 }
2665
2666 /* So long as it won't require any more insns to do so, it's
2667 desirable to emit a small constant (in bits 0...9) in the last
2668 insn. This way there is more chance that it can be combined with
2669 a later addressing insn to form a pre-indexed load or store
2670 operation. Consider:
2671
2672 *((volatile int *)0xe0000100) = 1;
2673 *((volatile int *)0xe0000110) = 2;
2674
2675 We want this to wind up as:
2676
2677 mov rA, #0xe0000000
2678 mov rB, #1
2679 str rB, [rA, #0x100]
2680 mov rB, #2
2681 str rB, [rA, #0x110]
2682
2683 rather than having to synthesize both large constants from scratch.
2684
2685 Therefore, we calculate how many insns would be required to emit
2686 the constant starting from `best_start', and also starting from
2687 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2688 yield a shorter sequence, we may as well use zero. */
2689 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2690 if (best_start != 0
2691 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2692 {
2693 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2694 if (insns2 <= insns1)
2695 {
2696 *return_sequence = tmp_sequence;
2697 insns1 = insns2;
2698 }
2699 }
2700
2701 return insns1;
2702 }
2703
2704 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2705 static int
2706 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2707 struct four_ints *return_sequence, int i)
2708 {
2709 int remainder = val & 0xffffffff;
2710 int insns = 0;
2711
2712 /* Try and find a way of doing the job in either two or three
2713 instructions.
2714
2715 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2716 location. We start at position I. This may be the MSB, or
2717 optimial_immediate_sequence may have positioned it at the largest block
2718 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2719 wrapping around to the top of the word when we drop off the bottom.
2720 In the worst case this code should produce no more than four insns.
2721
2722 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2723 constants, shifted to any arbitrary location. We should always start
2724 at the MSB. */
2725 do
2726 {
2727 int end;
2728 unsigned int b1, b2, b3, b4;
2729 unsigned HOST_WIDE_INT result;
2730 int loc;
2731
2732 gcc_assert (insns < 4);
2733
2734 if (i <= 0)
2735 i += 32;
2736
2737 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2738 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2739 {
2740 loc = i;
2741 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2742 /* We can use addw/subw for the last 12 bits. */
2743 result = remainder;
2744 else
2745 {
2746 /* Use an 8-bit shifted/rotated immediate. */
2747 end = i - 8;
2748 if (end < 0)
2749 end += 32;
2750 result = remainder & ((0x0ff << end)
2751 | ((i < end) ? (0xff >> (32 - end))
2752 : 0));
2753 i -= 8;
2754 }
2755 }
2756 else
2757 {
2758 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2759 arbitrary shifts. */
2760 i -= TARGET_ARM ? 2 : 1;
2761 continue;
2762 }
2763
2764 /* Next, see if we can do a better job with a thumb2 replicated
2765 constant.
2766
2767 We do it this way around to catch the cases like 0x01F001E0 where
2768 two 8-bit immediates would work, but a replicated constant would
2769 make it worse.
2770
2771 TODO: 16-bit constants that don't clear all the bits, but still win.
2772 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2773 if (TARGET_THUMB2)
2774 {
2775 b1 = (remainder & 0xff000000) >> 24;
2776 b2 = (remainder & 0x00ff0000) >> 16;
2777 b3 = (remainder & 0x0000ff00) >> 8;
2778 b4 = remainder & 0xff;
2779
2780 if (loc > 24)
2781 {
2782 /* The 8-bit immediate already found clears b1 (and maybe b2),
2783 but must leave b3 and b4 alone. */
2784
2785 /* First try to find a 32-bit replicated constant that clears
2786 almost everything. We can assume that we can't do it in one,
2787 or else we wouldn't be here. */
2788 unsigned int tmp = b1 & b2 & b3 & b4;
2789 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2790 + (tmp << 24);
2791 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2792 + (tmp == b3) + (tmp == b4);
2793 if (tmp
2794 && (matching_bytes >= 3
2795 || (matching_bytes == 2
2796 && const_ok_for_op (remainder & ~tmp2, code))))
2797 {
2798 /* At least 3 of the bytes match, and the fourth has at
2799 least as many bits set, or two of the bytes match
2800 and it will only require one more insn to finish. */
2801 result = tmp2;
2802 i = tmp != b1 ? 32
2803 : tmp != b2 ? 24
2804 : tmp != b3 ? 16
2805 : 8;
2806 }
2807
2808 /* Second, try to find a 16-bit replicated constant that can
2809 leave three of the bytes clear. If b2 or b4 is already
2810 zero, then we can. If the 8-bit from above would not
2811 clear b2 anyway, then we still win. */
2812 else if (b1 == b3 && (!b2 || !b4
2813 || (remainder & 0x00ff0000 & ~result)))
2814 {
2815 result = remainder & 0xff00ff00;
2816 i = 24;
2817 }
2818 }
2819 else if (loc > 16)
2820 {
2821 /* The 8-bit immediate already found clears b2 (and maybe b3)
2822 and we don't get here unless b1 is alredy clear, but it will
2823 leave b4 unchanged. */
2824
2825 /* If we can clear b2 and b4 at once, then we win, since the
2826 8-bits couldn't possibly reach that far. */
2827 if (b2 == b4)
2828 {
2829 result = remainder & 0x00ff00ff;
2830 i = 16;
2831 }
2832 }
2833 }
2834
2835 return_sequence->i[insns++] = result;
2836 remainder &= ~result;
2837
2838 if (code == SET || code == MINUS)
2839 code = PLUS;
2840 }
2841 while (remainder);
2842
2843 return insns;
2844 }
2845
2846 /* Emit an instruction with the indicated PATTERN. If COND is
2847 non-NULL, conditionalize the execution of the instruction on COND
2848 being true. */
2849
2850 static void
2851 emit_constant_insn (rtx cond, rtx pattern)
2852 {
2853 if (cond)
2854 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2855 emit_insn (pattern);
2856 }
2857
2858 /* As above, but extra parameter GENERATE which, if clear, suppresses
2859 RTL generation. */
2860
2861 static int
2862 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2863 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2864 int generate)
2865 {
2866 int can_invert = 0;
2867 int can_negate = 0;
2868 int final_invert = 0;
2869 int i;
2870 int set_sign_bit_copies = 0;
2871 int clear_sign_bit_copies = 0;
2872 int clear_zero_bit_copies = 0;
2873 int set_zero_bit_copies = 0;
2874 int insns = 0, neg_insns, inv_insns;
2875 unsigned HOST_WIDE_INT temp1, temp2;
2876 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2877 struct four_ints *immediates;
2878 struct four_ints pos_immediates, neg_immediates, inv_immediates;
2879
2880 /* Find out which operations are safe for a given CODE. Also do a quick
2881 check for degenerate cases; these can occur when DImode operations
2882 are split. */
2883 switch (code)
2884 {
2885 case SET:
2886 can_invert = 1;
2887 break;
2888
2889 case PLUS:
2890 can_negate = 1;
2891 break;
2892
2893 case IOR:
2894 if (remainder == 0xffffffff)
2895 {
2896 if (generate)
2897 emit_constant_insn (cond,
2898 gen_rtx_SET (VOIDmode, target,
2899 GEN_INT (ARM_SIGN_EXTEND (val))));
2900 return 1;
2901 }
2902
2903 if (remainder == 0)
2904 {
2905 if (reload_completed && rtx_equal_p (target, source))
2906 return 0;
2907
2908 if (generate)
2909 emit_constant_insn (cond,
2910 gen_rtx_SET (VOIDmode, target, source));
2911 return 1;
2912 }
2913 break;
2914
2915 case AND:
2916 if (remainder == 0)
2917 {
2918 if (generate)
2919 emit_constant_insn (cond,
2920 gen_rtx_SET (VOIDmode, target, const0_rtx));
2921 return 1;
2922 }
2923 if (remainder == 0xffffffff)
2924 {
2925 if (reload_completed && rtx_equal_p (target, source))
2926 return 0;
2927 if (generate)
2928 emit_constant_insn (cond,
2929 gen_rtx_SET (VOIDmode, target, source));
2930 return 1;
2931 }
2932 can_invert = 1;
2933 break;
2934
2935 case XOR:
2936 if (remainder == 0)
2937 {
2938 if (reload_completed && rtx_equal_p (target, source))
2939 return 0;
2940 if (generate)
2941 emit_constant_insn (cond,
2942 gen_rtx_SET (VOIDmode, target, source));
2943 return 1;
2944 }
2945
2946 if (remainder == 0xffffffff)
2947 {
2948 if (generate)
2949 emit_constant_insn (cond,
2950 gen_rtx_SET (VOIDmode, target,
2951 gen_rtx_NOT (mode, source)));
2952 return 1;
2953 }
2954 final_invert = 1;
2955 break;
2956
2957 case MINUS:
2958 /* We treat MINUS as (val - source), since (source - val) is always
2959 passed as (source + (-val)). */
2960 if (remainder == 0)
2961 {
2962 if (generate)
2963 emit_constant_insn (cond,
2964 gen_rtx_SET (VOIDmode, target,
2965 gen_rtx_NEG (mode, source)));
2966 return 1;
2967 }
2968 if (const_ok_for_arm (val))
2969 {
2970 if (generate)
2971 emit_constant_insn (cond,
2972 gen_rtx_SET (VOIDmode, target,
2973 gen_rtx_MINUS (mode, GEN_INT (val),
2974 source)));
2975 return 1;
2976 }
2977
2978 break;
2979
2980 default:
2981 gcc_unreachable ();
2982 }
2983
2984 /* If we can do it in one insn get out quickly. */
2985 if (const_ok_for_op (val, code))
2986 {
2987 if (generate)
2988 emit_constant_insn (cond,
2989 gen_rtx_SET (VOIDmode, target,
2990 (source
2991 ? gen_rtx_fmt_ee (code, mode, source,
2992 GEN_INT (val))
2993 : GEN_INT (val))));
2994 return 1;
2995 }
2996
2997 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
2998 insn. */
2999 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
3000 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
3001 {
3002 if (generate)
3003 {
3004 if (mode == SImode && i == 16)
3005 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3006 smaller insn. */
3007 emit_constant_insn (cond,
3008 gen_zero_extendhisi2
3009 (target, gen_lowpart (HImode, source)));
3010 else
3011 /* Extz only supports SImode, but we can coerce the operands
3012 into that mode. */
3013 emit_constant_insn (cond,
3014 gen_extzv_t2 (gen_lowpart (SImode, target),
3015 gen_lowpart (SImode, source),
3016 GEN_INT (i), const0_rtx));
3017 }
3018
3019 return 1;
3020 }
3021
3022 /* Calculate a few attributes that may be useful for specific
3023 optimizations. */
3024 /* Count number of leading zeros. */
3025 for (i = 31; i >= 0; i--)
3026 {
3027 if ((remainder & (1 << i)) == 0)
3028 clear_sign_bit_copies++;
3029 else
3030 break;
3031 }
3032
3033 /* Count number of leading 1's. */
3034 for (i = 31; i >= 0; i--)
3035 {
3036 if ((remainder & (1 << i)) != 0)
3037 set_sign_bit_copies++;
3038 else
3039 break;
3040 }
3041
3042 /* Count number of trailing zero's. */
3043 for (i = 0; i <= 31; i++)
3044 {
3045 if ((remainder & (1 << i)) == 0)
3046 clear_zero_bit_copies++;
3047 else
3048 break;
3049 }
3050
3051 /* Count number of trailing 1's. */
3052 for (i = 0; i <= 31; i++)
3053 {
3054 if ((remainder & (1 << i)) != 0)
3055 set_zero_bit_copies++;
3056 else
3057 break;
3058 }
3059
3060 switch (code)
3061 {
3062 case SET:
3063 /* See if we can do this by sign_extending a constant that is known
3064 to be negative. This is a good, way of doing it, since the shift
3065 may well merge into a subsequent insn. */
3066 if (set_sign_bit_copies > 1)
3067 {
3068 if (const_ok_for_arm
3069 (temp1 = ARM_SIGN_EXTEND (remainder
3070 << (set_sign_bit_copies - 1))))
3071 {
3072 if (generate)
3073 {
3074 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3075 emit_constant_insn (cond,
3076 gen_rtx_SET (VOIDmode, new_src,
3077 GEN_INT (temp1)));
3078 emit_constant_insn (cond,
3079 gen_ashrsi3 (target, new_src,
3080 GEN_INT (set_sign_bit_copies - 1)));
3081 }
3082 return 2;
3083 }
3084 /* For an inverted constant, we will need to set the low bits,
3085 these will be shifted out of harm's way. */
3086 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3087 if (const_ok_for_arm (~temp1))
3088 {
3089 if (generate)
3090 {
3091 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3092 emit_constant_insn (cond,
3093 gen_rtx_SET (VOIDmode, new_src,
3094 GEN_INT (temp1)));
3095 emit_constant_insn (cond,
3096 gen_ashrsi3 (target, new_src,
3097 GEN_INT (set_sign_bit_copies - 1)));
3098 }
3099 return 2;
3100 }
3101 }
3102
3103 /* See if we can calculate the value as the difference between two
3104 valid immediates. */
3105 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3106 {
3107 int topshift = clear_sign_bit_copies & ~1;
3108
3109 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3110 & (0xff000000 >> topshift));
3111
3112 /* If temp1 is zero, then that means the 9 most significant
3113 bits of remainder were 1 and we've caused it to overflow.
3114 When topshift is 0 we don't need to do anything since we
3115 can borrow from 'bit 32'. */
3116 if (temp1 == 0 && topshift != 0)
3117 temp1 = 0x80000000 >> (topshift - 1);
3118
3119 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3120
3121 if (const_ok_for_arm (temp2))
3122 {
3123 if (generate)
3124 {
3125 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3126 emit_constant_insn (cond,
3127 gen_rtx_SET (VOIDmode, new_src,
3128 GEN_INT (temp1)));
3129 emit_constant_insn (cond,
3130 gen_addsi3 (target, new_src,
3131 GEN_INT (-temp2)));
3132 }
3133
3134 return 2;
3135 }
3136 }
3137
3138 /* See if we can generate this by setting the bottom (or the top)
3139 16 bits, and then shifting these into the other half of the
3140 word. We only look for the simplest cases, to do more would cost
3141 too much. Be careful, however, not to generate this when the
3142 alternative would take fewer insns. */
3143 if (val & 0xffff0000)
3144 {
3145 temp1 = remainder & 0xffff0000;
3146 temp2 = remainder & 0x0000ffff;
3147
3148 /* Overlaps outside this range are best done using other methods. */
3149 for (i = 9; i < 24; i++)
3150 {
3151 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3152 && !const_ok_for_arm (temp2))
3153 {
3154 rtx new_src = (subtargets
3155 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3156 : target);
3157 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3158 source, subtargets, generate);
3159 source = new_src;
3160 if (generate)
3161 emit_constant_insn
3162 (cond,
3163 gen_rtx_SET
3164 (VOIDmode, target,
3165 gen_rtx_IOR (mode,
3166 gen_rtx_ASHIFT (mode, source,
3167 GEN_INT (i)),
3168 source)));
3169 return insns + 1;
3170 }
3171 }
3172
3173 /* Don't duplicate cases already considered. */
3174 for (i = 17; i < 24; i++)
3175 {
3176 if (((temp1 | (temp1 >> i)) == remainder)
3177 && !const_ok_for_arm (temp1))
3178 {
3179 rtx new_src = (subtargets
3180 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3181 : target);
3182 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3183 source, subtargets, generate);
3184 source = new_src;
3185 if (generate)
3186 emit_constant_insn
3187 (cond,
3188 gen_rtx_SET (VOIDmode, target,
3189 gen_rtx_IOR
3190 (mode,
3191 gen_rtx_LSHIFTRT (mode, source,
3192 GEN_INT (i)),
3193 source)));
3194 return insns + 1;
3195 }
3196 }
3197 }
3198 break;
3199
3200 case IOR:
3201 case XOR:
3202 /* If we have IOR or XOR, and the constant can be loaded in a
3203 single instruction, and we can find a temporary to put it in,
3204 then this can be done in two instructions instead of 3-4. */
3205 if (subtargets
3206 /* TARGET can't be NULL if SUBTARGETS is 0 */
3207 || (reload_completed && !reg_mentioned_p (target, source)))
3208 {
3209 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3210 {
3211 if (generate)
3212 {
3213 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3214
3215 emit_constant_insn (cond,
3216 gen_rtx_SET (VOIDmode, sub,
3217 GEN_INT (val)));
3218 emit_constant_insn (cond,
3219 gen_rtx_SET (VOIDmode, target,
3220 gen_rtx_fmt_ee (code, mode,
3221 source, sub)));
3222 }
3223 return 2;
3224 }
3225 }
3226
3227 if (code == XOR)
3228 break;
3229
3230 /* Convert.
3231 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3232 and the remainder 0s for e.g. 0xfff00000)
3233 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3234
3235 This can be done in 2 instructions by using shifts with mov or mvn.
3236 e.g. for
3237 x = x | 0xfff00000;
3238 we generate.
3239 mvn r0, r0, asl #12
3240 mvn r0, r0, lsr #12 */
3241 if (set_sign_bit_copies > 8
3242 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3243 {
3244 if (generate)
3245 {
3246 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3247 rtx shift = GEN_INT (set_sign_bit_copies);
3248
3249 emit_constant_insn
3250 (cond,
3251 gen_rtx_SET (VOIDmode, sub,
3252 gen_rtx_NOT (mode,
3253 gen_rtx_ASHIFT (mode,
3254 source,
3255 shift))));
3256 emit_constant_insn
3257 (cond,
3258 gen_rtx_SET (VOIDmode, target,
3259 gen_rtx_NOT (mode,
3260 gen_rtx_LSHIFTRT (mode, sub,
3261 shift))));
3262 }
3263 return 2;
3264 }
3265
3266 /* Convert
3267 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3268 to
3269 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3270
3271 For eg. r0 = r0 | 0xfff
3272 mvn r0, r0, lsr #12
3273 mvn r0, r0, asl #12
3274
3275 */
3276 if (set_zero_bit_copies > 8
3277 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3278 {
3279 if (generate)
3280 {
3281 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3282 rtx shift = GEN_INT (set_zero_bit_copies);
3283
3284 emit_constant_insn
3285 (cond,
3286 gen_rtx_SET (VOIDmode, sub,
3287 gen_rtx_NOT (mode,
3288 gen_rtx_LSHIFTRT (mode,
3289 source,
3290 shift))));
3291 emit_constant_insn
3292 (cond,
3293 gen_rtx_SET (VOIDmode, target,
3294 gen_rtx_NOT (mode,
3295 gen_rtx_ASHIFT (mode, sub,
3296 shift))));
3297 }
3298 return 2;
3299 }
3300
3301 /* This will never be reached for Thumb2 because orn is a valid
3302 instruction. This is for Thumb1 and the ARM 32 bit cases.
3303
3304 x = y | constant (such that ~constant is a valid constant)
3305 Transform this to
3306 x = ~(~y & ~constant).
3307 */
3308 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3309 {
3310 if (generate)
3311 {
3312 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3313 emit_constant_insn (cond,
3314 gen_rtx_SET (VOIDmode, sub,
3315 gen_rtx_NOT (mode, source)));
3316 source = sub;
3317 if (subtargets)
3318 sub = gen_reg_rtx (mode);
3319 emit_constant_insn (cond,
3320 gen_rtx_SET (VOIDmode, sub,
3321 gen_rtx_AND (mode, source,
3322 GEN_INT (temp1))));
3323 emit_constant_insn (cond,
3324 gen_rtx_SET (VOIDmode, target,
3325 gen_rtx_NOT (mode, sub)));
3326 }
3327 return 3;
3328 }
3329 break;
3330
3331 case AND:
3332 /* See if two shifts will do 2 or more insn's worth of work. */
3333 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3334 {
3335 HOST_WIDE_INT shift_mask = ((0xffffffff
3336 << (32 - clear_sign_bit_copies))
3337 & 0xffffffff);
3338
3339 if ((remainder | shift_mask) != 0xffffffff)
3340 {
3341 if (generate)
3342 {
3343 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3344 insns = arm_gen_constant (AND, mode, cond,
3345 remainder | shift_mask,
3346 new_src, source, subtargets, 1);
3347 source = new_src;
3348 }
3349 else
3350 {
3351 rtx targ = subtargets ? NULL_RTX : target;
3352 insns = arm_gen_constant (AND, mode, cond,
3353 remainder | shift_mask,
3354 targ, source, subtargets, 0);
3355 }
3356 }
3357
3358 if (generate)
3359 {
3360 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3361 rtx shift = GEN_INT (clear_sign_bit_copies);
3362
3363 emit_insn (gen_ashlsi3 (new_src, source, shift));
3364 emit_insn (gen_lshrsi3 (target, new_src, shift));
3365 }
3366
3367 return insns + 2;
3368 }
3369
3370 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3371 {
3372 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3373
3374 if ((remainder | shift_mask) != 0xffffffff)
3375 {
3376 if (generate)
3377 {
3378 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3379
3380 insns = arm_gen_constant (AND, mode, cond,
3381 remainder | shift_mask,
3382 new_src, source, subtargets, 1);
3383 source = new_src;
3384 }
3385 else
3386 {
3387 rtx targ = subtargets ? NULL_RTX : target;
3388
3389 insns = arm_gen_constant (AND, mode, cond,
3390 remainder | shift_mask,
3391 targ, source, subtargets, 0);
3392 }
3393 }
3394
3395 if (generate)
3396 {
3397 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3398 rtx shift = GEN_INT (clear_zero_bit_copies);
3399
3400 emit_insn (gen_lshrsi3 (new_src, source, shift));
3401 emit_insn (gen_ashlsi3 (target, new_src, shift));
3402 }
3403
3404 return insns + 2;
3405 }
3406
3407 break;
3408
3409 default:
3410 break;
3411 }
3412
3413 /* Calculate what the instruction sequences would be if we generated it
3414 normally, negated, or inverted. */
3415 if (code == AND)
3416 /* AND cannot be split into multiple insns, so invert and use BIC. */
3417 insns = 99;
3418 else
3419 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3420
3421 if (can_negate)
3422 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3423 &neg_immediates);
3424 else
3425 neg_insns = 99;
3426
3427 if (can_invert || final_invert)
3428 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3429 &inv_immediates);
3430 else
3431 inv_insns = 99;
3432
3433 immediates = &pos_immediates;
3434
3435 /* Is the negated immediate sequence more efficient? */
3436 if (neg_insns < insns && neg_insns <= inv_insns)
3437 {
3438 insns = neg_insns;
3439 immediates = &neg_immediates;
3440 }
3441 else
3442 can_negate = 0;
3443
3444 /* Is the inverted immediate sequence more efficient?
3445 We must allow for an extra NOT instruction for XOR operations, although
3446 there is some chance that the final 'mvn' will get optimized later. */
3447 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3448 {
3449 insns = inv_insns;
3450 immediates = &inv_immediates;
3451 }
3452 else
3453 {
3454 can_invert = 0;
3455 final_invert = 0;
3456 }
3457
3458 /* Now output the chosen sequence as instructions. */
3459 if (generate)
3460 {
3461 for (i = 0; i < insns; i++)
3462 {
3463 rtx new_src, temp1_rtx;
3464
3465 temp1 = immediates->i[i];
3466
3467 if (code == SET || code == MINUS)
3468 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3469 else if ((final_invert || i < (insns - 1)) && subtargets)
3470 new_src = gen_reg_rtx (mode);
3471 else
3472 new_src = target;
3473
3474 if (can_invert)
3475 temp1 = ~temp1;
3476 else if (can_negate)
3477 temp1 = -temp1;
3478
3479 temp1 = trunc_int_for_mode (temp1, mode);
3480 temp1_rtx = GEN_INT (temp1);
3481
3482 if (code == SET)
3483 ;
3484 else if (code == MINUS)
3485 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3486 else
3487 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3488
3489 emit_constant_insn (cond,
3490 gen_rtx_SET (VOIDmode, new_src,
3491 temp1_rtx));
3492 source = new_src;
3493
3494 if (code == SET)
3495 {
3496 can_negate = can_invert;
3497 can_invert = 0;
3498 code = PLUS;
3499 }
3500 else if (code == MINUS)
3501 code = PLUS;
3502 }
3503 }
3504
3505 if (final_invert)
3506 {
3507 if (generate)
3508 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3509 gen_rtx_NOT (mode, source)));
3510 insns++;
3511 }
3512
3513 return insns;
3514 }
3515
3516 /* Canonicalize a comparison so that we are more likely to recognize it.
3517 This can be done for a few constant compares, where we can make the
3518 immediate value easier to load. */
3519
3520 enum rtx_code
3521 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3522 {
3523 enum machine_mode mode;
3524 unsigned HOST_WIDE_INT i, maxval;
3525
3526 mode = GET_MODE (*op0);
3527 if (mode == VOIDmode)
3528 mode = GET_MODE (*op1);
3529
3530 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3531
3532 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3533 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3534 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3535 for GTU/LEU in Thumb mode. */
3536 if (mode == DImode)
3537 {
3538 rtx tem;
3539
3540 if (code == GT || code == LE
3541 || (!TARGET_ARM && (code == GTU || code == LEU)))
3542 {
3543 /* Missing comparison. First try to use an available
3544 comparison. */
3545 if (CONST_INT_P (*op1))
3546 {
3547 i = INTVAL (*op1);
3548 switch (code)
3549 {
3550 case GT:
3551 case LE:
3552 if (i != maxval
3553 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3554 {
3555 *op1 = GEN_INT (i + 1);
3556 return code == GT ? GE : LT;
3557 }
3558 break;
3559 case GTU:
3560 case LEU:
3561 if (i != ~((unsigned HOST_WIDE_INT) 0)
3562 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3563 {
3564 *op1 = GEN_INT (i + 1);
3565 return code == GTU ? GEU : LTU;
3566 }
3567 break;
3568 default:
3569 gcc_unreachable ();
3570 }
3571 }
3572
3573 /* If that did not work, reverse the condition. */
3574 tem = *op0;
3575 *op0 = *op1;
3576 *op1 = tem;
3577 return swap_condition (code);
3578 }
3579
3580 return code;
3581 }
3582
3583 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3584 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3585 to facilitate possible combining with a cmp into 'ands'. */
3586 if (mode == SImode
3587 && GET_CODE (*op0) == ZERO_EXTEND
3588 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3589 && GET_MODE (XEXP (*op0, 0)) == QImode
3590 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3591 && subreg_lowpart_p (XEXP (*op0, 0))
3592 && *op1 == const0_rtx)
3593 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3594 GEN_INT (255));
3595
3596 /* Comparisons smaller than DImode. Only adjust comparisons against
3597 an out-of-range constant. */
3598 if (!CONST_INT_P (*op1)
3599 || const_ok_for_arm (INTVAL (*op1))
3600 || const_ok_for_arm (- INTVAL (*op1)))
3601 return code;
3602
3603 i = INTVAL (*op1);
3604
3605 switch (code)
3606 {
3607 case EQ:
3608 case NE:
3609 return code;
3610
3611 case GT:
3612 case LE:
3613 if (i != maxval
3614 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3615 {
3616 *op1 = GEN_INT (i + 1);
3617 return code == GT ? GE : LT;
3618 }
3619 break;
3620
3621 case GE:
3622 case LT:
3623 if (i != ~maxval
3624 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3625 {
3626 *op1 = GEN_INT (i - 1);
3627 return code == GE ? GT : LE;
3628 }
3629 break;
3630
3631 case GTU:
3632 case LEU:
3633 if (i != ~((unsigned HOST_WIDE_INT) 0)
3634 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3635 {
3636 *op1 = GEN_INT (i + 1);
3637 return code == GTU ? GEU : LTU;
3638 }
3639 break;
3640
3641 case GEU:
3642 case LTU:
3643 if (i != 0
3644 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3645 {
3646 *op1 = GEN_INT (i - 1);
3647 return code == GEU ? GTU : LEU;
3648 }
3649 break;
3650
3651 default:
3652 gcc_unreachable ();
3653 }
3654
3655 return code;
3656 }
3657
3658
3659 /* Define how to find the value returned by a function. */
3660
3661 static rtx
3662 arm_function_value(const_tree type, const_tree func,
3663 bool outgoing ATTRIBUTE_UNUSED)
3664 {
3665 enum machine_mode mode;
3666 int unsignedp ATTRIBUTE_UNUSED;
3667 rtx r ATTRIBUTE_UNUSED;
3668
3669 mode = TYPE_MODE (type);
3670
3671 if (TARGET_AAPCS_BASED)
3672 return aapcs_allocate_return_reg (mode, type, func);
3673
3674 /* Promote integer types. */
3675 if (INTEGRAL_TYPE_P (type))
3676 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3677
3678 /* Promotes small structs returned in a register to full-word size
3679 for big-endian AAPCS. */
3680 if (arm_return_in_msb (type))
3681 {
3682 HOST_WIDE_INT size = int_size_in_bytes (type);
3683 if (size % UNITS_PER_WORD != 0)
3684 {
3685 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3686 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3687 }
3688 }
3689
3690 return arm_libcall_value_1 (mode);
3691 }
3692
3693 static int
3694 libcall_eq (const void *p1, const void *p2)
3695 {
3696 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3697 }
3698
3699 static hashval_t
3700 libcall_hash (const void *p1)
3701 {
3702 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3703 }
3704
3705 static void
3706 add_libcall (htab_t htab, rtx libcall)
3707 {
3708 *htab_find_slot (htab, libcall, INSERT) = libcall;
3709 }
3710
3711 static bool
3712 arm_libcall_uses_aapcs_base (const_rtx libcall)
3713 {
3714 static bool init_done = false;
3715 static htab_t libcall_htab;
3716
3717 if (!init_done)
3718 {
3719 init_done = true;
3720
3721 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3722 NULL);
3723 add_libcall (libcall_htab,
3724 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3725 add_libcall (libcall_htab,
3726 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3727 add_libcall (libcall_htab,
3728 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3729 add_libcall (libcall_htab,
3730 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3731
3732 add_libcall (libcall_htab,
3733 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3734 add_libcall (libcall_htab,
3735 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3736 add_libcall (libcall_htab,
3737 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3738 add_libcall (libcall_htab,
3739 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3740
3741 add_libcall (libcall_htab,
3742 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3743 add_libcall (libcall_htab,
3744 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3745 add_libcall (libcall_htab,
3746 convert_optab_libfunc (sfix_optab, SImode, DFmode));
3747 add_libcall (libcall_htab,
3748 convert_optab_libfunc (ufix_optab, SImode, DFmode));
3749 add_libcall (libcall_htab,
3750 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3751 add_libcall (libcall_htab,
3752 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3753 add_libcall (libcall_htab,
3754 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3755 add_libcall (libcall_htab,
3756 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3757
3758 /* Values from double-precision helper functions are returned in core
3759 registers if the selected core only supports single-precision
3760 arithmetic, even if we are using the hard-float ABI. The same is
3761 true for single-precision helpers, but we will never be using the
3762 hard-float ABI on a CPU which doesn't support single-precision
3763 operations in hardware. */
3764 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3765 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3766 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3767 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3768 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3769 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3770 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3771 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3772 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3773 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3774 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3775 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3776 SFmode));
3777 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3778 DFmode));
3779 }
3780
3781 return libcall && htab_find (libcall_htab, libcall) != NULL;
3782 }
3783
3784 static rtx
3785 arm_libcall_value_1 (enum machine_mode mode)
3786 {
3787 if (TARGET_AAPCS_BASED)
3788 return aapcs_libcall_value (mode);
3789 else if (TARGET_IWMMXT_ABI
3790 && arm_vector_mode_supported_p (mode))
3791 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
3792 else
3793 return gen_rtx_REG (mode, ARG_REGISTER (1));
3794 }
3795
3796 /* Define how to find the value returned by a library function
3797 assuming the value has mode MODE. */
3798
3799 static rtx
3800 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3801 {
3802 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3803 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3804 {
3805 /* The following libcalls return their result in integer registers,
3806 even though they return a floating point value. */
3807 if (arm_libcall_uses_aapcs_base (libcall))
3808 return gen_rtx_REG (mode, ARG_REGISTER(1));
3809
3810 }
3811
3812 return arm_libcall_value_1 (mode);
3813 }
3814
3815 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
3816
3817 static bool
3818 arm_function_value_regno_p (const unsigned int regno)
3819 {
3820 if (regno == ARG_REGISTER (1)
3821 || (TARGET_32BIT
3822 && TARGET_AAPCS_BASED
3823 && TARGET_VFP
3824 && TARGET_HARD_FLOAT
3825 && regno == FIRST_VFP_REGNUM)
3826 || (TARGET_IWMMXT_ABI
3827 && regno == FIRST_IWMMXT_REGNUM))
3828 return true;
3829
3830 return false;
3831 }
3832
3833 /* Determine the amount of memory needed to store the possible return
3834 registers of an untyped call. */
3835 int
3836 arm_apply_result_size (void)
3837 {
3838 int size = 16;
3839
3840 if (TARGET_32BIT)
3841 {
3842 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
3843 size += 32;
3844 if (TARGET_IWMMXT_ABI)
3845 size += 8;
3846 }
3847
3848 return size;
3849 }
3850
3851 /* Decide whether TYPE should be returned in memory (true)
3852 or in a register (false). FNTYPE is the type of the function making
3853 the call. */
3854 static bool
3855 arm_return_in_memory (const_tree type, const_tree fntype)
3856 {
3857 HOST_WIDE_INT size;
3858
3859 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3860
3861 if (TARGET_AAPCS_BASED)
3862 {
3863 /* Simple, non-aggregate types (ie not including vectors and
3864 complex) are always returned in a register (or registers).
3865 We don't care about which register here, so we can short-cut
3866 some of the detail. */
3867 if (!AGGREGATE_TYPE_P (type)
3868 && TREE_CODE (type) != VECTOR_TYPE
3869 && TREE_CODE (type) != COMPLEX_TYPE)
3870 return false;
3871
3872 /* Any return value that is no larger than one word can be
3873 returned in r0. */
3874 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3875 return false;
3876
3877 /* Check any available co-processors to see if they accept the
3878 type as a register candidate (VFP, for example, can return
3879 some aggregates in consecutive registers). These aren't
3880 available if the call is variadic. */
3881 if (aapcs_select_return_coproc (type, fntype) >= 0)
3882 return false;
3883
3884 /* Vector values should be returned using ARM registers, not
3885 memory (unless they're over 16 bytes, which will break since
3886 we only have four call-clobbered registers to play with). */
3887 if (TREE_CODE (type) == VECTOR_TYPE)
3888 return (size < 0 || size > (4 * UNITS_PER_WORD));
3889
3890 /* The rest go in memory. */
3891 return true;
3892 }
3893
3894 if (TREE_CODE (type) == VECTOR_TYPE)
3895 return (size < 0 || size > (4 * UNITS_PER_WORD));
3896
3897 if (!AGGREGATE_TYPE_P (type) &&
3898 (TREE_CODE (type) != VECTOR_TYPE))
3899 /* All simple types are returned in registers. */
3900 return false;
3901
3902 if (arm_abi != ARM_ABI_APCS)
3903 {
3904 /* ATPCS and later return aggregate types in memory only if they are
3905 larger than a word (or are variable size). */
3906 return (size < 0 || size > UNITS_PER_WORD);
3907 }
3908
3909 /* For the arm-wince targets we choose to be compatible with Microsoft's
3910 ARM and Thumb compilers, which always return aggregates in memory. */
3911 #ifndef ARM_WINCE
3912 /* All structures/unions bigger than one word are returned in memory.
3913 Also catch the case where int_size_in_bytes returns -1. In this case
3914 the aggregate is either huge or of variable size, and in either case
3915 we will want to return it via memory and not in a register. */
3916 if (size < 0 || size > UNITS_PER_WORD)
3917 return true;
3918
3919 if (TREE_CODE (type) == RECORD_TYPE)
3920 {
3921 tree field;
3922
3923 /* For a struct the APCS says that we only return in a register
3924 if the type is 'integer like' and every addressable element
3925 has an offset of zero. For practical purposes this means
3926 that the structure can have at most one non bit-field element
3927 and that this element must be the first one in the structure. */
3928
3929 /* Find the first field, ignoring non FIELD_DECL things which will
3930 have been created by C++. */
3931 for (field = TYPE_FIELDS (type);
3932 field && TREE_CODE (field) != FIELD_DECL;
3933 field = DECL_CHAIN (field))
3934 continue;
3935
3936 if (field == NULL)
3937 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3938
3939 /* Check that the first field is valid for returning in a register. */
3940
3941 /* ... Floats are not allowed */
3942 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3943 return true;
3944
3945 /* ... Aggregates that are not themselves valid for returning in
3946 a register are not allowed. */
3947 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3948 return true;
3949
3950 /* Now check the remaining fields, if any. Only bitfields are allowed,
3951 since they are not addressable. */
3952 for (field = DECL_CHAIN (field);
3953 field;
3954 field = DECL_CHAIN (field))
3955 {
3956 if (TREE_CODE (field) != FIELD_DECL)
3957 continue;
3958
3959 if (!DECL_BIT_FIELD_TYPE (field))
3960 return true;
3961 }
3962
3963 return false;
3964 }
3965
3966 if (TREE_CODE (type) == UNION_TYPE)
3967 {
3968 tree field;
3969
3970 /* Unions can be returned in registers if every element is
3971 integral, or can be returned in an integer register. */
3972 for (field = TYPE_FIELDS (type);
3973 field;
3974 field = DECL_CHAIN (field))
3975 {
3976 if (TREE_CODE (field) != FIELD_DECL)
3977 continue;
3978
3979 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3980 return true;
3981
3982 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3983 return true;
3984 }
3985
3986 return false;
3987 }
3988 #endif /* not ARM_WINCE */
3989
3990 /* Return all other types in memory. */
3991 return true;
3992 }
3993
3994 const struct pcs_attribute_arg
3995 {
3996 const char *arg;
3997 enum arm_pcs value;
3998 } pcs_attribute_args[] =
3999 {
4000 {"aapcs", ARM_PCS_AAPCS},
4001 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
4002 #if 0
4003 /* We could recognize these, but changes would be needed elsewhere
4004 * to implement them. */
4005 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
4006 {"atpcs", ARM_PCS_ATPCS},
4007 {"apcs", ARM_PCS_APCS},
4008 #endif
4009 {NULL, ARM_PCS_UNKNOWN}
4010 };
4011
4012 static enum arm_pcs
4013 arm_pcs_from_attribute (tree attr)
4014 {
4015 const struct pcs_attribute_arg *ptr;
4016 const char *arg;
4017
4018 /* Get the value of the argument. */
4019 if (TREE_VALUE (attr) == NULL_TREE
4020 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4021 return ARM_PCS_UNKNOWN;
4022
4023 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4024
4025 /* Check it against the list of known arguments. */
4026 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4027 if (streq (arg, ptr->arg))
4028 return ptr->value;
4029
4030 /* An unrecognized interrupt type. */
4031 return ARM_PCS_UNKNOWN;
4032 }
4033
4034 /* Get the PCS variant to use for this call. TYPE is the function's type
4035 specification, DECL is the specific declartion. DECL may be null if
4036 the call could be indirect or if this is a library call. */
4037 static enum arm_pcs
4038 arm_get_pcs_model (const_tree type, const_tree decl)
4039 {
4040 bool user_convention = false;
4041 enum arm_pcs user_pcs = arm_pcs_default;
4042 tree attr;
4043
4044 gcc_assert (type);
4045
4046 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4047 if (attr)
4048 {
4049 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4050 user_convention = true;
4051 }
4052
4053 if (TARGET_AAPCS_BASED)
4054 {
4055 /* Detect varargs functions. These always use the base rules
4056 (no argument is ever a candidate for a co-processor
4057 register). */
4058 bool base_rules = stdarg_p (type);
4059
4060 if (user_convention)
4061 {
4062 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4063 sorry ("non-AAPCS derived PCS variant");
4064 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4065 error ("variadic functions must use the base AAPCS variant");
4066 }
4067
4068 if (base_rules)
4069 return ARM_PCS_AAPCS;
4070 else if (user_convention)
4071 return user_pcs;
4072 else if (decl && flag_unit_at_a_time)
4073 {
4074 /* Local functions never leak outside this compilation unit,
4075 so we are free to use whatever conventions are
4076 appropriate. */
4077 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4078 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4079 if (i && i->local)
4080 return ARM_PCS_AAPCS_LOCAL;
4081 }
4082 }
4083 else if (user_convention && user_pcs != arm_pcs_default)
4084 sorry ("PCS variant");
4085
4086 /* For everything else we use the target's default. */
4087 return arm_pcs_default;
4088 }
4089
4090
4091 static void
4092 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4093 const_tree fntype ATTRIBUTE_UNUSED,
4094 rtx libcall ATTRIBUTE_UNUSED,
4095 const_tree fndecl ATTRIBUTE_UNUSED)
4096 {
4097 /* Record the unallocated VFP registers. */
4098 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4099 pcum->aapcs_vfp_reg_alloc = 0;
4100 }
4101
4102 /* Walk down the type tree of TYPE counting consecutive base elements.
4103 If *MODEP is VOIDmode, then set it to the first valid floating point
4104 type. If a non-floating point type is found, or if a floating point
4105 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4106 otherwise return the count in the sub-tree. */
4107 static int
4108 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4109 {
4110 enum machine_mode mode;
4111 HOST_WIDE_INT size;
4112
4113 switch (TREE_CODE (type))
4114 {
4115 case REAL_TYPE:
4116 mode = TYPE_MODE (type);
4117 if (mode != DFmode && mode != SFmode)
4118 return -1;
4119
4120 if (*modep == VOIDmode)
4121 *modep = mode;
4122
4123 if (*modep == mode)
4124 return 1;
4125
4126 break;
4127
4128 case COMPLEX_TYPE:
4129 mode = TYPE_MODE (TREE_TYPE (type));
4130 if (mode != DFmode && mode != SFmode)
4131 return -1;
4132
4133 if (*modep == VOIDmode)
4134 *modep = mode;
4135
4136 if (*modep == mode)
4137 return 2;
4138
4139 break;
4140
4141 case VECTOR_TYPE:
4142 /* Use V2SImode and V4SImode as representatives of all 64-bit
4143 and 128-bit vector types, whether or not those modes are
4144 supported with the present options. */
4145 size = int_size_in_bytes (type);
4146 switch (size)
4147 {
4148 case 8:
4149 mode = V2SImode;
4150 break;
4151 case 16:
4152 mode = V4SImode;
4153 break;
4154 default:
4155 return -1;
4156 }
4157
4158 if (*modep == VOIDmode)
4159 *modep = mode;
4160
4161 /* Vector modes are considered to be opaque: two vectors are
4162 equivalent for the purposes of being homogeneous aggregates
4163 if they are the same size. */
4164 if (*modep == mode)
4165 return 1;
4166
4167 break;
4168
4169 case ARRAY_TYPE:
4170 {
4171 int count;
4172 tree index = TYPE_DOMAIN (type);
4173
4174 /* Can't handle incomplete types. */
4175 if (!COMPLETE_TYPE_P (type))
4176 return -1;
4177
4178 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4179 if (count == -1
4180 || !index
4181 || !TYPE_MAX_VALUE (index)
4182 || !host_integerp (TYPE_MAX_VALUE (index), 1)
4183 || !TYPE_MIN_VALUE (index)
4184 || !host_integerp (TYPE_MIN_VALUE (index), 1)
4185 || count < 0)
4186 return -1;
4187
4188 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4189 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4190
4191 /* There must be no padding. */
4192 if (!host_integerp (TYPE_SIZE (type), 1)
4193 || (tree_low_cst (TYPE_SIZE (type), 1)
4194 != count * GET_MODE_BITSIZE (*modep)))
4195 return -1;
4196
4197 return count;
4198 }
4199
4200 case RECORD_TYPE:
4201 {
4202 int count = 0;
4203 int sub_count;
4204 tree field;
4205
4206 /* Can't handle incomplete types. */
4207 if (!COMPLETE_TYPE_P (type))
4208 return -1;
4209
4210 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4211 {
4212 if (TREE_CODE (field) != FIELD_DECL)
4213 continue;
4214
4215 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4216 if (sub_count < 0)
4217 return -1;
4218 count += sub_count;
4219 }
4220
4221 /* There must be no padding. */
4222 if (!host_integerp (TYPE_SIZE (type), 1)
4223 || (tree_low_cst (TYPE_SIZE (type), 1)
4224 != count * GET_MODE_BITSIZE (*modep)))
4225 return -1;
4226
4227 return count;
4228 }
4229
4230 case UNION_TYPE:
4231 case QUAL_UNION_TYPE:
4232 {
4233 /* These aren't very interesting except in a degenerate case. */
4234 int count = 0;
4235 int sub_count;
4236 tree field;
4237
4238 /* Can't handle incomplete types. */
4239 if (!COMPLETE_TYPE_P (type))
4240 return -1;
4241
4242 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4243 {
4244 if (TREE_CODE (field) != FIELD_DECL)
4245 continue;
4246
4247 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4248 if (sub_count < 0)
4249 return -1;
4250 count = count > sub_count ? count : sub_count;
4251 }
4252
4253 /* There must be no padding. */
4254 if (!host_integerp (TYPE_SIZE (type), 1)
4255 || (tree_low_cst (TYPE_SIZE (type), 1)
4256 != count * GET_MODE_BITSIZE (*modep)))
4257 return -1;
4258
4259 return count;
4260 }
4261
4262 default:
4263 break;
4264 }
4265
4266 return -1;
4267 }
4268
4269 /* Return true if PCS_VARIANT should use VFP registers. */
4270 static bool
4271 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4272 {
4273 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4274 {
4275 static bool seen_thumb1_vfp = false;
4276
4277 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4278 {
4279 sorry ("Thumb-1 hard-float VFP ABI");
4280 /* sorry() is not immediately fatal, so only display this once. */
4281 seen_thumb1_vfp = true;
4282 }
4283
4284 return true;
4285 }
4286
4287 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4288 return false;
4289
4290 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4291 (TARGET_VFP_DOUBLE || !is_double));
4292 }
4293
4294 /* Return true if an argument whose type is TYPE, or mode is MODE, is
4295 suitable for passing or returning in VFP registers for the PCS
4296 variant selected. If it is, then *BASE_MODE is updated to contain
4297 a machine mode describing each element of the argument's type and
4298 *COUNT to hold the number of such elements. */
4299 static bool
4300 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4301 enum machine_mode mode, const_tree type,
4302 enum machine_mode *base_mode, int *count)
4303 {
4304 enum machine_mode new_mode = VOIDmode;
4305
4306 /* If we have the type information, prefer that to working things
4307 out from the mode. */
4308 if (type)
4309 {
4310 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4311
4312 if (ag_count > 0 && ag_count <= 4)
4313 *count = ag_count;
4314 else
4315 return false;
4316 }
4317 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
4318 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4319 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4320 {
4321 *count = 1;
4322 new_mode = mode;
4323 }
4324 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4325 {
4326 *count = 2;
4327 new_mode = (mode == DCmode ? DFmode : SFmode);
4328 }
4329 else
4330 return false;
4331
4332
4333 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4334 return false;
4335
4336 *base_mode = new_mode;
4337 return true;
4338 }
4339
4340 static bool
4341 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4342 enum machine_mode mode, const_tree type)
4343 {
4344 int count ATTRIBUTE_UNUSED;
4345 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4346
4347 if (!use_vfp_abi (pcs_variant, false))
4348 return false;
4349 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4350 &ag_mode, &count);
4351 }
4352
4353 static bool
4354 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4355 const_tree type)
4356 {
4357 if (!use_vfp_abi (pcum->pcs_variant, false))
4358 return false;
4359
4360 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4361 &pcum->aapcs_vfp_rmode,
4362 &pcum->aapcs_vfp_rcount);
4363 }
4364
4365 static bool
4366 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4367 const_tree type ATTRIBUTE_UNUSED)
4368 {
4369 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4370 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4371 int regno;
4372
4373 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4374 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4375 {
4376 pcum->aapcs_vfp_reg_alloc = mask << regno;
4377 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4378 {
4379 int i;
4380 int rcount = pcum->aapcs_vfp_rcount;
4381 int rshift = shift;
4382 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4383 rtx par;
4384 if (!TARGET_NEON)
4385 {
4386 /* Avoid using unsupported vector modes. */
4387 if (rmode == V2SImode)
4388 rmode = DImode;
4389 else if (rmode == V4SImode)
4390 {
4391 rmode = DImode;
4392 rcount *= 2;
4393 rshift /= 2;
4394 }
4395 }
4396 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4397 for (i = 0; i < rcount; i++)
4398 {
4399 rtx tmp = gen_rtx_REG (rmode,
4400 FIRST_VFP_REGNUM + regno + i * rshift);
4401 tmp = gen_rtx_EXPR_LIST
4402 (VOIDmode, tmp,
4403 GEN_INT (i * GET_MODE_SIZE (rmode)));
4404 XVECEXP (par, 0, i) = tmp;
4405 }
4406
4407 pcum->aapcs_reg = par;
4408 }
4409 else
4410 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4411 return true;
4412 }
4413 return false;
4414 }
4415
4416 static rtx
4417 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4418 enum machine_mode mode,
4419 const_tree type ATTRIBUTE_UNUSED)
4420 {
4421 if (!use_vfp_abi (pcs_variant, false))
4422 return NULL;
4423
4424 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4425 {
4426 int count;
4427 enum machine_mode ag_mode;
4428 int i;
4429 rtx par;
4430 int shift;
4431
4432 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4433 &ag_mode, &count);
4434
4435 if (!TARGET_NEON)
4436 {
4437 if (ag_mode == V2SImode)
4438 ag_mode = DImode;
4439 else if (ag_mode == V4SImode)
4440 {
4441 ag_mode = DImode;
4442 count *= 2;
4443 }
4444 }
4445 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4446 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4447 for (i = 0; i < count; i++)
4448 {
4449 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4450 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4451 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4452 XVECEXP (par, 0, i) = tmp;
4453 }
4454
4455 return par;
4456 }
4457
4458 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4459 }
4460
4461 static void
4462 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4463 enum machine_mode mode ATTRIBUTE_UNUSED,
4464 const_tree type ATTRIBUTE_UNUSED)
4465 {
4466 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4467 pcum->aapcs_vfp_reg_alloc = 0;
4468 return;
4469 }
4470
4471 #define AAPCS_CP(X) \
4472 { \
4473 aapcs_ ## X ## _cum_init, \
4474 aapcs_ ## X ## _is_call_candidate, \
4475 aapcs_ ## X ## _allocate, \
4476 aapcs_ ## X ## _is_return_candidate, \
4477 aapcs_ ## X ## _allocate_return_reg, \
4478 aapcs_ ## X ## _advance \
4479 }
4480
4481 /* Table of co-processors that can be used to pass arguments in
4482 registers. Idealy no arugment should be a candidate for more than
4483 one co-processor table entry, but the table is processed in order
4484 and stops after the first match. If that entry then fails to put
4485 the argument into a co-processor register, the argument will go on
4486 the stack. */
4487 static struct
4488 {
4489 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4490 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4491
4492 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4493 BLKmode) is a candidate for this co-processor's registers; this
4494 function should ignore any position-dependent state in
4495 CUMULATIVE_ARGS and only use call-type dependent information. */
4496 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4497
4498 /* Return true if the argument does get a co-processor register; it
4499 should set aapcs_reg to an RTX of the register allocated as is
4500 required for a return from FUNCTION_ARG. */
4501 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4502
4503 /* Return true if a result of mode MODE (or type TYPE if MODE is
4504 BLKmode) is can be returned in this co-processor's registers. */
4505 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4506
4507 /* Allocate and return an RTX element to hold the return type of a
4508 call, this routine must not fail and will only be called if
4509 is_return_candidate returned true with the same parameters. */
4510 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4511
4512 /* Finish processing this argument and prepare to start processing
4513 the next one. */
4514 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4515 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4516 {
4517 AAPCS_CP(vfp)
4518 };
4519
4520 #undef AAPCS_CP
4521
4522 static int
4523 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4524 const_tree type)
4525 {
4526 int i;
4527
4528 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4529 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4530 return i;
4531
4532 return -1;
4533 }
4534
4535 static int
4536 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4537 {
4538 /* We aren't passed a decl, so we can't check that a call is local.
4539 However, it isn't clear that that would be a win anyway, since it
4540 might limit some tail-calling opportunities. */
4541 enum arm_pcs pcs_variant;
4542
4543 if (fntype)
4544 {
4545 const_tree fndecl = NULL_TREE;
4546
4547 if (TREE_CODE (fntype) == FUNCTION_DECL)
4548 {
4549 fndecl = fntype;
4550 fntype = TREE_TYPE (fntype);
4551 }
4552
4553 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4554 }
4555 else
4556 pcs_variant = arm_pcs_default;
4557
4558 if (pcs_variant != ARM_PCS_AAPCS)
4559 {
4560 int i;
4561
4562 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4563 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4564 TYPE_MODE (type),
4565 type))
4566 return i;
4567 }
4568 return -1;
4569 }
4570
4571 static rtx
4572 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4573 const_tree fntype)
4574 {
4575 /* We aren't passed a decl, so we can't check that a call is local.
4576 However, it isn't clear that that would be a win anyway, since it
4577 might limit some tail-calling opportunities. */
4578 enum arm_pcs pcs_variant;
4579 int unsignedp ATTRIBUTE_UNUSED;
4580
4581 if (fntype)
4582 {
4583 const_tree fndecl = NULL_TREE;
4584
4585 if (TREE_CODE (fntype) == FUNCTION_DECL)
4586 {
4587 fndecl = fntype;
4588 fntype = TREE_TYPE (fntype);
4589 }
4590
4591 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4592 }
4593 else
4594 pcs_variant = arm_pcs_default;
4595
4596 /* Promote integer types. */
4597 if (type && INTEGRAL_TYPE_P (type))
4598 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4599
4600 if (pcs_variant != ARM_PCS_AAPCS)
4601 {
4602 int i;
4603
4604 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4605 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4606 type))
4607 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4608 mode, type);
4609 }
4610
4611 /* Promotes small structs returned in a register to full-word size
4612 for big-endian AAPCS. */
4613 if (type && arm_return_in_msb (type))
4614 {
4615 HOST_WIDE_INT size = int_size_in_bytes (type);
4616 if (size % UNITS_PER_WORD != 0)
4617 {
4618 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4619 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4620 }
4621 }
4622
4623 return gen_rtx_REG (mode, R0_REGNUM);
4624 }
4625
4626 static rtx
4627 aapcs_libcall_value (enum machine_mode mode)
4628 {
4629 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4630 && GET_MODE_SIZE (mode) <= 4)
4631 mode = SImode;
4632
4633 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4634 }
4635
4636 /* Lay out a function argument using the AAPCS rules. The rule
4637 numbers referred to here are those in the AAPCS. */
4638 static void
4639 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4640 const_tree type, bool named)
4641 {
4642 int nregs, nregs2;
4643 int ncrn;
4644
4645 /* We only need to do this once per argument. */
4646 if (pcum->aapcs_arg_processed)
4647 return;
4648
4649 pcum->aapcs_arg_processed = true;
4650
4651 /* Special case: if named is false then we are handling an incoming
4652 anonymous argument which is on the stack. */
4653 if (!named)
4654 return;
4655
4656 /* Is this a potential co-processor register candidate? */
4657 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4658 {
4659 int slot = aapcs_select_call_coproc (pcum, mode, type);
4660 pcum->aapcs_cprc_slot = slot;
4661
4662 /* We don't have to apply any of the rules from part B of the
4663 preparation phase, these are handled elsewhere in the
4664 compiler. */
4665
4666 if (slot >= 0)
4667 {
4668 /* A Co-processor register candidate goes either in its own
4669 class of registers or on the stack. */
4670 if (!pcum->aapcs_cprc_failed[slot])
4671 {
4672 /* C1.cp - Try to allocate the argument to co-processor
4673 registers. */
4674 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4675 return;
4676
4677 /* C2.cp - Put the argument on the stack and note that we
4678 can't assign any more candidates in this slot. We also
4679 need to note that we have allocated stack space, so that
4680 we won't later try to split a non-cprc candidate between
4681 core registers and the stack. */
4682 pcum->aapcs_cprc_failed[slot] = true;
4683 pcum->can_split = false;
4684 }
4685
4686 /* We didn't get a register, so this argument goes on the
4687 stack. */
4688 gcc_assert (pcum->can_split == false);
4689 return;
4690 }
4691 }
4692
4693 /* C3 - For double-word aligned arguments, round the NCRN up to the
4694 next even number. */
4695 ncrn = pcum->aapcs_ncrn;
4696 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4697 ncrn++;
4698
4699 nregs = ARM_NUM_REGS2(mode, type);
4700
4701 /* Sigh, this test should really assert that nregs > 0, but a GCC
4702 extension allows empty structs and then gives them empty size; it
4703 then allows such a structure to be passed by value. For some of
4704 the code below we have to pretend that such an argument has
4705 non-zero size so that we 'locate' it correctly either in
4706 registers or on the stack. */
4707 gcc_assert (nregs >= 0);
4708
4709 nregs2 = nregs ? nregs : 1;
4710
4711 /* C4 - Argument fits entirely in core registers. */
4712 if (ncrn + nregs2 <= NUM_ARG_REGS)
4713 {
4714 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4715 pcum->aapcs_next_ncrn = ncrn + nregs;
4716 return;
4717 }
4718
4719 /* C5 - Some core registers left and there are no arguments already
4720 on the stack: split this argument between the remaining core
4721 registers and the stack. */
4722 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4723 {
4724 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4725 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4726 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4727 return;
4728 }
4729
4730 /* C6 - NCRN is set to 4. */
4731 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4732
4733 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4734 return;
4735 }
4736
4737 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4738 for a call to a function whose data type is FNTYPE.
4739 For a library call, FNTYPE is NULL. */
4740 void
4741 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4742 rtx libname,
4743 tree fndecl ATTRIBUTE_UNUSED)
4744 {
4745 /* Long call handling. */
4746 if (fntype)
4747 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4748 else
4749 pcum->pcs_variant = arm_pcs_default;
4750
4751 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4752 {
4753 if (arm_libcall_uses_aapcs_base (libname))
4754 pcum->pcs_variant = ARM_PCS_AAPCS;
4755
4756 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4757 pcum->aapcs_reg = NULL_RTX;
4758 pcum->aapcs_partial = 0;
4759 pcum->aapcs_arg_processed = false;
4760 pcum->aapcs_cprc_slot = -1;
4761 pcum->can_split = true;
4762
4763 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4764 {
4765 int i;
4766
4767 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4768 {
4769 pcum->aapcs_cprc_failed[i] = false;
4770 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4771 }
4772 }
4773 return;
4774 }
4775
4776 /* Legacy ABIs */
4777
4778 /* On the ARM, the offset starts at 0. */
4779 pcum->nregs = 0;
4780 pcum->iwmmxt_nregs = 0;
4781 pcum->can_split = true;
4782
4783 /* Varargs vectors are treated the same as long long.
4784 named_count avoids having to change the way arm handles 'named' */
4785 pcum->named_count = 0;
4786 pcum->nargs = 0;
4787
4788 if (TARGET_REALLY_IWMMXT && fntype)
4789 {
4790 tree fn_arg;
4791
4792 for (fn_arg = TYPE_ARG_TYPES (fntype);
4793 fn_arg;
4794 fn_arg = TREE_CHAIN (fn_arg))
4795 pcum->named_count += 1;
4796
4797 if (! pcum->named_count)
4798 pcum->named_count = INT_MAX;
4799 }
4800 }
4801
4802
4803 /* Return true if mode/type need doubleword alignment. */
4804 static bool
4805 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4806 {
4807 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4808 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4809 }
4810
4811
4812 /* Determine where to put an argument to a function.
4813 Value is zero to push the argument on the stack,
4814 or a hard register in which to store the argument.
4815
4816 MODE is the argument's machine mode.
4817 TYPE is the data type of the argument (as a tree).
4818 This is null for libcalls where that information may
4819 not be available.
4820 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4821 the preceding args and about the function being called.
4822 NAMED is nonzero if this argument is a named parameter
4823 (otherwise it is an extra parameter matching an ellipsis).
4824
4825 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4826 other arguments are passed on the stack. If (NAMED == 0) (which happens
4827 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4828 defined), say it is passed in the stack (function_prologue will
4829 indeed make it pass in the stack if necessary). */
4830
4831 static rtx
4832 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4833 const_tree type, bool named)
4834 {
4835 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4836 int nregs;
4837
4838 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4839 a call insn (op3 of a call_value insn). */
4840 if (mode == VOIDmode)
4841 return const0_rtx;
4842
4843 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4844 {
4845 aapcs_layout_arg (pcum, mode, type, named);
4846 return pcum->aapcs_reg;
4847 }
4848
4849 /* Varargs vectors are treated the same as long long.
4850 named_count avoids having to change the way arm handles 'named' */
4851 if (TARGET_IWMMXT_ABI
4852 && arm_vector_mode_supported_p (mode)
4853 && pcum->named_count > pcum->nargs + 1)
4854 {
4855 if (pcum->iwmmxt_nregs <= 9)
4856 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4857 else
4858 {
4859 pcum->can_split = false;
4860 return NULL_RTX;
4861 }
4862 }
4863
4864 /* Put doubleword aligned quantities in even register pairs. */
4865 if (pcum->nregs & 1
4866 && ARM_DOUBLEWORD_ALIGN
4867 && arm_needs_doubleword_align (mode, type))
4868 pcum->nregs++;
4869
4870 /* Only allow splitting an arg between regs and memory if all preceding
4871 args were allocated to regs. For args passed by reference we only count
4872 the reference pointer. */
4873 if (pcum->can_split)
4874 nregs = 1;
4875 else
4876 nregs = ARM_NUM_REGS2 (mode, type);
4877
4878 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4879 return NULL_RTX;
4880
4881 return gen_rtx_REG (mode, pcum->nregs);
4882 }
4883
4884 static unsigned int
4885 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4886 {
4887 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4888 ? DOUBLEWORD_ALIGNMENT
4889 : PARM_BOUNDARY);
4890 }
4891
4892 static int
4893 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
4894 tree type, bool named)
4895 {
4896 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4897 int nregs = pcum->nregs;
4898
4899 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4900 {
4901 aapcs_layout_arg (pcum, mode, type, named);
4902 return pcum->aapcs_partial;
4903 }
4904
4905 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4906 return 0;
4907
4908 if (NUM_ARG_REGS > nregs
4909 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4910 && pcum->can_split)
4911 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4912
4913 return 0;
4914 }
4915
4916 /* Update the data in PCUM to advance over an argument
4917 of mode MODE and data type TYPE.
4918 (TYPE is null for libcalls where that information may not be available.) */
4919
4920 static void
4921 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
4922 const_tree type, bool named)
4923 {
4924 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4925
4926 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4927 {
4928 aapcs_layout_arg (pcum, mode, type, named);
4929
4930 if (pcum->aapcs_cprc_slot >= 0)
4931 {
4932 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4933 type);
4934 pcum->aapcs_cprc_slot = -1;
4935 }
4936
4937 /* Generic stuff. */
4938 pcum->aapcs_arg_processed = false;
4939 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4940 pcum->aapcs_reg = NULL_RTX;
4941 pcum->aapcs_partial = 0;
4942 }
4943 else
4944 {
4945 pcum->nargs += 1;
4946 if (arm_vector_mode_supported_p (mode)
4947 && pcum->named_count > pcum->nargs
4948 && TARGET_IWMMXT_ABI)
4949 pcum->iwmmxt_nregs += 1;
4950 else
4951 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4952 }
4953 }
4954
4955 /* Variable sized types are passed by reference. This is a GCC
4956 extension to the ARM ABI. */
4957
4958 static bool
4959 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
4960 enum machine_mode mode ATTRIBUTE_UNUSED,
4961 const_tree type, bool named ATTRIBUTE_UNUSED)
4962 {
4963 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4964 }
4965 \f
4966 /* Encode the current state of the #pragma [no_]long_calls. */
4967 typedef enum
4968 {
4969 OFF, /* No #pragma [no_]long_calls is in effect. */
4970 LONG, /* #pragma long_calls is in effect. */
4971 SHORT /* #pragma no_long_calls is in effect. */
4972 } arm_pragma_enum;
4973
4974 static arm_pragma_enum arm_pragma_long_calls = OFF;
4975
4976 void
4977 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4978 {
4979 arm_pragma_long_calls = LONG;
4980 }
4981
4982 void
4983 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4984 {
4985 arm_pragma_long_calls = SHORT;
4986 }
4987
4988 void
4989 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4990 {
4991 arm_pragma_long_calls = OFF;
4992 }
4993 \f
4994 /* Handle an attribute requiring a FUNCTION_DECL;
4995 arguments as in struct attribute_spec.handler. */
4996 static tree
4997 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4998 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4999 {
5000 if (TREE_CODE (*node) != FUNCTION_DECL)
5001 {
5002 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5003 name);
5004 *no_add_attrs = true;
5005 }
5006
5007 return NULL_TREE;
5008 }
5009
5010 /* Handle an "interrupt" or "isr" attribute;
5011 arguments as in struct attribute_spec.handler. */
5012 static tree
5013 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5014 bool *no_add_attrs)
5015 {
5016 if (DECL_P (*node))
5017 {
5018 if (TREE_CODE (*node) != FUNCTION_DECL)
5019 {
5020 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5021 name);
5022 *no_add_attrs = true;
5023 }
5024 /* FIXME: the argument if any is checked for type attributes;
5025 should it be checked for decl ones? */
5026 }
5027 else
5028 {
5029 if (TREE_CODE (*node) == FUNCTION_TYPE
5030 || TREE_CODE (*node) == METHOD_TYPE)
5031 {
5032 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5033 {
5034 warning (OPT_Wattributes, "%qE attribute ignored",
5035 name);
5036 *no_add_attrs = true;
5037 }
5038 }
5039 else if (TREE_CODE (*node) == POINTER_TYPE
5040 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5041 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5042 && arm_isr_value (args) != ARM_FT_UNKNOWN)
5043 {
5044 *node = build_variant_type_copy (*node);
5045 TREE_TYPE (*node) = build_type_attribute_variant
5046 (TREE_TYPE (*node),
5047 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5048 *no_add_attrs = true;
5049 }
5050 else
5051 {
5052 /* Possibly pass this attribute on from the type to a decl. */
5053 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5054 | (int) ATTR_FLAG_FUNCTION_NEXT
5055 | (int) ATTR_FLAG_ARRAY_NEXT))
5056 {
5057 *no_add_attrs = true;
5058 return tree_cons (name, args, NULL_TREE);
5059 }
5060 else
5061 {
5062 warning (OPT_Wattributes, "%qE attribute ignored",
5063 name);
5064 }
5065 }
5066 }
5067
5068 return NULL_TREE;
5069 }
5070
5071 /* Handle a "pcs" attribute; arguments as in struct
5072 attribute_spec.handler. */
5073 static tree
5074 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5075 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5076 {
5077 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5078 {
5079 warning (OPT_Wattributes, "%qE attribute ignored", name);
5080 *no_add_attrs = true;
5081 }
5082 return NULL_TREE;
5083 }
5084
5085 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5086 /* Handle the "notshared" attribute. This attribute is another way of
5087 requesting hidden visibility. ARM's compiler supports
5088 "__declspec(notshared)"; we support the same thing via an
5089 attribute. */
5090
5091 static tree
5092 arm_handle_notshared_attribute (tree *node,
5093 tree name ATTRIBUTE_UNUSED,
5094 tree args ATTRIBUTE_UNUSED,
5095 int flags ATTRIBUTE_UNUSED,
5096 bool *no_add_attrs)
5097 {
5098 tree decl = TYPE_NAME (*node);
5099
5100 if (decl)
5101 {
5102 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5103 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5104 *no_add_attrs = false;
5105 }
5106 return NULL_TREE;
5107 }
5108 #endif
5109
5110 /* Return 0 if the attributes for two types are incompatible, 1 if they
5111 are compatible, and 2 if they are nearly compatible (which causes a
5112 warning to be generated). */
5113 static int
5114 arm_comp_type_attributes (const_tree type1, const_tree type2)
5115 {
5116 int l1, l2, s1, s2;
5117
5118 /* Check for mismatch of non-default calling convention. */
5119 if (TREE_CODE (type1) != FUNCTION_TYPE)
5120 return 1;
5121
5122 /* Check for mismatched call attributes. */
5123 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5124 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5125 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5126 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5127
5128 /* Only bother to check if an attribute is defined. */
5129 if (l1 | l2 | s1 | s2)
5130 {
5131 /* If one type has an attribute, the other must have the same attribute. */
5132 if ((l1 != l2) || (s1 != s2))
5133 return 0;
5134
5135 /* Disallow mixed attributes. */
5136 if ((l1 & s2) || (l2 & s1))
5137 return 0;
5138 }
5139
5140 /* Check for mismatched ISR attribute. */
5141 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5142 if (! l1)
5143 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5144 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5145 if (! l2)
5146 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5147 if (l1 != l2)
5148 return 0;
5149
5150 return 1;
5151 }
5152
5153 /* Assigns default attributes to newly defined type. This is used to
5154 set short_call/long_call attributes for function types of
5155 functions defined inside corresponding #pragma scopes. */
5156 static void
5157 arm_set_default_type_attributes (tree type)
5158 {
5159 /* Add __attribute__ ((long_call)) to all functions, when
5160 inside #pragma long_calls or __attribute__ ((short_call)),
5161 when inside #pragma no_long_calls. */
5162 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5163 {
5164 tree type_attr_list, attr_name;
5165 type_attr_list = TYPE_ATTRIBUTES (type);
5166
5167 if (arm_pragma_long_calls == LONG)
5168 attr_name = get_identifier ("long_call");
5169 else if (arm_pragma_long_calls == SHORT)
5170 attr_name = get_identifier ("short_call");
5171 else
5172 return;
5173
5174 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5175 TYPE_ATTRIBUTES (type) = type_attr_list;
5176 }
5177 }
5178 \f
5179 /* Return true if DECL is known to be linked into section SECTION. */
5180
5181 static bool
5182 arm_function_in_section_p (tree decl, section *section)
5183 {
5184 /* We can only be certain about functions defined in the same
5185 compilation unit. */
5186 if (!TREE_STATIC (decl))
5187 return false;
5188
5189 /* Make sure that SYMBOL always binds to the definition in this
5190 compilation unit. */
5191 if (!targetm.binds_local_p (decl))
5192 return false;
5193
5194 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5195 if (!DECL_SECTION_NAME (decl))
5196 {
5197 /* Make sure that we will not create a unique section for DECL. */
5198 if (flag_function_sections || DECL_ONE_ONLY (decl))
5199 return false;
5200 }
5201
5202 return function_section (decl) == section;
5203 }
5204
5205 /* Return nonzero if a 32-bit "long_call" should be generated for
5206 a call from the current function to DECL. We generate a long_call
5207 if the function:
5208
5209 a. has an __attribute__((long call))
5210 or b. is within the scope of a #pragma long_calls
5211 or c. the -mlong-calls command line switch has been specified
5212
5213 However we do not generate a long call if the function:
5214
5215 d. has an __attribute__ ((short_call))
5216 or e. is inside the scope of a #pragma no_long_calls
5217 or f. is defined in the same section as the current function. */
5218
5219 bool
5220 arm_is_long_call_p (tree decl)
5221 {
5222 tree attrs;
5223
5224 if (!decl)
5225 return TARGET_LONG_CALLS;
5226
5227 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5228 if (lookup_attribute ("short_call", attrs))
5229 return false;
5230
5231 /* For "f", be conservative, and only cater for cases in which the
5232 whole of the current function is placed in the same section. */
5233 if (!flag_reorder_blocks_and_partition
5234 && TREE_CODE (decl) == FUNCTION_DECL
5235 && arm_function_in_section_p (decl, current_function_section ()))
5236 return false;
5237
5238 if (lookup_attribute ("long_call", attrs))
5239 return true;
5240
5241 return TARGET_LONG_CALLS;
5242 }
5243
5244 /* Return nonzero if it is ok to make a tail-call to DECL. */
5245 static bool
5246 arm_function_ok_for_sibcall (tree decl, tree exp)
5247 {
5248 unsigned long func_type;
5249
5250 if (cfun->machine->sibcall_blocked)
5251 return false;
5252
5253 /* Never tailcall something for which we have no decl, or if we
5254 are generating code for Thumb-1. */
5255 if (decl == NULL || TARGET_THUMB1)
5256 return false;
5257
5258 /* The PIC register is live on entry to VxWorks PLT entries, so we
5259 must make the call before restoring the PIC register. */
5260 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5261 return false;
5262
5263 /* Cannot tail-call to long calls, since these are out of range of
5264 a branch instruction. */
5265 if (arm_is_long_call_p (decl))
5266 return false;
5267
5268 /* If we are interworking and the function is not declared static
5269 then we can't tail-call it unless we know that it exists in this
5270 compilation unit (since it might be a Thumb routine). */
5271 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5272 return false;
5273
5274 func_type = arm_current_func_type ();
5275 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5276 if (IS_INTERRUPT (func_type))
5277 return false;
5278
5279 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5280 {
5281 /* Check that the return value locations are the same. For
5282 example that we aren't returning a value from the sibling in
5283 a VFP register but then need to transfer it to a core
5284 register. */
5285 rtx a, b;
5286
5287 a = arm_function_value (TREE_TYPE (exp), decl, false);
5288 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5289 cfun->decl, false);
5290 if (!rtx_equal_p (a, b))
5291 return false;
5292 }
5293
5294 /* Never tailcall if function may be called with a misaligned SP. */
5295 if (IS_STACKALIGN (func_type))
5296 return false;
5297
5298 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5299 references should become a NOP. Don't convert such calls into
5300 sibling calls. */
5301 if (TARGET_AAPCS_BASED
5302 && arm_abi == ARM_ABI_AAPCS
5303 && DECL_WEAK (decl))
5304 return false;
5305
5306 /* Everything else is ok. */
5307 return true;
5308 }
5309
5310 \f
5311 /* Addressing mode support functions. */
5312
5313 /* Return nonzero if X is a legitimate immediate operand when compiling
5314 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5315 int
5316 legitimate_pic_operand_p (rtx x)
5317 {
5318 if (GET_CODE (x) == SYMBOL_REF
5319 || (GET_CODE (x) == CONST
5320 && GET_CODE (XEXP (x, 0)) == PLUS
5321 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5322 return 0;
5323
5324 return 1;
5325 }
5326
5327 /* Record that the current function needs a PIC register. Initialize
5328 cfun->machine->pic_reg if we have not already done so. */
5329
5330 static void
5331 require_pic_register (void)
5332 {
5333 /* A lot of the logic here is made obscure by the fact that this
5334 routine gets called as part of the rtx cost estimation process.
5335 We don't want those calls to affect any assumptions about the real
5336 function; and further, we can't call entry_of_function() until we
5337 start the real expansion process. */
5338 if (!crtl->uses_pic_offset_table)
5339 {
5340 gcc_assert (can_create_pseudo_p ());
5341 if (arm_pic_register != INVALID_REGNUM)
5342 {
5343 if (!cfun->machine->pic_reg)
5344 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5345
5346 /* Play games to avoid marking the function as needing pic
5347 if we are being called as part of the cost-estimation
5348 process. */
5349 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5350 crtl->uses_pic_offset_table = 1;
5351 }
5352 else
5353 {
5354 rtx seq, insn;
5355
5356 if (!cfun->machine->pic_reg)
5357 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5358
5359 /* Play games to avoid marking the function as needing pic
5360 if we are being called as part of the cost-estimation
5361 process. */
5362 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5363 {
5364 crtl->uses_pic_offset_table = 1;
5365 start_sequence ();
5366
5367 arm_load_pic_register (0UL);
5368
5369 seq = get_insns ();
5370 end_sequence ();
5371
5372 for (insn = seq; insn; insn = NEXT_INSN (insn))
5373 if (INSN_P (insn))
5374 INSN_LOCATION (insn) = prologue_location;
5375
5376 /* We can be called during expansion of PHI nodes, where
5377 we can't yet emit instructions directly in the final
5378 insn stream. Queue the insns on the entry edge, they will
5379 be committed after everything else is expanded. */
5380 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5381 }
5382 }
5383 }
5384 }
5385
5386 rtx
5387 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5388 {
5389 if (GET_CODE (orig) == SYMBOL_REF
5390 || GET_CODE (orig) == LABEL_REF)
5391 {
5392 rtx insn;
5393
5394 if (reg == 0)
5395 {
5396 gcc_assert (can_create_pseudo_p ());
5397 reg = gen_reg_rtx (Pmode);
5398 }
5399
5400 /* VxWorks does not impose a fixed gap between segments; the run-time
5401 gap can be different from the object-file gap. We therefore can't
5402 use GOTOFF unless we are absolutely sure that the symbol is in the
5403 same segment as the GOT. Unfortunately, the flexibility of linker
5404 scripts means that we can't be sure of that in general, so assume
5405 that GOTOFF is never valid on VxWorks. */
5406 if ((GET_CODE (orig) == LABEL_REF
5407 || (GET_CODE (orig) == SYMBOL_REF &&
5408 SYMBOL_REF_LOCAL_P (orig)))
5409 && NEED_GOT_RELOC
5410 && !TARGET_VXWORKS_RTP)
5411 insn = arm_pic_static_addr (orig, reg);
5412 else
5413 {
5414 rtx pat;
5415 rtx mem;
5416
5417 /* If this function doesn't have a pic register, create one now. */
5418 require_pic_register ();
5419
5420 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5421
5422 /* Make the MEM as close to a constant as possible. */
5423 mem = SET_SRC (pat);
5424 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5425 MEM_READONLY_P (mem) = 1;
5426 MEM_NOTRAP_P (mem) = 1;
5427
5428 insn = emit_insn (pat);
5429 }
5430
5431 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5432 by loop. */
5433 set_unique_reg_note (insn, REG_EQUAL, orig);
5434
5435 return reg;
5436 }
5437 else if (GET_CODE (orig) == CONST)
5438 {
5439 rtx base, offset;
5440
5441 if (GET_CODE (XEXP (orig, 0)) == PLUS
5442 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5443 return orig;
5444
5445 /* Handle the case where we have: const (UNSPEC_TLS). */
5446 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5447 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5448 return orig;
5449
5450 /* Handle the case where we have:
5451 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5452 CONST_INT. */
5453 if (GET_CODE (XEXP (orig, 0)) == PLUS
5454 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5455 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5456 {
5457 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
5458 return orig;
5459 }
5460
5461 if (reg == 0)
5462 {
5463 gcc_assert (can_create_pseudo_p ());
5464 reg = gen_reg_rtx (Pmode);
5465 }
5466
5467 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5468
5469 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5470 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5471 base == reg ? 0 : reg);
5472
5473 if (CONST_INT_P (offset))
5474 {
5475 /* The base register doesn't really matter, we only want to
5476 test the index for the appropriate mode. */
5477 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5478 {
5479 gcc_assert (can_create_pseudo_p ());
5480 offset = force_reg (Pmode, offset);
5481 }
5482
5483 if (CONST_INT_P (offset))
5484 return plus_constant (Pmode, base, INTVAL (offset));
5485 }
5486
5487 if (GET_MODE_SIZE (mode) > 4
5488 && (GET_MODE_CLASS (mode) == MODE_INT
5489 || TARGET_SOFT_FLOAT))
5490 {
5491 emit_insn (gen_addsi3 (reg, base, offset));
5492 return reg;
5493 }
5494
5495 return gen_rtx_PLUS (Pmode, base, offset);
5496 }
5497
5498 return orig;
5499 }
5500
5501
5502 /* Find a spare register to use during the prolog of a function. */
5503
5504 static int
5505 thumb_find_work_register (unsigned long pushed_regs_mask)
5506 {
5507 int reg;
5508
5509 /* Check the argument registers first as these are call-used. The
5510 register allocation order means that sometimes r3 might be used
5511 but earlier argument registers might not, so check them all. */
5512 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5513 if (!df_regs_ever_live_p (reg))
5514 return reg;
5515
5516 /* Before going on to check the call-saved registers we can try a couple
5517 more ways of deducing that r3 is available. The first is when we are
5518 pushing anonymous arguments onto the stack and we have less than 4
5519 registers worth of fixed arguments(*). In this case r3 will be part of
5520 the variable argument list and so we can be sure that it will be
5521 pushed right at the start of the function. Hence it will be available
5522 for the rest of the prologue.
5523 (*): ie crtl->args.pretend_args_size is greater than 0. */
5524 if (cfun->machine->uses_anonymous_args
5525 && crtl->args.pretend_args_size > 0)
5526 return LAST_ARG_REGNUM;
5527
5528 /* The other case is when we have fixed arguments but less than 4 registers
5529 worth. In this case r3 might be used in the body of the function, but
5530 it is not being used to convey an argument into the function. In theory
5531 we could just check crtl->args.size to see how many bytes are
5532 being passed in argument registers, but it seems that it is unreliable.
5533 Sometimes it will have the value 0 when in fact arguments are being
5534 passed. (See testcase execute/20021111-1.c for an example). So we also
5535 check the args_info.nregs field as well. The problem with this field is
5536 that it makes no allowances for arguments that are passed to the
5537 function but which are not used. Hence we could miss an opportunity
5538 when a function has an unused argument in r3. But it is better to be
5539 safe than to be sorry. */
5540 if (! cfun->machine->uses_anonymous_args
5541 && crtl->args.size >= 0
5542 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5543 && crtl->args.info.nregs < 4)
5544 return LAST_ARG_REGNUM;
5545
5546 /* Otherwise look for a call-saved register that is going to be pushed. */
5547 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5548 if (pushed_regs_mask & (1 << reg))
5549 return reg;
5550
5551 if (TARGET_THUMB2)
5552 {
5553 /* Thumb-2 can use high regs. */
5554 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5555 if (pushed_regs_mask & (1 << reg))
5556 return reg;
5557 }
5558 /* Something went wrong - thumb_compute_save_reg_mask()
5559 should have arranged for a suitable register to be pushed. */
5560 gcc_unreachable ();
5561 }
5562
5563 static GTY(()) int pic_labelno;
5564
5565 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5566 low register. */
5567
5568 void
5569 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5570 {
5571 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5572
5573 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5574 return;
5575
5576 gcc_assert (flag_pic);
5577
5578 pic_reg = cfun->machine->pic_reg;
5579 if (TARGET_VXWORKS_RTP)
5580 {
5581 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5582 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5583 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5584
5585 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5586
5587 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5588 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5589 }
5590 else
5591 {
5592 /* We use an UNSPEC rather than a LABEL_REF because this label
5593 never appears in the code stream. */
5594
5595 labelno = GEN_INT (pic_labelno++);
5596 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5597 l1 = gen_rtx_CONST (VOIDmode, l1);
5598
5599 /* On the ARM the PC register contains 'dot + 8' at the time of the
5600 addition, on the Thumb it is 'dot + 4'. */
5601 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5602 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5603 UNSPEC_GOTSYM_OFF);
5604 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5605
5606 if (TARGET_32BIT)
5607 {
5608 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5609 }
5610 else /* TARGET_THUMB1 */
5611 {
5612 if (arm_pic_register != INVALID_REGNUM
5613 && REGNO (pic_reg) > LAST_LO_REGNUM)
5614 {
5615 /* We will have pushed the pic register, so we should always be
5616 able to find a work register. */
5617 pic_tmp = gen_rtx_REG (SImode,
5618 thumb_find_work_register (saved_regs));
5619 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5620 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5621 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5622 }
5623 else
5624 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5625 }
5626 }
5627
5628 /* Need to emit this whether or not we obey regdecls,
5629 since setjmp/longjmp can cause life info to screw up. */
5630 emit_use (pic_reg);
5631 }
5632
5633 /* Generate code to load the address of a static var when flag_pic is set. */
5634 static rtx
5635 arm_pic_static_addr (rtx orig, rtx reg)
5636 {
5637 rtx l1, labelno, offset_rtx, insn;
5638
5639 gcc_assert (flag_pic);
5640
5641 /* We use an UNSPEC rather than a LABEL_REF because this label
5642 never appears in the code stream. */
5643 labelno = GEN_INT (pic_labelno++);
5644 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5645 l1 = gen_rtx_CONST (VOIDmode, l1);
5646
5647 /* On the ARM the PC register contains 'dot + 8' at the time of the
5648 addition, on the Thumb it is 'dot + 4'. */
5649 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5650 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5651 UNSPEC_SYMBOL_OFFSET);
5652 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5653
5654 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
5655 return insn;
5656 }
5657
5658 /* Return nonzero if X is valid as an ARM state addressing register. */
5659 static int
5660 arm_address_register_rtx_p (rtx x, int strict_p)
5661 {
5662 int regno;
5663
5664 if (!REG_P (x))
5665 return 0;
5666
5667 regno = REGNO (x);
5668
5669 if (strict_p)
5670 return ARM_REGNO_OK_FOR_BASE_P (regno);
5671
5672 return (regno <= LAST_ARM_REGNUM
5673 || regno >= FIRST_PSEUDO_REGISTER
5674 || regno == FRAME_POINTER_REGNUM
5675 || regno == ARG_POINTER_REGNUM);
5676 }
5677
5678 /* Return TRUE if this rtx is the difference of a symbol and a label,
5679 and will reduce to a PC-relative relocation in the object file.
5680 Expressions like this can be left alone when generating PIC, rather
5681 than forced through the GOT. */
5682 static int
5683 pcrel_constant_p (rtx x)
5684 {
5685 if (GET_CODE (x) == MINUS)
5686 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5687
5688 return FALSE;
5689 }
5690
5691 /* Return true if X will surely end up in an index register after next
5692 splitting pass. */
5693 static bool
5694 will_be_in_index_register (const_rtx x)
5695 {
5696 /* arm.md: calculate_pic_address will split this into a register. */
5697 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
5698 }
5699
5700 /* Return nonzero if X is a valid ARM state address operand. */
5701 int
5702 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5703 int strict_p)
5704 {
5705 bool use_ldrd;
5706 enum rtx_code code = GET_CODE (x);
5707
5708 if (arm_address_register_rtx_p (x, strict_p))
5709 return 1;
5710
5711 use_ldrd = (TARGET_LDRD
5712 && (mode == DImode
5713 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5714
5715 if (code == POST_INC || code == PRE_DEC
5716 || ((code == PRE_INC || code == POST_DEC)
5717 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5718 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5719
5720 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5721 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5722 && GET_CODE (XEXP (x, 1)) == PLUS
5723 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5724 {
5725 rtx addend = XEXP (XEXP (x, 1), 1);
5726
5727 /* Don't allow ldrd post increment by register because it's hard
5728 to fixup invalid register choices. */
5729 if (use_ldrd
5730 && GET_CODE (x) == POST_MODIFY
5731 && REG_P (addend))
5732 return 0;
5733
5734 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5735 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5736 }
5737
5738 /* After reload constants split into minipools will have addresses
5739 from a LABEL_REF. */
5740 else if (reload_completed
5741 && (code == LABEL_REF
5742 || (code == CONST
5743 && GET_CODE (XEXP (x, 0)) == PLUS
5744 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5745 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
5746 return 1;
5747
5748 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5749 return 0;
5750
5751 else if (code == PLUS)
5752 {
5753 rtx xop0 = XEXP (x, 0);
5754 rtx xop1 = XEXP (x, 1);
5755
5756 return ((arm_address_register_rtx_p (xop0, strict_p)
5757 && ((CONST_INT_P (xop1)
5758 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5759 || (!strict_p && will_be_in_index_register (xop1))))
5760 || (arm_address_register_rtx_p (xop1, strict_p)
5761 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5762 }
5763
5764 #if 0
5765 /* Reload currently can't handle MINUS, so disable this for now */
5766 else if (GET_CODE (x) == MINUS)
5767 {
5768 rtx xop0 = XEXP (x, 0);
5769 rtx xop1 = XEXP (x, 1);
5770
5771 return (arm_address_register_rtx_p (xop0, strict_p)
5772 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5773 }
5774 #endif
5775
5776 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5777 && code == SYMBOL_REF
5778 && CONSTANT_POOL_ADDRESS_P (x)
5779 && ! (flag_pic
5780 && symbol_mentioned_p (get_pool_constant (x))
5781 && ! pcrel_constant_p (get_pool_constant (x))))
5782 return 1;
5783
5784 return 0;
5785 }
5786
5787 /* Return nonzero if X is a valid Thumb-2 address operand. */
5788 static int
5789 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5790 {
5791 bool use_ldrd;
5792 enum rtx_code code = GET_CODE (x);
5793
5794 if (arm_address_register_rtx_p (x, strict_p))
5795 return 1;
5796
5797 use_ldrd = (TARGET_LDRD
5798 && (mode == DImode
5799 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5800
5801 if (code == POST_INC || code == PRE_DEC
5802 || ((code == PRE_INC || code == POST_DEC)
5803 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5804 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5805
5806 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5807 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5808 && GET_CODE (XEXP (x, 1)) == PLUS
5809 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5810 {
5811 /* Thumb-2 only has autoincrement by constant. */
5812 rtx addend = XEXP (XEXP (x, 1), 1);
5813 HOST_WIDE_INT offset;
5814
5815 if (!CONST_INT_P (addend))
5816 return 0;
5817
5818 offset = INTVAL(addend);
5819 if (GET_MODE_SIZE (mode) <= 4)
5820 return (offset > -256 && offset < 256);
5821
5822 return (use_ldrd && offset > -1024 && offset < 1024
5823 && (offset & 3) == 0);
5824 }
5825
5826 /* After reload constants split into minipools will have addresses
5827 from a LABEL_REF. */
5828 else if (reload_completed
5829 && (code == LABEL_REF
5830 || (code == CONST
5831 && GET_CODE (XEXP (x, 0)) == PLUS
5832 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5833 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
5834 return 1;
5835
5836 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5837 return 0;
5838
5839 else if (code == PLUS)
5840 {
5841 rtx xop0 = XEXP (x, 0);
5842 rtx xop1 = XEXP (x, 1);
5843
5844 return ((arm_address_register_rtx_p (xop0, strict_p)
5845 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5846 || (!strict_p && will_be_in_index_register (xop1))))
5847 || (arm_address_register_rtx_p (xop1, strict_p)
5848 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5849 }
5850
5851 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5852 && code == SYMBOL_REF
5853 && CONSTANT_POOL_ADDRESS_P (x)
5854 && ! (flag_pic
5855 && symbol_mentioned_p (get_pool_constant (x))
5856 && ! pcrel_constant_p (get_pool_constant (x))))
5857 return 1;
5858
5859 return 0;
5860 }
5861
5862 /* Return nonzero if INDEX is valid for an address index operand in
5863 ARM state. */
5864 static int
5865 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5866 int strict_p)
5867 {
5868 HOST_WIDE_INT range;
5869 enum rtx_code code = GET_CODE (index);
5870
5871 /* Standard coprocessor addressing modes. */
5872 if (TARGET_HARD_FLOAT
5873 && TARGET_VFP
5874 && (mode == SFmode || mode == DFmode))
5875 return (code == CONST_INT && INTVAL (index) < 1024
5876 && INTVAL (index) > -1024
5877 && (INTVAL (index) & 3) == 0);
5878
5879 /* For quad modes, we restrict the constant offset to be slightly less
5880 than what the instruction format permits. We do this because for
5881 quad mode moves, we will actually decompose them into two separate
5882 double-mode reads or writes. INDEX must therefore be a valid
5883 (double-mode) offset and so should INDEX+8. */
5884 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5885 return (code == CONST_INT
5886 && INTVAL (index) < 1016
5887 && INTVAL (index) > -1024
5888 && (INTVAL (index) & 3) == 0);
5889
5890 /* We have no such constraint on double mode offsets, so we permit the
5891 full range of the instruction format. */
5892 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5893 return (code == CONST_INT
5894 && INTVAL (index) < 1024
5895 && INTVAL (index) > -1024
5896 && (INTVAL (index) & 3) == 0);
5897
5898 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5899 return (code == CONST_INT
5900 && INTVAL (index) < 1024
5901 && INTVAL (index) > -1024
5902 && (INTVAL (index) & 3) == 0);
5903
5904 if (arm_address_register_rtx_p (index, strict_p)
5905 && (GET_MODE_SIZE (mode) <= 4))
5906 return 1;
5907
5908 if (mode == DImode || mode == DFmode)
5909 {
5910 if (code == CONST_INT)
5911 {
5912 HOST_WIDE_INT val = INTVAL (index);
5913
5914 if (TARGET_LDRD)
5915 return val > -256 && val < 256;
5916 else
5917 return val > -4096 && val < 4092;
5918 }
5919
5920 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5921 }
5922
5923 if (GET_MODE_SIZE (mode) <= 4
5924 && ! (arm_arch4
5925 && (mode == HImode
5926 || mode == HFmode
5927 || (mode == QImode && outer == SIGN_EXTEND))))
5928 {
5929 if (code == MULT)
5930 {
5931 rtx xiop0 = XEXP (index, 0);
5932 rtx xiop1 = XEXP (index, 1);
5933
5934 return ((arm_address_register_rtx_p (xiop0, strict_p)
5935 && power_of_two_operand (xiop1, SImode))
5936 || (arm_address_register_rtx_p (xiop1, strict_p)
5937 && power_of_two_operand (xiop0, SImode)));
5938 }
5939 else if (code == LSHIFTRT || code == ASHIFTRT
5940 || code == ASHIFT || code == ROTATERT)
5941 {
5942 rtx op = XEXP (index, 1);
5943
5944 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5945 && CONST_INT_P (op)
5946 && INTVAL (op) > 0
5947 && INTVAL (op) <= 31);
5948 }
5949 }
5950
5951 /* For ARM v4 we may be doing a sign-extend operation during the
5952 load. */
5953 if (arm_arch4)
5954 {
5955 if (mode == HImode
5956 || mode == HFmode
5957 || (outer == SIGN_EXTEND && mode == QImode))
5958 range = 256;
5959 else
5960 range = 4096;
5961 }
5962 else
5963 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5964
5965 return (code == CONST_INT
5966 && INTVAL (index) < range
5967 && INTVAL (index) > -range);
5968 }
5969
5970 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5971 index operand. i.e. 1, 2, 4 or 8. */
5972 static bool
5973 thumb2_index_mul_operand (rtx op)
5974 {
5975 HOST_WIDE_INT val;
5976
5977 if (!CONST_INT_P (op))
5978 return false;
5979
5980 val = INTVAL(op);
5981 return (val == 1 || val == 2 || val == 4 || val == 8);
5982 }
5983
5984 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5985 static int
5986 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5987 {
5988 enum rtx_code code = GET_CODE (index);
5989
5990 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5991 /* Standard coprocessor addressing modes. */
5992 if (TARGET_HARD_FLOAT
5993 && TARGET_VFP
5994 && (mode == SFmode || mode == DFmode))
5995 return (code == CONST_INT && INTVAL (index) < 1024
5996 /* Thumb-2 allows only > -256 index range for it's core register
5997 load/stores. Since we allow SF/DF in core registers, we have
5998 to use the intersection between -256~4096 (core) and -1024~1024
5999 (coprocessor). */
6000 && INTVAL (index) > -256
6001 && (INTVAL (index) & 3) == 0);
6002
6003 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6004 {
6005 /* For DImode assume values will usually live in core regs
6006 and only allow LDRD addressing modes. */
6007 if (!TARGET_LDRD || mode != DImode)
6008 return (code == CONST_INT
6009 && INTVAL (index) < 1024
6010 && INTVAL (index) > -1024
6011 && (INTVAL (index) & 3) == 0);
6012 }
6013
6014 /* For quad modes, we restrict the constant offset to be slightly less
6015 than what the instruction format permits. We do this because for
6016 quad mode moves, we will actually decompose them into two separate
6017 double-mode reads or writes. INDEX must therefore be a valid
6018 (double-mode) offset and so should INDEX+8. */
6019 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6020 return (code == CONST_INT
6021 && INTVAL (index) < 1016
6022 && INTVAL (index) > -1024
6023 && (INTVAL (index) & 3) == 0);
6024
6025 /* We have no such constraint on double mode offsets, so we permit the
6026 full range of the instruction format. */
6027 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6028 return (code == CONST_INT
6029 && INTVAL (index) < 1024
6030 && INTVAL (index) > -1024
6031 && (INTVAL (index) & 3) == 0);
6032
6033 if (arm_address_register_rtx_p (index, strict_p)
6034 && (GET_MODE_SIZE (mode) <= 4))
6035 return 1;
6036
6037 if (mode == DImode || mode == DFmode)
6038 {
6039 if (code == CONST_INT)
6040 {
6041 HOST_WIDE_INT val = INTVAL (index);
6042 /* ??? Can we assume ldrd for thumb2? */
6043 /* Thumb-2 ldrd only has reg+const addressing modes. */
6044 /* ldrd supports offsets of +-1020.
6045 However the ldr fallback does not. */
6046 return val > -256 && val < 256 && (val & 3) == 0;
6047 }
6048 else
6049 return 0;
6050 }
6051
6052 if (code == MULT)
6053 {
6054 rtx xiop0 = XEXP (index, 0);
6055 rtx xiop1 = XEXP (index, 1);
6056
6057 return ((arm_address_register_rtx_p (xiop0, strict_p)
6058 && thumb2_index_mul_operand (xiop1))
6059 || (arm_address_register_rtx_p (xiop1, strict_p)
6060 && thumb2_index_mul_operand (xiop0)));
6061 }
6062 else if (code == ASHIFT)
6063 {
6064 rtx op = XEXP (index, 1);
6065
6066 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6067 && CONST_INT_P (op)
6068 && INTVAL (op) > 0
6069 && INTVAL (op) <= 3);
6070 }
6071
6072 return (code == CONST_INT
6073 && INTVAL (index) < 4096
6074 && INTVAL (index) > -256);
6075 }
6076
6077 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6078 static int
6079 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6080 {
6081 int regno;
6082
6083 if (!REG_P (x))
6084 return 0;
6085
6086 regno = REGNO (x);
6087
6088 if (strict_p)
6089 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6090
6091 return (regno <= LAST_LO_REGNUM
6092 || regno > LAST_VIRTUAL_REGISTER
6093 || regno == FRAME_POINTER_REGNUM
6094 || (GET_MODE_SIZE (mode) >= 4
6095 && (regno == STACK_POINTER_REGNUM
6096 || regno >= FIRST_PSEUDO_REGISTER
6097 || x == hard_frame_pointer_rtx
6098 || x == arg_pointer_rtx)));
6099 }
6100
6101 /* Return nonzero if x is a legitimate index register. This is the case
6102 for any base register that can access a QImode object. */
6103 inline static int
6104 thumb1_index_register_rtx_p (rtx x, int strict_p)
6105 {
6106 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6107 }
6108
6109 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6110
6111 The AP may be eliminated to either the SP or the FP, so we use the
6112 least common denominator, e.g. SImode, and offsets from 0 to 64.
6113
6114 ??? Verify whether the above is the right approach.
6115
6116 ??? Also, the FP may be eliminated to the SP, so perhaps that
6117 needs special handling also.
6118
6119 ??? Look at how the mips16 port solves this problem. It probably uses
6120 better ways to solve some of these problems.
6121
6122 Although it is not incorrect, we don't accept QImode and HImode
6123 addresses based on the frame pointer or arg pointer until the
6124 reload pass starts. This is so that eliminating such addresses
6125 into stack based ones won't produce impossible code. */
6126 int
6127 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6128 {
6129 /* ??? Not clear if this is right. Experiment. */
6130 if (GET_MODE_SIZE (mode) < 4
6131 && !(reload_in_progress || reload_completed)
6132 && (reg_mentioned_p (frame_pointer_rtx, x)
6133 || reg_mentioned_p (arg_pointer_rtx, x)
6134 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6135 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6136 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6137 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6138 return 0;
6139
6140 /* Accept any base register. SP only in SImode or larger. */
6141 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6142 return 1;
6143
6144 /* This is PC relative data before arm_reorg runs. */
6145 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6146 && GET_CODE (x) == SYMBOL_REF
6147 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6148 return 1;
6149
6150 /* This is PC relative data after arm_reorg runs. */
6151 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6152 && reload_completed
6153 && (GET_CODE (x) == LABEL_REF
6154 || (GET_CODE (x) == CONST
6155 && GET_CODE (XEXP (x, 0)) == PLUS
6156 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6157 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6158 return 1;
6159
6160 /* Post-inc indexing only supported for SImode and larger. */
6161 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6162 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6163 return 1;
6164
6165 else if (GET_CODE (x) == PLUS)
6166 {
6167 /* REG+REG address can be any two index registers. */
6168 /* We disallow FRAME+REG addressing since we know that FRAME
6169 will be replaced with STACK, and SP relative addressing only
6170 permits SP+OFFSET. */
6171 if (GET_MODE_SIZE (mode) <= 4
6172 && XEXP (x, 0) != frame_pointer_rtx
6173 && XEXP (x, 1) != frame_pointer_rtx
6174 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6175 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6176 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6177 return 1;
6178
6179 /* REG+const has 5-7 bit offset for non-SP registers. */
6180 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6181 || XEXP (x, 0) == arg_pointer_rtx)
6182 && CONST_INT_P (XEXP (x, 1))
6183 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6184 return 1;
6185
6186 /* REG+const has 10-bit offset for SP, but only SImode and
6187 larger is supported. */
6188 /* ??? Should probably check for DI/DFmode overflow here
6189 just like GO_IF_LEGITIMATE_OFFSET does. */
6190 else if (REG_P (XEXP (x, 0))
6191 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6192 && GET_MODE_SIZE (mode) >= 4
6193 && CONST_INT_P (XEXP (x, 1))
6194 && INTVAL (XEXP (x, 1)) >= 0
6195 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6196 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6197 return 1;
6198
6199 else if (REG_P (XEXP (x, 0))
6200 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6201 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6202 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6203 && REGNO (XEXP (x, 0))
6204 <= LAST_VIRTUAL_POINTER_REGISTER))
6205 && GET_MODE_SIZE (mode) >= 4
6206 && CONST_INT_P (XEXP (x, 1))
6207 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6208 return 1;
6209 }
6210
6211 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6212 && GET_MODE_SIZE (mode) == 4
6213 && GET_CODE (x) == SYMBOL_REF
6214 && CONSTANT_POOL_ADDRESS_P (x)
6215 && ! (flag_pic
6216 && symbol_mentioned_p (get_pool_constant (x))
6217 && ! pcrel_constant_p (get_pool_constant (x))))
6218 return 1;
6219
6220 return 0;
6221 }
6222
6223 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6224 instruction of mode MODE. */
6225 int
6226 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6227 {
6228 switch (GET_MODE_SIZE (mode))
6229 {
6230 case 1:
6231 return val >= 0 && val < 32;
6232
6233 case 2:
6234 return val >= 0 && val < 64 && (val & 1) == 0;
6235
6236 default:
6237 return (val >= 0
6238 && (val + GET_MODE_SIZE (mode)) <= 128
6239 && (val & 3) == 0);
6240 }
6241 }
6242
6243 bool
6244 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6245 {
6246 if (TARGET_ARM)
6247 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6248 else if (TARGET_THUMB2)
6249 return thumb2_legitimate_address_p (mode, x, strict_p);
6250 else /* if (TARGET_THUMB1) */
6251 return thumb1_legitimate_address_p (mode, x, strict_p);
6252 }
6253
6254 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
6255
6256 Given an rtx X being reloaded into a reg required to be
6257 in class CLASS, return the class of reg to actually use.
6258 In general this is just CLASS, but for the Thumb core registers and
6259 immediate constants we prefer a LO_REGS class or a subset. */
6260
6261 static reg_class_t
6262 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
6263 {
6264 if (TARGET_32BIT)
6265 return rclass;
6266 else
6267 {
6268 if (rclass == GENERAL_REGS
6269 || rclass == HI_REGS
6270 || rclass == NO_REGS
6271 || rclass == STACK_REG)
6272 return LO_REGS;
6273 else
6274 return rclass;
6275 }
6276 }
6277
6278 /* Build the SYMBOL_REF for __tls_get_addr. */
6279
6280 static GTY(()) rtx tls_get_addr_libfunc;
6281
6282 static rtx
6283 get_tls_get_addr (void)
6284 {
6285 if (!tls_get_addr_libfunc)
6286 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6287 return tls_get_addr_libfunc;
6288 }
6289
6290 rtx
6291 arm_load_tp (rtx target)
6292 {
6293 if (!target)
6294 target = gen_reg_rtx (SImode);
6295
6296 if (TARGET_HARD_TP)
6297 {
6298 /* Can return in any reg. */
6299 emit_insn (gen_load_tp_hard (target));
6300 }
6301 else
6302 {
6303 /* Always returned in r0. Immediately copy the result into a pseudo,
6304 otherwise other uses of r0 (e.g. setting up function arguments) may
6305 clobber the value. */
6306
6307 rtx tmp;
6308
6309 emit_insn (gen_load_tp_soft ());
6310
6311 tmp = gen_rtx_REG (SImode, 0);
6312 emit_move_insn (target, tmp);
6313 }
6314 return target;
6315 }
6316
6317 static rtx
6318 load_tls_operand (rtx x, rtx reg)
6319 {
6320 rtx tmp;
6321
6322 if (reg == NULL_RTX)
6323 reg = gen_reg_rtx (SImode);
6324
6325 tmp = gen_rtx_CONST (SImode, x);
6326
6327 emit_move_insn (reg, tmp);
6328
6329 return reg;
6330 }
6331
6332 static rtx
6333 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6334 {
6335 rtx insns, label, labelno, sum;
6336
6337 gcc_assert (reloc != TLS_DESCSEQ);
6338 start_sequence ();
6339
6340 labelno = GEN_INT (pic_labelno++);
6341 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6342 label = gen_rtx_CONST (VOIDmode, label);
6343
6344 sum = gen_rtx_UNSPEC (Pmode,
6345 gen_rtvec (4, x, GEN_INT (reloc), label,
6346 GEN_INT (TARGET_ARM ? 8 : 4)),
6347 UNSPEC_TLS);
6348 reg = load_tls_operand (sum, reg);
6349
6350 if (TARGET_ARM)
6351 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6352 else
6353 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6354
6355 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6356 LCT_PURE, /* LCT_CONST? */
6357 Pmode, 1, reg, Pmode);
6358
6359 insns = get_insns ();
6360 end_sequence ();
6361
6362 return insns;
6363 }
6364
6365 static rtx
6366 arm_tls_descseq_addr (rtx x, rtx reg)
6367 {
6368 rtx labelno = GEN_INT (pic_labelno++);
6369 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6370 rtx sum = gen_rtx_UNSPEC (Pmode,
6371 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6372 gen_rtx_CONST (VOIDmode, label),
6373 GEN_INT (!TARGET_ARM)),
6374 UNSPEC_TLS);
6375 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6376
6377 emit_insn (gen_tlscall (x, labelno));
6378 if (!reg)
6379 reg = gen_reg_rtx (SImode);
6380 else
6381 gcc_assert (REGNO (reg) != 0);
6382
6383 emit_move_insn (reg, reg0);
6384
6385 return reg;
6386 }
6387
6388 rtx
6389 legitimize_tls_address (rtx x, rtx reg)
6390 {
6391 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6392 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6393
6394 switch (model)
6395 {
6396 case TLS_MODEL_GLOBAL_DYNAMIC:
6397 if (TARGET_GNU2_TLS)
6398 {
6399 reg = arm_tls_descseq_addr (x, reg);
6400
6401 tp = arm_load_tp (NULL_RTX);
6402
6403 dest = gen_rtx_PLUS (Pmode, tp, reg);
6404 }
6405 else
6406 {
6407 /* Original scheme */
6408 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6409 dest = gen_reg_rtx (Pmode);
6410 emit_libcall_block (insns, dest, ret, x);
6411 }
6412 return dest;
6413
6414 case TLS_MODEL_LOCAL_DYNAMIC:
6415 if (TARGET_GNU2_TLS)
6416 {
6417 reg = arm_tls_descseq_addr (x, reg);
6418
6419 tp = arm_load_tp (NULL_RTX);
6420
6421 dest = gen_rtx_PLUS (Pmode, tp, reg);
6422 }
6423 else
6424 {
6425 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6426
6427 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6428 share the LDM result with other LD model accesses. */
6429 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6430 UNSPEC_TLS);
6431 dest = gen_reg_rtx (Pmode);
6432 emit_libcall_block (insns, dest, ret, eqv);
6433
6434 /* Load the addend. */
6435 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6436 GEN_INT (TLS_LDO32)),
6437 UNSPEC_TLS);
6438 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6439 dest = gen_rtx_PLUS (Pmode, dest, addend);
6440 }
6441 return dest;
6442
6443 case TLS_MODEL_INITIAL_EXEC:
6444 labelno = GEN_INT (pic_labelno++);
6445 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6446 label = gen_rtx_CONST (VOIDmode, label);
6447 sum = gen_rtx_UNSPEC (Pmode,
6448 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6449 GEN_INT (TARGET_ARM ? 8 : 4)),
6450 UNSPEC_TLS);
6451 reg = load_tls_operand (sum, reg);
6452
6453 if (TARGET_ARM)
6454 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6455 else if (TARGET_THUMB2)
6456 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6457 else
6458 {
6459 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6460 emit_move_insn (reg, gen_const_mem (SImode, reg));
6461 }
6462
6463 tp = arm_load_tp (NULL_RTX);
6464
6465 return gen_rtx_PLUS (Pmode, tp, reg);
6466
6467 case TLS_MODEL_LOCAL_EXEC:
6468 tp = arm_load_tp (NULL_RTX);
6469
6470 reg = gen_rtx_UNSPEC (Pmode,
6471 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6472 UNSPEC_TLS);
6473 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6474
6475 return gen_rtx_PLUS (Pmode, tp, reg);
6476
6477 default:
6478 abort ();
6479 }
6480 }
6481
6482 /* Try machine-dependent ways of modifying an illegitimate address
6483 to be legitimate. If we find one, return the new, valid address. */
6484 rtx
6485 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6486 {
6487 if (!TARGET_ARM)
6488 {
6489 /* TODO: legitimize_address for Thumb2. */
6490 if (TARGET_THUMB2)
6491 return x;
6492 return thumb_legitimize_address (x, orig_x, mode);
6493 }
6494
6495 if (arm_tls_symbol_p (x))
6496 return legitimize_tls_address (x, NULL_RTX);
6497
6498 if (GET_CODE (x) == PLUS)
6499 {
6500 rtx xop0 = XEXP (x, 0);
6501 rtx xop1 = XEXP (x, 1);
6502
6503 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6504 xop0 = force_reg (SImode, xop0);
6505
6506 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6507 xop1 = force_reg (SImode, xop1);
6508
6509 if (ARM_BASE_REGISTER_RTX_P (xop0)
6510 && CONST_INT_P (xop1))
6511 {
6512 HOST_WIDE_INT n, low_n;
6513 rtx base_reg, val;
6514 n = INTVAL (xop1);
6515
6516 /* VFP addressing modes actually allow greater offsets, but for
6517 now we just stick with the lowest common denominator. */
6518 if (mode == DImode
6519 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6520 {
6521 low_n = n & 0x0f;
6522 n &= ~0x0f;
6523 if (low_n > 4)
6524 {
6525 n += 16;
6526 low_n -= 16;
6527 }
6528 }
6529 else
6530 {
6531 low_n = ((mode) == TImode ? 0
6532 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6533 n -= low_n;
6534 }
6535
6536 base_reg = gen_reg_rtx (SImode);
6537 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
6538 emit_move_insn (base_reg, val);
6539 x = plus_constant (Pmode, base_reg, low_n);
6540 }
6541 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6542 x = gen_rtx_PLUS (SImode, xop0, xop1);
6543 }
6544
6545 /* XXX We don't allow MINUS any more -- see comment in
6546 arm_legitimate_address_outer_p (). */
6547 else if (GET_CODE (x) == MINUS)
6548 {
6549 rtx xop0 = XEXP (x, 0);
6550 rtx xop1 = XEXP (x, 1);
6551
6552 if (CONSTANT_P (xop0))
6553 xop0 = force_reg (SImode, xop0);
6554
6555 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6556 xop1 = force_reg (SImode, xop1);
6557
6558 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6559 x = gen_rtx_MINUS (SImode, xop0, xop1);
6560 }
6561
6562 /* Make sure to take full advantage of the pre-indexed addressing mode
6563 with absolute addresses which often allows for the base register to
6564 be factorized for multiple adjacent memory references, and it might
6565 even allows for the mini pool to be avoided entirely. */
6566 else if (CONST_INT_P (x) && optimize > 0)
6567 {
6568 unsigned int bits;
6569 HOST_WIDE_INT mask, base, index;
6570 rtx base_reg;
6571
6572 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6573 use a 8-bit index. So let's use a 12-bit index for SImode only and
6574 hope that arm_gen_constant will enable ldrb to use more bits. */
6575 bits = (mode == SImode) ? 12 : 8;
6576 mask = (1 << bits) - 1;
6577 base = INTVAL (x) & ~mask;
6578 index = INTVAL (x) & mask;
6579 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6580 {
6581 /* It'll most probably be more efficient to generate the base
6582 with more bits set and use a negative index instead. */
6583 base |= mask;
6584 index -= mask;
6585 }
6586 base_reg = force_reg (SImode, GEN_INT (base));
6587 x = plus_constant (Pmode, base_reg, index);
6588 }
6589
6590 if (flag_pic)
6591 {
6592 /* We need to find and carefully transform any SYMBOL and LABEL
6593 references; so go back to the original address expression. */
6594 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6595
6596 if (new_x != orig_x)
6597 x = new_x;
6598 }
6599
6600 return x;
6601 }
6602
6603
6604 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6605 to be legitimate. If we find one, return the new, valid address. */
6606 rtx
6607 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6608 {
6609 if (arm_tls_symbol_p (x))
6610 return legitimize_tls_address (x, NULL_RTX);
6611
6612 if (GET_CODE (x) == PLUS
6613 && CONST_INT_P (XEXP (x, 1))
6614 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6615 || INTVAL (XEXP (x, 1)) < 0))
6616 {
6617 rtx xop0 = XEXP (x, 0);
6618 rtx xop1 = XEXP (x, 1);
6619 HOST_WIDE_INT offset = INTVAL (xop1);
6620
6621 /* Try and fold the offset into a biasing of the base register and
6622 then offsetting that. Don't do this when optimizing for space
6623 since it can cause too many CSEs. */
6624 if (optimize_size && offset >= 0
6625 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6626 {
6627 HOST_WIDE_INT delta;
6628
6629 if (offset >= 256)
6630 delta = offset - (256 - GET_MODE_SIZE (mode));
6631 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6632 delta = 31 * GET_MODE_SIZE (mode);
6633 else
6634 delta = offset & (~31 * GET_MODE_SIZE (mode));
6635
6636 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
6637 NULL_RTX);
6638 x = plus_constant (Pmode, xop0, delta);
6639 }
6640 else if (offset < 0 && offset > -256)
6641 /* Small negative offsets are best done with a subtract before the
6642 dereference, forcing these into a register normally takes two
6643 instructions. */
6644 x = force_operand (x, NULL_RTX);
6645 else
6646 {
6647 /* For the remaining cases, force the constant into a register. */
6648 xop1 = force_reg (SImode, xop1);
6649 x = gen_rtx_PLUS (SImode, xop0, xop1);
6650 }
6651 }
6652 else if (GET_CODE (x) == PLUS
6653 && s_register_operand (XEXP (x, 1), SImode)
6654 && !s_register_operand (XEXP (x, 0), SImode))
6655 {
6656 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6657
6658 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6659 }
6660
6661 if (flag_pic)
6662 {
6663 /* We need to find and carefully transform any SYMBOL and LABEL
6664 references; so go back to the original address expression. */
6665 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6666
6667 if (new_x != orig_x)
6668 x = new_x;
6669 }
6670
6671 return x;
6672 }
6673
6674 bool
6675 arm_legitimize_reload_address (rtx *p,
6676 enum machine_mode mode,
6677 int opnum, int type,
6678 int ind_levels ATTRIBUTE_UNUSED)
6679 {
6680 /* We must recognize output that we have already generated ourselves. */
6681 if (GET_CODE (*p) == PLUS
6682 && GET_CODE (XEXP (*p, 0)) == PLUS
6683 && REG_P (XEXP (XEXP (*p, 0), 0))
6684 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
6685 && CONST_INT_P (XEXP (*p, 1)))
6686 {
6687 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6688 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6689 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6690 return true;
6691 }
6692
6693 if (GET_CODE (*p) == PLUS
6694 && REG_P (XEXP (*p, 0))
6695 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6696 /* If the base register is equivalent to a constant, let the generic
6697 code handle it. Otherwise we will run into problems if a future
6698 reload pass decides to rematerialize the constant. */
6699 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
6700 && CONST_INT_P (XEXP (*p, 1)))
6701 {
6702 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6703 HOST_WIDE_INT low, high;
6704
6705 /* Detect coprocessor load/stores. */
6706 bool coproc_p = ((TARGET_HARD_FLOAT
6707 && TARGET_VFP
6708 && (mode == SFmode || mode == DFmode))
6709 || (TARGET_REALLY_IWMMXT
6710 && VALID_IWMMXT_REG_MODE (mode))
6711 || (TARGET_NEON
6712 && (VALID_NEON_DREG_MODE (mode)
6713 || VALID_NEON_QREG_MODE (mode))));
6714
6715 /* For some conditions, bail out when lower two bits are unaligned. */
6716 if ((val & 0x3) != 0
6717 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6718 && (coproc_p
6719 /* For DI, and DF under soft-float: */
6720 || ((mode == DImode || mode == DFmode)
6721 /* Without ldrd, we use stm/ldm, which does not
6722 fair well with unaligned bits. */
6723 && (! TARGET_LDRD
6724 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6725 || TARGET_THUMB2))))
6726 return false;
6727
6728 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6729 of which the (reg+high) gets turned into a reload add insn,
6730 we try to decompose the index into high/low values that can often
6731 also lead to better reload CSE.
6732 For example:
6733 ldr r0, [r2, #4100] // Offset too large
6734 ldr r1, [r2, #4104] // Offset too large
6735
6736 is best reloaded as:
6737 add t1, r2, #4096
6738 ldr r0, [t1, #4]
6739 add t2, r2, #4096
6740 ldr r1, [t2, #8]
6741
6742 which post-reload CSE can simplify in most cases to eliminate the
6743 second add instruction:
6744 add t1, r2, #4096
6745 ldr r0, [t1, #4]
6746 ldr r1, [t1, #8]
6747
6748 The idea here is that we want to split out the bits of the constant
6749 as a mask, rather than as subtracting the maximum offset that the
6750 respective type of load/store used can handle.
6751
6752 When encountering negative offsets, we can still utilize it even if
6753 the overall offset is positive; sometimes this may lead to an immediate
6754 that can be constructed with fewer instructions.
6755 For example:
6756 ldr r0, [r2, #0x3FFFFC]
6757
6758 This is best reloaded as:
6759 add t1, r2, #0x400000
6760 ldr r0, [t1, #-4]
6761
6762 The trick for spotting this for a load insn with N bits of offset
6763 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6764 negative offset that is going to make bit N and all the bits below
6765 it become zero in the remainder part.
6766
6767 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6768 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6769 used in most cases of ARM load/store instructions. */
6770
6771 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6772 (((VAL) & ((1 << (N)) - 1)) \
6773 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6774 : 0)
6775
6776 if (coproc_p)
6777 {
6778 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6779
6780 /* NEON quad-word load/stores are made of two double-word accesses,
6781 so the valid index range is reduced by 8. Treat as 9-bit range if
6782 we go over it. */
6783 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6784 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6785 }
6786 else if (GET_MODE_SIZE (mode) == 8)
6787 {
6788 if (TARGET_LDRD)
6789 low = (TARGET_THUMB2
6790 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6791 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6792 else
6793 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6794 to access doublewords. The supported load/store offsets are
6795 -8, -4, and 4, which we try to produce here. */
6796 low = ((val & 0xf) ^ 0x8) - 0x8;
6797 }
6798 else if (GET_MODE_SIZE (mode) < 8)
6799 {
6800 /* NEON element load/stores do not have an offset. */
6801 if (TARGET_NEON_FP16 && mode == HFmode)
6802 return false;
6803
6804 if (TARGET_THUMB2)
6805 {
6806 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6807 Try the wider 12-bit range first, and re-try if the result
6808 is out of range. */
6809 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6810 if (low < -255)
6811 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6812 }
6813 else
6814 {
6815 if (mode == HImode || mode == HFmode)
6816 {
6817 if (arm_arch4)
6818 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6819 else
6820 {
6821 /* The storehi/movhi_bytes fallbacks can use only
6822 [-4094,+4094] of the full ldrb/strb index range. */
6823 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6824 if (low == 4095 || low == -4095)
6825 return false;
6826 }
6827 }
6828 else
6829 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6830 }
6831 }
6832 else
6833 return false;
6834
6835 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6836 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6837 - (unsigned HOST_WIDE_INT) 0x80000000);
6838 /* Check for overflow or zero */
6839 if (low == 0 || high == 0 || (high + low != val))
6840 return false;
6841
6842 /* Reload the high part into a base reg; leave the low part
6843 in the mem. */
6844 *p = gen_rtx_PLUS (GET_MODE (*p),
6845 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6846 GEN_INT (high)),
6847 GEN_INT (low));
6848 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6849 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6850 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6851 return true;
6852 }
6853
6854 return false;
6855 }
6856
6857 rtx
6858 thumb_legitimize_reload_address (rtx *x_p,
6859 enum machine_mode mode,
6860 int opnum, int type,
6861 int ind_levels ATTRIBUTE_UNUSED)
6862 {
6863 rtx x = *x_p;
6864
6865 if (GET_CODE (x) == PLUS
6866 && GET_MODE_SIZE (mode) < 4
6867 && REG_P (XEXP (x, 0))
6868 && XEXP (x, 0) == stack_pointer_rtx
6869 && CONST_INT_P (XEXP (x, 1))
6870 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6871 {
6872 rtx orig_x = x;
6873
6874 x = copy_rtx (x);
6875 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6876 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6877 return x;
6878 }
6879
6880 /* If both registers are hi-regs, then it's better to reload the
6881 entire expression rather than each register individually. That
6882 only requires one reload register rather than two. */
6883 if (GET_CODE (x) == PLUS
6884 && REG_P (XEXP (x, 0))
6885 && REG_P (XEXP (x, 1))
6886 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6887 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6888 {
6889 rtx orig_x = x;
6890
6891 x = copy_rtx (x);
6892 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6893 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6894 return x;
6895 }
6896
6897 return NULL;
6898 }
6899
6900 /* Test for various thread-local symbols. */
6901
6902 /* Return TRUE if X is a thread-local symbol. */
6903
6904 static bool
6905 arm_tls_symbol_p (rtx x)
6906 {
6907 if (! TARGET_HAVE_TLS)
6908 return false;
6909
6910 if (GET_CODE (x) != SYMBOL_REF)
6911 return false;
6912
6913 return SYMBOL_REF_TLS_MODEL (x) != 0;
6914 }
6915
6916 /* Helper for arm_tls_referenced_p. */
6917
6918 static int
6919 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6920 {
6921 if (GET_CODE (*x) == SYMBOL_REF)
6922 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6923
6924 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6925 TLS offsets, not real symbol references. */
6926 if (GET_CODE (*x) == UNSPEC
6927 && XINT (*x, 1) == UNSPEC_TLS)
6928 return -1;
6929
6930 return 0;
6931 }
6932
6933 /* Return TRUE if X contains any TLS symbol references. */
6934
6935 bool
6936 arm_tls_referenced_p (rtx x)
6937 {
6938 if (! TARGET_HAVE_TLS)
6939 return false;
6940
6941 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6942 }
6943
6944 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6945
6946 On the ARM, allow any integer (invalid ones are removed later by insn
6947 patterns), nice doubles and symbol_refs which refer to the function's
6948 constant pool XXX.
6949
6950 When generating pic allow anything. */
6951
6952 static bool
6953 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
6954 {
6955 /* At present, we have no support for Neon structure constants, so forbid
6956 them here. It might be possible to handle simple cases like 0 and -1
6957 in future. */
6958 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
6959 return false;
6960
6961 return flag_pic || !label_mentioned_p (x);
6962 }
6963
6964 static bool
6965 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6966 {
6967 return (CONST_INT_P (x)
6968 || CONST_DOUBLE_P (x)
6969 || CONSTANT_ADDRESS_P (x)
6970 || flag_pic);
6971 }
6972
6973 static bool
6974 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
6975 {
6976 return (!arm_cannot_force_const_mem (mode, x)
6977 && (TARGET_32BIT
6978 ? arm_legitimate_constant_p_1 (mode, x)
6979 : thumb_legitimate_constant_p (mode, x)));
6980 }
6981
6982 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6983
6984 static bool
6985 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6986 {
6987 rtx base, offset;
6988
6989 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6990 {
6991 split_const (x, &base, &offset);
6992 if (GET_CODE (base) == SYMBOL_REF
6993 && !offset_within_block_p (base, INTVAL (offset)))
6994 return true;
6995 }
6996 return arm_tls_referenced_p (x);
6997 }
6998 \f
6999 #define REG_OR_SUBREG_REG(X) \
7000 (REG_P (X) \
7001 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7002
7003 #define REG_OR_SUBREG_RTX(X) \
7004 (REG_P (X) ? (X) : SUBREG_REG (X))
7005
7006 static inline int
7007 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7008 {
7009 enum machine_mode mode = GET_MODE (x);
7010 int total;
7011
7012 switch (code)
7013 {
7014 case ASHIFT:
7015 case ASHIFTRT:
7016 case LSHIFTRT:
7017 case ROTATERT:
7018 case PLUS:
7019 case MINUS:
7020 case COMPARE:
7021 case NEG:
7022 case NOT:
7023 return COSTS_N_INSNS (1);
7024
7025 case MULT:
7026 if (CONST_INT_P (XEXP (x, 1)))
7027 {
7028 int cycles = 0;
7029 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7030
7031 while (i)
7032 {
7033 i >>= 2;
7034 cycles++;
7035 }
7036 return COSTS_N_INSNS (2) + cycles;
7037 }
7038 return COSTS_N_INSNS (1) + 16;
7039
7040 case SET:
7041 return (COSTS_N_INSNS (1)
7042 + 4 * ((MEM_P (SET_SRC (x)))
7043 + MEM_P (SET_DEST (x))));
7044
7045 case CONST_INT:
7046 if (outer == SET)
7047 {
7048 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7049 return 0;
7050 if (thumb_shiftable_const (INTVAL (x)))
7051 return COSTS_N_INSNS (2);
7052 return COSTS_N_INSNS (3);
7053 }
7054 else if ((outer == PLUS || outer == COMPARE)
7055 && INTVAL (x) < 256 && INTVAL (x) > -256)
7056 return 0;
7057 else if ((outer == IOR || outer == XOR || outer == AND)
7058 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7059 return COSTS_N_INSNS (1);
7060 else if (outer == AND)
7061 {
7062 int i;
7063 /* This duplicates the tests in the andsi3 expander. */
7064 for (i = 9; i <= 31; i++)
7065 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7066 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7067 return COSTS_N_INSNS (2);
7068 }
7069 else if (outer == ASHIFT || outer == ASHIFTRT
7070 || outer == LSHIFTRT)
7071 return 0;
7072 return COSTS_N_INSNS (2);
7073
7074 case CONST:
7075 case CONST_DOUBLE:
7076 case LABEL_REF:
7077 case SYMBOL_REF:
7078 return COSTS_N_INSNS (3);
7079
7080 case UDIV:
7081 case UMOD:
7082 case DIV:
7083 case MOD:
7084 return 100;
7085
7086 case TRUNCATE:
7087 return 99;
7088
7089 case AND:
7090 case XOR:
7091 case IOR:
7092 /* XXX guess. */
7093 return 8;
7094
7095 case MEM:
7096 /* XXX another guess. */
7097 /* Memory costs quite a lot for the first word, but subsequent words
7098 load at the equivalent of a single insn each. */
7099 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7100 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7101 ? 4 : 0));
7102
7103 case IF_THEN_ELSE:
7104 /* XXX a guess. */
7105 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7106 return 14;
7107 return 2;
7108
7109 case SIGN_EXTEND:
7110 case ZERO_EXTEND:
7111 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7112 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7113
7114 if (mode == SImode)
7115 return total;
7116
7117 if (arm_arch6)
7118 return total + COSTS_N_INSNS (1);
7119
7120 /* Assume a two-shift sequence. Increase the cost slightly so
7121 we prefer actual shifts over an extend operation. */
7122 return total + 1 + COSTS_N_INSNS (2);
7123
7124 default:
7125 return 99;
7126 }
7127 }
7128
7129 static inline bool
7130 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7131 {
7132 enum machine_mode mode = GET_MODE (x);
7133 enum rtx_code subcode;
7134 rtx operand;
7135 enum rtx_code code = GET_CODE (x);
7136 *total = 0;
7137
7138 switch (code)
7139 {
7140 case MEM:
7141 /* Memory costs quite a lot for the first word, but subsequent words
7142 load at the equivalent of a single insn each. */
7143 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7144 return true;
7145
7146 case DIV:
7147 case MOD:
7148 case UDIV:
7149 case UMOD:
7150 if (TARGET_HARD_FLOAT && mode == SFmode)
7151 *total = COSTS_N_INSNS (2);
7152 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7153 *total = COSTS_N_INSNS (4);
7154 else
7155 *total = COSTS_N_INSNS (20);
7156 return false;
7157
7158 case ROTATE:
7159 if (REG_P (XEXP (x, 1)))
7160 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7161 else if (!CONST_INT_P (XEXP (x, 1)))
7162 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7163
7164 /* Fall through */
7165 case ROTATERT:
7166 if (mode != SImode)
7167 {
7168 *total += COSTS_N_INSNS (4);
7169 return true;
7170 }
7171
7172 /* Fall through */
7173 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7174 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7175 if (mode == DImode)
7176 {
7177 *total += COSTS_N_INSNS (3);
7178 return true;
7179 }
7180
7181 *total += COSTS_N_INSNS (1);
7182 /* Increase the cost of complex shifts because they aren't any faster,
7183 and reduce dual issue opportunities. */
7184 if (arm_tune_cortex_a9
7185 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
7186 ++*total;
7187
7188 return true;
7189
7190 case MINUS:
7191 if (mode == DImode)
7192 {
7193 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7194 if (CONST_INT_P (XEXP (x, 0))
7195 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7196 {
7197 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7198 return true;
7199 }
7200
7201 if (CONST_INT_P (XEXP (x, 1))
7202 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7203 {
7204 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7205 return true;
7206 }
7207
7208 return false;
7209 }
7210
7211 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7212 {
7213 if (TARGET_HARD_FLOAT
7214 && (mode == SFmode
7215 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7216 {
7217 *total = COSTS_N_INSNS (1);
7218 if (CONST_DOUBLE_P (XEXP (x, 0))
7219 && arm_const_double_rtx (XEXP (x, 0)))
7220 {
7221 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7222 return true;
7223 }
7224
7225 if (CONST_DOUBLE_P (XEXP (x, 1))
7226 && arm_const_double_rtx (XEXP (x, 1)))
7227 {
7228 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7229 return true;
7230 }
7231
7232 return false;
7233 }
7234 *total = COSTS_N_INSNS (20);
7235 return false;
7236 }
7237
7238 *total = COSTS_N_INSNS (1);
7239 if (CONST_INT_P (XEXP (x, 0))
7240 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7241 {
7242 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7243 return true;
7244 }
7245
7246 subcode = GET_CODE (XEXP (x, 1));
7247 if (subcode == ASHIFT || subcode == ASHIFTRT
7248 || subcode == LSHIFTRT
7249 || subcode == ROTATE || subcode == ROTATERT)
7250 {
7251 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7252 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7253 return true;
7254 }
7255
7256 /* A shift as a part of RSB costs no more than RSB itself. */
7257 if (GET_CODE (XEXP (x, 0)) == MULT
7258 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7259 {
7260 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7261 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7262 return true;
7263 }
7264
7265 if (subcode == MULT
7266 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7267 {
7268 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7269 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7270 return true;
7271 }
7272
7273 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7274 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7275 {
7276 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7277 if (REG_P (XEXP (XEXP (x, 1), 0))
7278 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7279 *total += COSTS_N_INSNS (1);
7280
7281 return true;
7282 }
7283
7284 /* Fall through */
7285
7286 case PLUS:
7287 if (code == PLUS && arm_arch6 && mode == SImode
7288 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7289 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7290 {
7291 *total = COSTS_N_INSNS (1);
7292 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7293 0, speed);
7294 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7295 return true;
7296 }
7297
7298 /* MLA: All arguments must be registers. We filter out
7299 multiplication by a power of two, so that we fall down into
7300 the code below. */
7301 if (GET_CODE (XEXP (x, 0)) == MULT
7302 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7303 {
7304 /* The cost comes from the cost of the multiply. */
7305 return false;
7306 }
7307
7308 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7309 {
7310 if (TARGET_HARD_FLOAT
7311 && (mode == SFmode
7312 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7313 {
7314 *total = COSTS_N_INSNS (1);
7315 if (CONST_DOUBLE_P (XEXP (x, 1))
7316 && arm_const_double_rtx (XEXP (x, 1)))
7317 {
7318 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7319 return true;
7320 }
7321
7322 return false;
7323 }
7324
7325 *total = COSTS_N_INSNS (20);
7326 return false;
7327 }
7328
7329 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7330 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7331 {
7332 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7333 if (REG_P (XEXP (XEXP (x, 0), 0))
7334 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7335 *total += COSTS_N_INSNS (1);
7336 return true;
7337 }
7338
7339 /* Fall through */
7340
7341 case AND: case XOR: case IOR:
7342
7343 /* Normally the frame registers will be spilt into reg+const during
7344 reload, so it is a bad idea to combine them with other instructions,
7345 since then they might not be moved outside of loops. As a compromise
7346 we allow integration with ops that have a constant as their second
7347 operand. */
7348 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7349 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7350 && !CONST_INT_P (XEXP (x, 1)))
7351 *total = COSTS_N_INSNS (1);
7352
7353 if (mode == DImode)
7354 {
7355 *total += COSTS_N_INSNS (2);
7356 if (CONST_INT_P (XEXP (x, 1))
7357 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7358 {
7359 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7360 return true;
7361 }
7362
7363 return false;
7364 }
7365
7366 *total += COSTS_N_INSNS (1);
7367 if (CONST_INT_P (XEXP (x, 1))
7368 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7369 {
7370 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7371 return true;
7372 }
7373 subcode = GET_CODE (XEXP (x, 0));
7374 if (subcode == ASHIFT || subcode == ASHIFTRT
7375 || subcode == LSHIFTRT
7376 || subcode == ROTATE || subcode == ROTATERT)
7377 {
7378 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7379 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7380 return true;
7381 }
7382
7383 if (subcode == MULT
7384 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7385 {
7386 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7387 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7388 return true;
7389 }
7390
7391 if (subcode == UMIN || subcode == UMAX
7392 || subcode == SMIN || subcode == SMAX)
7393 {
7394 *total = COSTS_N_INSNS (3);
7395 return true;
7396 }
7397
7398 return false;
7399
7400 case MULT:
7401 /* This should have been handled by the CPU specific routines. */
7402 gcc_unreachable ();
7403
7404 case TRUNCATE:
7405 if (arm_arch3m && mode == SImode
7406 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7407 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7408 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7409 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7410 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7411 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7412 {
7413 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7414 return true;
7415 }
7416 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7417 return false;
7418
7419 case NEG:
7420 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7421 {
7422 if (TARGET_HARD_FLOAT
7423 && (mode == SFmode
7424 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7425 {
7426 *total = COSTS_N_INSNS (1);
7427 return false;
7428 }
7429 *total = COSTS_N_INSNS (2);
7430 return false;
7431 }
7432
7433 /* Fall through */
7434 case NOT:
7435 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7436 if (mode == SImode && code == NOT)
7437 {
7438 subcode = GET_CODE (XEXP (x, 0));
7439 if (subcode == ASHIFT || subcode == ASHIFTRT
7440 || subcode == LSHIFTRT
7441 || subcode == ROTATE || subcode == ROTATERT
7442 || (subcode == MULT
7443 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7444 {
7445 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7446 /* Register shifts cost an extra cycle. */
7447 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
7448 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7449 subcode, 1, speed);
7450 return true;
7451 }
7452 }
7453
7454 return false;
7455
7456 case IF_THEN_ELSE:
7457 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7458 {
7459 *total = COSTS_N_INSNS (4);
7460 return true;
7461 }
7462
7463 operand = XEXP (x, 0);
7464
7465 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7466 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7467 && REG_P (XEXP (operand, 0))
7468 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7469 *total += COSTS_N_INSNS (1);
7470 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7471 + rtx_cost (XEXP (x, 2), code, 2, speed));
7472 return true;
7473
7474 case NE:
7475 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7476 {
7477 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7478 return true;
7479 }
7480 goto scc_insn;
7481
7482 case GE:
7483 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
7484 && mode == SImode && XEXP (x, 1) == const0_rtx)
7485 {
7486 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7487 return true;
7488 }
7489 goto scc_insn;
7490
7491 case LT:
7492 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
7493 && mode == SImode && XEXP (x, 1) == const0_rtx)
7494 {
7495 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7496 return true;
7497 }
7498 goto scc_insn;
7499
7500 case EQ:
7501 case GT:
7502 case LE:
7503 case GEU:
7504 case LTU:
7505 case GTU:
7506 case LEU:
7507 case UNORDERED:
7508 case ORDERED:
7509 case UNEQ:
7510 case UNGE:
7511 case UNLT:
7512 case UNGT:
7513 case UNLE:
7514 scc_insn:
7515 /* SCC insns. In the case where the comparison has already been
7516 performed, then they cost 2 instructions. Otherwise they need
7517 an additional comparison before them. */
7518 *total = COSTS_N_INSNS (2);
7519 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
7520 {
7521 return true;
7522 }
7523
7524 /* Fall through */
7525 case COMPARE:
7526 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
7527 {
7528 *total = 0;
7529 return true;
7530 }
7531
7532 *total += COSTS_N_INSNS (1);
7533 if (CONST_INT_P (XEXP (x, 1))
7534 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7535 {
7536 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7537 return true;
7538 }
7539
7540 subcode = GET_CODE (XEXP (x, 0));
7541 if (subcode == ASHIFT || subcode == ASHIFTRT
7542 || subcode == LSHIFTRT
7543 || subcode == ROTATE || subcode == ROTATERT)
7544 {
7545 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7546 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7547 return true;
7548 }
7549
7550 if (subcode == MULT
7551 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7552 {
7553 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7554 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7555 return true;
7556 }
7557
7558 return false;
7559
7560 case UMIN:
7561 case UMAX:
7562 case SMIN:
7563 case SMAX:
7564 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7565 if (!CONST_INT_P (XEXP (x, 1))
7566 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7567 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7568 return true;
7569
7570 case ABS:
7571 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7572 {
7573 if (TARGET_HARD_FLOAT
7574 && (mode == SFmode
7575 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7576 {
7577 *total = COSTS_N_INSNS (1);
7578 return false;
7579 }
7580 *total = COSTS_N_INSNS (20);
7581 return false;
7582 }
7583 *total = COSTS_N_INSNS (1);
7584 if (mode == DImode)
7585 *total += COSTS_N_INSNS (3);
7586 return false;
7587
7588 case SIGN_EXTEND:
7589 case ZERO_EXTEND:
7590 *total = 0;
7591 if (GET_MODE_CLASS (mode) == MODE_INT)
7592 {
7593 rtx op = XEXP (x, 0);
7594 enum machine_mode opmode = GET_MODE (op);
7595
7596 if (mode == DImode)
7597 *total += COSTS_N_INSNS (1);
7598
7599 if (opmode != SImode)
7600 {
7601 if (MEM_P (op))
7602 {
7603 /* If !arm_arch4, we use one of the extendhisi2_mem
7604 or movhi_bytes patterns for HImode. For a QImode
7605 sign extension, we first zero-extend from memory
7606 and then perform a shift sequence. */
7607 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7608 *total += COSTS_N_INSNS (2);
7609 }
7610 else if (arm_arch6)
7611 *total += COSTS_N_INSNS (1);
7612
7613 /* We don't have the necessary insn, so we need to perform some
7614 other operation. */
7615 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7616 /* An and with constant 255. */
7617 *total += COSTS_N_INSNS (1);
7618 else
7619 /* A shift sequence. Increase costs slightly to avoid
7620 combining two shifts into an extend operation. */
7621 *total += COSTS_N_INSNS (2) + 1;
7622 }
7623
7624 return false;
7625 }
7626
7627 switch (GET_MODE (XEXP (x, 0)))
7628 {
7629 case V8QImode:
7630 case V4HImode:
7631 case V2SImode:
7632 case V4QImode:
7633 case V2HImode:
7634 *total = COSTS_N_INSNS (1);
7635 return false;
7636
7637 default:
7638 gcc_unreachable ();
7639 }
7640 gcc_unreachable ();
7641
7642 case ZERO_EXTRACT:
7643 case SIGN_EXTRACT:
7644 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7645 return true;
7646
7647 case CONST_INT:
7648 if (const_ok_for_arm (INTVAL (x))
7649 || const_ok_for_arm (~INTVAL (x)))
7650 *total = COSTS_N_INSNS (1);
7651 else
7652 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7653 INTVAL (x), NULL_RTX,
7654 NULL_RTX, 0, 0));
7655 return true;
7656
7657 case CONST:
7658 case LABEL_REF:
7659 case SYMBOL_REF:
7660 *total = COSTS_N_INSNS (3);
7661 return true;
7662
7663 case HIGH:
7664 *total = COSTS_N_INSNS (1);
7665 return true;
7666
7667 case LO_SUM:
7668 *total = COSTS_N_INSNS (1);
7669 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7670 return true;
7671
7672 case CONST_DOUBLE:
7673 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7674 && (mode == SFmode || !TARGET_VFP_SINGLE))
7675 *total = COSTS_N_INSNS (1);
7676 else
7677 *total = COSTS_N_INSNS (4);
7678 return true;
7679
7680 case SET:
7681 /* The vec_extract patterns accept memory operands that require an
7682 address reload. Account for the cost of that reload to give the
7683 auto-inc-dec pass an incentive to try to replace them. */
7684 if (TARGET_NEON && MEM_P (SET_DEST (x))
7685 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
7686 {
7687 *total = rtx_cost (SET_DEST (x), code, 0, speed);
7688 if (!neon_vector_mem_operand (SET_DEST (x), 2))
7689 *total += COSTS_N_INSNS (1);
7690 return true;
7691 }
7692 /* Likewise for the vec_set patterns. */
7693 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
7694 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
7695 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
7696 {
7697 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
7698 *total = rtx_cost (mem, code, 0, speed);
7699 if (!neon_vector_mem_operand (mem, 2))
7700 *total += COSTS_N_INSNS (1);
7701 return true;
7702 }
7703 return false;
7704
7705 case UNSPEC:
7706 /* We cost this as high as our memory costs to allow this to
7707 be hoisted from loops. */
7708 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
7709 {
7710 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7711 }
7712 return true;
7713
7714 case CONST_VECTOR:
7715 if (TARGET_NEON
7716 && TARGET_HARD_FLOAT
7717 && outer == SET
7718 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
7719 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
7720 *total = COSTS_N_INSNS (1);
7721 else
7722 *total = COSTS_N_INSNS (4);
7723 return true;
7724
7725 default:
7726 *total = COSTS_N_INSNS (4);
7727 return false;
7728 }
7729 }
7730
7731 /* Estimates the size cost of thumb1 instructions.
7732 For now most of the code is copied from thumb1_rtx_costs. We need more
7733 fine grain tuning when we have more related test cases. */
7734 static inline int
7735 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7736 {
7737 enum machine_mode mode = GET_MODE (x);
7738
7739 switch (code)
7740 {
7741 case ASHIFT:
7742 case ASHIFTRT:
7743 case LSHIFTRT:
7744 case ROTATERT:
7745 case PLUS:
7746 case MINUS:
7747 case COMPARE:
7748 case NEG:
7749 case NOT:
7750 return COSTS_N_INSNS (1);
7751
7752 case MULT:
7753 if (CONST_INT_P (XEXP (x, 1)))
7754 {
7755 /* Thumb1 mul instruction can't operate on const. We must Load it
7756 into a register first. */
7757 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7758 return COSTS_N_INSNS (1) + const_size;
7759 }
7760 return COSTS_N_INSNS (1);
7761
7762 case SET:
7763 return (COSTS_N_INSNS (1)
7764 + 4 * ((MEM_P (SET_SRC (x)))
7765 + MEM_P (SET_DEST (x))));
7766
7767 case CONST_INT:
7768 if (outer == SET)
7769 {
7770 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7771 return COSTS_N_INSNS (1);
7772 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7773 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7774 return COSTS_N_INSNS (2);
7775 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7776 if (thumb_shiftable_const (INTVAL (x)))
7777 return COSTS_N_INSNS (2);
7778 return COSTS_N_INSNS (3);
7779 }
7780 else if ((outer == PLUS || outer == COMPARE)
7781 && INTVAL (x) < 256 && INTVAL (x) > -256)
7782 return 0;
7783 else if ((outer == IOR || outer == XOR || outer == AND)
7784 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7785 return COSTS_N_INSNS (1);
7786 else if (outer == AND)
7787 {
7788 int i;
7789 /* This duplicates the tests in the andsi3 expander. */
7790 for (i = 9; i <= 31; i++)
7791 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7792 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7793 return COSTS_N_INSNS (2);
7794 }
7795 else if (outer == ASHIFT || outer == ASHIFTRT
7796 || outer == LSHIFTRT)
7797 return 0;
7798 return COSTS_N_INSNS (2);
7799
7800 case CONST:
7801 case CONST_DOUBLE:
7802 case LABEL_REF:
7803 case SYMBOL_REF:
7804 return COSTS_N_INSNS (3);
7805
7806 case UDIV:
7807 case UMOD:
7808 case DIV:
7809 case MOD:
7810 return 100;
7811
7812 case TRUNCATE:
7813 return 99;
7814
7815 case AND:
7816 case XOR:
7817 case IOR:
7818 /* XXX guess. */
7819 return 8;
7820
7821 case MEM:
7822 /* XXX another guess. */
7823 /* Memory costs quite a lot for the first word, but subsequent words
7824 load at the equivalent of a single insn each. */
7825 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7826 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7827 ? 4 : 0));
7828
7829 case IF_THEN_ELSE:
7830 /* XXX a guess. */
7831 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7832 return 14;
7833 return 2;
7834
7835 case ZERO_EXTEND:
7836 /* XXX still guessing. */
7837 switch (GET_MODE (XEXP (x, 0)))
7838 {
7839 case QImode:
7840 return (1 + (mode == DImode ? 4 : 0)
7841 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
7842
7843 case HImode:
7844 return (4 + (mode == DImode ? 4 : 0)
7845 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
7846
7847 case SImode:
7848 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
7849
7850 default:
7851 return 99;
7852 }
7853
7854 default:
7855 return 99;
7856 }
7857 }
7858
7859 /* RTX costs when optimizing for size. */
7860 static bool
7861 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7862 int *total)
7863 {
7864 enum machine_mode mode = GET_MODE (x);
7865 if (TARGET_THUMB1)
7866 {
7867 *total = thumb1_size_rtx_costs (x, code, outer_code);
7868 return true;
7869 }
7870
7871 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7872 switch (code)
7873 {
7874 case MEM:
7875 /* A memory access costs 1 insn if the mode is small, or the address is
7876 a single register, otherwise it costs one insn per word. */
7877 if (REG_P (XEXP (x, 0)))
7878 *total = COSTS_N_INSNS (1);
7879 else if (flag_pic
7880 && GET_CODE (XEXP (x, 0)) == PLUS
7881 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7882 /* This will be split into two instructions.
7883 See arm.md:calculate_pic_address. */
7884 *total = COSTS_N_INSNS (2);
7885 else
7886 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7887 return true;
7888
7889 case DIV:
7890 case MOD:
7891 case UDIV:
7892 case UMOD:
7893 /* Needs a libcall, so it costs about this. */
7894 *total = COSTS_N_INSNS (2);
7895 return false;
7896
7897 case ROTATE:
7898 if (mode == SImode && REG_P (XEXP (x, 1)))
7899 {
7900 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
7901 return true;
7902 }
7903 /* Fall through */
7904 case ROTATERT:
7905 case ASHIFT:
7906 case LSHIFTRT:
7907 case ASHIFTRT:
7908 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
7909 {
7910 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
7911 return true;
7912 }
7913 else if (mode == SImode)
7914 {
7915 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
7916 /* Slightly disparage register shifts, but not by much. */
7917 if (!CONST_INT_P (XEXP (x, 1)))
7918 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
7919 return true;
7920 }
7921
7922 /* Needs a libcall. */
7923 *total = COSTS_N_INSNS (2);
7924 return false;
7925
7926 case MINUS:
7927 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7928 && (mode == SFmode || !TARGET_VFP_SINGLE))
7929 {
7930 *total = COSTS_N_INSNS (1);
7931 return false;
7932 }
7933
7934 if (mode == SImode)
7935 {
7936 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7937 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7938
7939 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7940 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7941 || subcode1 == ROTATE || subcode1 == ROTATERT
7942 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7943 || subcode1 == ASHIFTRT)
7944 {
7945 /* It's just the cost of the two operands. */
7946 *total = 0;
7947 return false;
7948 }
7949
7950 *total = COSTS_N_INSNS (1);
7951 return false;
7952 }
7953
7954 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7955 return false;
7956
7957 case PLUS:
7958 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7959 && (mode == SFmode || !TARGET_VFP_SINGLE))
7960 {
7961 *total = COSTS_N_INSNS (1);
7962 return false;
7963 }
7964
7965 /* A shift as a part of ADD costs nothing. */
7966 if (GET_CODE (XEXP (x, 0)) == MULT
7967 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7968 {
7969 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7970 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
7971 *total += rtx_cost (XEXP (x, 1), code, 1, false);
7972 return true;
7973 }
7974
7975 /* Fall through */
7976 case AND: case XOR: case IOR:
7977 if (mode == SImode)
7978 {
7979 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7980
7981 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7982 || subcode == LSHIFTRT || subcode == ASHIFTRT
7983 || (code == AND && subcode == NOT))
7984 {
7985 /* It's just the cost of the two operands. */
7986 *total = 0;
7987 return false;
7988 }
7989 }
7990
7991 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7992 return false;
7993
7994 case MULT:
7995 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7996 return false;
7997
7998 case NEG:
7999 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8000 && (mode == SFmode || !TARGET_VFP_SINGLE))
8001 {
8002 *total = COSTS_N_INSNS (1);
8003 return false;
8004 }
8005
8006 /* Fall through */
8007 case NOT:
8008 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8009
8010 return false;
8011
8012 case IF_THEN_ELSE:
8013 *total = 0;
8014 return false;
8015
8016 case COMPARE:
8017 if (cc_register (XEXP (x, 0), VOIDmode))
8018 * total = 0;
8019 else
8020 *total = COSTS_N_INSNS (1);
8021 return false;
8022
8023 case ABS:
8024 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8025 && (mode == SFmode || !TARGET_VFP_SINGLE))
8026 *total = COSTS_N_INSNS (1);
8027 else
8028 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
8029 return false;
8030
8031 case SIGN_EXTEND:
8032 case ZERO_EXTEND:
8033 return arm_rtx_costs_1 (x, outer_code, total, 0);
8034
8035 case CONST_INT:
8036 if (const_ok_for_arm (INTVAL (x)))
8037 /* A multiplication by a constant requires another instruction
8038 to load the constant to a register. */
8039 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
8040 ? 1 : 0);
8041 else if (const_ok_for_arm (~INTVAL (x)))
8042 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
8043 else if (const_ok_for_arm (-INTVAL (x)))
8044 {
8045 if (outer_code == COMPARE || outer_code == PLUS
8046 || outer_code == MINUS)
8047 *total = 0;
8048 else
8049 *total = COSTS_N_INSNS (1);
8050 }
8051 else
8052 *total = COSTS_N_INSNS (2);
8053 return true;
8054
8055 case CONST:
8056 case LABEL_REF:
8057 case SYMBOL_REF:
8058 *total = COSTS_N_INSNS (2);
8059 return true;
8060
8061 case CONST_DOUBLE:
8062 *total = COSTS_N_INSNS (4);
8063 return true;
8064
8065 case CONST_VECTOR:
8066 if (TARGET_NEON
8067 && TARGET_HARD_FLOAT
8068 && outer_code == SET
8069 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8070 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8071 *total = COSTS_N_INSNS (1);
8072 else
8073 *total = COSTS_N_INSNS (4);
8074 return true;
8075
8076 case HIGH:
8077 case LO_SUM:
8078 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8079 cost of these slightly. */
8080 *total = COSTS_N_INSNS (1) + 1;
8081 return true;
8082
8083 case SET:
8084 return false;
8085
8086 default:
8087 if (mode != VOIDmode)
8088 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8089 else
8090 *total = COSTS_N_INSNS (4); /* How knows? */
8091 return false;
8092 }
8093 }
8094
8095 /* RTX costs when optimizing for size. */
8096 static bool
8097 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
8098 int *total, bool speed)
8099 {
8100 if (!speed)
8101 return arm_size_rtx_costs (x, (enum rtx_code) code,
8102 (enum rtx_code) outer_code, total);
8103 else
8104 return current_tune->rtx_costs (x, (enum rtx_code) code,
8105 (enum rtx_code) outer_code,
8106 total, speed);
8107 }
8108
8109 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
8110 supported on any "slowmul" cores, so it can be ignored. */
8111
8112 static bool
8113 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8114 int *total, bool speed)
8115 {
8116 enum machine_mode mode = GET_MODE (x);
8117
8118 if (TARGET_THUMB)
8119 {
8120 *total = thumb1_rtx_costs (x, code, outer_code);
8121 return true;
8122 }
8123
8124 switch (code)
8125 {
8126 case MULT:
8127 if (GET_MODE_CLASS (mode) == MODE_FLOAT
8128 || mode == DImode)
8129 {
8130 *total = COSTS_N_INSNS (20);
8131 return false;
8132 }
8133
8134 if (CONST_INT_P (XEXP (x, 1)))
8135 {
8136 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8137 & (unsigned HOST_WIDE_INT) 0xffffffff);
8138 int cost, const_ok = const_ok_for_arm (i);
8139 int j, booth_unit_size;
8140
8141 /* Tune as appropriate. */
8142 cost = const_ok ? 4 : 8;
8143 booth_unit_size = 2;
8144 for (j = 0; i && j < 32; j += booth_unit_size)
8145 {
8146 i >>= booth_unit_size;
8147 cost++;
8148 }
8149
8150 *total = COSTS_N_INSNS (cost);
8151 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8152 return true;
8153 }
8154
8155 *total = COSTS_N_INSNS (20);
8156 return false;
8157
8158 default:
8159 return arm_rtx_costs_1 (x, outer_code, total, speed);;
8160 }
8161 }
8162
8163
8164 /* RTX cost for cores with a fast multiply unit (M variants). */
8165
8166 static bool
8167 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8168 int *total, bool speed)
8169 {
8170 enum machine_mode mode = GET_MODE (x);
8171
8172 if (TARGET_THUMB1)
8173 {
8174 *total = thumb1_rtx_costs (x, code, outer_code);
8175 return true;
8176 }
8177
8178 /* ??? should thumb2 use different costs? */
8179 switch (code)
8180 {
8181 case MULT:
8182 /* There is no point basing this on the tuning, since it is always the
8183 fast variant if it exists at all. */
8184 if (mode == DImode
8185 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8186 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8187 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8188 {
8189 *total = COSTS_N_INSNS(2);
8190 return false;
8191 }
8192
8193
8194 if (mode == DImode)
8195 {
8196 *total = COSTS_N_INSNS (5);
8197 return false;
8198 }
8199
8200 if (CONST_INT_P (XEXP (x, 1)))
8201 {
8202 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8203 & (unsigned HOST_WIDE_INT) 0xffffffff);
8204 int cost, const_ok = const_ok_for_arm (i);
8205 int j, booth_unit_size;
8206
8207 /* Tune as appropriate. */
8208 cost = const_ok ? 4 : 8;
8209 booth_unit_size = 8;
8210 for (j = 0; i && j < 32; j += booth_unit_size)
8211 {
8212 i >>= booth_unit_size;
8213 cost++;
8214 }
8215
8216 *total = COSTS_N_INSNS(cost);
8217 return false;
8218 }
8219
8220 if (mode == SImode)
8221 {
8222 *total = COSTS_N_INSNS (4);
8223 return false;
8224 }
8225
8226 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8227 {
8228 if (TARGET_HARD_FLOAT
8229 && (mode == SFmode
8230 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8231 {
8232 *total = COSTS_N_INSNS (1);
8233 return false;
8234 }
8235 }
8236
8237 /* Requires a lib call */
8238 *total = COSTS_N_INSNS (20);
8239 return false;
8240
8241 default:
8242 return arm_rtx_costs_1 (x, outer_code, total, speed);
8243 }
8244 }
8245
8246
8247 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8248 so it can be ignored. */
8249
8250 static bool
8251 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8252 int *total, bool speed)
8253 {
8254 enum machine_mode mode = GET_MODE (x);
8255
8256 if (TARGET_THUMB)
8257 {
8258 *total = thumb1_rtx_costs (x, code, outer_code);
8259 return true;
8260 }
8261
8262 switch (code)
8263 {
8264 case COMPARE:
8265 if (GET_CODE (XEXP (x, 0)) != MULT)
8266 return arm_rtx_costs_1 (x, outer_code, total, speed);
8267
8268 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8269 will stall until the multiplication is complete. */
8270 *total = COSTS_N_INSNS (3);
8271 return false;
8272
8273 case MULT:
8274 /* There is no point basing this on the tuning, since it is always the
8275 fast variant if it exists at all. */
8276 if (mode == DImode
8277 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8278 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8279 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8280 {
8281 *total = COSTS_N_INSNS (2);
8282 return false;
8283 }
8284
8285
8286 if (mode == DImode)
8287 {
8288 *total = COSTS_N_INSNS (5);
8289 return false;
8290 }
8291
8292 if (CONST_INT_P (XEXP (x, 1)))
8293 {
8294 /* If operand 1 is a constant we can more accurately
8295 calculate the cost of the multiply. The multiplier can
8296 retire 15 bits on the first cycle and a further 12 on the
8297 second. We do, of course, have to load the constant into
8298 a register first. */
8299 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8300 /* There's a general overhead of one cycle. */
8301 int cost = 1;
8302 unsigned HOST_WIDE_INT masked_const;
8303
8304 if (i & 0x80000000)
8305 i = ~i;
8306
8307 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8308
8309 masked_const = i & 0xffff8000;
8310 if (masked_const != 0)
8311 {
8312 cost++;
8313 masked_const = i & 0xf8000000;
8314 if (masked_const != 0)
8315 cost++;
8316 }
8317 *total = COSTS_N_INSNS (cost);
8318 return false;
8319 }
8320
8321 if (mode == SImode)
8322 {
8323 *total = COSTS_N_INSNS (3);
8324 return false;
8325 }
8326
8327 /* Requires a lib call */
8328 *total = COSTS_N_INSNS (20);
8329 return false;
8330
8331 default:
8332 return arm_rtx_costs_1 (x, outer_code, total, speed);
8333 }
8334 }
8335
8336
8337 /* RTX costs for 9e (and later) cores. */
8338
8339 static bool
8340 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8341 int *total, bool speed)
8342 {
8343 enum machine_mode mode = GET_MODE (x);
8344
8345 if (TARGET_THUMB1)
8346 {
8347 switch (code)
8348 {
8349 case MULT:
8350 *total = COSTS_N_INSNS (3);
8351 return true;
8352
8353 default:
8354 *total = thumb1_rtx_costs (x, code, outer_code);
8355 return true;
8356 }
8357 }
8358
8359 switch (code)
8360 {
8361 case MULT:
8362 /* There is no point basing this on the tuning, since it is always the
8363 fast variant if it exists at all. */
8364 if (mode == DImode
8365 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8366 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8367 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8368 {
8369 *total = COSTS_N_INSNS (2);
8370 return false;
8371 }
8372
8373
8374 if (mode == DImode)
8375 {
8376 *total = COSTS_N_INSNS (5);
8377 return false;
8378 }
8379
8380 if (mode == SImode)
8381 {
8382 *total = COSTS_N_INSNS (2);
8383 return false;
8384 }
8385
8386 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8387 {
8388 if (TARGET_HARD_FLOAT
8389 && (mode == SFmode
8390 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8391 {
8392 *total = COSTS_N_INSNS (1);
8393 return false;
8394 }
8395 }
8396
8397 *total = COSTS_N_INSNS (20);
8398 return false;
8399
8400 default:
8401 return arm_rtx_costs_1 (x, outer_code, total, speed);
8402 }
8403 }
8404 /* All address computations that can be done are free, but rtx cost returns
8405 the same for practically all of them. So we weight the different types
8406 of address here in the order (most pref first):
8407 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8408 static inline int
8409 arm_arm_address_cost (rtx x)
8410 {
8411 enum rtx_code c = GET_CODE (x);
8412
8413 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8414 return 0;
8415 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8416 return 10;
8417
8418 if (c == PLUS)
8419 {
8420 if (CONST_INT_P (XEXP (x, 1)))
8421 return 2;
8422
8423 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8424 return 3;
8425
8426 return 4;
8427 }
8428
8429 return 6;
8430 }
8431
8432 static inline int
8433 arm_thumb_address_cost (rtx x)
8434 {
8435 enum rtx_code c = GET_CODE (x);
8436
8437 if (c == REG)
8438 return 1;
8439 if (c == PLUS
8440 && REG_P (XEXP (x, 0))
8441 && CONST_INT_P (XEXP (x, 1)))
8442 return 1;
8443
8444 return 2;
8445 }
8446
8447 static int
8448 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
8449 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
8450 {
8451 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8452 }
8453
8454 /* Adjust cost hook for XScale. */
8455 static bool
8456 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8457 {
8458 /* Some true dependencies can have a higher cost depending
8459 on precisely how certain input operands are used. */
8460 if (REG_NOTE_KIND(link) == 0
8461 && recog_memoized (insn) >= 0
8462 && recog_memoized (dep) >= 0)
8463 {
8464 int shift_opnum = get_attr_shift (insn);
8465 enum attr_type attr_type = get_attr_type (dep);
8466
8467 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8468 operand for INSN. If we have a shifted input operand and the
8469 instruction we depend on is another ALU instruction, then we may
8470 have to account for an additional stall. */
8471 if (shift_opnum != 0
8472 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
8473 {
8474 rtx shifted_operand;
8475 int opno;
8476
8477 /* Get the shifted operand. */
8478 extract_insn (insn);
8479 shifted_operand = recog_data.operand[shift_opnum];
8480
8481 /* Iterate over all the operands in DEP. If we write an operand
8482 that overlaps with SHIFTED_OPERAND, then we have increase the
8483 cost of this dependency. */
8484 extract_insn (dep);
8485 preprocess_constraints ();
8486 for (opno = 0; opno < recog_data.n_operands; opno++)
8487 {
8488 /* We can ignore strict inputs. */
8489 if (recog_data.operand_type[opno] == OP_IN)
8490 continue;
8491
8492 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8493 shifted_operand))
8494 {
8495 *cost = 2;
8496 return false;
8497 }
8498 }
8499 }
8500 }
8501 return true;
8502 }
8503
8504 /* Adjust cost hook for Cortex A9. */
8505 static bool
8506 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8507 {
8508 switch (REG_NOTE_KIND (link))
8509 {
8510 case REG_DEP_ANTI:
8511 *cost = 0;
8512 return false;
8513
8514 case REG_DEP_TRUE:
8515 case REG_DEP_OUTPUT:
8516 if (recog_memoized (insn) >= 0
8517 && recog_memoized (dep) >= 0)
8518 {
8519 if (GET_CODE (PATTERN (insn)) == SET)
8520 {
8521 if (GET_MODE_CLASS
8522 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8523 || GET_MODE_CLASS
8524 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8525 {
8526 enum attr_type attr_type_insn = get_attr_type (insn);
8527 enum attr_type attr_type_dep = get_attr_type (dep);
8528
8529 /* By default all dependencies of the form
8530 s0 = s0 <op> s1
8531 s0 = s0 <op> s2
8532 have an extra latency of 1 cycle because
8533 of the input and output dependency in this
8534 case. However this gets modeled as an true
8535 dependency and hence all these checks. */
8536 if (REG_P (SET_DEST (PATTERN (insn)))
8537 && REG_P (SET_DEST (PATTERN (dep)))
8538 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8539 SET_DEST (PATTERN (dep))))
8540 {
8541 /* FMACS is a special case where the dependent
8542 instruction can be issued 3 cycles before
8543 the normal latency in case of an output
8544 dependency. */
8545 if ((attr_type_insn == TYPE_FMACS
8546 || attr_type_insn == TYPE_FMACD)
8547 && (attr_type_dep == TYPE_FMACS
8548 || attr_type_dep == TYPE_FMACD))
8549 {
8550 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8551 *cost = insn_default_latency (dep) - 3;
8552 else
8553 *cost = insn_default_latency (dep);
8554 return false;
8555 }
8556 else
8557 {
8558 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8559 *cost = insn_default_latency (dep) + 1;
8560 else
8561 *cost = insn_default_latency (dep);
8562 }
8563 return false;
8564 }
8565 }
8566 }
8567 }
8568 break;
8569
8570 default:
8571 gcc_unreachable ();
8572 }
8573
8574 return true;
8575 }
8576
8577 /* Adjust cost hook for FA726TE. */
8578 static bool
8579 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8580 {
8581 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8582 have penalty of 3. */
8583 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8584 && recog_memoized (insn) >= 0
8585 && recog_memoized (dep) >= 0
8586 && get_attr_conds (dep) == CONDS_SET)
8587 {
8588 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8589 if (get_attr_conds (insn) == CONDS_USE
8590 && get_attr_type (insn) != TYPE_BRANCH)
8591 {
8592 *cost = 3;
8593 return false;
8594 }
8595
8596 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8597 || get_attr_conds (insn) == CONDS_USE)
8598 {
8599 *cost = 0;
8600 return false;
8601 }
8602 }
8603
8604 return true;
8605 }
8606
8607 /* Implement TARGET_REGISTER_MOVE_COST.
8608
8609 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
8610 it is typically more expensive than a single memory access. We set
8611 the cost to less than two memory accesses so that floating
8612 point to integer conversion does not go through memory. */
8613
8614 int
8615 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
8616 reg_class_t from, reg_class_t to)
8617 {
8618 if (TARGET_32BIT)
8619 {
8620 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
8621 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
8622 return 15;
8623 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
8624 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
8625 return 4;
8626 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
8627 return 20;
8628 else
8629 return 2;
8630 }
8631 else
8632 {
8633 if (from == HI_REGS || to == HI_REGS)
8634 return 4;
8635 else
8636 return 2;
8637 }
8638 }
8639
8640 /* Implement TARGET_MEMORY_MOVE_COST. */
8641
8642 int
8643 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
8644 bool in ATTRIBUTE_UNUSED)
8645 {
8646 if (TARGET_32BIT)
8647 return 10;
8648 else
8649 {
8650 if (GET_MODE_SIZE (mode) < 4)
8651 return 8;
8652 else
8653 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
8654 }
8655 }
8656
8657 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8658 It corrects the value of COST based on the relationship between
8659 INSN and DEP through the dependence LINK. It returns the new
8660 value. There is a per-core adjust_cost hook to adjust scheduler costs
8661 and the per-core hook can choose to completely override the generic
8662 adjust_cost function. Only put bits of code into arm_adjust_cost that
8663 are common across all cores. */
8664 static int
8665 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8666 {
8667 rtx i_pat, d_pat;
8668
8669 /* When generating Thumb-1 code, we want to place flag-setting operations
8670 close to a conditional branch which depends on them, so that we can
8671 omit the comparison. */
8672 if (TARGET_THUMB1
8673 && REG_NOTE_KIND (link) == 0
8674 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8675 && recog_memoized (dep) >= 0
8676 && get_attr_conds (dep) == CONDS_SET)
8677 return 0;
8678
8679 if (current_tune->sched_adjust_cost != NULL)
8680 {
8681 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8682 return cost;
8683 }
8684
8685 /* XXX Is this strictly true? */
8686 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8687 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8688 return 0;
8689
8690 /* Call insns don't incur a stall, even if they follow a load. */
8691 if (REG_NOTE_KIND (link) == 0
8692 && CALL_P (insn))
8693 return 1;
8694
8695 if ((i_pat = single_set (insn)) != NULL
8696 && MEM_P (SET_SRC (i_pat))
8697 && (d_pat = single_set (dep)) != NULL
8698 && MEM_P (SET_DEST (d_pat)))
8699 {
8700 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8701 /* This is a load after a store, there is no conflict if the load reads
8702 from a cached area. Assume that loads from the stack, and from the
8703 constant pool are cached, and that others will miss. This is a
8704 hack. */
8705
8706 if ((GET_CODE (src_mem) == SYMBOL_REF
8707 && CONSTANT_POOL_ADDRESS_P (src_mem))
8708 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8709 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8710 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8711 return 1;
8712 }
8713
8714 return cost;
8715 }
8716
8717 static int
8718 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
8719 {
8720 if (TARGET_32BIT)
8721 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
8722 else
8723 return (optimize > 0) ? 2 : 0;
8724 }
8725
8726 static int
8727 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
8728 {
8729 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
8730 }
8731
8732 static bool fp_consts_inited = false;
8733
8734 static REAL_VALUE_TYPE value_fp0;
8735
8736 static void
8737 init_fp_table (void)
8738 {
8739 REAL_VALUE_TYPE r;
8740
8741 r = REAL_VALUE_ATOF ("0", DFmode);
8742 value_fp0 = r;
8743 fp_consts_inited = true;
8744 }
8745
8746 /* Return TRUE if rtx X is a valid immediate FP constant. */
8747 int
8748 arm_const_double_rtx (rtx x)
8749 {
8750 REAL_VALUE_TYPE r;
8751
8752 if (!fp_consts_inited)
8753 init_fp_table ();
8754
8755 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8756 if (REAL_VALUE_MINUS_ZERO (r))
8757 return 0;
8758
8759 if (REAL_VALUES_EQUAL (r, value_fp0))
8760 return 1;
8761
8762 return 0;
8763 }
8764
8765 /* VFPv3 has a fairly wide range of representable immediates, formed from
8766 "quarter-precision" floating-point values. These can be evaluated using this
8767 formula (with ^ for exponentiation):
8768
8769 -1^s * n * 2^-r
8770
8771 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8772 16 <= n <= 31 and 0 <= r <= 7.
8773
8774 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8775
8776 - A (most-significant) is the sign bit.
8777 - BCD are the exponent (encoded as r XOR 3).
8778 - EFGH are the mantissa (encoded as n - 16).
8779 */
8780
8781 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8782 fconst[sd] instruction, or -1 if X isn't suitable. */
8783 static int
8784 vfp3_const_double_index (rtx x)
8785 {
8786 REAL_VALUE_TYPE r, m;
8787 int sign, exponent;
8788 unsigned HOST_WIDE_INT mantissa, mant_hi;
8789 unsigned HOST_WIDE_INT mask;
8790 HOST_WIDE_INT m1, m2;
8791 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8792
8793 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
8794 return -1;
8795
8796 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8797
8798 /* We can't represent these things, so detect them first. */
8799 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8800 return -1;
8801
8802 /* Extract sign, exponent and mantissa. */
8803 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8804 r = real_value_abs (&r);
8805 exponent = REAL_EXP (&r);
8806 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8807 highest (sign) bit, with a fixed binary point at bit point_pos.
8808 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8809 bits for the mantissa, this may fail (low bits would be lost). */
8810 real_ldexp (&m, &r, point_pos - exponent);
8811 REAL_VALUE_TO_INT (&m1, &m2, m);
8812 mantissa = m1;
8813 mant_hi = m2;
8814
8815 /* If there are bits set in the low part of the mantissa, we can't
8816 represent this value. */
8817 if (mantissa != 0)
8818 return -1;
8819
8820 /* Now make it so that mantissa contains the most-significant bits, and move
8821 the point_pos to indicate that the least-significant bits have been
8822 discarded. */
8823 point_pos -= HOST_BITS_PER_WIDE_INT;
8824 mantissa = mant_hi;
8825
8826 /* We can permit four significant bits of mantissa only, plus a high bit
8827 which is always 1. */
8828 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8829 if ((mantissa & mask) != 0)
8830 return -1;
8831
8832 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8833 mantissa >>= point_pos - 5;
8834
8835 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8836 floating-point immediate zero with Neon using an integer-zero load, but
8837 that case is handled elsewhere.) */
8838 if (mantissa == 0)
8839 return -1;
8840
8841 gcc_assert (mantissa >= 16 && mantissa <= 31);
8842
8843 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8844 normalized significands are in the range [1, 2). (Our mantissa is shifted
8845 left 4 places at this point relative to normalized IEEE754 values). GCC
8846 internally uses [0.5, 1) (see real.c), so the exponent returned from
8847 REAL_EXP must be altered. */
8848 exponent = 5 - exponent;
8849
8850 if (exponent < 0 || exponent > 7)
8851 return -1;
8852
8853 /* Sign, mantissa and exponent are now in the correct form to plug into the
8854 formula described in the comment above. */
8855 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8856 }
8857
8858 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8859 int
8860 vfp3_const_double_rtx (rtx x)
8861 {
8862 if (!TARGET_VFP3)
8863 return 0;
8864
8865 return vfp3_const_double_index (x) != -1;
8866 }
8867
8868 /* Recognize immediates which can be used in various Neon instructions. Legal
8869 immediates are described by the following table (for VMVN variants, the
8870 bitwise inverse of the constant shown is recognized. In either case, VMOV
8871 is output and the correct instruction to use for a given constant is chosen
8872 by the assembler). The constant shown is replicated across all elements of
8873 the destination vector.
8874
8875 insn elems variant constant (binary)
8876 ---- ----- ------- -----------------
8877 vmov i32 0 00000000 00000000 00000000 abcdefgh
8878 vmov i32 1 00000000 00000000 abcdefgh 00000000
8879 vmov i32 2 00000000 abcdefgh 00000000 00000000
8880 vmov i32 3 abcdefgh 00000000 00000000 00000000
8881 vmov i16 4 00000000 abcdefgh
8882 vmov i16 5 abcdefgh 00000000
8883 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8884 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8885 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8886 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8887 vmvn i16 10 00000000 abcdefgh
8888 vmvn i16 11 abcdefgh 00000000
8889 vmov i32 12 00000000 00000000 abcdefgh 11111111
8890 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8891 vmov i32 14 00000000 abcdefgh 11111111 11111111
8892 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8893 vmov i8 16 abcdefgh
8894 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8895 eeeeeeee ffffffff gggggggg hhhhhhhh
8896 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8897 vmov f32 19 00000000 00000000 00000000 00000000
8898
8899 For case 18, B = !b. Representable values are exactly those accepted by
8900 vfp3_const_double_index, but are output as floating-point numbers rather
8901 than indices.
8902
8903 For case 19, we will change it to vmov.i32 when assembling.
8904
8905 Variants 0-5 (inclusive) may also be used as immediates for the second
8906 operand of VORR/VBIC instructions.
8907
8908 The INVERSE argument causes the bitwise inverse of the given operand to be
8909 recognized instead (used for recognizing legal immediates for the VAND/VORN
8910 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8911 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8912 output, rather than the real insns vbic/vorr).
8913
8914 INVERSE makes no difference to the recognition of float vectors.
8915
8916 The return value is the variant of immediate as shown in the above table, or
8917 -1 if the given value doesn't match any of the listed patterns.
8918 */
8919 static int
8920 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8921 rtx *modconst, int *elementwidth)
8922 {
8923 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8924 matches = 1; \
8925 for (i = 0; i < idx; i += (STRIDE)) \
8926 if (!(TEST)) \
8927 matches = 0; \
8928 if (matches) \
8929 { \
8930 immtype = (CLASS); \
8931 elsize = (ELSIZE); \
8932 break; \
8933 }
8934
8935 unsigned int i, elsize = 0, idx = 0, n_elts;
8936 unsigned int innersize;
8937 unsigned char bytes[16];
8938 int immtype = -1, matches;
8939 unsigned int invmask = inverse ? 0xff : 0;
8940 bool vector = GET_CODE (op) == CONST_VECTOR;
8941
8942 if (vector)
8943 {
8944 n_elts = CONST_VECTOR_NUNITS (op);
8945 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8946 }
8947 else
8948 {
8949 n_elts = 1;
8950 if (mode == VOIDmode)
8951 mode = DImode;
8952 innersize = GET_MODE_SIZE (mode);
8953 }
8954
8955 /* Vectors of float constants. */
8956 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8957 {
8958 rtx el0 = CONST_VECTOR_ELT (op, 0);
8959 REAL_VALUE_TYPE r0;
8960
8961 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
8962 return -1;
8963
8964 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8965
8966 for (i = 1; i < n_elts; i++)
8967 {
8968 rtx elt = CONST_VECTOR_ELT (op, i);
8969 REAL_VALUE_TYPE re;
8970
8971 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8972
8973 if (!REAL_VALUES_EQUAL (r0, re))
8974 return -1;
8975 }
8976
8977 if (modconst)
8978 *modconst = CONST_VECTOR_ELT (op, 0);
8979
8980 if (elementwidth)
8981 *elementwidth = 0;
8982
8983 if (el0 == CONST0_RTX (GET_MODE (el0)))
8984 return 19;
8985 else
8986 return 18;
8987 }
8988
8989 /* Splat vector constant out into a byte vector. */
8990 for (i = 0; i < n_elts; i++)
8991 {
8992 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
8993 unsigned HOST_WIDE_INT elpart;
8994 unsigned int part, parts;
8995
8996 if (CONST_INT_P (el))
8997 {
8998 elpart = INTVAL (el);
8999 parts = 1;
9000 }
9001 else if (CONST_DOUBLE_P (el))
9002 {
9003 elpart = CONST_DOUBLE_LOW (el);
9004 parts = 2;
9005 }
9006 else
9007 gcc_unreachable ();
9008
9009 for (part = 0; part < parts; part++)
9010 {
9011 unsigned int byte;
9012 for (byte = 0; byte < innersize; byte++)
9013 {
9014 bytes[idx++] = (elpart & 0xff) ^ invmask;
9015 elpart >>= BITS_PER_UNIT;
9016 }
9017 if (CONST_DOUBLE_P (el))
9018 elpart = CONST_DOUBLE_HIGH (el);
9019 }
9020 }
9021
9022 /* Sanity check. */
9023 gcc_assert (idx == GET_MODE_SIZE (mode));
9024
9025 do
9026 {
9027 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
9028 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9029
9030 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9031 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9032
9033 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
9034 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9035
9036 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
9037 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
9038
9039 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
9040
9041 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
9042
9043 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
9044 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9045
9046 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9047 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9048
9049 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
9050 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9051
9052 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
9053 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
9054
9055 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
9056
9057 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
9058
9059 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9060 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9061
9062 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9063 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9064
9065 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
9066 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9067
9068 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
9069 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9070
9071 CHECK (1, 8, 16, bytes[i] == bytes[0]);
9072
9073 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
9074 && bytes[i] == bytes[(i + 8) % idx]);
9075 }
9076 while (0);
9077
9078 if (immtype == -1)
9079 return -1;
9080
9081 if (elementwidth)
9082 *elementwidth = elsize;
9083
9084 if (modconst)
9085 {
9086 unsigned HOST_WIDE_INT imm = 0;
9087
9088 /* Un-invert bytes of recognized vector, if necessary. */
9089 if (invmask != 0)
9090 for (i = 0; i < idx; i++)
9091 bytes[i] ^= invmask;
9092
9093 if (immtype == 17)
9094 {
9095 /* FIXME: Broken on 32-bit H_W_I hosts. */
9096 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
9097
9098 for (i = 0; i < 8; i++)
9099 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
9100 << (i * BITS_PER_UNIT);
9101
9102 *modconst = GEN_INT (imm);
9103 }
9104 else
9105 {
9106 unsigned HOST_WIDE_INT imm = 0;
9107
9108 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
9109 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
9110
9111 *modconst = GEN_INT (imm);
9112 }
9113 }
9114
9115 return immtype;
9116 #undef CHECK
9117 }
9118
9119 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
9120 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
9121 float elements), and a modified constant (whatever should be output for a
9122 VMOV) in *MODCONST. */
9123
9124 int
9125 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
9126 rtx *modconst, int *elementwidth)
9127 {
9128 rtx tmpconst;
9129 int tmpwidth;
9130 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
9131
9132 if (retval == -1)
9133 return 0;
9134
9135 if (modconst)
9136 *modconst = tmpconst;
9137
9138 if (elementwidth)
9139 *elementwidth = tmpwidth;
9140
9141 return 1;
9142 }
9143
9144 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
9145 the immediate is valid, write a constant suitable for using as an operand
9146 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
9147 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
9148
9149 int
9150 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
9151 rtx *modconst, int *elementwidth)
9152 {
9153 rtx tmpconst;
9154 int tmpwidth;
9155 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
9156
9157 if (retval < 0 || retval > 5)
9158 return 0;
9159
9160 if (modconst)
9161 *modconst = tmpconst;
9162
9163 if (elementwidth)
9164 *elementwidth = tmpwidth;
9165
9166 return 1;
9167 }
9168
9169 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
9170 the immediate is valid, write a constant suitable for using as an operand
9171 to VSHR/VSHL to *MODCONST and the corresponding element width to
9172 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9173 because they have different limitations. */
9174
9175 int
9176 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
9177 rtx *modconst, int *elementwidth,
9178 bool isleftshift)
9179 {
9180 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9181 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
9182 unsigned HOST_WIDE_INT last_elt = 0;
9183 unsigned HOST_WIDE_INT maxshift;
9184
9185 /* Split vector constant out into a byte vector. */
9186 for (i = 0; i < n_elts; i++)
9187 {
9188 rtx el = CONST_VECTOR_ELT (op, i);
9189 unsigned HOST_WIDE_INT elpart;
9190
9191 if (CONST_INT_P (el))
9192 elpart = INTVAL (el);
9193 else if (CONST_DOUBLE_P (el))
9194 return 0;
9195 else
9196 gcc_unreachable ();
9197
9198 if (i != 0 && elpart != last_elt)
9199 return 0;
9200
9201 last_elt = elpart;
9202 }
9203
9204 /* Shift less than element size. */
9205 maxshift = innersize * 8;
9206
9207 if (isleftshift)
9208 {
9209 /* Left shift immediate value can be from 0 to <size>-1. */
9210 if (last_elt >= maxshift)
9211 return 0;
9212 }
9213 else
9214 {
9215 /* Right shift immediate value can be from 1 to <size>. */
9216 if (last_elt == 0 || last_elt > maxshift)
9217 return 0;
9218 }
9219
9220 if (elementwidth)
9221 *elementwidth = innersize * 8;
9222
9223 if (modconst)
9224 *modconst = CONST_VECTOR_ELT (op, 0);
9225
9226 return 1;
9227 }
9228
9229 /* Return a string suitable for output of Neon immediate logic operation
9230 MNEM. */
9231
9232 char *
9233 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9234 int inverse, int quad)
9235 {
9236 int width, is_valid;
9237 static char templ[40];
9238
9239 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9240
9241 gcc_assert (is_valid != 0);
9242
9243 if (quad)
9244 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9245 else
9246 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9247
9248 return templ;
9249 }
9250
9251 /* Return a string suitable for output of Neon immediate shift operation
9252 (VSHR or VSHL) MNEM. */
9253
9254 char *
9255 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9256 enum machine_mode mode, int quad,
9257 bool isleftshift)
9258 {
9259 int width, is_valid;
9260 static char templ[40];
9261
9262 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9263 gcc_assert (is_valid != 0);
9264
9265 if (quad)
9266 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9267 else
9268 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9269
9270 return templ;
9271 }
9272
9273 /* Output a sequence of pairwise operations to implement a reduction.
9274 NOTE: We do "too much work" here, because pairwise operations work on two
9275 registers-worth of operands in one go. Unfortunately we can't exploit those
9276 extra calculations to do the full operation in fewer steps, I don't think.
9277 Although all vector elements of the result but the first are ignored, we
9278 actually calculate the same result in each of the elements. An alternative
9279 such as initially loading a vector with zero to use as each of the second
9280 operands would use up an additional register and take an extra instruction,
9281 for no particular gain. */
9282
9283 void
9284 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9285 rtx (*reduc) (rtx, rtx, rtx))
9286 {
9287 enum machine_mode inner = GET_MODE_INNER (mode);
9288 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9289 rtx tmpsum = op1;
9290
9291 for (i = parts / 2; i >= 1; i /= 2)
9292 {
9293 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9294 emit_insn (reduc (dest, tmpsum, tmpsum));
9295 tmpsum = dest;
9296 }
9297 }
9298
9299 /* If VALS is a vector constant that can be loaded into a register
9300 using VDUP, generate instructions to do so and return an RTX to
9301 assign to the register. Otherwise return NULL_RTX. */
9302
9303 static rtx
9304 neon_vdup_constant (rtx vals)
9305 {
9306 enum machine_mode mode = GET_MODE (vals);
9307 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9308 int n_elts = GET_MODE_NUNITS (mode);
9309 bool all_same = true;
9310 rtx x;
9311 int i;
9312
9313 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9314 return NULL_RTX;
9315
9316 for (i = 0; i < n_elts; ++i)
9317 {
9318 x = XVECEXP (vals, 0, i);
9319 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9320 all_same = false;
9321 }
9322
9323 if (!all_same)
9324 /* The elements are not all the same. We could handle repeating
9325 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9326 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9327 vdup.i16). */
9328 return NULL_RTX;
9329
9330 /* We can load this constant by using VDUP and a constant in a
9331 single ARM register. This will be cheaper than a vector
9332 load. */
9333
9334 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9335 return gen_rtx_VEC_DUPLICATE (mode, x);
9336 }
9337
9338 /* Generate code to load VALS, which is a PARALLEL containing only
9339 constants (for vec_init) or CONST_VECTOR, efficiently into a
9340 register. Returns an RTX to copy into the register, or NULL_RTX
9341 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9342
9343 rtx
9344 neon_make_constant (rtx vals)
9345 {
9346 enum machine_mode mode = GET_MODE (vals);
9347 rtx target;
9348 rtx const_vec = NULL_RTX;
9349 int n_elts = GET_MODE_NUNITS (mode);
9350 int n_const = 0;
9351 int i;
9352
9353 if (GET_CODE (vals) == CONST_VECTOR)
9354 const_vec = vals;
9355 else if (GET_CODE (vals) == PARALLEL)
9356 {
9357 /* A CONST_VECTOR must contain only CONST_INTs and
9358 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9359 Only store valid constants in a CONST_VECTOR. */
9360 for (i = 0; i < n_elts; ++i)
9361 {
9362 rtx x = XVECEXP (vals, 0, i);
9363 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
9364 n_const++;
9365 }
9366 if (n_const == n_elts)
9367 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9368 }
9369 else
9370 gcc_unreachable ();
9371
9372 if (const_vec != NULL
9373 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9374 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9375 return const_vec;
9376 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9377 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9378 pipeline cycle; creating the constant takes one or two ARM
9379 pipeline cycles. */
9380 return target;
9381 else if (const_vec != NULL_RTX)
9382 /* Load from constant pool. On Cortex-A8 this takes two cycles
9383 (for either double or quad vectors). We can not take advantage
9384 of single-cycle VLD1 because we need a PC-relative addressing
9385 mode. */
9386 return const_vec;
9387 else
9388 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9389 We can not construct an initializer. */
9390 return NULL_RTX;
9391 }
9392
9393 /* Initialize vector TARGET to VALS. */
9394
9395 void
9396 neon_expand_vector_init (rtx target, rtx vals)
9397 {
9398 enum machine_mode mode = GET_MODE (target);
9399 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9400 int n_elts = GET_MODE_NUNITS (mode);
9401 int n_var = 0, one_var = -1;
9402 bool all_same = true;
9403 rtx x, mem;
9404 int i;
9405
9406 for (i = 0; i < n_elts; ++i)
9407 {
9408 x = XVECEXP (vals, 0, i);
9409 if (!CONSTANT_P (x))
9410 ++n_var, one_var = i;
9411
9412 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9413 all_same = false;
9414 }
9415
9416 if (n_var == 0)
9417 {
9418 rtx constant = neon_make_constant (vals);
9419 if (constant != NULL_RTX)
9420 {
9421 emit_move_insn (target, constant);
9422 return;
9423 }
9424 }
9425
9426 /* Splat a single non-constant element if we can. */
9427 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9428 {
9429 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9430 emit_insn (gen_rtx_SET (VOIDmode, target,
9431 gen_rtx_VEC_DUPLICATE (mode, x)));
9432 return;
9433 }
9434
9435 /* One field is non-constant. Load constant then overwrite varying
9436 field. This is more efficient than using the stack. */
9437 if (n_var == 1)
9438 {
9439 rtx copy = copy_rtx (vals);
9440 rtx index = GEN_INT (one_var);
9441
9442 /* Load constant part of vector, substitute neighboring value for
9443 varying element. */
9444 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9445 neon_expand_vector_init (target, copy);
9446
9447 /* Insert variable. */
9448 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9449 switch (mode)
9450 {
9451 case V8QImode:
9452 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9453 break;
9454 case V16QImode:
9455 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9456 break;
9457 case V4HImode:
9458 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9459 break;
9460 case V8HImode:
9461 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9462 break;
9463 case V2SImode:
9464 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9465 break;
9466 case V4SImode:
9467 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9468 break;
9469 case V2SFmode:
9470 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9471 break;
9472 case V4SFmode:
9473 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9474 break;
9475 case V2DImode:
9476 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9477 break;
9478 default:
9479 gcc_unreachable ();
9480 }
9481 return;
9482 }
9483
9484 /* Construct the vector in memory one field at a time
9485 and load the whole vector. */
9486 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
9487 for (i = 0; i < n_elts; i++)
9488 emit_move_insn (adjust_address_nv (mem, inner_mode,
9489 i * GET_MODE_SIZE (inner_mode)),
9490 XVECEXP (vals, 0, i));
9491 emit_move_insn (target, mem);
9492 }
9493
9494 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9495 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9496 reported source locations are bogus. */
9497
9498 static void
9499 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9500 const char *err)
9501 {
9502 HOST_WIDE_INT lane;
9503
9504 gcc_assert (CONST_INT_P (operand));
9505
9506 lane = INTVAL (operand);
9507
9508 if (lane < low || lane >= high)
9509 error (err);
9510 }
9511
9512 /* Bounds-check lanes. */
9513
9514 void
9515 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9516 {
9517 bounds_check (operand, low, high, "lane out of range");
9518 }
9519
9520 /* Bounds-check constants. */
9521
9522 void
9523 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9524 {
9525 bounds_check (operand, low, high, "constant out of range");
9526 }
9527
9528 HOST_WIDE_INT
9529 neon_element_bits (enum machine_mode mode)
9530 {
9531 if (mode == DImode)
9532 return GET_MODE_BITSIZE (mode);
9533 else
9534 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9535 }
9536
9537 \f
9538 /* Predicates for `match_operand' and `match_operator'. */
9539
9540 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9541 WB is true if full writeback address modes are allowed and is false
9542 if limited writeback address modes (POST_INC and PRE_DEC) are
9543 allowed. */
9544
9545 int
9546 arm_coproc_mem_operand (rtx op, bool wb)
9547 {
9548 rtx ind;
9549
9550 /* Reject eliminable registers. */
9551 if (! (reload_in_progress || reload_completed)
9552 && ( reg_mentioned_p (frame_pointer_rtx, op)
9553 || reg_mentioned_p (arg_pointer_rtx, op)
9554 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9555 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9556 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9557 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9558 return FALSE;
9559
9560 /* Constants are converted into offsets from labels. */
9561 if (!MEM_P (op))
9562 return FALSE;
9563
9564 ind = XEXP (op, 0);
9565
9566 if (reload_completed
9567 && (GET_CODE (ind) == LABEL_REF
9568 || (GET_CODE (ind) == CONST
9569 && GET_CODE (XEXP (ind, 0)) == PLUS
9570 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9571 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
9572 return TRUE;
9573
9574 /* Match: (mem (reg)). */
9575 if (REG_P (ind))
9576 return arm_address_register_rtx_p (ind, 0);
9577
9578 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9579 acceptable in any case (subject to verification by
9580 arm_address_register_rtx_p). We need WB to be true to accept
9581 PRE_INC and POST_DEC. */
9582 if (GET_CODE (ind) == POST_INC
9583 || GET_CODE (ind) == PRE_DEC
9584 || (wb
9585 && (GET_CODE (ind) == PRE_INC
9586 || GET_CODE (ind) == POST_DEC)))
9587 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9588
9589 if (wb
9590 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9591 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9592 && GET_CODE (XEXP (ind, 1)) == PLUS
9593 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9594 ind = XEXP (ind, 1);
9595
9596 /* Match:
9597 (plus (reg)
9598 (const)). */
9599 if (GET_CODE (ind) == PLUS
9600 && REG_P (XEXP (ind, 0))
9601 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9602 && CONST_INT_P (XEXP (ind, 1))
9603 && INTVAL (XEXP (ind, 1)) > -1024
9604 && INTVAL (XEXP (ind, 1)) < 1024
9605 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9606 return TRUE;
9607
9608 return FALSE;
9609 }
9610
9611 /* Return TRUE if OP is a memory operand which we can load or store a vector
9612 to/from. TYPE is one of the following values:
9613 0 - Vector load/stor (vldr)
9614 1 - Core registers (ldm)
9615 2 - Element/structure loads (vld1)
9616 */
9617 int
9618 neon_vector_mem_operand (rtx op, int type)
9619 {
9620 rtx ind;
9621
9622 /* Reject eliminable registers. */
9623 if (! (reload_in_progress || reload_completed)
9624 && ( reg_mentioned_p (frame_pointer_rtx, op)
9625 || reg_mentioned_p (arg_pointer_rtx, op)
9626 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9627 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9628 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9629 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9630 return FALSE;
9631
9632 /* Constants are converted into offsets from labels. */
9633 if (!MEM_P (op))
9634 return FALSE;
9635
9636 ind = XEXP (op, 0);
9637
9638 if (reload_completed
9639 && (GET_CODE (ind) == LABEL_REF
9640 || (GET_CODE (ind) == CONST
9641 && GET_CODE (XEXP (ind, 0)) == PLUS
9642 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9643 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
9644 return TRUE;
9645
9646 /* Match: (mem (reg)). */
9647 if (REG_P (ind))
9648 return arm_address_register_rtx_p (ind, 0);
9649
9650 /* Allow post-increment with Neon registers. */
9651 if ((type != 1 && GET_CODE (ind) == POST_INC)
9652 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9653 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9654
9655 /* FIXME: vld1 allows register post-modify. */
9656
9657 /* Match:
9658 (plus (reg)
9659 (const)). */
9660 if (type == 0
9661 && GET_CODE (ind) == PLUS
9662 && REG_P (XEXP (ind, 0))
9663 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9664 && CONST_INT_P (XEXP (ind, 1))
9665 && INTVAL (XEXP (ind, 1)) > -1024
9666 /* For quad modes, we restrict the constant offset to be slightly less
9667 than what the instruction format permits. We have no such constraint
9668 on double mode offsets. (This must match arm_legitimate_index_p.) */
9669 && (INTVAL (XEXP (ind, 1))
9670 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
9671 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9672 return TRUE;
9673
9674 return FALSE;
9675 }
9676
9677 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9678 type. */
9679 int
9680 neon_struct_mem_operand (rtx op)
9681 {
9682 rtx ind;
9683
9684 /* Reject eliminable registers. */
9685 if (! (reload_in_progress || reload_completed)
9686 && ( reg_mentioned_p (frame_pointer_rtx, op)
9687 || reg_mentioned_p (arg_pointer_rtx, op)
9688 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9689 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9690 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9691 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9692 return FALSE;
9693
9694 /* Constants are converted into offsets from labels. */
9695 if (!MEM_P (op))
9696 return FALSE;
9697
9698 ind = XEXP (op, 0);
9699
9700 if (reload_completed
9701 && (GET_CODE (ind) == LABEL_REF
9702 || (GET_CODE (ind) == CONST
9703 && GET_CODE (XEXP (ind, 0)) == PLUS
9704 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9705 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
9706 return TRUE;
9707
9708 /* Match: (mem (reg)). */
9709 if (REG_P (ind))
9710 return arm_address_register_rtx_p (ind, 0);
9711
9712 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9713 if (GET_CODE (ind) == POST_INC
9714 || GET_CODE (ind) == PRE_DEC)
9715 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9716
9717 return FALSE;
9718 }
9719
9720 /* Return true if X is a register that will be eliminated later on. */
9721 int
9722 arm_eliminable_register (rtx x)
9723 {
9724 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9725 || REGNO (x) == ARG_POINTER_REGNUM
9726 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9727 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9728 }
9729
9730 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9731 coprocessor registers. Otherwise return NO_REGS. */
9732
9733 enum reg_class
9734 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9735 {
9736 if (mode == HFmode)
9737 {
9738 if (!TARGET_NEON_FP16)
9739 return GENERAL_REGS;
9740 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9741 return NO_REGS;
9742 return GENERAL_REGS;
9743 }
9744
9745 /* The neon move patterns handle all legitimate vector and struct
9746 addresses. */
9747 if (TARGET_NEON
9748 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
9749 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9750 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9751 || VALID_NEON_STRUCT_MODE (mode)))
9752 return NO_REGS;
9753
9754 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9755 return NO_REGS;
9756
9757 return GENERAL_REGS;
9758 }
9759
9760 /* Values which must be returned in the most-significant end of the return
9761 register. */
9762
9763 static bool
9764 arm_return_in_msb (const_tree valtype)
9765 {
9766 return (TARGET_AAPCS_BASED
9767 && BYTES_BIG_ENDIAN
9768 && (AGGREGATE_TYPE_P (valtype)
9769 || TREE_CODE (valtype) == COMPLEX_TYPE
9770 || FIXED_POINT_TYPE_P (valtype)));
9771 }
9772
9773 /* Return TRUE if X references a SYMBOL_REF. */
9774 int
9775 symbol_mentioned_p (rtx x)
9776 {
9777 const char * fmt;
9778 int i;
9779
9780 if (GET_CODE (x) == SYMBOL_REF)
9781 return 1;
9782
9783 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9784 are constant offsets, not symbols. */
9785 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9786 return 0;
9787
9788 fmt = GET_RTX_FORMAT (GET_CODE (x));
9789
9790 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9791 {
9792 if (fmt[i] == 'E')
9793 {
9794 int j;
9795
9796 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9797 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9798 return 1;
9799 }
9800 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9801 return 1;
9802 }
9803
9804 return 0;
9805 }
9806
9807 /* Return TRUE if X references a LABEL_REF. */
9808 int
9809 label_mentioned_p (rtx x)
9810 {
9811 const char * fmt;
9812 int i;
9813
9814 if (GET_CODE (x) == LABEL_REF)
9815 return 1;
9816
9817 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9818 instruction, but they are constant offsets, not symbols. */
9819 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9820 return 0;
9821
9822 fmt = GET_RTX_FORMAT (GET_CODE (x));
9823 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9824 {
9825 if (fmt[i] == 'E')
9826 {
9827 int j;
9828
9829 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9830 if (label_mentioned_p (XVECEXP (x, i, j)))
9831 return 1;
9832 }
9833 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9834 return 1;
9835 }
9836
9837 return 0;
9838 }
9839
9840 int
9841 tls_mentioned_p (rtx x)
9842 {
9843 switch (GET_CODE (x))
9844 {
9845 case CONST:
9846 return tls_mentioned_p (XEXP (x, 0));
9847
9848 case UNSPEC:
9849 if (XINT (x, 1) == UNSPEC_TLS)
9850 return 1;
9851
9852 default:
9853 return 0;
9854 }
9855 }
9856
9857 /* Must not copy any rtx that uses a pc-relative address. */
9858
9859 static int
9860 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9861 {
9862 if (GET_CODE (*x) == UNSPEC
9863 && (XINT (*x, 1) == UNSPEC_PIC_BASE
9864 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
9865 return 1;
9866 return 0;
9867 }
9868
9869 static bool
9870 arm_cannot_copy_insn_p (rtx insn)
9871 {
9872 /* The tls call insn cannot be copied, as it is paired with a data
9873 word. */
9874 if (recog_memoized (insn) == CODE_FOR_tlscall)
9875 return true;
9876
9877 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9878 }
9879
9880 enum rtx_code
9881 minmax_code (rtx x)
9882 {
9883 enum rtx_code code = GET_CODE (x);
9884
9885 switch (code)
9886 {
9887 case SMAX:
9888 return GE;
9889 case SMIN:
9890 return LE;
9891 case UMIN:
9892 return LEU;
9893 case UMAX:
9894 return GEU;
9895 default:
9896 gcc_unreachable ();
9897 }
9898 }
9899
9900 /* Match pair of min/max operators that can be implemented via usat/ssat. */
9901
9902 bool
9903 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
9904 int *mask, bool *signed_sat)
9905 {
9906 /* The high bound must be a power of two minus one. */
9907 int log = exact_log2 (INTVAL (hi_bound) + 1);
9908 if (log == -1)
9909 return false;
9910
9911 /* The low bound is either zero (for usat) or one less than the
9912 negation of the high bound (for ssat). */
9913 if (INTVAL (lo_bound) == 0)
9914 {
9915 if (mask)
9916 *mask = log;
9917 if (signed_sat)
9918 *signed_sat = false;
9919
9920 return true;
9921 }
9922
9923 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
9924 {
9925 if (mask)
9926 *mask = log + 1;
9927 if (signed_sat)
9928 *signed_sat = true;
9929
9930 return true;
9931 }
9932
9933 return false;
9934 }
9935
9936 /* Return 1 if memory locations are adjacent. */
9937 int
9938 adjacent_mem_locations (rtx a, rtx b)
9939 {
9940 /* We don't guarantee to preserve the order of these memory refs. */
9941 if (volatile_refs_p (a) || volatile_refs_p (b))
9942 return 0;
9943
9944 if ((REG_P (XEXP (a, 0))
9945 || (GET_CODE (XEXP (a, 0)) == PLUS
9946 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
9947 && (REG_P (XEXP (b, 0))
9948 || (GET_CODE (XEXP (b, 0)) == PLUS
9949 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
9950 {
9951 HOST_WIDE_INT val0 = 0, val1 = 0;
9952 rtx reg0, reg1;
9953 int val_diff;
9954
9955 if (GET_CODE (XEXP (a, 0)) == PLUS)
9956 {
9957 reg0 = XEXP (XEXP (a, 0), 0);
9958 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9959 }
9960 else
9961 reg0 = XEXP (a, 0);
9962
9963 if (GET_CODE (XEXP (b, 0)) == PLUS)
9964 {
9965 reg1 = XEXP (XEXP (b, 0), 0);
9966 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9967 }
9968 else
9969 reg1 = XEXP (b, 0);
9970
9971 /* Don't accept any offset that will require multiple
9972 instructions to handle, since this would cause the
9973 arith_adjacentmem pattern to output an overlong sequence. */
9974 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9975 return 0;
9976
9977 /* Don't allow an eliminable register: register elimination can make
9978 the offset too large. */
9979 if (arm_eliminable_register (reg0))
9980 return 0;
9981
9982 val_diff = val1 - val0;
9983
9984 if (arm_ld_sched)
9985 {
9986 /* If the target has load delay slots, then there's no benefit
9987 to using an ldm instruction unless the offset is zero and
9988 we are optimizing for size. */
9989 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9990 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9991 && (val_diff == 4 || val_diff == -4));
9992 }
9993
9994 return ((REGNO (reg0) == REGNO (reg1))
9995 && (val_diff == 4 || val_diff == -4));
9996 }
9997
9998 return 0;
9999 }
10000
10001 /* Return true if OP is a valid load or store multiple operation. LOAD is true
10002 for load operations, false for store operations. CONSECUTIVE is true
10003 if the register numbers in the operation must be consecutive in the register
10004 bank. RETURN_PC is true if value is to be loaded in PC.
10005 The pattern we are trying to match for load is:
10006 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
10007 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
10008 :
10009 :
10010 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
10011 ]
10012 where
10013 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
10014 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
10015 3. If consecutive is TRUE, then for kth register being loaded,
10016 REGNO (R_dk) = REGNO (R_d0) + k.
10017 The pattern for store is similar. */
10018 bool
10019 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
10020 bool consecutive, bool return_pc)
10021 {
10022 HOST_WIDE_INT count = XVECLEN (op, 0);
10023 rtx reg, mem, addr;
10024 unsigned regno;
10025 unsigned first_regno;
10026 HOST_WIDE_INT i = 1, base = 0, offset = 0;
10027 rtx elt;
10028 bool addr_reg_in_reglist = false;
10029 bool update = false;
10030 int reg_increment;
10031 int offset_adj;
10032 int regs_per_val;
10033
10034 /* If not in SImode, then registers must be consecutive
10035 (e.g., VLDM instructions for DFmode). */
10036 gcc_assert ((mode == SImode) || consecutive);
10037 /* Setting return_pc for stores is illegal. */
10038 gcc_assert (!return_pc || load);
10039
10040 /* Set up the increments and the regs per val based on the mode. */
10041 reg_increment = GET_MODE_SIZE (mode);
10042 regs_per_val = reg_increment / 4;
10043 offset_adj = return_pc ? 1 : 0;
10044
10045 if (count <= 1
10046 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
10047 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
10048 return false;
10049
10050 /* Check if this is a write-back. */
10051 elt = XVECEXP (op, 0, offset_adj);
10052 if (GET_CODE (SET_SRC (elt)) == PLUS)
10053 {
10054 i++;
10055 base = 1;
10056 update = true;
10057
10058 /* The offset adjustment must be the number of registers being
10059 popped times the size of a single register. */
10060 if (!REG_P (SET_DEST (elt))
10061 || !REG_P (XEXP (SET_SRC (elt), 0))
10062 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
10063 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
10064 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
10065 ((count - 1 - offset_adj) * reg_increment))
10066 return false;
10067 }
10068
10069 i = i + offset_adj;
10070 base = base + offset_adj;
10071 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
10072 success depends on the type: VLDM can do just one reg,
10073 LDM must do at least two. */
10074 if ((count <= i) && (mode == SImode))
10075 return false;
10076
10077 elt = XVECEXP (op, 0, i - 1);
10078 if (GET_CODE (elt) != SET)
10079 return false;
10080
10081 if (load)
10082 {
10083 reg = SET_DEST (elt);
10084 mem = SET_SRC (elt);
10085 }
10086 else
10087 {
10088 reg = SET_SRC (elt);
10089 mem = SET_DEST (elt);
10090 }
10091
10092 if (!REG_P (reg) || !MEM_P (mem))
10093 return false;
10094
10095 regno = REGNO (reg);
10096 first_regno = regno;
10097 addr = XEXP (mem, 0);
10098 if (GET_CODE (addr) == PLUS)
10099 {
10100 if (!CONST_INT_P (XEXP (addr, 1)))
10101 return false;
10102
10103 offset = INTVAL (XEXP (addr, 1));
10104 addr = XEXP (addr, 0);
10105 }
10106
10107 if (!REG_P (addr))
10108 return false;
10109
10110 /* Don't allow SP to be loaded unless it is also the base register. It
10111 guarantees that SP is reset correctly when an LDM instruction
10112 is interruptted. Otherwise, we might end up with a corrupt stack. */
10113 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10114 return false;
10115
10116 for (; i < count; i++)
10117 {
10118 elt = XVECEXP (op, 0, i);
10119 if (GET_CODE (elt) != SET)
10120 return false;
10121
10122 if (load)
10123 {
10124 reg = SET_DEST (elt);
10125 mem = SET_SRC (elt);
10126 }
10127 else
10128 {
10129 reg = SET_SRC (elt);
10130 mem = SET_DEST (elt);
10131 }
10132
10133 if (!REG_P (reg)
10134 || GET_MODE (reg) != mode
10135 || REGNO (reg) <= regno
10136 || (consecutive
10137 && (REGNO (reg) !=
10138 (unsigned int) (first_regno + regs_per_val * (i - base))))
10139 /* Don't allow SP to be loaded unless it is also the base register. It
10140 guarantees that SP is reset correctly when an LDM instruction
10141 is interrupted. Otherwise, we might end up with a corrupt stack. */
10142 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10143 || !MEM_P (mem)
10144 || GET_MODE (mem) != mode
10145 || ((GET_CODE (XEXP (mem, 0)) != PLUS
10146 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
10147 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
10148 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
10149 offset + (i - base) * reg_increment))
10150 && (!REG_P (XEXP (mem, 0))
10151 || offset + (i - base) * reg_increment != 0)))
10152 return false;
10153
10154 regno = REGNO (reg);
10155 if (regno == REGNO (addr))
10156 addr_reg_in_reglist = true;
10157 }
10158
10159 if (load)
10160 {
10161 if (update && addr_reg_in_reglist)
10162 return false;
10163
10164 /* For Thumb-1, address register is always modified - either by write-back
10165 or by explicit load. If the pattern does not describe an update,
10166 then the address register must be in the list of loaded registers. */
10167 if (TARGET_THUMB1)
10168 return update || addr_reg_in_reglist;
10169 }
10170
10171 return true;
10172 }
10173
10174 /* Return true iff it would be profitable to turn a sequence of NOPS loads
10175 or stores (depending on IS_STORE) into a load-multiple or store-multiple
10176 instruction. ADD_OFFSET is nonzero if the base address register needs
10177 to be modified with an add instruction before we can use it. */
10178
10179 static bool
10180 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
10181 int nops, HOST_WIDE_INT add_offset)
10182 {
10183 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10184 if the offset isn't small enough. The reason 2 ldrs are faster
10185 is because these ARMs are able to do more than one cache access
10186 in a single cycle. The ARM9 and StrongARM have Harvard caches,
10187 whilst the ARM8 has a double bandwidth cache. This means that
10188 these cores can do both an instruction fetch and a data fetch in
10189 a single cycle, so the trick of calculating the address into a
10190 scratch register (one of the result regs) and then doing a load
10191 multiple actually becomes slower (and no smaller in code size).
10192 That is the transformation
10193
10194 ldr rd1, [rbase + offset]
10195 ldr rd2, [rbase + offset + 4]
10196
10197 to
10198
10199 add rd1, rbase, offset
10200 ldmia rd1, {rd1, rd2}
10201
10202 produces worse code -- '3 cycles + any stalls on rd2' instead of
10203 '2 cycles + any stalls on rd2'. On ARMs with only one cache
10204 access per cycle, the first sequence could never complete in less
10205 than 6 cycles, whereas the ldm sequence would only take 5 and
10206 would make better use of sequential accesses if not hitting the
10207 cache.
10208
10209 We cheat here and test 'arm_ld_sched' which we currently know to
10210 only be true for the ARM8, ARM9 and StrongARM. If this ever
10211 changes, then the test below needs to be reworked. */
10212 if (nops == 2 && arm_ld_sched && add_offset != 0)
10213 return false;
10214
10215 /* XScale has load-store double instructions, but they have stricter
10216 alignment requirements than load-store multiple, so we cannot
10217 use them.
10218
10219 For XScale ldm requires 2 + NREGS cycles to complete and blocks
10220 the pipeline until completion.
10221
10222 NREGS CYCLES
10223 1 3
10224 2 4
10225 3 5
10226 4 6
10227
10228 An ldr instruction takes 1-3 cycles, but does not block the
10229 pipeline.
10230
10231 NREGS CYCLES
10232 1 1-3
10233 2 2-6
10234 3 3-9
10235 4 4-12
10236
10237 Best case ldr will always win. However, the more ldr instructions
10238 we issue, the less likely we are to be able to schedule them well.
10239 Using ldr instructions also increases code size.
10240
10241 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10242 for counts of 3 or 4 regs. */
10243 if (nops <= 2 && arm_tune_xscale && !optimize_size)
10244 return false;
10245 return true;
10246 }
10247
10248 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10249 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10250 an array ORDER which describes the sequence to use when accessing the
10251 offsets that produces an ascending order. In this sequence, each
10252 offset must be larger by exactly 4 than the previous one. ORDER[0]
10253 must have been filled in with the lowest offset by the caller.
10254 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10255 we use to verify that ORDER produces an ascending order of registers.
10256 Return true if it was possible to construct such an order, false if
10257 not. */
10258
10259 static bool
10260 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10261 int *unsorted_regs)
10262 {
10263 int i;
10264 for (i = 1; i < nops; i++)
10265 {
10266 int j;
10267
10268 order[i] = order[i - 1];
10269 for (j = 0; j < nops; j++)
10270 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10271 {
10272 /* We must find exactly one offset that is higher than the
10273 previous one by 4. */
10274 if (order[i] != order[i - 1])
10275 return false;
10276 order[i] = j;
10277 }
10278 if (order[i] == order[i - 1])
10279 return false;
10280 /* The register numbers must be ascending. */
10281 if (unsorted_regs != NULL
10282 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10283 return false;
10284 }
10285 return true;
10286 }
10287
10288 /* Used to determine in a peephole whether a sequence of load
10289 instructions can be changed into a load-multiple instruction.
10290 NOPS is the number of separate load instructions we are examining. The
10291 first NOPS entries in OPERANDS are the destination registers, the
10292 next NOPS entries are memory operands. If this function is
10293 successful, *BASE is set to the common base register of the memory
10294 accesses; *LOAD_OFFSET is set to the first memory location's offset
10295 from that base register.
10296 REGS is an array filled in with the destination register numbers.
10297 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10298 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10299 the sequence of registers in REGS matches the loads from ascending memory
10300 locations, and the function verifies that the register numbers are
10301 themselves ascending. If CHECK_REGS is false, the register numbers
10302 are stored in the order they are found in the operands. */
10303 static int
10304 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10305 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10306 {
10307 int unsorted_regs[MAX_LDM_STM_OPS];
10308 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10309 int order[MAX_LDM_STM_OPS];
10310 rtx base_reg_rtx = NULL;
10311 int base_reg = -1;
10312 int i, ldm_case;
10313
10314 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10315 easily extended if required. */
10316 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10317
10318 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10319
10320 /* Loop over the operands and check that the memory references are
10321 suitable (i.e. immediate offsets from the same base register). At
10322 the same time, extract the target register, and the memory
10323 offsets. */
10324 for (i = 0; i < nops; i++)
10325 {
10326 rtx reg;
10327 rtx offset;
10328
10329 /* Convert a subreg of a mem into the mem itself. */
10330 if (GET_CODE (operands[nops + i]) == SUBREG)
10331 operands[nops + i] = alter_subreg (operands + (nops + i), true);
10332
10333 gcc_assert (MEM_P (operands[nops + i]));
10334
10335 /* Don't reorder volatile memory references; it doesn't seem worth
10336 looking for the case where the order is ok anyway. */
10337 if (MEM_VOLATILE_P (operands[nops + i]))
10338 return 0;
10339
10340 offset = const0_rtx;
10341
10342 if ((REG_P (reg = XEXP (operands[nops + i], 0))
10343 || (GET_CODE (reg) == SUBREG
10344 && REG_P (reg = SUBREG_REG (reg))))
10345 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10346 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
10347 || (GET_CODE (reg) == SUBREG
10348 && REG_P (reg = SUBREG_REG (reg))))
10349 && (CONST_INT_P (offset
10350 = XEXP (XEXP (operands[nops + i], 0), 1)))))
10351 {
10352 if (i == 0)
10353 {
10354 base_reg = REGNO (reg);
10355 base_reg_rtx = reg;
10356 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10357 return 0;
10358 }
10359 else if (base_reg != (int) REGNO (reg))
10360 /* Not addressed from the same base register. */
10361 return 0;
10362
10363 unsorted_regs[i] = (REG_P (operands[i])
10364 ? REGNO (operands[i])
10365 : REGNO (SUBREG_REG (operands[i])));
10366
10367 /* If it isn't an integer register, or if it overwrites the
10368 base register but isn't the last insn in the list, then
10369 we can't do this. */
10370 if (unsorted_regs[i] < 0
10371 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10372 || unsorted_regs[i] > 14
10373 || (i != nops - 1 && unsorted_regs[i] == base_reg))
10374 return 0;
10375
10376 unsorted_offsets[i] = INTVAL (offset);
10377 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10378 order[0] = i;
10379 }
10380 else
10381 /* Not a suitable memory address. */
10382 return 0;
10383 }
10384
10385 /* All the useful information has now been extracted from the
10386 operands into unsorted_regs and unsorted_offsets; additionally,
10387 order[0] has been set to the lowest offset in the list. Sort
10388 the offsets into order, verifying that they are adjacent, and
10389 check that the register numbers are ascending. */
10390 if (!compute_offset_order (nops, unsorted_offsets, order,
10391 check_regs ? unsorted_regs : NULL))
10392 return 0;
10393
10394 if (saved_order)
10395 memcpy (saved_order, order, sizeof order);
10396
10397 if (base)
10398 {
10399 *base = base_reg;
10400
10401 for (i = 0; i < nops; i++)
10402 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10403
10404 *load_offset = unsorted_offsets[order[0]];
10405 }
10406
10407 if (TARGET_THUMB1
10408 && !peep2_reg_dead_p (nops, base_reg_rtx))
10409 return 0;
10410
10411 if (unsorted_offsets[order[0]] == 0)
10412 ldm_case = 1; /* ldmia */
10413 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10414 ldm_case = 2; /* ldmib */
10415 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10416 ldm_case = 3; /* ldmda */
10417 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10418 ldm_case = 4; /* ldmdb */
10419 else if (const_ok_for_arm (unsorted_offsets[order[0]])
10420 || const_ok_for_arm (-unsorted_offsets[order[0]]))
10421 ldm_case = 5;
10422 else
10423 return 0;
10424
10425 if (!multiple_operation_profitable_p (false, nops,
10426 ldm_case == 5
10427 ? unsorted_offsets[order[0]] : 0))
10428 return 0;
10429
10430 return ldm_case;
10431 }
10432
10433 /* Used to determine in a peephole whether a sequence of store instructions can
10434 be changed into a store-multiple instruction.
10435 NOPS is the number of separate store instructions we are examining.
10436 NOPS_TOTAL is the total number of instructions recognized by the peephole
10437 pattern.
10438 The first NOPS entries in OPERANDS are the source registers, the next
10439 NOPS entries are memory operands. If this function is successful, *BASE is
10440 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10441 to the first memory location's offset from that base register. REGS is an
10442 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10443 likewise filled with the corresponding rtx's.
10444 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10445 numbers to an ascending order of stores.
10446 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10447 from ascending memory locations, and the function verifies that the register
10448 numbers are themselves ascending. If CHECK_REGS is false, the register
10449 numbers are stored in the order they are found in the operands. */
10450 static int
10451 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10452 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10453 HOST_WIDE_INT *load_offset, bool check_regs)
10454 {
10455 int unsorted_regs[MAX_LDM_STM_OPS];
10456 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10457 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10458 int order[MAX_LDM_STM_OPS];
10459 int base_reg = -1;
10460 rtx base_reg_rtx = NULL;
10461 int i, stm_case;
10462
10463 /* Write back of base register is currently only supported for Thumb 1. */
10464 int base_writeback = TARGET_THUMB1;
10465
10466 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10467 easily extended if required. */
10468 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10469
10470 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10471
10472 /* Loop over the operands and check that the memory references are
10473 suitable (i.e. immediate offsets from the same base register). At
10474 the same time, extract the target register, and the memory
10475 offsets. */
10476 for (i = 0; i < nops; i++)
10477 {
10478 rtx reg;
10479 rtx offset;
10480
10481 /* Convert a subreg of a mem into the mem itself. */
10482 if (GET_CODE (operands[nops + i]) == SUBREG)
10483 operands[nops + i] = alter_subreg (operands + (nops + i), true);
10484
10485 gcc_assert (MEM_P (operands[nops + i]));
10486
10487 /* Don't reorder volatile memory references; it doesn't seem worth
10488 looking for the case where the order is ok anyway. */
10489 if (MEM_VOLATILE_P (operands[nops + i]))
10490 return 0;
10491
10492 offset = const0_rtx;
10493
10494 if ((REG_P (reg = XEXP (operands[nops + i], 0))
10495 || (GET_CODE (reg) == SUBREG
10496 && REG_P (reg = SUBREG_REG (reg))))
10497 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10498 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
10499 || (GET_CODE (reg) == SUBREG
10500 && REG_P (reg = SUBREG_REG (reg))))
10501 && (CONST_INT_P (offset
10502 = XEXP (XEXP (operands[nops + i], 0), 1)))))
10503 {
10504 unsorted_reg_rtxs[i] = (REG_P (operands[i])
10505 ? operands[i] : SUBREG_REG (operands[i]));
10506 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10507
10508 if (i == 0)
10509 {
10510 base_reg = REGNO (reg);
10511 base_reg_rtx = reg;
10512 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10513 return 0;
10514 }
10515 else if (base_reg != (int) REGNO (reg))
10516 /* Not addressed from the same base register. */
10517 return 0;
10518
10519 /* If it isn't an integer register, then we can't do this. */
10520 if (unsorted_regs[i] < 0
10521 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10522 /* The effects are unpredictable if the base register is
10523 both updated and stored. */
10524 || (base_writeback && unsorted_regs[i] == base_reg)
10525 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10526 || unsorted_regs[i] > 14)
10527 return 0;
10528
10529 unsorted_offsets[i] = INTVAL (offset);
10530 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10531 order[0] = i;
10532 }
10533 else
10534 /* Not a suitable memory address. */
10535 return 0;
10536 }
10537
10538 /* All the useful information has now been extracted from the
10539 operands into unsorted_regs and unsorted_offsets; additionally,
10540 order[0] has been set to the lowest offset in the list. Sort
10541 the offsets into order, verifying that they are adjacent, and
10542 check that the register numbers are ascending. */
10543 if (!compute_offset_order (nops, unsorted_offsets, order,
10544 check_regs ? unsorted_regs : NULL))
10545 return 0;
10546
10547 if (saved_order)
10548 memcpy (saved_order, order, sizeof order);
10549
10550 if (base)
10551 {
10552 *base = base_reg;
10553
10554 for (i = 0; i < nops; i++)
10555 {
10556 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10557 if (reg_rtxs)
10558 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
10559 }
10560
10561 *load_offset = unsorted_offsets[order[0]];
10562 }
10563
10564 if (TARGET_THUMB1
10565 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
10566 return 0;
10567
10568 if (unsorted_offsets[order[0]] == 0)
10569 stm_case = 1; /* stmia */
10570 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10571 stm_case = 2; /* stmib */
10572 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10573 stm_case = 3; /* stmda */
10574 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10575 stm_case = 4; /* stmdb */
10576 else
10577 return 0;
10578
10579 if (!multiple_operation_profitable_p (false, nops, 0))
10580 return 0;
10581
10582 return stm_case;
10583 }
10584 \f
10585 /* Routines for use in generating RTL. */
10586
10587 /* Generate a load-multiple instruction. COUNT is the number of loads in
10588 the instruction; REGS and MEMS are arrays containing the operands.
10589 BASEREG is the base register to be used in addressing the memory operands.
10590 WBACK_OFFSET is nonzero if the instruction should update the base
10591 register. */
10592
10593 static rtx
10594 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10595 HOST_WIDE_INT wback_offset)
10596 {
10597 int i = 0, j;
10598 rtx result;
10599
10600 if (!multiple_operation_profitable_p (false, count, 0))
10601 {
10602 rtx seq;
10603
10604 start_sequence ();
10605
10606 for (i = 0; i < count; i++)
10607 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
10608
10609 if (wback_offset != 0)
10610 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
10611
10612 seq = get_insns ();
10613 end_sequence ();
10614
10615 return seq;
10616 }
10617
10618 result = gen_rtx_PARALLEL (VOIDmode,
10619 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10620 if (wback_offset != 0)
10621 {
10622 XVECEXP (result, 0, 0)
10623 = gen_rtx_SET (VOIDmode, basereg,
10624 plus_constant (Pmode, basereg, wback_offset));
10625 i = 1;
10626 count++;
10627 }
10628
10629 for (j = 0; i < count; i++, j++)
10630 XVECEXP (result, 0, i)
10631 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
10632
10633 return result;
10634 }
10635
10636 /* Generate a store-multiple instruction. COUNT is the number of stores in
10637 the instruction; REGS and MEMS are arrays containing the operands.
10638 BASEREG is the base register to be used in addressing the memory operands.
10639 WBACK_OFFSET is nonzero if the instruction should update the base
10640 register. */
10641
10642 static rtx
10643 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10644 HOST_WIDE_INT wback_offset)
10645 {
10646 int i = 0, j;
10647 rtx result;
10648
10649 if (GET_CODE (basereg) == PLUS)
10650 basereg = XEXP (basereg, 0);
10651
10652 if (!multiple_operation_profitable_p (false, count, 0))
10653 {
10654 rtx seq;
10655
10656 start_sequence ();
10657
10658 for (i = 0; i < count; i++)
10659 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
10660
10661 if (wback_offset != 0)
10662 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
10663
10664 seq = get_insns ();
10665 end_sequence ();
10666
10667 return seq;
10668 }
10669
10670 result = gen_rtx_PARALLEL (VOIDmode,
10671 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10672 if (wback_offset != 0)
10673 {
10674 XVECEXP (result, 0, 0)
10675 = gen_rtx_SET (VOIDmode, basereg,
10676 plus_constant (Pmode, basereg, wback_offset));
10677 i = 1;
10678 count++;
10679 }
10680
10681 for (j = 0; i < count; i++, j++)
10682 XVECEXP (result, 0, i)
10683 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
10684
10685 return result;
10686 }
10687
10688 /* Generate either a load-multiple or a store-multiple instruction. This
10689 function can be used in situations where we can start with a single MEM
10690 rtx and adjust its address upwards.
10691 COUNT is the number of operations in the instruction, not counting a
10692 possible update of the base register. REGS is an array containing the
10693 register operands.
10694 BASEREG is the base register to be used in addressing the memory operands,
10695 which are constructed from BASEMEM.
10696 WRITE_BACK specifies whether the generated instruction should include an
10697 update of the base register.
10698 OFFSETP is used to pass an offset to and from this function; this offset
10699 is not used when constructing the address (instead BASEMEM should have an
10700 appropriate offset in its address), it is used only for setting
10701 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10702
10703 static rtx
10704 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10705 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10706 {
10707 rtx mems[MAX_LDM_STM_OPS];
10708 HOST_WIDE_INT offset = *offsetp;
10709 int i;
10710
10711 gcc_assert (count <= MAX_LDM_STM_OPS);
10712
10713 if (GET_CODE (basereg) == PLUS)
10714 basereg = XEXP (basereg, 0);
10715
10716 for (i = 0; i < count; i++)
10717 {
10718 rtx addr = plus_constant (Pmode, basereg, i * 4);
10719 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10720 offset += 4;
10721 }
10722
10723 if (write_back)
10724 *offsetp = offset;
10725
10726 if (is_load)
10727 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10728 write_back ? 4 * count : 0);
10729 else
10730 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10731 write_back ? 4 * count : 0);
10732 }
10733
10734 rtx
10735 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10736 rtx basemem, HOST_WIDE_INT *offsetp)
10737 {
10738 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10739 offsetp);
10740 }
10741
10742 rtx
10743 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10744 rtx basemem, HOST_WIDE_INT *offsetp)
10745 {
10746 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10747 offsetp);
10748 }
10749
10750 /* Called from a peephole2 expander to turn a sequence of loads into an
10751 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10752 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10753 is true if we can reorder the registers because they are used commutatively
10754 subsequently.
10755 Returns true iff we could generate a new instruction. */
10756
10757 bool
10758 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10759 {
10760 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10761 rtx mems[MAX_LDM_STM_OPS];
10762 int i, j, base_reg;
10763 rtx base_reg_rtx;
10764 HOST_WIDE_INT offset;
10765 int write_back = FALSE;
10766 int ldm_case;
10767 rtx addr;
10768
10769 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10770 &base_reg, &offset, !sort_regs);
10771
10772 if (ldm_case == 0)
10773 return false;
10774
10775 if (sort_regs)
10776 for (i = 0; i < nops - 1; i++)
10777 for (j = i + 1; j < nops; j++)
10778 if (regs[i] > regs[j])
10779 {
10780 int t = regs[i];
10781 regs[i] = regs[j];
10782 regs[j] = t;
10783 }
10784 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10785
10786 if (TARGET_THUMB1)
10787 {
10788 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10789 gcc_assert (ldm_case == 1 || ldm_case == 5);
10790 write_back = TRUE;
10791 }
10792
10793 if (ldm_case == 5)
10794 {
10795 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10796 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10797 offset = 0;
10798 if (!TARGET_THUMB1)
10799 {
10800 base_reg = regs[0];
10801 base_reg_rtx = newbase;
10802 }
10803 }
10804
10805 for (i = 0; i < nops; i++)
10806 {
10807 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
10808 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10809 SImode, addr, 0);
10810 }
10811 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10812 write_back ? offset + i * 4 : 0));
10813 return true;
10814 }
10815
10816 /* Called from a peephole2 expander to turn a sequence of stores into an
10817 STM instruction. OPERANDS are the operands found by the peephole matcher;
10818 NOPS indicates how many separate stores we are trying to combine.
10819 Returns true iff we could generate a new instruction. */
10820
10821 bool
10822 gen_stm_seq (rtx *operands, int nops)
10823 {
10824 int i;
10825 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10826 rtx mems[MAX_LDM_STM_OPS];
10827 int base_reg;
10828 rtx base_reg_rtx;
10829 HOST_WIDE_INT offset;
10830 int write_back = FALSE;
10831 int stm_case;
10832 rtx addr;
10833 bool base_reg_dies;
10834
10835 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10836 mem_order, &base_reg, &offset, true);
10837
10838 if (stm_case == 0)
10839 return false;
10840
10841 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10842
10843 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10844 if (TARGET_THUMB1)
10845 {
10846 gcc_assert (base_reg_dies);
10847 write_back = TRUE;
10848 }
10849
10850 if (stm_case == 5)
10851 {
10852 gcc_assert (base_reg_dies);
10853 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10854 offset = 0;
10855 }
10856
10857 addr = plus_constant (Pmode, base_reg_rtx, offset);
10858
10859 for (i = 0; i < nops; i++)
10860 {
10861 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
10862 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10863 SImode, addr, 0);
10864 }
10865 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10866 write_back ? offset + i * 4 : 0));
10867 return true;
10868 }
10869
10870 /* Called from a peephole2 expander to turn a sequence of stores that are
10871 preceded by constant loads into an STM instruction. OPERANDS are the
10872 operands found by the peephole matcher; NOPS indicates how many
10873 separate stores we are trying to combine; there are 2 * NOPS
10874 instructions in the peephole.
10875 Returns true iff we could generate a new instruction. */
10876
10877 bool
10878 gen_const_stm_seq (rtx *operands, int nops)
10879 {
10880 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10881 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10882 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10883 rtx mems[MAX_LDM_STM_OPS];
10884 int base_reg;
10885 rtx base_reg_rtx;
10886 HOST_WIDE_INT offset;
10887 int write_back = FALSE;
10888 int stm_case;
10889 rtx addr;
10890 bool base_reg_dies;
10891 int i, j;
10892 HARD_REG_SET allocated;
10893
10894 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10895 mem_order, &base_reg, &offset, false);
10896
10897 if (stm_case == 0)
10898 return false;
10899
10900 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10901
10902 /* If the same register is used more than once, try to find a free
10903 register. */
10904 CLEAR_HARD_REG_SET (allocated);
10905 for (i = 0; i < nops; i++)
10906 {
10907 for (j = i + 1; j < nops; j++)
10908 if (regs[i] == regs[j])
10909 {
10910 rtx t = peep2_find_free_register (0, nops * 2,
10911 TARGET_THUMB1 ? "l" : "r",
10912 SImode, &allocated);
10913 if (t == NULL_RTX)
10914 return false;
10915 reg_rtxs[i] = t;
10916 regs[i] = REGNO (t);
10917 }
10918 }
10919
10920 /* Compute an ordering that maps the register numbers to an ascending
10921 sequence. */
10922 reg_order[0] = 0;
10923 for (i = 0; i < nops; i++)
10924 if (regs[i] < regs[reg_order[0]])
10925 reg_order[0] = i;
10926
10927 for (i = 1; i < nops; i++)
10928 {
10929 int this_order = reg_order[i - 1];
10930 for (j = 0; j < nops; j++)
10931 if (regs[j] > regs[reg_order[i - 1]]
10932 && (this_order == reg_order[i - 1]
10933 || regs[j] < regs[this_order]))
10934 this_order = j;
10935 reg_order[i] = this_order;
10936 }
10937
10938 /* Ensure that registers that must be live after the instruction end
10939 up with the correct value. */
10940 for (i = 0; i < nops; i++)
10941 {
10942 int this_order = reg_order[i];
10943 if ((this_order != mem_order[i]
10944 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10945 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10946 return false;
10947 }
10948
10949 /* Load the constants. */
10950 for (i = 0; i < nops; i++)
10951 {
10952 rtx op = operands[2 * nops + mem_order[i]];
10953 sorted_regs[i] = regs[reg_order[i]];
10954 emit_move_insn (reg_rtxs[reg_order[i]], op);
10955 }
10956
10957 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10958
10959 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10960 if (TARGET_THUMB1)
10961 {
10962 gcc_assert (base_reg_dies);
10963 write_back = TRUE;
10964 }
10965
10966 if (stm_case == 5)
10967 {
10968 gcc_assert (base_reg_dies);
10969 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10970 offset = 0;
10971 }
10972
10973 addr = plus_constant (Pmode, base_reg_rtx, offset);
10974
10975 for (i = 0; i < nops; i++)
10976 {
10977 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
10978 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10979 SImode, addr, 0);
10980 }
10981 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10982 write_back ? offset + i * 4 : 0));
10983 return true;
10984 }
10985
10986 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
10987 unaligned copies on processors which support unaligned semantics for those
10988 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
10989 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
10990 An interleave factor of 1 (the minimum) will perform no interleaving.
10991 Load/store multiple are used for aligned addresses where possible. */
10992
10993 static void
10994 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
10995 HOST_WIDE_INT length,
10996 unsigned int interleave_factor)
10997 {
10998 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
10999 int *regnos = XALLOCAVEC (int, interleave_factor);
11000 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
11001 HOST_WIDE_INT i, j;
11002 HOST_WIDE_INT remaining = length, words;
11003 rtx halfword_tmp = NULL, byte_tmp = NULL;
11004 rtx dst, src;
11005 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
11006 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
11007 HOST_WIDE_INT srcoffset, dstoffset;
11008 HOST_WIDE_INT src_autoinc, dst_autoinc;
11009 rtx mem, addr;
11010
11011 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
11012
11013 /* Use hard registers if we have aligned source or destination so we can use
11014 load/store multiple with contiguous registers. */
11015 if (dst_aligned || src_aligned)
11016 for (i = 0; i < interleave_factor; i++)
11017 regs[i] = gen_rtx_REG (SImode, i);
11018 else
11019 for (i = 0; i < interleave_factor; i++)
11020 regs[i] = gen_reg_rtx (SImode);
11021
11022 dst = copy_addr_to_reg (XEXP (dstbase, 0));
11023 src = copy_addr_to_reg (XEXP (srcbase, 0));
11024
11025 srcoffset = dstoffset = 0;
11026
11027 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
11028 For copying the last bytes we want to subtract this offset again. */
11029 src_autoinc = dst_autoinc = 0;
11030
11031 for (i = 0; i < interleave_factor; i++)
11032 regnos[i] = i;
11033
11034 /* Copy BLOCK_SIZE_BYTES chunks. */
11035
11036 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
11037 {
11038 /* Load words. */
11039 if (src_aligned && interleave_factor > 1)
11040 {
11041 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
11042 TRUE, srcbase, &srcoffset));
11043 src_autoinc += UNITS_PER_WORD * interleave_factor;
11044 }
11045 else
11046 {
11047 for (j = 0; j < interleave_factor; j++)
11048 {
11049 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
11050 - src_autoinc));
11051 mem = adjust_automodify_address (srcbase, SImode, addr,
11052 srcoffset + j * UNITS_PER_WORD);
11053 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11054 }
11055 srcoffset += block_size_bytes;
11056 }
11057
11058 /* Store words. */
11059 if (dst_aligned && interleave_factor > 1)
11060 {
11061 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
11062 TRUE, dstbase, &dstoffset));
11063 dst_autoinc += UNITS_PER_WORD * interleave_factor;
11064 }
11065 else
11066 {
11067 for (j = 0; j < interleave_factor; j++)
11068 {
11069 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
11070 - dst_autoinc));
11071 mem = adjust_automodify_address (dstbase, SImode, addr,
11072 dstoffset + j * UNITS_PER_WORD);
11073 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11074 }
11075 dstoffset += block_size_bytes;
11076 }
11077
11078 remaining -= block_size_bytes;
11079 }
11080
11081 /* Copy any whole words left (note these aren't interleaved with any
11082 subsequent halfword/byte load/stores in the interests of simplicity). */
11083
11084 words = remaining / UNITS_PER_WORD;
11085
11086 gcc_assert (words < interleave_factor);
11087
11088 if (src_aligned && words > 1)
11089 {
11090 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
11091 &srcoffset));
11092 src_autoinc += UNITS_PER_WORD * words;
11093 }
11094 else
11095 {
11096 for (j = 0; j < words; j++)
11097 {
11098 addr = plus_constant (Pmode, src,
11099 srcoffset + j * UNITS_PER_WORD - src_autoinc);
11100 mem = adjust_automodify_address (srcbase, SImode, addr,
11101 srcoffset + j * UNITS_PER_WORD);
11102 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11103 }
11104 srcoffset += words * UNITS_PER_WORD;
11105 }
11106
11107 if (dst_aligned && words > 1)
11108 {
11109 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
11110 &dstoffset));
11111 dst_autoinc += words * UNITS_PER_WORD;
11112 }
11113 else
11114 {
11115 for (j = 0; j < words; j++)
11116 {
11117 addr = plus_constant (Pmode, dst,
11118 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
11119 mem = adjust_automodify_address (dstbase, SImode, addr,
11120 dstoffset + j * UNITS_PER_WORD);
11121 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11122 }
11123 dstoffset += words * UNITS_PER_WORD;
11124 }
11125
11126 remaining -= words * UNITS_PER_WORD;
11127
11128 gcc_assert (remaining < 4);
11129
11130 /* Copy a halfword if necessary. */
11131
11132 if (remaining >= 2)
11133 {
11134 halfword_tmp = gen_reg_rtx (SImode);
11135
11136 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11137 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
11138 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
11139
11140 /* Either write out immediately, or delay until we've loaded the last
11141 byte, depending on interleave factor. */
11142 if (interleave_factor == 1)
11143 {
11144 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11145 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11146 emit_insn (gen_unaligned_storehi (mem,
11147 gen_lowpart (HImode, halfword_tmp)));
11148 halfword_tmp = NULL;
11149 dstoffset += 2;
11150 }
11151
11152 remaining -= 2;
11153 srcoffset += 2;
11154 }
11155
11156 gcc_assert (remaining < 2);
11157
11158 /* Copy last byte. */
11159
11160 if ((remaining & 1) != 0)
11161 {
11162 byte_tmp = gen_reg_rtx (SImode);
11163
11164 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11165 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
11166 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
11167
11168 if (interleave_factor == 1)
11169 {
11170 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11171 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11172 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11173 byte_tmp = NULL;
11174 dstoffset++;
11175 }
11176
11177 remaining--;
11178 srcoffset++;
11179 }
11180
11181 /* Store last halfword if we haven't done so already. */
11182
11183 if (halfword_tmp)
11184 {
11185 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11186 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11187 emit_insn (gen_unaligned_storehi (mem,
11188 gen_lowpart (HImode, halfword_tmp)));
11189 dstoffset += 2;
11190 }
11191
11192 /* Likewise for last byte. */
11193
11194 if (byte_tmp)
11195 {
11196 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11197 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11198 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11199 dstoffset++;
11200 }
11201
11202 gcc_assert (remaining == 0 && srcoffset == dstoffset);
11203 }
11204
11205 /* From mips_adjust_block_mem:
11206
11207 Helper function for doing a loop-based block operation on memory
11208 reference MEM. Each iteration of the loop will operate on LENGTH
11209 bytes of MEM.
11210
11211 Create a new base register for use within the loop and point it to
11212 the start of MEM. Create a new memory reference that uses this
11213 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
11214
11215 static void
11216 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
11217 rtx *loop_mem)
11218 {
11219 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
11220
11221 /* Although the new mem does not refer to a known location,
11222 it does keep up to LENGTH bytes of alignment. */
11223 *loop_mem = change_address (mem, BLKmode, *loop_reg);
11224 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
11225 }
11226
11227 /* From mips_block_move_loop:
11228
11229 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11230 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
11231 the memory regions do not overlap. */
11232
11233 static void
11234 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
11235 unsigned int interleave_factor,
11236 HOST_WIDE_INT bytes_per_iter)
11237 {
11238 rtx label, src_reg, dest_reg, final_src, test;
11239 HOST_WIDE_INT leftover;
11240
11241 leftover = length % bytes_per_iter;
11242 length -= leftover;
11243
11244 /* Create registers and memory references for use within the loop. */
11245 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
11246 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
11247
11248 /* Calculate the value that SRC_REG should have after the last iteration of
11249 the loop. */
11250 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
11251 0, 0, OPTAB_WIDEN);
11252
11253 /* Emit the start of the loop. */
11254 label = gen_label_rtx ();
11255 emit_label (label);
11256
11257 /* Emit the loop body. */
11258 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
11259 interleave_factor);
11260
11261 /* Move on to the next block. */
11262 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
11263 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
11264
11265 /* Emit the loop condition. */
11266 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
11267 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
11268
11269 /* Mop up any left-over bytes. */
11270 if (leftover)
11271 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
11272 }
11273
11274 /* Emit a block move when either the source or destination is unaligned (not
11275 aligned to a four-byte boundary). This may need further tuning depending on
11276 core type, optimize_size setting, etc. */
11277
11278 static int
11279 arm_movmemqi_unaligned (rtx *operands)
11280 {
11281 HOST_WIDE_INT length = INTVAL (operands[2]);
11282
11283 if (optimize_size)
11284 {
11285 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
11286 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
11287 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11288 size of code if optimizing for size. We'll use ldm/stm if src_aligned
11289 or dst_aligned though: allow more interleaving in those cases since the
11290 resulting code can be smaller. */
11291 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
11292 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
11293
11294 if (length > 12)
11295 arm_block_move_unaligned_loop (operands[0], operands[1], length,
11296 interleave_factor, bytes_per_iter);
11297 else
11298 arm_block_move_unaligned_straight (operands[0], operands[1], length,
11299 interleave_factor);
11300 }
11301 else
11302 {
11303 /* Note that the loop created by arm_block_move_unaligned_loop may be
11304 subject to loop unrolling, which makes tuning this condition a little
11305 redundant. */
11306 if (length > 32)
11307 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
11308 else
11309 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
11310 }
11311
11312 return 1;
11313 }
11314
11315 int
11316 arm_gen_movmemqi (rtx *operands)
11317 {
11318 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
11319 HOST_WIDE_INT srcoffset, dstoffset;
11320 int i;
11321 rtx src, dst, srcbase, dstbase;
11322 rtx part_bytes_reg = NULL;
11323 rtx mem;
11324
11325 if (!CONST_INT_P (operands[2])
11326 || !CONST_INT_P (operands[3])
11327 || INTVAL (operands[2]) > 64)
11328 return 0;
11329
11330 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
11331 return arm_movmemqi_unaligned (operands);
11332
11333 if (INTVAL (operands[3]) & 3)
11334 return 0;
11335
11336 dstbase = operands[0];
11337 srcbase = operands[1];
11338
11339 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
11340 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
11341
11342 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
11343 out_words_to_go = INTVAL (operands[2]) / 4;
11344 last_bytes = INTVAL (operands[2]) & 3;
11345 dstoffset = srcoffset = 0;
11346
11347 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
11348 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
11349
11350 for (i = 0; in_words_to_go >= 2; i+=4)
11351 {
11352 if (in_words_to_go > 4)
11353 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
11354 TRUE, srcbase, &srcoffset));
11355 else
11356 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
11357 src, FALSE, srcbase,
11358 &srcoffset));
11359
11360 if (out_words_to_go)
11361 {
11362 if (out_words_to_go > 4)
11363 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
11364 TRUE, dstbase, &dstoffset));
11365 else if (out_words_to_go != 1)
11366 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
11367 out_words_to_go, dst,
11368 (last_bytes == 0
11369 ? FALSE : TRUE),
11370 dstbase, &dstoffset));
11371 else
11372 {
11373 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11374 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
11375 if (last_bytes != 0)
11376 {
11377 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
11378 dstoffset += 4;
11379 }
11380 }
11381 }
11382
11383 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
11384 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
11385 }
11386
11387 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
11388 if (out_words_to_go)
11389 {
11390 rtx sreg;
11391
11392 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11393 sreg = copy_to_reg (mem);
11394
11395 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11396 emit_move_insn (mem, sreg);
11397 in_words_to_go--;
11398
11399 gcc_assert (!in_words_to_go); /* Sanity check */
11400 }
11401
11402 if (in_words_to_go)
11403 {
11404 gcc_assert (in_words_to_go > 0);
11405
11406 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11407 part_bytes_reg = copy_to_mode_reg (SImode, mem);
11408 }
11409
11410 gcc_assert (!last_bytes || part_bytes_reg);
11411
11412 if (BYTES_BIG_ENDIAN && last_bytes)
11413 {
11414 rtx tmp = gen_reg_rtx (SImode);
11415
11416 /* The bytes we want are in the top end of the word. */
11417 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
11418 GEN_INT (8 * (4 - last_bytes))));
11419 part_bytes_reg = tmp;
11420
11421 while (last_bytes)
11422 {
11423 mem = adjust_automodify_address (dstbase, QImode,
11424 plus_constant (Pmode, dst,
11425 last_bytes - 1),
11426 dstoffset + last_bytes - 1);
11427 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11428
11429 if (--last_bytes)
11430 {
11431 tmp = gen_reg_rtx (SImode);
11432 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
11433 part_bytes_reg = tmp;
11434 }
11435 }
11436
11437 }
11438 else
11439 {
11440 if (last_bytes > 1)
11441 {
11442 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
11443 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
11444 last_bytes -= 2;
11445 if (last_bytes)
11446 {
11447 rtx tmp = gen_reg_rtx (SImode);
11448 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
11449 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
11450 part_bytes_reg = tmp;
11451 dstoffset += 2;
11452 }
11453 }
11454
11455 if (last_bytes)
11456 {
11457 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
11458 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11459 }
11460 }
11461
11462 return 1;
11463 }
11464
11465 /* Select a dominance comparison mode if possible for a test of the general
11466 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
11467 COND_OR == DOM_CC_X_AND_Y => (X && Y)
11468 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
11469 COND_OR == DOM_CC_X_OR_Y => (X || Y)
11470 In all cases OP will be either EQ or NE, but we don't need to know which
11471 here. If we are unable to support a dominance comparison we return
11472 CC mode. This will then fail to match for the RTL expressions that
11473 generate this call. */
11474 enum machine_mode
11475 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
11476 {
11477 enum rtx_code cond1, cond2;
11478 int swapped = 0;
11479
11480 /* Currently we will probably get the wrong result if the individual
11481 comparisons are not simple. This also ensures that it is safe to
11482 reverse a comparison if necessary. */
11483 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
11484 != CCmode)
11485 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
11486 != CCmode))
11487 return CCmode;
11488
11489 /* The if_then_else variant of this tests the second condition if the
11490 first passes, but is true if the first fails. Reverse the first
11491 condition to get a true "inclusive-or" expression. */
11492 if (cond_or == DOM_CC_NX_OR_Y)
11493 cond1 = reverse_condition (cond1);
11494
11495 /* If the comparisons are not equal, and one doesn't dominate the other,
11496 then we can't do this. */
11497 if (cond1 != cond2
11498 && !comparison_dominates_p (cond1, cond2)
11499 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
11500 return CCmode;
11501
11502 if (swapped)
11503 {
11504 enum rtx_code temp = cond1;
11505 cond1 = cond2;
11506 cond2 = temp;
11507 }
11508
11509 switch (cond1)
11510 {
11511 case EQ:
11512 if (cond_or == DOM_CC_X_AND_Y)
11513 return CC_DEQmode;
11514
11515 switch (cond2)
11516 {
11517 case EQ: return CC_DEQmode;
11518 case LE: return CC_DLEmode;
11519 case LEU: return CC_DLEUmode;
11520 case GE: return CC_DGEmode;
11521 case GEU: return CC_DGEUmode;
11522 default: gcc_unreachable ();
11523 }
11524
11525 case LT:
11526 if (cond_or == DOM_CC_X_AND_Y)
11527 return CC_DLTmode;
11528
11529 switch (cond2)
11530 {
11531 case LT:
11532 return CC_DLTmode;
11533 case LE:
11534 return CC_DLEmode;
11535 case NE:
11536 return CC_DNEmode;
11537 default:
11538 gcc_unreachable ();
11539 }
11540
11541 case GT:
11542 if (cond_or == DOM_CC_X_AND_Y)
11543 return CC_DGTmode;
11544
11545 switch (cond2)
11546 {
11547 case GT:
11548 return CC_DGTmode;
11549 case GE:
11550 return CC_DGEmode;
11551 case NE:
11552 return CC_DNEmode;
11553 default:
11554 gcc_unreachable ();
11555 }
11556
11557 case LTU:
11558 if (cond_or == DOM_CC_X_AND_Y)
11559 return CC_DLTUmode;
11560
11561 switch (cond2)
11562 {
11563 case LTU:
11564 return CC_DLTUmode;
11565 case LEU:
11566 return CC_DLEUmode;
11567 case NE:
11568 return CC_DNEmode;
11569 default:
11570 gcc_unreachable ();
11571 }
11572
11573 case GTU:
11574 if (cond_or == DOM_CC_X_AND_Y)
11575 return CC_DGTUmode;
11576
11577 switch (cond2)
11578 {
11579 case GTU:
11580 return CC_DGTUmode;
11581 case GEU:
11582 return CC_DGEUmode;
11583 case NE:
11584 return CC_DNEmode;
11585 default:
11586 gcc_unreachable ();
11587 }
11588
11589 /* The remaining cases only occur when both comparisons are the
11590 same. */
11591 case NE:
11592 gcc_assert (cond1 == cond2);
11593 return CC_DNEmode;
11594
11595 case LE:
11596 gcc_assert (cond1 == cond2);
11597 return CC_DLEmode;
11598
11599 case GE:
11600 gcc_assert (cond1 == cond2);
11601 return CC_DGEmode;
11602
11603 case LEU:
11604 gcc_assert (cond1 == cond2);
11605 return CC_DLEUmode;
11606
11607 case GEU:
11608 gcc_assert (cond1 == cond2);
11609 return CC_DGEUmode;
11610
11611 default:
11612 gcc_unreachable ();
11613 }
11614 }
11615
11616 enum machine_mode
11617 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
11618 {
11619 /* All floating point compares return CCFP if it is an equality
11620 comparison, and CCFPE otherwise. */
11621 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11622 {
11623 switch (op)
11624 {
11625 case EQ:
11626 case NE:
11627 case UNORDERED:
11628 case ORDERED:
11629 case UNLT:
11630 case UNLE:
11631 case UNGT:
11632 case UNGE:
11633 case UNEQ:
11634 case LTGT:
11635 return CCFPmode;
11636
11637 case LT:
11638 case LE:
11639 case GT:
11640 case GE:
11641 return CCFPEmode;
11642
11643 default:
11644 gcc_unreachable ();
11645 }
11646 }
11647
11648 /* A compare with a shifted operand. Because of canonicalization, the
11649 comparison will have to be swapped when we emit the assembler. */
11650 if (GET_MODE (y) == SImode
11651 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11652 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11653 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
11654 || GET_CODE (x) == ROTATERT))
11655 return CC_SWPmode;
11656
11657 /* This operation is performed swapped, but since we only rely on the Z
11658 flag we don't need an additional mode. */
11659 if (GET_MODE (y) == SImode
11660 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11661 && GET_CODE (x) == NEG
11662 && (op == EQ || op == NE))
11663 return CC_Zmode;
11664
11665 /* This is a special case that is used by combine to allow a
11666 comparison of a shifted byte load to be split into a zero-extend
11667 followed by a comparison of the shifted integer (only valid for
11668 equalities and unsigned inequalities). */
11669 if (GET_MODE (x) == SImode
11670 && GET_CODE (x) == ASHIFT
11671 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
11672 && GET_CODE (XEXP (x, 0)) == SUBREG
11673 && MEM_P (SUBREG_REG (XEXP (x, 0)))
11674 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
11675 && (op == EQ || op == NE
11676 || op == GEU || op == GTU || op == LTU || op == LEU)
11677 && CONST_INT_P (y))
11678 return CC_Zmode;
11679
11680 /* A construct for a conditional compare, if the false arm contains
11681 0, then both conditions must be true, otherwise either condition
11682 must be true. Not all conditions are possible, so CCmode is
11683 returned if it can't be done. */
11684 if (GET_CODE (x) == IF_THEN_ELSE
11685 && (XEXP (x, 2) == const0_rtx
11686 || XEXP (x, 2) == const1_rtx)
11687 && COMPARISON_P (XEXP (x, 0))
11688 && COMPARISON_P (XEXP (x, 1)))
11689 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11690 INTVAL (XEXP (x, 2)));
11691
11692 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
11693 if (GET_CODE (x) == AND
11694 && (op == EQ || op == NE)
11695 && COMPARISON_P (XEXP (x, 0))
11696 && COMPARISON_P (XEXP (x, 1)))
11697 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11698 DOM_CC_X_AND_Y);
11699
11700 if (GET_CODE (x) == IOR
11701 && (op == EQ || op == NE)
11702 && COMPARISON_P (XEXP (x, 0))
11703 && COMPARISON_P (XEXP (x, 1)))
11704 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11705 DOM_CC_X_OR_Y);
11706
11707 /* An operation (on Thumb) where we want to test for a single bit.
11708 This is done by shifting that bit up into the top bit of a
11709 scratch register; we can then branch on the sign bit. */
11710 if (TARGET_THUMB1
11711 && GET_MODE (x) == SImode
11712 && (op == EQ || op == NE)
11713 && GET_CODE (x) == ZERO_EXTRACT
11714 && XEXP (x, 1) == const1_rtx)
11715 return CC_Nmode;
11716
11717 /* An operation that sets the condition codes as a side-effect, the
11718 V flag is not set correctly, so we can only use comparisons where
11719 this doesn't matter. (For LT and GE we can use "mi" and "pl"
11720 instead.) */
11721 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
11722 if (GET_MODE (x) == SImode
11723 && y == const0_rtx
11724 && (op == EQ || op == NE || op == LT || op == GE)
11725 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
11726 || GET_CODE (x) == AND || GET_CODE (x) == IOR
11727 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
11728 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
11729 || GET_CODE (x) == LSHIFTRT
11730 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11731 || GET_CODE (x) == ROTATERT
11732 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
11733 return CC_NOOVmode;
11734
11735 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
11736 return CC_Zmode;
11737
11738 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
11739 && GET_CODE (x) == PLUS
11740 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
11741 return CC_Cmode;
11742
11743 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
11744 {
11745 switch (op)
11746 {
11747 case EQ:
11748 case NE:
11749 /* A DImode comparison against zero can be implemented by
11750 or'ing the two halves together. */
11751 if (y == const0_rtx)
11752 return CC_Zmode;
11753
11754 /* We can do an equality test in three Thumb instructions. */
11755 if (!TARGET_32BIT)
11756 return CC_Zmode;
11757
11758 /* FALLTHROUGH */
11759
11760 case LTU:
11761 case LEU:
11762 case GTU:
11763 case GEU:
11764 /* DImode unsigned comparisons can be implemented by cmp +
11765 cmpeq without a scratch register. Not worth doing in
11766 Thumb-2. */
11767 if (TARGET_32BIT)
11768 return CC_CZmode;
11769
11770 /* FALLTHROUGH */
11771
11772 case LT:
11773 case LE:
11774 case GT:
11775 case GE:
11776 /* DImode signed and unsigned comparisons can be implemented
11777 by cmp + sbcs with a scratch register, but that does not
11778 set the Z flag - we must reverse GT/LE/GTU/LEU. */
11779 gcc_assert (op != EQ && op != NE);
11780 return CC_NCVmode;
11781
11782 default:
11783 gcc_unreachable ();
11784 }
11785 }
11786
11787 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
11788 return GET_MODE (x);
11789
11790 return CCmode;
11791 }
11792
11793 /* X and Y are two things to compare using CODE. Emit the compare insn and
11794 return the rtx for register 0 in the proper mode. FP means this is a
11795 floating point compare: I don't think that it is needed on the arm. */
11796 rtx
11797 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
11798 {
11799 enum machine_mode mode;
11800 rtx cc_reg;
11801 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
11802
11803 /* We might have X as a constant, Y as a register because of the predicates
11804 used for cmpdi. If so, force X to a register here. */
11805 if (dimode_comparison && !REG_P (x))
11806 x = force_reg (DImode, x);
11807
11808 mode = SELECT_CC_MODE (code, x, y);
11809 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
11810
11811 if (dimode_comparison
11812 && mode != CC_CZmode)
11813 {
11814 rtx clobber, set;
11815
11816 /* To compare two non-zero values for equality, XOR them and
11817 then compare against zero. Not used for ARM mode; there
11818 CC_CZmode is cheaper. */
11819 if (mode == CC_Zmode && y != const0_rtx)
11820 {
11821 gcc_assert (!reload_completed);
11822 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
11823 y = const0_rtx;
11824 }
11825
11826 /* A scratch register is required. */
11827 if (reload_completed)
11828 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
11829 else
11830 scratch = gen_rtx_SCRATCH (SImode);
11831
11832 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11833 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
11834 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11835 }
11836 else
11837 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
11838
11839 return cc_reg;
11840 }
11841
11842 /* Generate a sequence of insns that will generate the correct return
11843 address mask depending on the physical architecture that the program
11844 is running on. */
11845 rtx
11846 arm_gen_return_addr_mask (void)
11847 {
11848 rtx reg = gen_reg_rtx (Pmode);
11849
11850 emit_insn (gen_return_addr_mask (reg));
11851 return reg;
11852 }
11853
11854 void
11855 arm_reload_in_hi (rtx *operands)
11856 {
11857 rtx ref = operands[1];
11858 rtx base, scratch;
11859 HOST_WIDE_INT offset = 0;
11860
11861 if (GET_CODE (ref) == SUBREG)
11862 {
11863 offset = SUBREG_BYTE (ref);
11864 ref = SUBREG_REG (ref);
11865 }
11866
11867 if (REG_P (ref))
11868 {
11869 /* We have a pseudo which has been spilt onto the stack; there
11870 are two cases here: the first where there is a simple
11871 stack-slot replacement and a second where the stack-slot is
11872 out of range, or is used as a subreg. */
11873 if (reg_equiv_mem (REGNO (ref)))
11874 {
11875 ref = reg_equiv_mem (REGNO (ref));
11876 base = find_replacement (&XEXP (ref, 0));
11877 }
11878 else
11879 /* The slot is out of range, or was dressed up in a SUBREG. */
11880 base = reg_equiv_address (REGNO (ref));
11881 }
11882 else
11883 base = find_replacement (&XEXP (ref, 0));
11884
11885 /* Handle the case where the address is too complex to be offset by 1. */
11886 if (GET_CODE (base) == MINUS
11887 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
11888 {
11889 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11890
11891 emit_set_insn (base_plus, base);
11892 base = base_plus;
11893 }
11894 else if (GET_CODE (base) == PLUS)
11895 {
11896 /* The addend must be CONST_INT, or we would have dealt with it above. */
11897 HOST_WIDE_INT hi, lo;
11898
11899 offset += INTVAL (XEXP (base, 1));
11900 base = XEXP (base, 0);
11901
11902 /* Rework the address into a legal sequence of insns. */
11903 /* Valid range for lo is -4095 -> 4095 */
11904 lo = (offset >= 0
11905 ? (offset & 0xfff)
11906 : -((-offset) & 0xfff));
11907
11908 /* Corner case, if lo is the max offset then we would be out of range
11909 once we have added the additional 1 below, so bump the msb into the
11910 pre-loading insn(s). */
11911 if (lo == 4095)
11912 lo &= 0x7ff;
11913
11914 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11915 ^ (HOST_WIDE_INT) 0x80000000)
11916 - (HOST_WIDE_INT) 0x80000000);
11917
11918 gcc_assert (hi + lo == offset);
11919
11920 if (hi != 0)
11921 {
11922 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11923
11924 /* Get the base address; addsi3 knows how to handle constants
11925 that require more than one insn. */
11926 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11927 base = base_plus;
11928 offset = lo;
11929 }
11930 }
11931
11932 /* Operands[2] may overlap operands[0] (though it won't overlap
11933 operands[1]), that's why we asked for a DImode reg -- so we can
11934 use the bit that does not overlap. */
11935 if (REGNO (operands[2]) == REGNO (operands[0]))
11936 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11937 else
11938 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11939
11940 emit_insn (gen_zero_extendqisi2 (scratch,
11941 gen_rtx_MEM (QImode,
11942 plus_constant (Pmode, base,
11943 offset))));
11944 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
11945 gen_rtx_MEM (QImode,
11946 plus_constant (Pmode, base,
11947 offset + 1))));
11948 if (!BYTES_BIG_ENDIAN)
11949 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11950 gen_rtx_IOR (SImode,
11951 gen_rtx_ASHIFT
11952 (SImode,
11953 gen_rtx_SUBREG (SImode, operands[0], 0),
11954 GEN_INT (8)),
11955 scratch));
11956 else
11957 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11958 gen_rtx_IOR (SImode,
11959 gen_rtx_ASHIFT (SImode, scratch,
11960 GEN_INT (8)),
11961 gen_rtx_SUBREG (SImode, operands[0], 0)));
11962 }
11963
11964 /* Handle storing a half-word to memory during reload by synthesizing as two
11965 byte stores. Take care not to clobber the input values until after we
11966 have moved them somewhere safe. This code assumes that if the DImode
11967 scratch in operands[2] overlaps either the input value or output address
11968 in some way, then that value must die in this insn (we absolutely need
11969 two scratch registers for some corner cases). */
11970 void
11971 arm_reload_out_hi (rtx *operands)
11972 {
11973 rtx ref = operands[0];
11974 rtx outval = operands[1];
11975 rtx base, scratch;
11976 HOST_WIDE_INT offset = 0;
11977
11978 if (GET_CODE (ref) == SUBREG)
11979 {
11980 offset = SUBREG_BYTE (ref);
11981 ref = SUBREG_REG (ref);
11982 }
11983
11984 if (REG_P (ref))
11985 {
11986 /* We have a pseudo which has been spilt onto the stack; there
11987 are two cases here: the first where there is a simple
11988 stack-slot replacement and a second where the stack-slot is
11989 out of range, or is used as a subreg. */
11990 if (reg_equiv_mem (REGNO (ref)))
11991 {
11992 ref = reg_equiv_mem (REGNO (ref));
11993 base = find_replacement (&XEXP (ref, 0));
11994 }
11995 else
11996 /* The slot is out of range, or was dressed up in a SUBREG. */
11997 base = reg_equiv_address (REGNO (ref));
11998 }
11999 else
12000 base = find_replacement (&XEXP (ref, 0));
12001
12002 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
12003
12004 /* Handle the case where the address is too complex to be offset by 1. */
12005 if (GET_CODE (base) == MINUS
12006 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
12007 {
12008 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12009
12010 /* Be careful not to destroy OUTVAL. */
12011 if (reg_overlap_mentioned_p (base_plus, outval))
12012 {
12013 /* Updating base_plus might destroy outval, see if we can
12014 swap the scratch and base_plus. */
12015 if (!reg_overlap_mentioned_p (scratch, outval))
12016 {
12017 rtx tmp = scratch;
12018 scratch = base_plus;
12019 base_plus = tmp;
12020 }
12021 else
12022 {
12023 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12024
12025 /* Be conservative and copy OUTVAL into the scratch now,
12026 this should only be necessary if outval is a subreg
12027 of something larger than a word. */
12028 /* XXX Might this clobber base? I can't see how it can,
12029 since scratch is known to overlap with OUTVAL, and
12030 must be wider than a word. */
12031 emit_insn (gen_movhi (scratch_hi, outval));
12032 outval = scratch_hi;
12033 }
12034 }
12035
12036 emit_set_insn (base_plus, base);
12037 base = base_plus;
12038 }
12039 else if (GET_CODE (base) == PLUS)
12040 {
12041 /* The addend must be CONST_INT, or we would have dealt with it above. */
12042 HOST_WIDE_INT hi, lo;
12043
12044 offset += INTVAL (XEXP (base, 1));
12045 base = XEXP (base, 0);
12046
12047 /* Rework the address into a legal sequence of insns. */
12048 /* Valid range for lo is -4095 -> 4095 */
12049 lo = (offset >= 0
12050 ? (offset & 0xfff)
12051 : -((-offset) & 0xfff));
12052
12053 /* Corner case, if lo is the max offset then we would be out of range
12054 once we have added the additional 1 below, so bump the msb into the
12055 pre-loading insn(s). */
12056 if (lo == 4095)
12057 lo &= 0x7ff;
12058
12059 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
12060 ^ (HOST_WIDE_INT) 0x80000000)
12061 - (HOST_WIDE_INT) 0x80000000);
12062
12063 gcc_assert (hi + lo == offset);
12064
12065 if (hi != 0)
12066 {
12067 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12068
12069 /* Be careful not to destroy OUTVAL. */
12070 if (reg_overlap_mentioned_p (base_plus, outval))
12071 {
12072 /* Updating base_plus might destroy outval, see if we
12073 can swap the scratch and base_plus. */
12074 if (!reg_overlap_mentioned_p (scratch, outval))
12075 {
12076 rtx tmp = scratch;
12077 scratch = base_plus;
12078 base_plus = tmp;
12079 }
12080 else
12081 {
12082 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12083
12084 /* Be conservative and copy outval into scratch now,
12085 this should only be necessary if outval is a
12086 subreg of something larger than a word. */
12087 /* XXX Might this clobber base? I can't see how it
12088 can, since scratch is known to overlap with
12089 outval. */
12090 emit_insn (gen_movhi (scratch_hi, outval));
12091 outval = scratch_hi;
12092 }
12093 }
12094
12095 /* Get the base address; addsi3 knows how to handle constants
12096 that require more than one insn. */
12097 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12098 base = base_plus;
12099 offset = lo;
12100 }
12101 }
12102
12103 if (BYTES_BIG_ENDIAN)
12104 {
12105 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12106 plus_constant (Pmode, base,
12107 offset + 1)),
12108 gen_lowpart (QImode, outval)));
12109 emit_insn (gen_lshrsi3 (scratch,
12110 gen_rtx_SUBREG (SImode, outval, 0),
12111 GEN_INT (8)));
12112 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12113 offset)),
12114 gen_lowpart (QImode, scratch)));
12115 }
12116 else
12117 {
12118 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12119 offset)),
12120 gen_lowpart (QImode, outval)));
12121 emit_insn (gen_lshrsi3 (scratch,
12122 gen_rtx_SUBREG (SImode, outval, 0),
12123 GEN_INT (8)));
12124 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12125 plus_constant (Pmode, base,
12126 offset + 1)),
12127 gen_lowpart (QImode, scratch)));
12128 }
12129 }
12130
12131 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
12132 (padded to the size of a word) should be passed in a register. */
12133
12134 static bool
12135 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
12136 {
12137 if (TARGET_AAPCS_BASED)
12138 return must_pass_in_stack_var_size (mode, type);
12139 else
12140 return must_pass_in_stack_var_size_or_pad (mode, type);
12141 }
12142
12143
12144 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
12145 Return true if an argument passed on the stack should be padded upwards,
12146 i.e. if the least-significant byte has useful data.
12147 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
12148 aggregate types are placed in the lowest memory address. */
12149
12150 bool
12151 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
12152 {
12153 if (!TARGET_AAPCS_BASED)
12154 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
12155
12156 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
12157 return false;
12158
12159 return true;
12160 }
12161
12162
12163 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12164 Return !BYTES_BIG_ENDIAN if the least significant byte of the
12165 register has useful data, and return the opposite if the most
12166 significant byte does. */
12167
12168 bool
12169 arm_pad_reg_upward (enum machine_mode mode,
12170 tree type, int first ATTRIBUTE_UNUSED)
12171 {
12172 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
12173 {
12174 /* For AAPCS, small aggregates, small fixed-point types,
12175 and small complex types are always padded upwards. */
12176 if (type)
12177 {
12178 if ((AGGREGATE_TYPE_P (type)
12179 || TREE_CODE (type) == COMPLEX_TYPE
12180 || FIXED_POINT_TYPE_P (type))
12181 && int_size_in_bytes (type) <= 4)
12182 return true;
12183 }
12184 else
12185 {
12186 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
12187 && GET_MODE_SIZE (mode) <= 4)
12188 return true;
12189 }
12190 }
12191
12192 /* Otherwise, use default padding. */
12193 return !BYTES_BIG_ENDIAN;
12194 }
12195
12196 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
12197 assuming that the address in the base register is word aligned. */
12198 bool
12199 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
12200 {
12201 HOST_WIDE_INT max_offset;
12202
12203 /* Offset must be a multiple of 4 in Thumb mode. */
12204 if (TARGET_THUMB2 && ((offset & 3) != 0))
12205 return false;
12206
12207 if (TARGET_THUMB2)
12208 max_offset = 1020;
12209 else if (TARGET_ARM)
12210 max_offset = 255;
12211 else
12212 return false;
12213
12214 return ((offset <= max_offset) && (offset >= -max_offset));
12215 }
12216
12217 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
12218 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
12219 Assumes that the address in the base register RN is word aligned. Pattern
12220 guarantees that both memory accesses use the same base register,
12221 the offsets are constants within the range, and the gap between the offsets is 4.
12222 If preload complete then check that registers are legal. WBACK indicates whether
12223 address is updated. LOAD indicates whether memory access is load or store. */
12224 bool
12225 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
12226 bool wback, bool load)
12227 {
12228 unsigned int t, t2, n;
12229
12230 if (!reload_completed)
12231 return true;
12232
12233 if (!offset_ok_for_ldrd_strd (offset))
12234 return false;
12235
12236 t = REGNO (rt);
12237 t2 = REGNO (rt2);
12238 n = REGNO (rn);
12239
12240 if ((TARGET_THUMB2)
12241 && ((wback && (n == t || n == t2))
12242 || (t == SP_REGNUM)
12243 || (t == PC_REGNUM)
12244 || (t2 == SP_REGNUM)
12245 || (t2 == PC_REGNUM)
12246 || (!load && (n == PC_REGNUM))
12247 || (load && (t == t2))
12248 /* Triggers Cortex-M3 LDRD errata. */
12249 || (!wback && load && fix_cm3_ldrd && (n == t))))
12250 return false;
12251
12252 if ((TARGET_ARM)
12253 && ((wback && (n == t || n == t2))
12254 || (t2 == PC_REGNUM)
12255 || (t % 2 != 0) /* First destination register is not even. */
12256 || (t2 != t + 1)
12257 /* PC can be used as base register (for offset addressing only),
12258 but it is depricated. */
12259 || (n == PC_REGNUM)))
12260 return false;
12261
12262 return true;
12263 }
12264
12265 \f
12266 /* Print a symbolic form of X to the debug file, F. */
12267 static void
12268 arm_print_value (FILE *f, rtx x)
12269 {
12270 switch (GET_CODE (x))
12271 {
12272 case CONST_INT:
12273 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
12274 return;
12275
12276 case CONST_DOUBLE:
12277 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
12278 return;
12279
12280 case CONST_VECTOR:
12281 {
12282 int i;
12283
12284 fprintf (f, "<");
12285 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
12286 {
12287 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
12288 if (i < (CONST_VECTOR_NUNITS (x) - 1))
12289 fputc (',', f);
12290 }
12291 fprintf (f, ">");
12292 }
12293 return;
12294
12295 case CONST_STRING:
12296 fprintf (f, "\"%s\"", XSTR (x, 0));
12297 return;
12298
12299 case SYMBOL_REF:
12300 fprintf (f, "`%s'", XSTR (x, 0));
12301 return;
12302
12303 case LABEL_REF:
12304 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
12305 return;
12306
12307 case CONST:
12308 arm_print_value (f, XEXP (x, 0));
12309 return;
12310
12311 case PLUS:
12312 arm_print_value (f, XEXP (x, 0));
12313 fprintf (f, "+");
12314 arm_print_value (f, XEXP (x, 1));
12315 return;
12316
12317 case PC:
12318 fprintf (f, "pc");
12319 return;
12320
12321 default:
12322 fprintf (f, "????");
12323 return;
12324 }
12325 }
12326 \f
12327 /* Routines for manipulation of the constant pool. */
12328
12329 /* Arm instructions cannot load a large constant directly into a
12330 register; they have to come from a pc relative load. The constant
12331 must therefore be placed in the addressable range of the pc
12332 relative load. Depending on the precise pc relative load
12333 instruction the range is somewhere between 256 bytes and 4k. This
12334 means that we often have to dump a constant inside a function, and
12335 generate code to branch around it.
12336
12337 It is important to minimize this, since the branches will slow
12338 things down and make the code larger.
12339
12340 Normally we can hide the table after an existing unconditional
12341 branch so that there is no interruption of the flow, but in the
12342 worst case the code looks like this:
12343
12344 ldr rn, L1
12345 ...
12346 b L2
12347 align
12348 L1: .long value
12349 L2:
12350 ...
12351
12352 ldr rn, L3
12353 ...
12354 b L4
12355 align
12356 L3: .long value
12357 L4:
12358 ...
12359
12360 We fix this by performing a scan after scheduling, which notices
12361 which instructions need to have their operands fetched from the
12362 constant table and builds the table.
12363
12364 The algorithm starts by building a table of all the constants that
12365 need fixing up and all the natural barriers in the function (places
12366 where a constant table can be dropped without breaking the flow).
12367 For each fixup we note how far the pc-relative replacement will be
12368 able to reach and the offset of the instruction into the function.
12369
12370 Having built the table we then group the fixes together to form
12371 tables that are as large as possible (subject to addressing
12372 constraints) and emit each table of constants after the last
12373 barrier that is within range of all the instructions in the group.
12374 If a group does not contain a barrier, then we forcibly create one
12375 by inserting a jump instruction into the flow. Once the table has
12376 been inserted, the insns are then modified to reference the
12377 relevant entry in the pool.
12378
12379 Possible enhancements to the algorithm (not implemented) are:
12380
12381 1) For some processors and object formats, there may be benefit in
12382 aligning the pools to the start of cache lines; this alignment
12383 would need to be taken into account when calculating addressability
12384 of a pool. */
12385
12386 /* These typedefs are located at the start of this file, so that
12387 they can be used in the prototypes there. This comment is to
12388 remind readers of that fact so that the following structures
12389 can be understood more easily.
12390
12391 typedef struct minipool_node Mnode;
12392 typedef struct minipool_fixup Mfix; */
12393
12394 struct minipool_node
12395 {
12396 /* Doubly linked chain of entries. */
12397 Mnode * next;
12398 Mnode * prev;
12399 /* The maximum offset into the code that this entry can be placed. While
12400 pushing fixes for forward references, all entries are sorted in order
12401 of increasing max_address. */
12402 HOST_WIDE_INT max_address;
12403 /* Similarly for an entry inserted for a backwards ref. */
12404 HOST_WIDE_INT min_address;
12405 /* The number of fixes referencing this entry. This can become zero
12406 if we "unpush" an entry. In this case we ignore the entry when we
12407 come to emit the code. */
12408 int refcount;
12409 /* The offset from the start of the minipool. */
12410 HOST_WIDE_INT offset;
12411 /* The value in table. */
12412 rtx value;
12413 /* The mode of value. */
12414 enum machine_mode mode;
12415 /* The size of the value. With iWMMXt enabled
12416 sizes > 4 also imply an alignment of 8-bytes. */
12417 int fix_size;
12418 };
12419
12420 struct minipool_fixup
12421 {
12422 Mfix * next;
12423 rtx insn;
12424 HOST_WIDE_INT address;
12425 rtx * loc;
12426 enum machine_mode mode;
12427 int fix_size;
12428 rtx value;
12429 Mnode * minipool;
12430 HOST_WIDE_INT forwards;
12431 HOST_WIDE_INT backwards;
12432 };
12433
12434 /* Fixes less than a word need padding out to a word boundary. */
12435 #define MINIPOOL_FIX_SIZE(mode) \
12436 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
12437
12438 static Mnode * minipool_vector_head;
12439 static Mnode * minipool_vector_tail;
12440 static rtx minipool_vector_label;
12441 static int minipool_pad;
12442
12443 /* The linked list of all minipool fixes required for this function. */
12444 Mfix * minipool_fix_head;
12445 Mfix * minipool_fix_tail;
12446 /* The fix entry for the current minipool, once it has been placed. */
12447 Mfix * minipool_barrier;
12448
12449 /* Determines if INSN is the start of a jump table. Returns the end
12450 of the TABLE or NULL_RTX. */
12451 static rtx
12452 is_jump_table (rtx insn)
12453 {
12454 rtx table;
12455
12456 if (jump_to_label_p (insn)
12457 && ((table = next_real_insn (JUMP_LABEL (insn)))
12458 == next_real_insn (insn))
12459 && table != NULL
12460 && JUMP_P (table)
12461 && (GET_CODE (PATTERN (table)) == ADDR_VEC
12462 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
12463 return table;
12464
12465 return NULL_RTX;
12466 }
12467
12468 #ifndef JUMP_TABLES_IN_TEXT_SECTION
12469 #define JUMP_TABLES_IN_TEXT_SECTION 0
12470 #endif
12471
12472 static HOST_WIDE_INT
12473 get_jump_table_size (rtx insn)
12474 {
12475 /* ADDR_VECs only take room if read-only data does into the text
12476 section. */
12477 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
12478 {
12479 rtx body = PATTERN (insn);
12480 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
12481 HOST_WIDE_INT size;
12482 HOST_WIDE_INT modesize;
12483
12484 modesize = GET_MODE_SIZE (GET_MODE (body));
12485 size = modesize * XVECLEN (body, elt);
12486 switch (modesize)
12487 {
12488 case 1:
12489 /* Round up size of TBB table to a halfword boundary. */
12490 size = (size + 1) & ~(HOST_WIDE_INT)1;
12491 break;
12492 case 2:
12493 /* No padding necessary for TBH. */
12494 break;
12495 case 4:
12496 /* Add two bytes for alignment on Thumb. */
12497 if (TARGET_THUMB)
12498 size += 2;
12499 break;
12500 default:
12501 gcc_unreachable ();
12502 }
12503 return size;
12504 }
12505
12506 return 0;
12507 }
12508
12509 /* Return the maximum amount of padding that will be inserted before
12510 label LABEL. */
12511
12512 static HOST_WIDE_INT
12513 get_label_padding (rtx label)
12514 {
12515 HOST_WIDE_INT align, min_insn_size;
12516
12517 align = 1 << label_to_alignment (label);
12518 min_insn_size = TARGET_THUMB ? 2 : 4;
12519 return align > min_insn_size ? align - min_insn_size : 0;
12520 }
12521
12522 /* Move a minipool fix MP from its current location to before MAX_MP.
12523 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
12524 constraints may need updating. */
12525 static Mnode *
12526 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
12527 HOST_WIDE_INT max_address)
12528 {
12529 /* The code below assumes these are different. */
12530 gcc_assert (mp != max_mp);
12531
12532 if (max_mp == NULL)
12533 {
12534 if (max_address < mp->max_address)
12535 mp->max_address = max_address;
12536 }
12537 else
12538 {
12539 if (max_address > max_mp->max_address - mp->fix_size)
12540 mp->max_address = max_mp->max_address - mp->fix_size;
12541 else
12542 mp->max_address = max_address;
12543
12544 /* Unlink MP from its current position. Since max_mp is non-null,
12545 mp->prev must be non-null. */
12546 mp->prev->next = mp->next;
12547 if (mp->next != NULL)
12548 mp->next->prev = mp->prev;
12549 else
12550 minipool_vector_tail = mp->prev;
12551
12552 /* Re-insert it before MAX_MP. */
12553 mp->next = max_mp;
12554 mp->prev = max_mp->prev;
12555 max_mp->prev = mp;
12556
12557 if (mp->prev != NULL)
12558 mp->prev->next = mp;
12559 else
12560 minipool_vector_head = mp;
12561 }
12562
12563 /* Save the new entry. */
12564 max_mp = mp;
12565
12566 /* Scan over the preceding entries and adjust their addresses as
12567 required. */
12568 while (mp->prev != NULL
12569 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12570 {
12571 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12572 mp = mp->prev;
12573 }
12574
12575 return max_mp;
12576 }
12577
12578 /* Add a constant to the minipool for a forward reference. Returns the
12579 node added or NULL if the constant will not fit in this pool. */
12580 static Mnode *
12581 add_minipool_forward_ref (Mfix *fix)
12582 {
12583 /* If set, max_mp is the first pool_entry that has a lower
12584 constraint than the one we are trying to add. */
12585 Mnode * max_mp = NULL;
12586 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
12587 Mnode * mp;
12588
12589 /* If the minipool starts before the end of FIX->INSN then this FIX
12590 can not be placed into the current pool. Furthermore, adding the
12591 new constant pool entry may cause the pool to start FIX_SIZE bytes
12592 earlier. */
12593 if (minipool_vector_head &&
12594 (fix->address + get_attr_length (fix->insn)
12595 >= minipool_vector_head->max_address - fix->fix_size))
12596 return NULL;
12597
12598 /* Scan the pool to see if a constant with the same value has
12599 already been added. While we are doing this, also note the
12600 location where we must insert the constant if it doesn't already
12601 exist. */
12602 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12603 {
12604 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12605 && fix->mode == mp->mode
12606 && (!LABEL_P (fix->value)
12607 || (CODE_LABEL_NUMBER (fix->value)
12608 == CODE_LABEL_NUMBER (mp->value)))
12609 && rtx_equal_p (fix->value, mp->value))
12610 {
12611 /* More than one fix references this entry. */
12612 mp->refcount++;
12613 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
12614 }
12615
12616 /* Note the insertion point if necessary. */
12617 if (max_mp == NULL
12618 && mp->max_address > max_address)
12619 max_mp = mp;
12620
12621 /* If we are inserting an 8-bytes aligned quantity and
12622 we have not already found an insertion point, then
12623 make sure that all such 8-byte aligned quantities are
12624 placed at the start of the pool. */
12625 if (ARM_DOUBLEWORD_ALIGN
12626 && max_mp == NULL
12627 && fix->fix_size >= 8
12628 && mp->fix_size < 8)
12629 {
12630 max_mp = mp;
12631 max_address = mp->max_address;
12632 }
12633 }
12634
12635 /* The value is not currently in the minipool, so we need to create
12636 a new entry for it. If MAX_MP is NULL, the entry will be put on
12637 the end of the list since the placement is less constrained than
12638 any existing entry. Otherwise, we insert the new fix before
12639 MAX_MP and, if necessary, adjust the constraints on the other
12640 entries. */
12641 mp = XNEW (Mnode);
12642 mp->fix_size = fix->fix_size;
12643 mp->mode = fix->mode;
12644 mp->value = fix->value;
12645 mp->refcount = 1;
12646 /* Not yet required for a backwards ref. */
12647 mp->min_address = -65536;
12648
12649 if (max_mp == NULL)
12650 {
12651 mp->max_address = max_address;
12652 mp->next = NULL;
12653 mp->prev = minipool_vector_tail;
12654
12655 if (mp->prev == NULL)
12656 {
12657 minipool_vector_head = mp;
12658 minipool_vector_label = gen_label_rtx ();
12659 }
12660 else
12661 mp->prev->next = mp;
12662
12663 minipool_vector_tail = mp;
12664 }
12665 else
12666 {
12667 if (max_address > max_mp->max_address - mp->fix_size)
12668 mp->max_address = max_mp->max_address - mp->fix_size;
12669 else
12670 mp->max_address = max_address;
12671
12672 mp->next = max_mp;
12673 mp->prev = max_mp->prev;
12674 max_mp->prev = mp;
12675 if (mp->prev != NULL)
12676 mp->prev->next = mp;
12677 else
12678 minipool_vector_head = mp;
12679 }
12680
12681 /* Save the new entry. */
12682 max_mp = mp;
12683
12684 /* Scan over the preceding entries and adjust their addresses as
12685 required. */
12686 while (mp->prev != NULL
12687 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12688 {
12689 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12690 mp = mp->prev;
12691 }
12692
12693 return max_mp;
12694 }
12695
12696 static Mnode *
12697 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
12698 HOST_WIDE_INT min_address)
12699 {
12700 HOST_WIDE_INT offset;
12701
12702 /* The code below assumes these are different. */
12703 gcc_assert (mp != min_mp);
12704
12705 if (min_mp == NULL)
12706 {
12707 if (min_address > mp->min_address)
12708 mp->min_address = min_address;
12709 }
12710 else
12711 {
12712 /* We will adjust this below if it is too loose. */
12713 mp->min_address = min_address;
12714
12715 /* Unlink MP from its current position. Since min_mp is non-null,
12716 mp->next must be non-null. */
12717 mp->next->prev = mp->prev;
12718 if (mp->prev != NULL)
12719 mp->prev->next = mp->next;
12720 else
12721 minipool_vector_head = mp->next;
12722
12723 /* Reinsert it after MIN_MP. */
12724 mp->prev = min_mp;
12725 mp->next = min_mp->next;
12726 min_mp->next = mp;
12727 if (mp->next != NULL)
12728 mp->next->prev = mp;
12729 else
12730 minipool_vector_tail = mp;
12731 }
12732
12733 min_mp = mp;
12734
12735 offset = 0;
12736 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12737 {
12738 mp->offset = offset;
12739 if (mp->refcount > 0)
12740 offset += mp->fix_size;
12741
12742 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
12743 mp->next->min_address = mp->min_address + mp->fix_size;
12744 }
12745
12746 return min_mp;
12747 }
12748
12749 /* Add a constant to the minipool for a backward reference. Returns the
12750 node added or NULL if the constant will not fit in this pool.
12751
12752 Note that the code for insertion for a backwards reference can be
12753 somewhat confusing because the calculated offsets for each fix do
12754 not take into account the size of the pool (which is still under
12755 construction. */
12756 static Mnode *
12757 add_minipool_backward_ref (Mfix *fix)
12758 {
12759 /* If set, min_mp is the last pool_entry that has a lower constraint
12760 than the one we are trying to add. */
12761 Mnode *min_mp = NULL;
12762 /* This can be negative, since it is only a constraint. */
12763 HOST_WIDE_INT min_address = fix->address - fix->backwards;
12764 Mnode *mp;
12765
12766 /* If we can't reach the current pool from this insn, or if we can't
12767 insert this entry at the end of the pool without pushing other
12768 fixes out of range, then we don't try. This ensures that we
12769 can't fail later on. */
12770 if (min_address >= minipool_barrier->address
12771 || (minipool_vector_tail->min_address + fix->fix_size
12772 >= minipool_barrier->address))
12773 return NULL;
12774
12775 /* Scan the pool to see if a constant with the same value has
12776 already been added. While we are doing this, also note the
12777 location where we must insert the constant if it doesn't already
12778 exist. */
12779 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
12780 {
12781 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12782 && fix->mode == mp->mode
12783 && (!LABEL_P (fix->value)
12784 || (CODE_LABEL_NUMBER (fix->value)
12785 == CODE_LABEL_NUMBER (mp->value)))
12786 && rtx_equal_p (fix->value, mp->value)
12787 /* Check that there is enough slack to move this entry to the
12788 end of the table (this is conservative). */
12789 && (mp->max_address
12790 > (minipool_barrier->address
12791 + minipool_vector_tail->offset
12792 + minipool_vector_tail->fix_size)))
12793 {
12794 mp->refcount++;
12795 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
12796 }
12797
12798 if (min_mp != NULL)
12799 mp->min_address += fix->fix_size;
12800 else
12801 {
12802 /* Note the insertion point if necessary. */
12803 if (mp->min_address < min_address)
12804 {
12805 /* For now, we do not allow the insertion of 8-byte alignment
12806 requiring nodes anywhere but at the start of the pool. */
12807 if (ARM_DOUBLEWORD_ALIGN
12808 && fix->fix_size >= 8 && mp->fix_size < 8)
12809 return NULL;
12810 else
12811 min_mp = mp;
12812 }
12813 else if (mp->max_address
12814 < minipool_barrier->address + mp->offset + fix->fix_size)
12815 {
12816 /* Inserting before this entry would push the fix beyond
12817 its maximum address (which can happen if we have
12818 re-located a forwards fix); force the new fix to come
12819 after it. */
12820 if (ARM_DOUBLEWORD_ALIGN
12821 && fix->fix_size >= 8 && mp->fix_size < 8)
12822 return NULL;
12823 else
12824 {
12825 min_mp = mp;
12826 min_address = mp->min_address + fix->fix_size;
12827 }
12828 }
12829 /* Do not insert a non-8-byte aligned quantity before 8-byte
12830 aligned quantities. */
12831 else if (ARM_DOUBLEWORD_ALIGN
12832 && fix->fix_size < 8
12833 && mp->fix_size >= 8)
12834 {
12835 min_mp = mp;
12836 min_address = mp->min_address + fix->fix_size;
12837 }
12838 }
12839 }
12840
12841 /* We need to create a new entry. */
12842 mp = XNEW (Mnode);
12843 mp->fix_size = fix->fix_size;
12844 mp->mode = fix->mode;
12845 mp->value = fix->value;
12846 mp->refcount = 1;
12847 mp->max_address = minipool_barrier->address + 65536;
12848
12849 mp->min_address = min_address;
12850
12851 if (min_mp == NULL)
12852 {
12853 mp->prev = NULL;
12854 mp->next = minipool_vector_head;
12855
12856 if (mp->next == NULL)
12857 {
12858 minipool_vector_tail = mp;
12859 minipool_vector_label = gen_label_rtx ();
12860 }
12861 else
12862 mp->next->prev = mp;
12863
12864 minipool_vector_head = mp;
12865 }
12866 else
12867 {
12868 mp->next = min_mp->next;
12869 mp->prev = min_mp;
12870 min_mp->next = mp;
12871
12872 if (mp->next != NULL)
12873 mp->next->prev = mp;
12874 else
12875 minipool_vector_tail = mp;
12876 }
12877
12878 /* Save the new entry. */
12879 min_mp = mp;
12880
12881 if (mp->prev)
12882 mp = mp->prev;
12883 else
12884 mp->offset = 0;
12885
12886 /* Scan over the following entries and adjust their offsets. */
12887 while (mp->next != NULL)
12888 {
12889 if (mp->next->min_address < mp->min_address + mp->fix_size)
12890 mp->next->min_address = mp->min_address + mp->fix_size;
12891
12892 if (mp->refcount)
12893 mp->next->offset = mp->offset + mp->fix_size;
12894 else
12895 mp->next->offset = mp->offset;
12896
12897 mp = mp->next;
12898 }
12899
12900 return min_mp;
12901 }
12902
12903 static void
12904 assign_minipool_offsets (Mfix *barrier)
12905 {
12906 HOST_WIDE_INT offset = 0;
12907 Mnode *mp;
12908
12909 minipool_barrier = barrier;
12910
12911 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12912 {
12913 mp->offset = offset;
12914
12915 if (mp->refcount > 0)
12916 offset += mp->fix_size;
12917 }
12918 }
12919
12920 /* Output the literal table */
12921 static void
12922 dump_minipool (rtx scan)
12923 {
12924 Mnode * mp;
12925 Mnode * nmp;
12926 int align64 = 0;
12927
12928 if (ARM_DOUBLEWORD_ALIGN)
12929 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12930 if (mp->refcount > 0 && mp->fix_size >= 8)
12931 {
12932 align64 = 1;
12933 break;
12934 }
12935
12936 if (dump_file)
12937 fprintf (dump_file,
12938 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
12939 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
12940
12941 scan = emit_label_after (gen_label_rtx (), scan);
12942 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
12943 scan = emit_label_after (minipool_vector_label, scan);
12944
12945 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
12946 {
12947 if (mp->refcount > 0)
12948 {
12949 if (dump_file)
12950 {
12951 fprintf (dump_file,
12952 ";; Offset %u, min %ld, max %ld ",
12953 (unsigned) mp->offset, (unsigned long) mp->min_address,
12954 (unsigned long) mp->max_address);
12955 arm_print_value (dump_file, mp->value);
12956 fputc ('\n', dump_file);
12957 }
12958
12959 switch (mp->fix_size)
12960 {
12961 #ifdef HAVE_consttable_1
12962 case 1:
12963 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
12964 break;
12965
12966 #endif
12967 #ifdef HAVE_consttable_2
12968 case 2:
12969 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
12970 break;
12971
12972 #endif
12973 #ifdef HAVE_consttable_4
12974 case 4:
12975 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
12976 break;
12977
12978 #endif
12979 #ifdef HAVE_consttable_8
12980 case 8:
12981 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
12982 break;
12983
12984 #endif
12985 #ifdef HAVE_consttable_16
12986 case 16:
12987 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
12988 break;
12989
12990 #endif
12991 default:
12992 gcc_unreachable ();
12993 }
12994 }
12995
12996 nmp = mp->next;
12997 free (mp);
12998 }
12999
13000 minipool_vector_head = minipool_vector_tail = NULL;
13001 scan = emit_insn_after (gen_consttable_end (), scan);
13002 scan = emit_barrier_after (scan);
13003 }
13004
13005 /* Return the cost of forcibly inserting a barrier after INSN. */
13006 static int
13007 arm_barrier_cost (rtx insn)
13008 {
13009 /* Basing the location of the pool on the loop depth is preferable,
13010 but at the moment, the basic block information seems to be
13011 corrupt by this stage of the compilation. */
13012 int base_cost = 50;
13013 rtx next = next_nonnote_insn (insn);
13014
13015 if (next != NULL && LABEL_P (next))
13016 base_cost -= 20;
13017
13018 switch (GET_CODE (insn))
13019 {
13020 case CODE_LABEL:
13021 /* It will always be better to place the table before the label, rather
13022 than after it. */
13023 return 50;
13024
13025 case INSN:
13026 case CALL_INSN:
13027 return base_cost;
13028
13029 case JUMP_INSN:
13030 return base_cost - 10;
13031
13032 default:
13033 return base_cost + 10;
13034 }
13035 }
13036
13037 /* Find the best place in the insn stream in the range
13038 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
13039 Create the barrier by inserting a jump and add a new fix entry for
13040 it. */
13041 static Mfix *
13042 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
13043 {
13044 HOST_WIDE_INT count = 0;
13045 rtx barrier;
13046 rtx from = fix->insn;
13047 /* The instruction after which we will insert the jump. */
13048 rtx selected = NULL;
13049 int selected_cost;
13050 /* The address at which the jump instruction will be placed. */
13051 HOST_WIDE_INT selected_address;
13052 Mfix * new_fix;
13053 HOST_WIDE_INT max_count = max_address - fix->address;
13054 rtx label = gen_label_rtx ();
13055
13056 selected_cost = arm_barrier_cost (from);
13057 selected_address = fix->address;
13058
13059 while (from && count < max_count)
13060 {
13061 rtx tmp;
13062 int new_cost;
13063
13064 /* This code shouldn't have been called if there was a natural barrier
13065 within range. */
13066 gcc_assert (!BARRIER_P (from));
13067
13068 /* Count the length of this insn. This must stay in sync with the
13069 code that pushes minipool fixes. */
13070 if (LABEL_P (from))
13071 count += get_label_padding (from);
13072 else
13073 count += get_attr_length (from);
13074
13075 /* If there is a jump table, add its length. */
13076 tmp = is_jump_table (from);
13077 if (tmp != NULL)
13078 {
13079 count += get_jump_table_size (tmp);
13080
13081 /* Jump tables aren't in a basic block, so base the cost on
13082 the dispatch insn. If we select this location, we will
13083 still put the pool after the table. */
13084 new_cost = arm_barrier_cost (from);
13085
13086 if (count < max_count
13087 && (!selected || new_cost <= selected_cost))
13088 {
13089 selected = tmp;
13090 selected_cost = new_cost;
13091 selected_address = fix->address + count;
13092 }
13093
13094 /* Continue after the dispatch table. */
13095 from = NEXT_INSN (tmp);
13096 continue;
13097 }
13098
13099 new_cost = arm_barrier_cost (from);
13100
13101 if (count < max_count
13102 && (!selected || new_cost <= selected_cost))
13103 {
13104 selected = from;
13105 selected_cost = new_cost;
13106 selected_address = fix->address + count;
13107 }
13108
13109 from = NEXT_INSN (from);
13110 }
13111
13112 /* Make sure that we found a place to insert the jump. */
13113 gcc_assert (selected);
13114
13115 /* Make sure we do not split a call and its corresponding
13116 CALL_ARG_LOCATION note. */
13117 if (CALL_P (selected))
13118 {
13119 rtx next = NEXT_INSN (selected);
13120 if (next && NOTE_P (next)
13121 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
13122 selected = next;
13123 }
13124
13125 /* Create a new JUMP_INSN that branches around a barrier. */
13126 from = emit_jump_insn_after (gen_jump (label), selected);
13127 JUMP_LABEL (from) = label;
13128 barrier = emit_barrier_after (from);
13129 emit_label_after (label, barrier);
13130
13131 /* Create a minipool barrier entry for the new barrier. */
13132 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
13133 new_fix->insn = barrier;
13134 new_fix->address = selected_address;
13135 new_fix->next = fix->next;
13136 fix->next = new_fix;
13137
13138 return new_fix;
13139 }
13140
13141 /* Record that there is a natural barrier in the insn stream at
13142 ADDRESS. */
13143 static void
13144 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
13145 {
13146 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13147
13148 fix->insn = insn;
13149 fix->address = address;
13150
13151 fix->next = NULL;
13152 if (minipool_fix_head != NULL)
13153 minipool_fix_tail->next = fix;
13154 else
13155 minipool_fix_head = fix;
13156
13157 minipool_fix_tail = fix;
13158 }
13159
13160 /* Record INSN, which will need fixing up to load a value from the
13161 minipool. ADDRESS is the offset of the insn since the start of the
13162 function; LOC is a pointer to the part of the insn which requires
13163 fixing; VALUE is the constant that must be loaded, which is of type
13164 MODE. */
13165 static void
13166 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
13167 enum machine_mode mode, rtx value)
13168 {
13169 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13170
13171 fix->insn = insn;
13172 fix->address = address;
13173 fix->loc = loc;
13174 fix->mode = mode;
13175 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
13176 fix->value = value;
13177 fix->forwards = get_attr_pool_range (insn);
13178 fix->backwards = get_attr_neg_pool_range (insn);
13179 fix->minipool = NULL;
13180
13181 /* If an insn doesn't have a range defined for it, then it isn't
13182 expecting to be reworked by this code. Better to stop now than
13183 to generate duff assembly code. */
13184 gcc_assert (fix->forwards || fix->backwards);
13185
13186 /* If an entry requires 8-byte alignment then assume all constant pools
13187 require 4 bytes of padding. Trying to do this later on a per-pool
13188 basis is awkward because existing pool entries have to be modified. */
13189 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
13190 minipool_pad = 4;
13191
13192 if (dump_file)
13193 {
13194 fprintf (dump_file,
13195 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
13196 GET_MODE_NAME (mode),
13197 INSN_UID (insn), (unsigned long) address,
13198 -1 * (long)fix->backwards, (long)fix->forwards);
13199 arm_print_value (dump_file, fix->value);
13200 fprintf (dump_file, "\n");
13201 }
13202
13203 /* Add it to the chain of fixes. */
13204 fix->next = NULL;
13205
13206 if (minipool_fix_head != NULL)
13207 minipool_fix_tail->next = fix;
13208 else
13209 minipool_fix_head = fix;
13210
13211 minipool_fix_tail = fix;
13212 }
13213
13214 /* Return the cost of synthesizing a 64-bit constant VAL inline.
13215 Returns the number of insns needed, or 99 if we don't know how to
13216 do it. */
13217 int
13218 arm_const_double_inline_cost (rtx val)
13219 {
13220 rtx lowpart, highpart;
13221 enum machine_mode mode;
13222
13223 mode = GET_MODE (val);
13224
13225 if (mode == VOIDmode)
13226 mode = DImode;
13227
13228 gcc_assert (GET_MODE_SIZE (mode) == 8);
13229
13230 lowpart = gen_lowpart (SImode, val);
13231 highpart = gen_highpart_mode (SImode, mode, val);
13232
13233 gcc_assert (CONST_INT_P (lowpart));
13234 gcc_assert (CONST_INT_P (highpart));
13235
13236 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
13237 NULL_RTX, NULL_RTX, 0, 0)
13238 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
13239 NULL_RTX, NULL_RTX, 0, 0));
13240 }
13241
13242 /* Return true if it is worthwhile to split a 64-bit constant into two
13243 32-bit operations. This is the case if optimizing for size, or
13244 if we have load delay slots, or if one 32-bit part can be done with
13245 a single data operation. */
13246 bool
13247 arm_const_double_by_parts (rtx val)
13248 {
13249 enum machine_mode mode = GET_MODE (val);
13250 rtx part;
13251
13252 if (optimize_size || arm_ld_sched)
13253 return true;
13254
13255 if (mode == VOIDmode)
13256 mode = DImode;
13257
13258 part = gen_highpart_mode (SImode, mode, val);
13259
13260 gcc_assert (CONST_INT_P (part));
13261
13262 if (const_ok_for_arm (INTVAL (part))
13263 || const_ok_for_arm (~INTVAL (part)))
13264 return true;
13265
13266 part = gen_lowpart (SImode, val);
13267
13268 gcc_assert (CONST_INT_P (part));
13269
13270 if (const_ok_for_arm (INTVAL (part))
13271 || const_ok_for_arm (~INTVAL (part)))
13272 return true;
13273
13274 return false;
13275 }
13276
13277 /* Return true if it is possible to inline both the high and low parts
13278 of a 64-bit constant into 32-bit data processing instructions. */
13279 bool
13280 arm_const_double_by_immediates (rtx val)
13281 {
13282 enum machine_mode mode = GET_MODE (val);
13283 rtx part;
13284
13285 if (mode == VOIDmode)
13286 mode = DImode;
13287
13288 part = gen_highpart_mode (SImode, mode, val);
13289
13290 gcc_assert (CONST_INT_P (part));
13291
13292 if (!const_ok_for_arm (INTVAL (part)))
13293 return false;
13294
13295 part = gen_lowpart (SImode, val);
13296
13297 gcc_assert (CONST_INT_P (part));
13298
13299 if (!const_ok_for_arm (INTVAL (part)))
13300 return false;
13301
13302 return true;
13303 }
13304
13305 /* Scan INSN and note any of its operands that need fixing.
13306 If DO_PUSHES is false we do not actually push any of the fixups
13307 needed. */
13308 static void
13309 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
13310 {
13311 int opno;
13312
13313 extract_insn (insn);
13314
13315 if (!constrain_operands (1))
13316 fatal_insn_not_found (insn);
13317
13318 if (recog_data.n_alternatives == 0)
13319 return;
13320
13321 /* Fill in recog_op_alt with information about the constraints of
13322 this insn. */
13323 preprocess_constraints ();
13324
13325 for (opno = 0; opno < recog_data.n_operands; opno++)
13326 {
13327 /* Things we need to fix can only occur in inputs. */
13328 if (recog_data.operand_type[opno] != OP_IN)
13329 continue;
13330
13331 /* If this alternative is a memory reference, then any mention
13332 of constants in this alternative is really to fool reload
13333 into allowing us to accept one there. We need to fix them up
13334 now so that we output the right code. */
13335 if (recog_op_alt[opno][which_alternative].memory_ok)
13336 {
13337 rtx op = recog_data.operand[opno];
13338
13339 if (CONSTANT_P (op))
13340 {
13341 if (do_pushes)
13342 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
13343 recog_data.operand_mode[opno], op);
13344 }
13345 else if (MEM_P (op)
13346 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
13347 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
13348 {
13349 if (do_pushes)
13350 {
13351 rtx cop = avoid_constant_pool_reference (op);
13352
13353 /* Casting the address of something to a mode narrower
13354 than a word can cause avoid_constant_pool_reference()
13355 to return the pool reference itself. That's no good to
13356 us here. Lets just hope that we can use the
13357 constant pool value directly. */
13358 if (op == cop)
13359 cop = get_pool_constant (XEXP (op, 0));
13360
13361 push_minipool_fix (insn, address,
13362 recog_data.operand_loc[opno],
13363 recog_data.operand_mode[opno], cop);
13364 }
13365
13366 }
13367 }
13368 }
13369
13370 return;
13371 }
13372
13373 /* Convert instructions to their cc-clobbering variant if possible, since
13374 that allows us to use smaller encodings. */
13375
13376 static void
13377 thumb2_reorg (void)
13378 {
13379 basic_block bb;
13380 regset_head live;
13381
13382 INIT_REG_SET (&live);
13383
13384 /* We are freeing block_for_insn in the toplev to keep compatibility
13385 with old MDEP_REORGS that are not CFG based. Recompute it now. */
13386 compute_bb_for_insn ();
13387 df_analyze ();
13388
13389 FOR_EACH_BB (bb)
13390 {
13391 rtx insn;
13392
13393 COPY_REG_SET (&live, DF_LR_OUT (bb));
13394 df_simulate_initialize_backwards (bb, &live);
13395 FOR_BB_INSNS_REVERSE (bb, insn)
13396 {
13397 if (NONJUMP_INSN_P (insn)
13398 && !REGNO_REG_SET_P (&live, CC_REGNUM)
13399 && GET_CODE (PATTERN (insn)) == SET)
13400 {
13401 enum {SKIP, CONV, SWAP_CONV} action = SKIP;
13402 rtx pat = PATTERN (insn);
13403 rtx dst = XEXP (pat, 0);
13404 rtx src = XEXP (pat, 1);
13405 rtx op0 = NULL_RTX, op1 = NULL_RTX;
13406
13407 if (!OBJECT_P (src))
13408 op0 = XEXP (src, 0);
13409
13410 if (BINARY_P (src))
13411 op1 = XEXP (src, 1);
13412
13413 if (low_register_operand (dst, SImode))
13414 {
13415 switch (GET_CODE (src))
13416 {
13417 case PLUS:
13418 /* Adding two registers and storing the result
13419 in the first source is already a 16-bit
13420 operation. */
13421 if (rtx_equal_p (dst, op0)
13422 && register_operand (op1, SImode))
13423 break;
13424
13425 if (low_register_operand (op0, SImode))
13426 {
13427 /* ADDS <Rd>,<Rn>,<Rm> */
13428 if (low_register_operand (op1, SImode))
13429 action = CONV;
13430 /* ADDS <Rdn>,#<imm8> */
13431 /* SUBS <Rdn>,#<imm8> */
13432 else if (rtx_equal_p (dst, op0)
13433 && CONST_INT_P (op1)
13434 && IN_RANGE (INTVAL (op1), -255, 255))
13435 action = CONV;
13436 /* ADDS <Rd>,<Rn>,#<imm3> */
13437 /* SUBS <Rd>,<Rn>,#<imm3> */
13438 else if (CONST_INT_P (op1)
13439 && IN_RANGE (INTVAL (op1), -7, 7))
13440 action = CONV;
13441 }
13442 break;
13443
13444 case MINUS:
13445 /* RSBS <Rd>,<Rn>,#0
13446 Not handled here: see NEG below. */
13447 /* SUBS <Rd>,<Rn>,#<imm3>
13448 SUBS <Rdn>,#<imm8>
13449 Not handled here: see PLUS above. */
13450 /* SUBS <Rd>,<Rn>,<Rm> */
13451 if (low_register_operand (op0, SImode)
13452 && low_register_operand (op1, SImode))
13453 action = CONV;
13454 break;
13455
13456 case MULT:
13457 /* MULS <Rdm>,<Rn>,<Rdm>
13458 As an exception to the rule, this is only used
13459 when optimizing for size since MULS is slow on all
13460 known implementations. We do not even want to use
13461 MULS in cold code, if optimizing for speed, so we
13462 test the global flag here. */
13463 if (!optimize_size)
13464 break;
13465 /* else fall through. */
13466 case AND:
13467 case IOR:
13468 case XOR:
13469 /* ANDS <Rdn>,<Rm> */
13470 if (rtx_equal_p (dst, op0)
13471 && low_register_operand (op1, SImode))
13472 action = CONV;
13473 else if (rtx_equal_p (dst, op1)
13474 && low_register_operand (op0, SImode))
13475 action = SWAP_CONV;
13476 break;
13477
13478 case ASHIFTRT:
13479 case ASHIFT:
13480 case LSHIFTRT:
13481 /* ASRS <Rdn>,<Rm> */
13482 /* LSRS <Rdn>,<Rm> */
13483 /* LSLS <Rdn>,<Rm> */
13484 if (rtx_equal_p (dst, op0)
13485 && low_register_operand (op1, SImode))
13486 action = CONV;
13487 /* ASRS <Rd>,<Rm>,#<imm5> */
13488 /* LSRS <Rd>,<Rm>,#<imm5> */
13489 /* LSLS <Rd>,<Rm>,#<imm5> */
13490 else if (low_register_operand (op0, SImode)
13491 && CONST_INT_P (op1)
13492 && IN_RANGE (INTVAL (op1), 0, 31))
13493 action = CONV;
13494 break;
13495
13496 case ROTATERT:
13497 /* RORS <Rdn>,<Rm> */
13498 if (rtx_equal_p (dst, op0)
13499 && low_register_operand (op1, SImode))
13500 action = CONV;
13501 break;
13502
13503 case NOT:
13504 case NEG:
13505 /* MVNS <Rd>,<Rm> */
13506 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
13507 if (low_register_operand (op0, SImode))
13508 action = CONV;
13509 break;
13510
13511 case CONST_INT:
13512 /* MOVS <Rd>,#<imm8> */
13513 if (CONST_INT_P (src)
13514 && IN_RANGE (INTVAL (src), 0, 255))
13515 action = CONV;
13516 break;
13517
13518 case REG:
13519 /* MOVS and MOV<c> with registers have different
13520 encodings, so are not relevant here. */
13521 break;
13522
13523 default:
13524 break;
13525 }
13526 }
13527
13528 if (action != SKIP)
13529 {
13530 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13531 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13532 rtvec vec;
13533
13534 if (action == SWAP_CONV)
13535 {
13536 src = copy_rtx (src);
13537 XEXP (src, 0) = op1;
13538 XEXP (src, 1) = op0;
13539 pat = gen_rtx_SET (VOIDmode, dst, src);
13540 vec = gen_rtvec (2, pat, clobber);
13541 }
13542 else /* action == CONV */
13543 vec = gen_rtvec (2, pat, clobber);
13544
13545 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13546 INSN_CODE (insn) = -1;
13547 }
13548 }
13549
13550 if (NONDEBUG_INSN_P (insn))
13551 df_simulate_one_insn_backwards (bb, insn, &live);
13552 }
13553 }
13554
13555 CLEAR_REG_SET (&live);
13556 }
13557
13558 /* Gcc puts the pool in the wrong place for ARM, since we can only
13559 load addresses a limited distance around the pc. We do some
13560 special munging to move the constant pool values to the correct
13561 point in the code. */
13562 static void
13563 arm_reorg (void)
13564 {
13565 rtx insn;
13566 HOST_WIDE_INT address = 0;
13567 Mfix * fix;
13568
13569 if (TARGET_THUMB2)
13570 thumb2_reorg ();
13571
13572 /* Ensure all insns that must be split have been split at this point.
13573 Otherwise, the pool placement code below may compute incorrect
13574 insn lengths. Note that when optimizing, all insns have already
13575 been split at this point. */
13576 if (!optimize)
13577 split_all_insns_noflow ();
13578
13579 minipool_fix_head = minipool_fix_tail = NULL;
13580
13581 /* The first insn must always be a note, or the code below won't
13582 scan it properly. */
13583 insn = get_insns ();
13584 gcc_assert (NOTE_P (insn));
13585 minipool_pad = 0;
13586
13587 /* Scan all the insns and record the operands that will need fixing. */
13588 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
13589 {
13590 if (BARRIER_P (insn))
13591 push_minipool_barrier (insn, address);
13592 else if (INSN_P (insn))
13593 {
13594 rtx table;
13595
13596 note_invalid_constants (insn, address, true);
13597 address += get_attr_length (insn);
13598
13599 /* If the insn is a vector jump, add the size of the table
13600 and skip the table. */
13601 if ((table = is_jump_table (insn)) != NULL)
13602 {
13603 address += get_jump_table_size (table);
13604 insn = table;
13605 }
13606 }
13607 else if (LABEL_P (insn))
13608 /* Add the worst-case padding due to alignment. We don't add
13609 the _current_ padding because the minipool insertions
13610 themselves might change it. */
13611 address += get_label_padding (insn);
13612 }
13613
13614 fix = minipool_fix_head;
13615
13616 /* Now scan the fixups and perform the required changes. */
13617 while (fix)
13618 {
13619 Mfix * ftmp;
13620 Mfix * fdel;
13621 Mfix * last_added_fix;
13622 Mfix * last_barrier = NULL;
13623 Mfix * this_fix;
13624
13625 /* Skip any further barriers before the next fix. */
13626 while (fix && BARRIER_P (fix->insn))
13627 fix = fix->next;
13628
13629 /* No more fixes. */
13630 if (fix == NULL)
13631 break;
13632
13633 last_added_fix = NULL;
13634
13635 for (ftmp = fix; ftmp; ftmp = ftmp->next)
13636 {
13637 if (BARRIER_P (ftmp->insn))
13638 {
13639 if (ftmp->address >= minipool_vector_head->max_address)
13640 break;
13641
13642 last_barrier = ftmp;
13643 }
13644 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
13645 break;
13646
13647 last_added_fix = ftmp; /* Keep track of the last fix added. */
13648 }
13649
13650 /* If we found a barrier, drop back to that; any fixes that we
13651 could have reached but come after the barrier will now go in
13652 the next mini-pool. */
13653 if (last_barrier != NULL)
13654 {
13655 /* Reduce the refcount for those fixes that won't go into this
13656 pool after all. */
13657 for (fdel = last_barrier->next;
13658 fdel && fdel != ftmp;
13659 fdel = fdel->next)
13660 {
13661 fdel->minipool->refcount--;
13662 fdel->minipool = NULL;
13663 }
13664
13665 ftmp = last_barrier;
13666 }
13667 else
13668 {
13669 /* ftmp is first fix that we can't fit into this pool and
13670 there no natural barriers that we could use. Insert a
13671 new barrier in the code somewhere between the previous
13672 fix and this one, and arrange to jump around it. */
13673 HOST_WIDE_INT max_address;
13674
13675 /* The last item on the list of fixes must be a barrier, so
13676 we can never run off the end of the list of fixes without
13677 last_barrier being set. */
13678 gcc_assert (ftmp);
13679
13680 max_address = minipool_vector_head->max_address;
13681 /* Check that there isn't another fix that is in range that
13682 we couldn't fit into this pool because the pool was
13683 already too large: we need to put the pool before such an
13684 instruction. The pool itself may come just after the
13685 fix because create_fix_barrier also allows space for a
13686 jump instruction. */
13687 if (ftmp->address < max_address)
13688 max_address = ftmp->address + 1;
13689
13690 last_barrier = create_fix_barrier (last_added_fix, max_address);
13691 }
13692
13693 assign_minipool_offsets (last_barrier);
13694
13695 while (ftmp)
13696 {
13697 if (!BARRIER_P (ftmp->insn)
13698 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
13699 == NULL))
13700 break;
13701
13702 ftmp = ftmp->next;
13703 }
13704
13705 /* Scan over the fixes we have identified for this pool, fixing them
13706 up and adding the constants to the pool itself. */
13707 for (this_fix = fix; this_fix && ftmp != this_fix;
13708 this_fix = this_fix->next)
13709 if (!BARRIER_P (this_fix->insn))
13710 {
13711 rtx addr
13712 = plus_constant (Pmode,
13713 gen_rtx_LABEL_REF (VOIDmode,
13714 minipool_vector_label),
13715 this_fix->minipool->offset);
13716 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
13717 }
13718
13719 dump_minipool (last_barrier->insn);
13720 fix = ftmp;
13721 }
13722
13723 /* From now on we must synthesize any constants that we can't handle
13724 directly. This can happen if the RTL gets split during final
13725 instruction generation. */
13726 after_arm_reorg = 1;
13727
13728 /* Free the minipool memory. */
13729 obstack_free (&minipool_obstack, minipool_startobj);
13730 }
13731 \f
13732 /* Routines to output assembly language. */
13733
13734 /* If the rtx is the correct value then return the string of the number.
13735 In this way we can ensure that valid double constants are generated even
13736 when cross compiling. */
13737 const char *
13738 fp_immediate_constant (rtx x)
13739 {
13740 REAL_VALUE_TYPE r;
13741
13742 if (!fp_consts_inited)
13743 init_fp_table ();
13744
13745 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13746
13747 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
13748 return "0";
13749 }
13750
13751 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
13752 static const char *
13753 fp_const_from_val (REAL_VALUE_TYPE *r)
13754 {
13755 if (!fp_consts_inited)
13756 init_fp_table ();
13757
13758 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
13759 return "0";
13760 }
13761
13762 /* OPERANDS[0] is the entire list of insns that constitute pop,
13763 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
13764 is in the list, UPDATE is true iff the list contains explicit
13765 update of base register. */
13766 void
13767 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
13768 bool update)
13769 {
13770 int i;
13771 char pattern[100];
13772 int offset;
13773 const char *conditional;
13774 int num_saves = XVECLEN (operands[0], 0);
13775 unsigned int regno;
13776 unsigned int regno_base = REGNO (operands[1]);
13777
13778 offset = 0;
13779 offset += update ? 1 : 0;
13780 offset += return_pc ? 1 : 0;
13781
13782 /* Is the base register in the list? */
13783 for (i = offset; i < num_saves; i++)
13784 {
13785 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
13786 /* If SP is in the list, then the base register must be SP. */
13787 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
13788 /* If base register is in the list, there must be no explicit update. */
13789 if (regno == regno_base)
13790 gcc_assert (!update);
13791 }
13792
13793 conditional = reverse ? "%?%D0" : "%?%d0";
13794 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
13795 {
13796 /* Output pop (not stmfd) because it has a shorter encoding. */
13797 gcc_assert (update);
13798 sprintf (pattern, "pop%s\t{", conditional);
13799 }
13800 else
13801 {
13802 /* Output ldmfd when the base register is SP, otherwise output ldmia.
13803 It's just a convention, their semantics are identical. */
13804 if (regno_base == SP_REGNUM)
13805 sprintf (pattern, "ldm%sfd\t", conditional);
13806 else if (TARGET_UNIFIED_ASM)
13807 sprintf (pattern, "ldmia%s\t", conditional);
13808 else
13809 sprintf (pattern, "ldm%sia\t", conditional);
13810
13811 strcat (pattern, reg_names[regno_base]);
13812 if (update)
13813 strcat (pattern, "!, {");
13814 else
13815 strcat (pattern, ", {");
13816 }
13817
13818 /* Output the first destination register. */
13819 strcat (pattern,
13820 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
13821
13822 /* Output the rest of the destination registers. */
13823 for (i = offset + 1; i < num_saves; i++)
13824 {
13825 strcat (pattern, ", ");
13826 strcat (pattern,
13827 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
13828 }
13829
13830 strcat (pattern, "}");
13831
13832 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
13833 strcat (pattern, "^");
13834
13835 output_asm_insn (pattern, &cond);
13836 }
13837
13838
13839 /* Output the assembly for a store multiple. */
13840
13841 const char *
13842 vfp_output_fstmd (rtx * operands)
13843 {
13844 char pattern[100];
13845 int p;
13846 int base;
13847 int i;
13848
13849 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
13850 p = strlen (pattern);
13851
13852 gcc_assert (REG_P (operands[1]));
13853
13854 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
13855 for (i = 1; i < XVECLEN (operands[2], 0); i++)
13856 {
13857 p += sprintf (&pattern[p], ", d%d", base + i);
13858 }
13859 strcpy (&pattern[p], "}");
13860
13861 output_asm_insn (pattern, operands);
13862 return "";
13863 }
13864
13865
13866 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
13867 number of bytes pushed. */
13868
13869 static int
13870 vfp_emit_fstmd (int base_reg, int count)
13871 {
13872 rtx par;
13873 rtx dwarf;
13874 rtx tmp, reg;
13875 int i;
13876
13877 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
13878 register pairs are stored by a store multiple insn. We avoid this
13879 by pushing an extra pair. */
13880 if (count == 2 && !arm_arch6)
13881 {
13882 if (base_reg == LAST_VFP_REGNUM - 3)
13883 base_reg -= 2;
13884 count++;
13885 }
13886
13887 /* FSTMD may not store more than 16 doubleword registers at once. Split
13888 larger stores into multiple parts (up to a maximum of two, in
13889 practice). */
13890 if (count > 16)
13891 {
13892 int saved;
13893 /* NOTE: base_reg is an internal register number, so each D register
13894 counts as 2. */
13895 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
13896 saved += vfp_emit_fstmd (base_reg, 16);
13897 return saved;
13898 }
13899
13900 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
13901 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
13902
13903 reg = gen_rtx_REG (DFmode, base_reg);
13904 base_reg += 2;
13905
13906 XVECEXP (par, 0, 0)
13907 = gen_rtx_SET (VOIDmode,
13908 gen_frame_mem
13909 (BLKmode,
13910 gen_rtx_PRE_MODIFY (Pmode,
13911 stack_pointer_rtx,
13912 plus_constant
13913 (Pmode, stack_pointer_rtx,
13914 - (count * 8)))
13915 ),
13916 gen_rtx_UNSPEC (BLKmode,
13917 gen_rtvec (1, reg),
13918 UNSPEC_PUSH_MULT));
13919
13920 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13921 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
13922 RTX_FRAME_RELATED_P (tmp) = 1;
13923 XVECEXP (dwarf, 0, 0) = tmp;
13924
13925 tmp = gen_rtx_SET (VOIDmode,
13926 gen_frame_mem (DFmode, stack_pointer_rtx),
13927 reg);
13928 RTX_FRAME_RELATED_P (tmp) = 1;
13929 XVECEXP (dwarf, 0, 1) = tmp;
13930
13931 for (i = 1; i < count; i++)
13932 {
13933 reg = gen_rtx_REG (DFmode, base_reg);
13934 base_reg += 2;
13935 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
13936
13937 tmp = gen_rtx_SET (VOIDmode,
13938 gen_frame_mem (DFmode,
13939 plus_constant (Pmode,
13940 stack_pointer_rtx,
13941 i * 8)),
13942 reg);
13943 RTX_FRAME_RELATED_P (tmp) = 1;
13944 XVECEXP (dwarf, 0, i + 1) = tmp;
13945 }
13946
13947 par = emit_insn (par);
13948 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13949 RTX_FRAME_RELATED_P (par) = 1;
13950
13951 return count * 8;
13952 }
13953
13954 /* Emit a call instruction with pattern PAT. ADDR is the address of
13955 the call target. */
13956
13957 void
13958 arm_emit_call_insn (rtx pat, rtx addr)
13959 {
13960 rtx insn;
13961
13962 insn = emit_call_insn (pat);
13963
13964 /* The PIC register is live on entry to VxWorks PIC PLT entries.
13965 If the call might use such an entry, add a use of the PIC register
13966 to the instruction's CALL_INSN_FUNCTION_USAGE. */
13967 if (TARGET_VXWORKS_RTP
13968 && flag_pic
13969 && GET_CODE (addr) == SYMBOL_REF
13970 && (SYMBOL_REF_DECL (addr)
13971 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
13972 : !SYMBOL_REF_LOCAL_P (addr)))
13973 {
13974 require_pic_register ();
13975 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
13976 }
13977 }
13978
13979 /* Output a 'call' insn. */
13980 const char *
13981 output_call (rtx *operands)
13982 {
13983 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
13984
13985 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
13986 if (REGNO (operands[0]) == LR_REGNUM)
13987 {
13988 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
13989 output_asm_insn ("mov%?\t%0, %|lr", operands);
13990 }
13991
13992 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13993
13994 if (TARGET_INTERWORK || arm_arch4t)
13995 output_asm_insn ("bx%?\t%0", operands);
13996 else
13997 output_asm_insn ("mov%?\t%|pc, %0", operands);
13998
13999 return "";
14000 }
14001
14002 /* Output a 'call' insn that is a reference in memory. This is
14003 disabled for ARMv5 and we prefer a blx instead because otherwise
14004 there's a significant performance overhead. */
14005 const char *
14006 output_call_mem (rtx *operands)
14007 {
14008 gcc_assert (!arm_arch5);
14009 if (TARGET_INTERWORK)
14010 {
14011 output_asm_insn ("ldr%?\t%|ip, %0", operands);
14012 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14013 output_asm_insn ("bx%?\t%|ip", operands);
14014 }
14015 else if (regno_use_in (LR_REGNUM, operands[0]))
14016 {
14017 /* LR is used in the memory address. We load the address in the
14018 first instruction. It's safe to use IP as the target of the
14019 load since the call will kill it anyway. */
14020 output_asm_insn ("ldr%?\t%|ip, %0", operands);
14021 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14022 if (arm_arch4t)
14023 output_asm_insn ("bx%?\t%|ip", operands);
14024 else
14025 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
14026 }
14027 else
14028 {
14029 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
14030 output_asm_insn ("ldr%?\t%|pc, %0", operands);
14031 }
14032
14033 return "";
14034 }
14035
14036
14037 /* Output a move from arm registers to arm registers of a long double
14038 OPERANDS[0] is the destination.
14039 OPERANDS[1] is the source. */
14040 const char *
14041 output_mov_long_double_arm_from_arm (rtx *operands)
14042 {
14043 /* We have to be careful here because the two might overlap. */
14044 int dest_start = REGNO (operands[0]);
14045 int src_start = REGNO (operands[1]);
14046 rtx ops[2];
14047 int i;
14048
14049 if (dest_start < src_start)
14050 {
14051 for (i = 0; i < 3; i++)
14052 {
14053 ops[0] = gen_rtx_REG (SImode, dest_start + i);
14054 ops[1] = gen_rtx_REG (SImode, src_start + i);
14055 output_asm_insn ("mov%?\t%0, %1", ops);
14056 }
14057 }
14058 else
14059 {
14060 for (i = 2; i >= 0; i--)
14061 {
14062 ops[0] = gen_rtx_REG (SImode, dest_start + i);
14063 ops[1] = gen_rtx_REG (SImode, src_start + i);
14064 output_asm_insn ("mov%?\t%0, %1", ops);
14065 }
14066 }
14067
14068 return "";
14069 }
14070
14071 void
14072 arm_emit_movpair (rtx dest, rtx src)
14073 {
14074 /* If the src is an immediate, simplify it. */
14075 if (CONST_INT_P (src))
14076 {
14077 HOST_WIDE_INT val = INTVAL (src);
14078 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
14079 if ((val >> 16) & 0x0000ffff)
14080 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
14081 GEN_INT (16)),
14082 GEN_INT ((val >> 16) & 0x0000ffff));
14083 return;
14084 }
14085 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
14086 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
14087 }
14088
14089 /* Output a move between double words. It must be REG<-MEM
14090 or MEM<-REG. */
14091 const char *
14092 output_move_double (rtx *operands, bool emit, int *count)
14093 {
14094 enum rtx_code code0 = GET_CODE (operands[0]);
14095 enum rtx_code code1 = GET_CODE (operands[1]);
14096 rtx otherops[3];
14097 if (count)
14098 *count = 1;
14099
14100 /* The only case when this might happen is when
14101 you are looking at the length of a DImode instruction
14102 that has an invalid constant in it. */
14103 if (code0 == REG && code1 != MEM)
14104 {
14105 gcc_assert (!emit);
14106 *count = 2;
14107 return "";
14108 }
14109
14110 if (code0 == REG)
14111 {
14112 unsigned int reg0 = REGNO (operands[0]);
14113
14114 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
14115
14116 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
14117
14118 switch (GET_CODE (XEXP (operands[1], 0)))
14119 {
14120 case REG:
14121
14122 if (emit)
14123 {
14124 if (TARGET_LDRD
14125 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
14126 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
14127 else
14128 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14129 }
14130 break;
14131
14132 case PRE_INC:
14133 gcc_assert (TARGET_LDRD);
14134 if (emit)
14135 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
14136 break;
14137
14138 case PRE_DEC:
14139 if (emit)
14140 {
14141 if (TARGET_LDRD)
14142 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
14143 else
14144 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
14145 }
14146 break;
14147
14148 case POST_INC:
14149 if (emit)
14150 {
14151 if (TARGET_LDRD)
14152 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
14153 else
14154 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
14155 }
14156 break;
14157
14158 case POST_DEC:
14159 gcc_assert (TARGET_LDRD);
14160 if (emit)
14161 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
14162 break;
14163
14164 case PRE_MODIFY:
14165 case POST_MODIFY:
14166 /* Autoicrement addressing modes should never have overlapping
14167 base and destination registers, and overlapping index registers
14168 are already prohibited, so this doesn't need to worry about
14169 fix_cm3_ldrd. */
14170 otherops[0] = operands[0];
14171 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
14172 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
14173
14174 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
14175 {
14176 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
14177 {
14178 /* Registers overlap so split out the increment. */
14179 if (emit)
14180 {
14181 output_asm_insn ("add%?\t%1, %1, %2", otherops);
14182 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
14183 }
14184 if (count)
14185 *count = 2;
14186 }
14187 else
14188 {
14189 /* Use a single insn if we can.
14190 FIXME: IWMMXT allows offsets larger than ldrd can
14191 handle, fix these up with a pair of ldr. */
14192 if (TARGET_THUMB2
14193 || !CONST_INT_P (otherops[2])
14194 || (INTVAL (otherops[2]) > -256
14195 && INTVAL (otherops[2]) < 256))
14196 {
14197 if (emit)
14198 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
14199 }
14200 else
14201 {
14202 if (emit)
14203 {
14204 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
14205 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14206 }
14207 if (count)
14208 *count = 2;
14209
14210 }
14211 }
14212 }
14213 else
14214 {
14215 /* Use a single insn if we can.
14216 FIXME: IWMMXT allows offsets larger than ldrd can handle,
14217 fix these up with a pair of ldr. */
14218 if (TARGET_THUMB2
14219 || !CONST_INT_P (otherops[2])
14220 || (INTVAL (otherops[2]) > -256
14221 && INTVAL (otherops[2]) < 256))
14222 {
14223 if (emit)
14224 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
14225 }
14226 else
14227 {
14228 if (emit)
14229 {
14230 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14231 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
14232 }
14233 if (count)
14234 *count = 2;
14235 }
14236 }
14237 break;
14238
14239 case LABEL_REF:
14240 case CONST:
14241 /* We might be able to use ldrd %0, %1 here. However the range is
14242 different to ldr/adr, and it is broken on some ARMv7-M
14243 implementations. */
14244 /* Use the second register of the pair to avoid problematic
14245 overlap. */
14246 otherops[1] = operands[1];
14247 if (emit)
14248 output_asm_insn ("adr%?\t%0, %1", otherops);
14249 operands[1] = otherops[0];
14250 if (emit)
14251 {
14252 if (TARGET_LDRD)
14253 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14254 else
14255 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
14256 }
14257
14258 if (count)
14259 *count = 2;
14260 break;
14261
14262 /* ??? This needs checking for thumb2. */
14263 default:
14264 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
14265 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
14266 {
14267 otherops[0] = operands[0];
14268 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
14269 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
14270
14271 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
14272 {
14273 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
14274 {
14275 switch ((int) INTVAL (otherops[2]))
14276 {
14277 case -8:
14278 if (emit)
14279 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
14280 return "";
14281 case -4:
14282 if (TARGET_THUMB2)
14283 break;
14284 if (emit)
14285 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
14286 return "";
14287 case 4:
14288 if (TARGET_THUMB2)
14289 break;
14290 if (emit)
14291 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
14292 return "";
14293 }
14294 }
14295 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
14296 operands[1] = otherops[0];
14297 if (TARGET_LDRD
14298 && (REG_P (otherops[2])
14299 || TARGET_THUMB2
14300 || (CONST_INT_P (otherops[2])
14301 && INTVAL (otherops[2]) > -256
14302 && INTVAL (otherops[2]) < 256)))
14303 {
14304 if (reg_overlap_mentioned_p (operands[0],
14305 otherops[2]))
14306 {
14307 rtx tmp;
14308 /* Swap base and index registers over to
14309 avoid a conflict. */
14310 tmp = otherops[1];
14311 otherops[1] = otherops[2];
14312 otherops[2] = tmp;
14313 }
14314 /* If both registers conflict, it will usually
14315 have been fixed by a splitter. */
14316 if (reg_overlap_mentioned_p (operands[0], otherops[2])
14317 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
14318 {
14319 if (emit)
14320 {
14321 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14322 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14323 }
14324 if (count)
14325 *count = 2;
14326 }
14327 else
14328 {
14329 otherops[0] = operands[0];
14330 if (emit)
14331 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
14332 }
14333 return "";
14334 }
14335
14336 if (CONST_INT_P (otherops[2]))
14337 {
14338 if (emit)
14339 {
14340 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
14341 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
14342 else
14343 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14344 }
14345 }
14346 else
14347 {
14348 if (emit)
14349 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14350 }
14351 }
14352 else
14353 {
14354 if (emit)
14355 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
14356 }
14357
14358 if (count)
14359 *count = 2;
14360
14361 if (TARGET_LDRD)
14362 return "ldr%(d%)\t%0, [%1]";
14363
14364 return "ldm%(ia%)\t%1, %M0";
14365 }
14366 else
14367 {
14368 otherops[1] = adjust_address (operands[1], SImode, 4);
14369 /* Take care of overlapping base/data reg. */
14370 if (reg_mentioned_p (operands[0], operands[1]))
14371 {
14372 if (emit)
14373 {
14374 output_asm_insn ("ldr%?\t%0, %1", otherops);
14375 output_asm_insn ("ldr%?\t%0, %1", operands);
14376 }
14377 if (count)
14378 *count = 2;
14379
14380 }
14381 else
14382 {
14383 if (emit)
14384 {
14385 output_asm_insn ("ldr%?\t%0, %1", operands);
14386 output_asm_insn ("ldr%?\t%0, %1", otherops);
14387 }
14388 if (count)
14389 *count = 2;
14390 }
14391 }
14392 }
14393 }
14394 else
14395 {
14396 /* Constraints should ensure this. */
14397 gcc_assert (code0 == MEM && code1 == REG);
14398 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
14399
14400 switch (GET_CODE (XEXP (operands[0], 0)))
14401 {
14402 case REG:
14403 if (emit)
14404 {
14405 if (TARGET_LDRD)
14406 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
14407 else
14408 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14409 }
14410 break;
14411
14412 case PRE_INC:
14413 gcc_assert (TARGET_LDRD);
14414 if (emit)
14415 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
14416 break;
14417
14418 case PRE_DEC:
14419 if (emit)
14420 {
14421 if (TARGET_LDRD)
14422 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
14423 else
14424 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
14425 }
14426 break;
14427
14428 case POST_INC:
14429 if (emit)
14430 {
14431 if (TARGET_LDRD)
14432 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
14433 else
14434 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
14435 }
14436 break;
14437
14438 case POST_DEC:
14439 gcc_assert (TARGET_LDRD);
14440 if (emit)
14441 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
14442 break;
14443
14444 case PRE_MODIFY:
14445 case POST_MODIFY:
14446 otherops[0] = operands[1];
14447 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
14448 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
14449
14450 /* IWMMXT allows offsets larger than ldrd can handle,
14451 fix these up with a pair of ldr. */
14452 if (!TARGET_THUMB2
14453 && CONST_INT_P (otherops[2])
14454 && (INTVAL(otherops[2]) <= -256
14455 || INTVAL(otherops[2]) >= 256))
14456 {
14457 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14458 {
14459 if (emit)
14460 {
14461 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
14462 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14463 }
14464 if (count)
14465 *count = 2;
14466 }
14467 else
14468 {
14469 if (emit)
14470 {
14471 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14472 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
14473 }
14474 if (count)
14475 *count = 2;
14476 }
14477 }
14478 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14479 {
14480 if (emit)
14481 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
14482 }
14483 else
14484 {
14485 if (emit)
14486 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
14487 }
14488 break;
14489
14490 case PLUS:
14491 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
14492 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
14493 {
14494 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
14495 {
14496 case -8:
14497 if (emit)
14498 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
14499 return "";
14500
14501 case -4:
14502 if (TARGET_THUMB2)
14503 break;
14504 if (emit)
14505 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
14506 return "";
14507
14508 case 4:
14509 if (TARGET_THUMB2)
14510 break;
14511 if (emit)
14512 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
14513 return "";
14514 }
14515 }
14516 if (TARGET_LDRD
14517 && (REG_P (otherops[2])
14518 || TARGET_THUMB2
14519 || (CONST_INT_P (otherops[2])
14520 && INTVAL (otherops[2]) > -256
14521 && INTVAL (otherops[2]) < 256)))
14522 {
14523 otherops[0] = operands[1];
14524 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
14525 if (emit)
14526 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
14527 return "";
14528 }
14529 /* Fall through */
14530
14531 default:
14532 otherops[0] = adjust_address (operands[0], SImode, 4);
14533 otherops[1] = operands[1];
14534 if (emit)
14535 {
14536 output_asm_insn ("str%?\t%1, %0", operands);
14537 output_asm_insn ("str%?\t%H1, %0", otherops);
14538 }
14539 if (count)
14540 *count = 2;
14541 }
14542 }
14543
14544 return "";
14545 }
14546
14547 /* Output a move, load or store for quad-word vectors in ARM registers. Only
14548 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
14549
14550 const char *
14551 output_move_quad (rtx *operands)
14552 {
14553 if (REG_P (operands[0]))
14554 {
14555 /* Load, or reg->reg move. */
14556
14557 if (MEM_P (operands[1]))
14558 {
14559 switch (GET_CODE (XEXP (operands[1], 0)))
14560 {
14561 case REG:
14562 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14563 break;
14564
14565 case LABEL_REF:
14566 case CONST:
14567 output_asm_insn ("adr%?\t%0, %1", operands);
14568 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
14569 break;
14570
14571 default:
14572 gcc_unreachable ();
14573 }
14574 }
14575 else
14576 {
14577 rtx ops[2];
14578 int dest, src, i;
14579
14580 gcc_assert (REG_P (operands[1]));
14581
14582 dest = REGNO (operands[0]);
14583 src = REGNO (operands[1]);
14584
14585 /* This seems pretty dumb, but hopefully GCC won't try to do it
14586 very often. */
14587 if (dest < src)
14588 for (i = 0; i < 4; i++)
14589 {
14590 ops[0] = gen_rtx_REG (SImode, dest + i);
14591 ops[1] = gen_rtx_REG (SImode, src + i);
14592 output_asm_insn ("mov%?\t%0, %1", ops);
14593 }
14594 else
14595 for (i = 3; i >= 0; i--)
14596 {
14597 ops[0] = gen_rtx_REG (SImode, dest + i);
14598 ops[1] = gen_rtx_REG (SImode, src + i);
14599 output_asm_insn ("mov%?\t%0, %1", ops);
14600 }
14601 }
14602 }
14603 else
14604 {
14605 gcc_assert (MEM_P (operands[0]));
14606 gcc_assert (REG_P (operands[1]));
14607 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
14608
14609 switch (GET_CODE (XEXP (operands[0], 0)))
14610 {
14611 case REG:
14612 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14613 break;
14614
14615 default:
14616 gcc_unreachable ();
14617 }
14618 }
14619
14620 return "";
14621 }
14622
14623 /* Output a VFP load or store instruction. */
14624
14625 const char *
14626 output_move_vfp (rtx *operands)
14627 {
14628 rtx reg, mem, addr, ops[2];
14629 int load = REG_P (operands[0]);
14630 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
14631 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
14632 const char *templ;
14633 char buff[50];
14634 enum machine_mode mode;
14635
14636 reg = operands[!load];
14637 mem = operands[load];
14638
14639 mode = GET_MODE (reg);
14640
14641 gcc_assert (REG_P (reg));
14642 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
14643 gcc_assert (mode == SFmode
14644 || mode == DFmode
14645 || mode == SImode
14646 || mode == DImode
14647 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
14648 gcc_assert (MEM_P (mem));
14649
14650 addr = XEXP (mem, 0);
14651
14652 switch (GET_CODE (addr))
14653 {
14654 case PRE_DEC:
14655 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
14656 ops[0] = XEXP (addr, 0);
14657 ops[1] = reg;
14658 break;
14659
14660 case POST_INC:
14661 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
14662 ops[0] = XEXP (addr, 0);
14663 ops[1] = reg;
14664 break;
14665
14666 default:
14667 templ = "f%s%c%%?\t%%%s0, %%1%s";
14668 ops[0] = reg;
14669 ops[1] = mem;
14670 break;
14671 }
14672
14673 sprintf (buff, templ,
14674 load ? "ld" : "st",
14675 dp ? 'd' : 's',
14676 dp ? "P" : "",
14677 integer_p ? "\t%@ int" : "");
14678 output_asm_insn (buff, ops);
14679
14680 return "";
14681 }
14682
14683 /* Output a Neon double-word or quad-word load or store, or a load
14684 or store for larger structure modes.
14685
14686 WARNING: The ordering of elements is weird in big-endian mode,
14687 because the EABI requires that vectors stored in memory appear
14688 as though they were stored by a VSTM, as required by the EABI.
14689 GCC RTL defines element ordering based on in-memory order.
14690 This can be different from the architectural ordering of elements
14691 within a NEON register. The intrinsics defined in arm_neon.h use the
14692 NEON register element ordering, not the GCC RTL element ordering.
14693
14694 For example, the in-memory ordering of a big-endian a quadword
14695 vector with 16-bit elements when stored from register pair {d0,d1}
14696 will be (lowest address first, d0[N] is NEON register element N):
14697
14698 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
14699
14700 When necessary, quadword registers (dN, dN+1) are moved to ARM
14701 registers from rN in the order:
14702
14703 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
14704
14705 So that STM/LDM can be used on vectors in ARM registers, and the
14706 same memory layout will result as if VSTM/VLDM were used.
14707
14708 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
14709 possible, which allows use of appropriate alignment tags.
14710 Note that the choice of "64" is independent of the actual vector
14711 element size; this size simply ensures that the behavior is
14712 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
14713
14714 Due to limitations of those instructions, use of VST1.64/VLD1.64
14715 is not possible if:
14716 - the address contains PRE_DEC, or
14717 - the mode refers to more than 4 double-word registers
14718
14719 In those cases, it would be possible to replace VSTM/VLDM by a
14720 sequence of instructions; this is not currently implemented since
14721 this is not certain to actually improve performance. */
14722
14723 const char *
14724 output_move_neon (rtx *operands)
14725 {
14726 rtx reg, mem, addr, ops[2];
14727 int regno, nregs, load = REG_P (operands[0]);
14728 const char *templ;
14729 char buff[50];
14730 enum machine_mode mode;
14731
14732 reg = operands[!load];
14733 mem = operands[load];
14734
14735 mode = GET_MODE (reg);
14736
14737 gcc_assert (REG_P (reg));
14738 regno = REGNO (reg);
14739 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
14740 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
14741 || NEON_REGNO_OK_FOR_QUAD (regno));
14742 gcc_assert (VALID_NEON_DREG_MODE (mode)
14743 || VALID_NEON_QREG_MODE (mode)
14744 || VALID_NEON_STRUCT_MODE (mode));
14745 gcc_assert (MEM_P (mem));
14746
14747 addr = XEXP (mem, 0);
14748
14749 /* Strip off const from addresses like (const (plus (...))). */
14750 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14751 addr = XEXP (addr, 0);
14752
14753 switch (GET_CODE (addr))
14754 {
14755 case POST_INC:
14756 /* We have to use vldm / vstm for too-large modes. */
14757 if (nregs > 4)
14758 {
14759 templ = "v%smia%%?\t%%0!, %%h1";
14760 ops[0] = XEXP (addr, 0);
14761 }
14762 else
14763 {
14764 templ = "v%s1.64\t%%h1, %%A0";
14765 ops[0] = mem;
14766 }
14767 ops[1] = reg;
14768 break;
14769
14770 case PRE_DEC:
14771 /* We have to use vldm / vstm in this case, since there is no
14772 pre-decrement form of the vld1 / vst1 instructions. */
14773 templ = "v%smdb%%?\t%%0!, %%h1";
14774 ops[0] = XEXP (addr, 0);
14775 ops[1] = reg;
14776 break;
14777
14778 case POST_MODIFY:
14779 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
14780 gcc_unreachable ();
14781
14782 case LABEL_REF:
14783 case PLUS:
14784 {
14785 int i;
14786 int overlap = -1;
14787 for (i = 0; i < nregs; i++)
14788 {
14789 /* We're only using DImode here because it's a convenient size. */
14790 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
14791 ops[1] = adjust_address (mem, DImode, 8 * i);
14792 if (reg_overlap_mentioned_p (ops[0], mem))
14793 {
14794 gcc_assert (overlap == -1);
14795 overlap = i;
14796 }
14797 else
14798 {
14799 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14800 output_asm_insn (buff, ops);
14801 }
14802 }
14803 if (overlap != -1)
14804 {
14805 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
14806 ops[1] = adjust_address (mem, SImode, 8 * overlap);
14807 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14808 output_asm_insn (buff, ops);
14809 }
14810
14811 return "";
14812 }
14813
14814 default:
14815 /* We have to use vldm / vstm for too-large modes. */
14816 if (nregs > 4)
14817 templ = "v%smia%%?\t%%m0, %%h1";
14818 else
14819 templ = "v%s1.64\t%%h1, %%A0";
14820
14821 ops[0] = mem;
14822 ops[1] = reg;
14823 }
14824
14825 sprintf (buff, templ, load ? "ld" : "st");
14826 output_asm_insn (buff, ops);
14827
14828 return "";
14829 }
14830
14831 /* Compute and return the length of neon_mov<mode>, where <mode> is
14832 one of VSTRUCT modes: EI, OI, CI or XI. */
14833 int
14834 arm_attr_length_move_neon (rtx insn)
14835 {
14836 rtx reg, mem, addr;
14837 int load;
14838 enum machine_mode mode;
14839
14840 extract_insn_cached (insn);
14841
14842 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
14843 {
14844 mode = GET_MODE (recog_data.operand[0]);
14845 switch (mode)
14846 {
14847 case EImode:
14848 case OImode:
14849 return 8;
14850 case CImode:
14851 return 12;
14852 case XImode:
14853 return 16;
14854 default:
14855 gcc_unreachable ();
14856 }
14857 }
14858
14859 load = REG_P (recog_data.operand[0]);
14860 reg = recog_data.operand[!load];
14861 mem = recog_data.operand[load];
14862
14863 gcc_assert (MEM_P (mem));
14864
14865 mode = GET_MODE (reg);
14866 addr = XEXP (mem, 0);
14867
14868 /* Strip off const from addresses like (const (plus (...))). */
14869 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14870 addr = XEXP (addr, 0);
14871
14872 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
14873 {
14874 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14875 return insns * 4;
14876 }
14877 else
14878 return 4;
14879 }
14880
14881 /* Return nonzero if the offset in the address is an immediate. Otherwise,
14882 return zero. */
14883
14884 int
14885 arm_address_offset_is_imm (rtx insn)
14886 {
14887 rtx mem, addr;
14888
14889 extract_insn_cached (insn);
14890
14891 if (REG_P (recog_data.operand[0]))
14892 return 0;
14893
14894 mem = recog_data.operand[0];
14895
14896 gcc_assert (MEM_P (mem));
14897
14898 addr = XEXP (mem, 0);
14899
14900 if (REG_P (addr)
14901 || (GET_CODE (addr) == PLUS
14902 && REG_P (XEXP (addr, 0))
14903 && CONST_INT_P (XEXP (addr, 1))))
14904 return 1;
14905 else
14906 return 0;
14907 }
14908
14909 /* Output an ADD r, s, #n where n may be too big for one instruction.
14910 If adding zero to one register, output nothing. */
14911 const char *
14912 output_add_immediate (rtx *operands)
14913 {
14914 HOST_WIDE_INT n = INTVAL (operands[2]);
14915
14916 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
14917 {
14918 if (n < 0)
14919 output_multi_immediate (operands,
14920 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
14921 -n);
14922 else
14923 output_multi_immediate (operands,
14924 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
14925 n);
14926 }
14927
14928 return "";
14929 }
14930
14931 /* Output a multiple immediate operation.
14932 OPERANDS is the vector of operands referred to in the output patterns.
14933 INSTR1 is the output pattern to use for the first constant.
14934 INSTR2 is the output pattern to use for subsequent constants.
14935 IMMED_OP is the index of the constant slot in OPERANDS.
14936 N is the constant value. */
14937 static const char *
14938 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
14939 int immed_op, HOST_WIDE_INT n)
14940 {
14941 #if HOST_BITS_PER_WIDE_INT > 32
14942 n &= 0xffffffff;
14943 #endif
14944
14945 if (n == 0)
14946 {
14947 /* Quick and easy output. */
14948 operands[immed_op] = const0_rtx;
14949 output_asm_insn (instr1, operands);
14950 }
14951 else
14952 {
14953 int i;
14954 const char * instr = instr1;
14955
14956 /* Note that n is never zero here (which would give no output). */
14957 for (i = 0; i < 32; i += 2)
14958 {
14959 if (n & (3 << i))
14960 {
14961 operands[immed_op] = GEN_INT (n & (255 << i));
14962 output_asm_insn (instr, operands);
14963 instr = instr2;
14964 i += 6;
14965 }
14966 }
14967 }
14968
14969 return "";
14970 }
14971
14972 /* Return the name of a shifter operation. */
14973 static const char *
14974 arm_shift_nmem(enum rtx_code code)
14975 {
14976 switch (code)
14977 {
14978 case ASHIFT:
14979 return ARM_LSL_NAME;
14980
14981 case ASHIFTRT:
14982 return "asr";
14983
14984 case LSHIFTRT:
14985 return "lsr";
14986
14987 case ROTATERT:
14988 return "ror";
14989
14990 default:
14991 abort();
14992 }
14993 }
14994
14995 /* Return the appropriate ARM instruction for the operation code.
14996 The returned result should not be overwritten. OP is the rtx of the
14997 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
14998 was shifted. */
14999 const char *
15000 arithmetic_instr (rtx op, int shift_first_arg)
15001 {
15002 switch (GET_CODE (op))
15003 {
15004 case PLUS:
15005 return "add";
15006
15007 case MINUS:
15008 return shift_first_arg ? "rsb" : "sub";
15009
15010 case IOR:
15011 return "orr";
15012
15013 case XOR:
15014 return "eor";
15015
15016 case AND:
15017 return "and";
15018
15019 case ASHIFT:
15020 case ASHIFTRT:
15021 case LSHIFTRT:
15022 case ROTATERT:
15023 return arm_shift_nmem(GET_CODE(op));
15024
15025 default:
15026 gcc_unreachable ();
15027 }
15028 }
15029
15030 /* Ensure valid constant shifts and return the appropriate shift mnemonic
15031 for the operation code. The returned result should not be overwritten.
15032 OP is the rtx code of the shift.
15033 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
15034 shift. */
15035 static const char *
15036 shift_op (rtx op, HOST_WIDE_INT *amountp)
15037 {
15038 const char * mnem;
15039 enum rtx_code code = GET_CODE (op);
15040
15041 switch (GET_CODE (XEXP (op, 1)))
15042 {
15043 case REG:
15044 case SUBREG:
15045 *amountp = -1;
15046 break;
15047
15048 case CONST_INT:
15049 *amountp = INTVAL (XEXP (op, 1));
15050 break;
15051
15052 default:
15053 gcc_unreachable ();
15054 }
15055
15056 switch (code)
15057 {
15058 case ROTATE:
15059 gcc_assert (*amountp != -1);
15060 *amountp = 32 - *amountp;
15061 code = ROTATERT;
15062
15063 /* Fall through. */
15064
15065 case ASHIFT:
15066 case ASHIFTRT:
15067 case LSHIFTRT:
15068 case ROTATERT:
15069 mnem = arm_shift_nmem(code);
15070 break;
15071
15072 case MULT:
15073 /* We never have to worry about the amount being other than a
15074 power of 2, since this case can never be reloaded from a reg. */
15075 gcc_assert (*amountp != -1);
15076 *amountp = int_log2 (*amountp);
15077 return ARM_LSL_NAME;
15078
15079 default:
15080 gcc_unreachable ();
15081 }
15082
15083 if (*amountp != -1)
15084 {
15085 /* This is not 100% correct, but follows from the desire to merge
15086 multiplication by a power of 2 with the recognizer for a
15087 shift. >=32 is not a valid shift for "lsl", so we must try and
15088 output a shift that produces the correct arithmetical result.
15089 Using lsr #32 is identical except for the fact that the carry bit
15090 is not set correctly if we set the flags; but we never use the
15091 carry bit from such an operation, so we can ignore that. */
15092 if (code == ROTATERT)
15093 /* Rotate is just modulo 32. */
15094 *amountp &= 31;
15095 else if (*amountp != (*amountp & 31))
15096 {
15097 if (code == ASHIFT)
15098 mnem = "lsr";
15099 *amountp = 32;
15100 }
15101
15102 /* Shifts of 0 are no-ops. */
15103 if (*amountp == 0)
15104 return NULL;
15105 }
15106
15107 return mnem;
15108 }
15109
15110 /* Obtain the shift from the POWER of two. */
15111
15112 static HOST_WIDE_INT
15113 int_log2 (HOST_WIDE_INT power)
15114 {
15115 HOST_WIDE_INT shift = 0;
15116
15117 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
15118 {
15119 gcc_assert (shift <= 31);
15120 shift++;
15121 }
15122
15123 return shift;
15124 }
15125
15126 /* Output a .ascii pseudo-op, keeping track of lengths. This is
15127 because /bin/as is horribly restrictive. The judgement about
15128 whether or not each character is 'printable' (and can be output as
15129 is) or not (and must be printed with an octal escape) must be made
15130 with reference to the *host* character set -- the situation is
15131 similar to that discussed in the comments above pp_c_char in
15132 c-pretty-print.c. */
15133
15134 #define MAX_ASCII_LEN 51
15135
15136 void
15137 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
15138 {
15139 int i;
15140 int len_so_far = 0;
15141
15142 fputs ("\t.ascii\t\"", stream);
15143
15144 for (i = 0; i < len; i++)
15145 {
15146 int c = p[i];
15147
15148 if (len_so_far >= MAX_ASCII_LEN)
15149 {
15150 fputs ("\"\n\t.ascii\t\"", stream);
15151 len_so_far = 0;
15152 }
15153
15154 if (ISPRINT (c))
15155 {
15156 if (c == '\\' || c == '\"')
15157 {
15158 putc ('\\', stream);
15159 len_so_far++;
15160 }
15161 putc (c, stream);
15162 len_so_far++;
15163 }
15164 else
15165 {
15166 fprintf (stream, "\\%03o", c);
15167 len_so_far += 4;
15168 }
15169 }
15170
15171 fputs ("\"\n", stream);
15172 }
15173 \f
15174 /* Compute the register save mask for registers 0 through 12
15175 inclusive. This code is used by arm_compute_save_reg_mask. */
15176
15177 static unsigned long
15178 arm_compute_save_reg0_reg12_mask (void)
15179 {
15180 unsigned long func_type = arm_current_func_type ();
15181 unsigned long save_reg_mask = 0;
15182 unsigned int reg;
15183
15184 if (IS_INTERRUPT (func_type))
15185 {
15186 unsigned int max_reg;
15187 /* Interrupt functions must not corrupt any registers,
15188 even call clobbered ones. If this is a leaf function
15189 we can just examine the registers used by the RTL, but
15190 otherwise we have to assume that whatever function is
15191 called might clobber anything, and so we have to save
15192 all the call-clobbered registers as well. */
15193 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
15194 /* FIQ handlers have registers r8 - r12 banked, so
15195 we only need to check r0 - r7, Normal ISRs only
15196 bank r14 and r15, so we must check up to r12.
15197 r13 is the stack pointer which is always preserved,
15198 so we do not need to consider it here. */
15199 max_reg = 7;
15200 else
15201 max_reg = 12;
15202
15203 for (reg = 0; reg <= max_reg; reg++)
15204 if (df_regs_ever_live_p (reg)
15205 || (! crtl->is_leaf && call_used_regs[reg]))
15206 save_reg_mask |= (1 << reg);
15207
15208 /* Also save the pic base register if necessary. */
15209 if (flag_pic
15210 && !TARGET_SINGLE_PIC_BASE
15211 && arm_pic_register != INVALID_REGNUM
15212 && crtl->uses_pic_offset_table)
15213 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15214 }
15215 else if (IS_VOLATILE(func_type))
15216 {
15217 /* For noreturn functions we historically omitted register saves
15218 altogether. However this really messes up debugging. As a
15219 compromise save just the frame pointers. Combined with the link
15220 register saved elsewhere this should be sufficient to get
15221 a backtrace. */
15222 if (frame_pointer_needed)
15223 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15224 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
15225 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15226 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
15227 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
15228 }
15229 else
15230 {
15231 /* In the normal case we only need to save those registers
15232 which are call saved and which are used by this function. */
15233 for (reg = 0; reg <= 11; reg++)
15234 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15235 save_reg_mask |= (1 << reg);
15236
15237 /* Handle the frame pointer as a special case. */
15238 if (frame_pointer_needed)
15239 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15240
15241 /* If we aren't loading the PIC register,
15242 don't stack it even though it may be live. */
15243 if (flag_pic
15244 && !TARGET_SINGLE_PIC_BASE
15245 && arm_pic_register != INVALID_REGNUM
15246 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
15247 || crtl->uses_pic_offset_table))
15248 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15249
15250 /* The prologue will copy SP into R0, so save it. */
15251 if (IS_STACKALIGN (func_type))
15252 save_reg_mask |= 1;
15253 }
15254
15255 /* Save registers so the exception handler can modify them. */
15256 if (crtl->calls_eh_return)
15257 {
15258 unsigned int i;
15259
15260 for (i = 0; ; i++)
15261 {
15262 reg = EH_RETURN_DATA_REGNO (i);
15263 if (reg == INVALID_REGNUM)
15264 break;
15265 save_reg_mask |= 1 << reg;
15266 }
15267 }
15268
15269 return save_reg_mask;
15270 }
15271
15272
15273 /* Compute the number of bytes used to store the static chain register on the
15274 stack, above the stack frame. We need to know this accurately to get the
15275 alignment of the rest of the stack frame correct. */
15276
15277 static int arm_compute_static_chain_stack_bytes (void)
15278 {
15279 unsigned long func_type = arm_current_func_type ();
15280 int static_chain_stack_bytes = 0;
15281
15282 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
15283 IS_NESTED (func_type) &&
15284 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
15285 static_chain_stack_bytes = 4;
15286
15287 return static_chain_stack_bytes;
15288 }
15289
15290
15291 /* Compute a bit mask of which registers need to be
15292 saved on the stack for the current function.
15293 This is used by arm_get_frame_offsets, which may add extra registers. */
15294
15295 static unsigned long
15296 arm_compute_save_reg_mask (void)
15297 {
15298 unsigned int save_reg_mask = 0;
15299 unsigned long func_type = arm_current_func_type ();
15300 unsigned int reg;
15301
15302 if (IS_NAKED (func_type))
15303 /* This should never really happen. */
15304 return 0;
15305
15306 /* If we are creating a stack frame, then we must save the frame pointer,
15307 IP (which will hold the old stack pointer), LR and the PC. */
15308 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15309 save_reg_mask |=
15310 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
15311 | (1 << IP_REGNUM)
15312 | (1 << LR_REGNUM)
15313 | (1 << PC_REGNUM);
15314
15315 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
15316
15317 /* Decide if we need to save the link register.
15318 Interrupt routines have their own banked link register,
15319 so they never need to save it.
15320 Otherwise if we do not use the link register we do not need to save
15321 it. If we are pushing other registers onto the stack however, we
15322 can save an instruction in the epilogue by pushing the link register
15323 now and then popping it back into the PC. This incurs extra memory
15324 accesses though, so we only do it when optimizing for size, and only
15325 if we know that we will not need a fancy return sequence. */
15326 if (df_regs_ever_live_p (LR_REGNUM)
15327 || (save_reg_mask
15328 && optimize_size
15329 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15330 && !crtl->calls_eh_return))
15331 save_reg_mask |= 1 << LR_REGNUM;
15332
15333 if (cfun->machine->lr_save_eliminated)
15334 save_reg_mask &= ~ (1 << LR_REGNUM);
15335
15336 if (TARGET_REALLY_IWMMXT
15337 && ((bit_count (save_reg_mask)
15338 + ARM_NUM_INTS (crtl->args.pretend_args_size +
15339 arm_compute_static_chain_stack_bytes())
15340 ) % 2) != 0)
15341 {
15342 /* The total number of registers that are going to be pushed
15343 onto the stack is odd. We need to ensure that the stack
15344 is 64-bit aligned before we start to save iWMMXt registers,
15345 and also before we start to create locals. (A local variable
15346 might be a double or long long which we will load/store using
15347 an iWMMXt instruction). Therefore we need to push another
15348 ARM register, so that the stack will be 64-bit aligned. We
15349 try to avoid using the arg registers (r0 -r3) as they might be
15350 used to pass values in a tail call. */
15351 for (reg = 4; reg <= 12; reg++)
15352 if ((save_reg_mask & (1 << reg)) == 0)
15353 break;
15354
15355 if (reg <= 12)
15356 save_reg_mask |= (1 << reg);
15357 else
15358 {
15359 cfun->machine->sibcall_blocked = 1;
15360 save_reg_mask |= (1 << 3);
15361 }
15362 }
15363
15364 /* We may need to push an additional register for use initializing the
15365 PIC base register. */
15366 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
15367 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
15368 {
15369 reg = thumb_find_work_register (1 << 4);
15370 if (!call_used_regs[reg])
15371 save_reg_mask |= (1 << reg);
15372 }
15373
15374 return save_reg_mask;
15375 }
15376
15377
15378 /* Compute a bit mask of which registers need to be
15379 saved on the stack for the current function. */
15380 static unsigned long
15381 thumb1_compute_save_reg_mask (void)
15382 {
15383 unsigned long mask;
15384 unsigned reg;
15385
15386 mask = 0;
15387 for (reg = 0; reg < 12; reg ++)
15388 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15389 mask |= 1 << reg;
15390
15391 if (flag_pic
15392 && !TARGET_SINGLE_PIC_BASE
15393 && arm_pic_register != INVALID_REGNUM
15394 && crtl->uses_pic_offset_table)
15395 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15396
15397 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
15398 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
15399 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15400
15401 /* LR will also be pushed if any lo regs are pushed. */
15402 if (mask & 0xff || thumb_force_lr_save ())
15403 mask |= (1 << LR_REGNUM);
15404
15405 /* Make sure we have a low work register if we need one.
15406 We will need one if we are going to push a high register,
15407 but we are not currently intending to push a low register. */
15408 if ((mask & 0xff) == 0
15409 && ((mask & 0x0f00) || TARGET_BACKTRACE))
15410 {
15411 /* Use thumb_find_work_register to choose which register
15412 we will use. If the register is live then we will
15413 have to push it. Use LAST_LO_REGNUM as our fallback
15414 choice for the register to select. */
15415 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
15416 /* Make sure the register returned by thumb_find_work_register is
15417 not part of the return value. */
15418 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
15419 reg = LAST_LO_REGNUM;
15420
15421 if (! call_used_regs[reg])
15422 mask |= 1 << reg;
15423 }
15424
15425 /* The 504 below is 8 bytes less than 512 because there are two possible
15426 alignment words. We can't tell here if they will be present or not so we
15427 have to play it safe and assume that they are. */
15428 if ((CALLER_INTERWORKING_SLOT_SIZE +
15429 ROUND_UP_WORD (get_frame_size ()) +
15430 crtl->outgoing_args_size) >= 504)
15431 {
15432 /* This is the same as the code in thumb1_expand_prologue() which
15433 determines which register to use for stack decrement. */
15434 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
15435 if (mask & (1 << reg))
15436 break;
15437
15438 if (reg > LAST_LO_REGNUM)
15439 {
15440 /* Make sure we have a register available for stack decrement. */
15441 mask |= 1 << LAST_LO_REGNUM;
15442 }
15443 }
15444
15445 return mask;
15446 }
15447
15448
15449 /* Return the number of bytes required to save VFP registers. */
15450 static int
15451 arm_get_vfp_saved_size (void)
15452 {
15453 unsigned int regno;
15454 int count;
15455 int saved;
15456
15457 saved = 0;
15458 /* Space for saved VFP registers. */
15459 if (TARGET_HARD_FLOAT && TARGET_VFP)
15460 {
15461 count = 0;
15462 for (regno = FIRST_VFP_REGNUM;
15463 regno < LAST_VFP_REGNUM;
15464 regno += 2)
15465 {
15466 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
15467 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
15468 {
15469 if (count > 0)
15470 {
15471 /* Workaround ARM10 VFPr1 bug. */
15472 if (count == 2 && !arm_arch6)
15473 count++;
15474 saved += count * 8;
15475 }
15476 count = 0;
15477 }
15478 else
15479 count++;
15480 }
15481 if (count > 0)
15482 {
15483 if (count == 2 && !arm_arch6)
15484 count++;
15485 saved += count * 8;
15486 }
15487 }
15488 return saved;
15489 }
15490
15491
15492 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
15493 everything bar the final return instruction. If simple_return is true,
15494 then do not output epilogue, because it has already been emitted in RTL. */
15495 const char *
15496 output_return_instruction (rtx operand, bool really_return, bool reverse,
15497 bool simple_return)
15498 {
15499 char conditional[10];
15500 char instr[100];
15501 unsigned reg;
15502 unsigned long live_regs_mask;
15503 unsigned long func_type;
15504 arm_stack_offsets *offsets;
15505
15506 func_type = arm_current_func_type ();
15507
15508 if (IS_NAKED (func_type))
15509 return "";
15510
15511 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15512 {
15513 /* If this function was declared non-returning, and we have
15514 found a tail call, then we have to trust that the called
15515 function won't return. */
15516 if (really_return)
15517 {
15518 rtx ops[2];
15519
15520 /* Otherwise, trap an attempted return by aborting. */
15521 ops[0] = operand;
15522 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
15523 : "abort");
15524 assemble_external_libcall (ops[1]);
15525 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
15526 }
15527
15528 return "";
15529 }
15530
15531 gcc_assert (!cfun->calls_alloca || really_return);
15532
15533 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
15534
15535 cfun->machine->return_used_this_function = 1;
15536
15537 offsets = arm_get_frame_offsets ();
15538 live_regs_mask = offsets->saved_regs_mask;
15539
15540 if (!simple_return && live_regs_mask)
15541 {
15542 const char * return_reg;
15543
15544 /* If we do not have any special requirements for function exit
15545 (e.g. interworking) then we can load the return address
15546 directly into the PC. Otherwise we must load it into LR. */
15547 if (really_return
15548 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
15549 return_reg = reg_names[PC_REGNUM];
15550 else
15551 return_reg = reg_names[LR_REGNUM];
15552
15553 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
15554 {
15555 /* There are three possible reasons for the IP register
15556 being saved. 1) a stack frame was created, in which case
15557 IP contains the old stack pointer, or 2) an ISR routine
15558 corrupted it, or 3) it was saved to align the stack on
15559 iWMMXt. In case 1, restore IP into SP, otherwise just
15560 restore IP. */
15561 if (frame_pointer_needed)
15562 {
15563 live_regs_mask &= ~ (1 << IP_REGNUM);
15564 live_regs_mask |= (1 << SP_REGNUM);
15565 }
15566 else
15567 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
15568 }
15569
15570 /* On some ARM architectures it is faster to use LDR rather than
15571 LDM to load a single register. On other architectures, the
15572 cost is the same. In 26 bit mode, or for exception handlers,
15573 we have to use LDM to load the PC so that the CPSR is also
15574 restored. */
15575 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15576 if (live_regs_mask == (1U << reg))
15577 break;
15578
15579 if (reg <= LAST_ARM_REGNUM
15580 && (reg != LR_REGNUM
15581 || ! really_return
15582 || ! IS_INTERRUPT (func_type)))
15583 {
15584 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
15585 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
15586 }
15587 else
15588 {
15589 char *p;
15590 int first = 1;
15591
15592 /* Generate the load multiple instruction to restore the
15593 registers. Note we can get here, even if
15594 frame_pointer_needed is true, but only if sp already
15595 points to the base of the saved core registers. */
15596 if (live_regs_mask & (1 << SP_REGNUM))
15597 {
15598 unsigned HOST_WIDE_INT stack_adjust;
15599
15600 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
15601 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
15602
15603 if (stack_adjust && arm_arch5 && TARGET_ARM)
15604 if (TARGET_UNIFIED_ASM)
15605 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
15606 else
15607 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
15608 else
15609 {
15610 /* If we can't use ldmib (SA110 bug),
15611 then try to pop r3 instead. */
15612 if (stack_adjust)
15613 live_regs_mask |= 1 << 3;
15614
15615 if (TARGET_UNIFIED_ASM)
15616 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
15617 else
15618 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
15619 }
15620 }
15621 else
15622 if (TARGET_UNIFIED_ASM)
15623 sprintf (instr, "pop%s\t{", conditional);
15624 else
15625 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
15626
15627 p = instr + strlen (instr);
15628
15629 for (reg = 0; reg <= SP_REGNUM; reg++)
15630 if (live_regs_mask & (1 << reg))
15631 {
15632 int l = strlen (reg_names[reg]);
15633
15634 if (first)
15635 first = 0;
15636 else
15637 {
15638 memcpy (p, ", ", 2);
15639 p += 2;
15640 }
15641
15642 memcpy (p, "%|", 2);
15643 memcpy (p + 2, reg_names[reg], l);
15644 p += l + 2;
15645 }
15646
15647 if (live_regs_mask & (1 << LR_REGNUM))
15648 {
15649 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
15650 /* If returning from an interrupt, restore the CPSR. */
15651 if (IS_INTERRUPT (func_type))
15652 strcat (p, "^");
15653 }
15654 else
15655 strcpy (p, "}");
15656 }
15657
15658 output_asm_insn (instr, & operand);
15659
15660 /* See if we need to generate an extra instruction to
15661 perform the actual function return. */
15662 if (really_return
15663 && func_type != ARM_FT_INTERWORKED
15664 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
15665 {
15666 /* The return has already been handled
15667 by loading the LR into the PC. */
15668 return "";
15669 }
15670 }
15671
15672 if (really_return)
15673 {
15674 switch ((int) ARM_FUNC_TYPE (func_type))
15675 {
15676 case ARM_FT_ISR:
15677 case ARM_FT_FIQ:
15678 /* ??? This is wrong for unified assembly syntax. */
15679 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
15680 break;
15681
15682 case ARM_FT_INTERWORKED:
15683 sprintf (instr, "bx%s\t%%|lr", conditional);
15684 break;
15685
15686 case ARM_FT_EXCEPTION:
15687 /* ??? This is wrong for unified assembly syntax. */
15688 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
15689 break;
15690
15691 default:
15692 /* Use bx if it's available. */
15693 if (arm_arch5 || arm_arch4t)
15694 sprintf (instr, "bx%s\t%%|lr", conditional);
15695 else
15696 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
15697 break;
15698 }
15699
15700 output_asm_insn (instr, & operand);
15701 }
15702
15703 return "";
15704 }
15705
15706 /* Write the function name into the code section, directly preceding
15707 the function prologue.
15708
15709 Code will be output similar to this:
15710 t0
15711 .ascii "arm_poke_function_name", 0
15712 .align
15713 t1
15714 .word 0xff000000 + (t1 - t0)
15715 arm_poke_function_name
15716 mov ip, sp
15717 stmfd sp!, {fp, ip, lr, pc}
15718 sub fp, ip, #4
15719
15720 When performing a stack backtrace, code can inspect the value
15721 of 'pc' stored at 'fp' + 0. If the trace function then looks
15722 at location pc - 12 and the top 8 bits are set, then we know
15723 that there is a function name embedded immediately preceding this
15724 location and has length ((pc[-3]) & 0xff000000).
15725
15726 We assume that pc is declared as a pointer to an unsigned long.
15727
15728 It is of no benefit to output the function name if we are assembling
15729 a leaf function. These function types will not contain a stack
15730 backtrace structure, therefore it is not possible to determine the
15731 function name. */
15732 void
15733 arm_poke_function_name (FILE *stream, const char *name)
15734 {
15735 unsigned long alignlength;
15736 unsigned long length;
15737 rtx x;
15738
15739 length = strlen (name) + 1;
15740 alignlength = ROUND_UP_WORD (length);
15741
15742 ASM_OUTPUT_ASCII (stream, name, length);
15743 ASM_OUTPUT_ALIGN (stream, 2);
15744 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
15745 assemble_aligned_integer (UNITS_PER_WORD, x);
15746 }
15747
15748 /* Place some comments into the assembler stream
15749 describing the current function. */
15750 static void
15751 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
15752 {
15753 unsigned long func_type;
15754
15755 /* ??? Do we want to print some of the below anyway? */
15756 if (TARGET_THUMB1)
15757 return;
15758
15759 /* Sanity check. */
15760 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
15761
15762 func_type = arm_current_func_type ();
15763
15764 switch ((int) ARM_FUNC_TYPE (func_type))
15765 {
15766 default:
15767 case ARM_FT_NORMAL:
15768 break;
15769 case ARM_FT_INTERWORKED:
15770 asm_fprintf (f, "\t%@ Function supports interworking.\n");
15771 break;
15772 case ARM_FT_ISR:
15773 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
15774 break;
15775 case ARM_FT_FIQ:
15776 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
15777 break;
15778 case ARM_FT_EXCEPTION:
15779 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
15780 break;
15781 }
15782
15783 if (IS_NAKED (func_type))
15784 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
15785
15786 if (IS_VOLATILE (func_type))
15787 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
15788
15789 if (IS_NESTED (func_type))
15790 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
15791 if (IS_STACKALIGN (func_type))
15792 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
15793
15794 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
15795 crtl->args.size,
15796 crtl->args.pretend_args_size, frame_size);
15797
15798 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
15799 frame_pointer_needed,
15800 cfun->machine->uses_anonymous_args);
15801
15802 if (cfun->machine->lr_save_eliminated)
15803 asm_fprintf (f, "\t%@ link register save eliminated.\n");
15804
15805 if (crtl->calls_eh_return)
15806 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
15807
15808 }
15809
15810 static void
15811 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
15812 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
15813 {
15814 arm_stack_offsets *offsets;
15815
15816 if (TARGET_THUMB1)
15817 {
15818 int regno;
15819
15820 /* Emit any call-via-reg trampolines that are needed for v4t support
15821 of call_reg and call_value_reg type insns. */
15822 for (regno = 0; regno < LR_REGNUM; regno++)
15823 {
15824 rtx label = cfun->machine->call_via[regno];
15825
15826 if (label != NULL)
15827 {
15828 switch_to_section (function_section (current_function_decl));
15829 targetm.asm_out.internal_label (asm_out_file, "L",
15830 CODE_LABEL_NUMBER (label));
15831 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
15832 }
15833 }
15834
15835 /* ??? Probably not safe to set this here, since it assumes that a
15836 function will be emitted as assembly immediately after we generate
15837 RTL for it. This does not happen for inline functions. */
15838 cfun->machine->return_used_this_function = 0;
15839 }
15840 else /* TARGET_32BIT */
15841 {
15842 /* We need to take into account any stack-frame rounding. */
15843 offsets = arm_get_frame_offsets ();
15844
15845 gcc_assert (!use_return_insn (FALSE, NULL)
15846 || (cfun->machine->return_used_this_function != 0)
15847 || offsets->saved_regs == offsets->outgoing_args
15848 || frame_pointer_needed);
15849
15850 /* Reset the ARM-specific per-function variables. */
15851 after_arm_reorg = 0;
15852 }
15853 }
15854
15855 /* Generate and emit a pattern that will be recognized as STRD pattern. If even
15856 number of registers are being pushed, multiple STRD patterns are created for
15857 all register pairs. If odd number of registers are pushed, emit a
15858 combination of STRDs and STR for the prologue saves. */
15859 static void
15860 thumb2_emit_strd_push (unsigned long saved_regs_mask)
15861 {
15862 int num_regs = 0;
15863 int i, j;
15864 rtx par = NULL_RTX;
15865 rtx insn = NULL_RTX;
15866 rtx dwarf = NULL_RTX;
15867 rtx tmp, reg, tmp1;
15868
15869 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15870 if (saved_regs_mask & (1 << i))
15871 num_regs++;
15872
15873 gcc_assert (num_regs && num_regs <= 16);
15874
15875 /* Pre-decrement the stack pointer, based on there being num_regs 4-byte
15876 registers to push. */
15877 tmp = gen_rtx_SET (VOIDmode,
15878 stack_pointer_rtx,
15879 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
15880 RTX_FRAME_RELATED_P (tmp) = 1;
15881 insn = emit_insn (tmp);
15882
15883 /* Create sequence for DWARF info. */
15884 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
15885
15886 /* RTLs cannot be shared, hence create new copy for dwarf. */
15887 tmp1 = gen_rtx_SET (VOIDmode,
15888 stack_pointer_rtx,
15889 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
15890 RTX_FRAME_RELATED_P (tmp1) = 1;
15891 XVECEXP (dwarf, 0, 0) = tmp1;
15892
15893 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
15894 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
15895
15896 /* Var j iterates over all the registers to gather all the registers in
15897 saved_regs_mask. Var i gives index of register R_j in stack frame.
15898 A PARALLEL RTX of register-pair is created here, so that pattern for
15899 STRD can be matched. If num_regs is odd, 1st register will be pushed
15900 using STR and remaining registers will be pushed with STRD in pairs.
15901 If num_regs is even, all registers are pushed with STRD in pairs.
15902 Hence, skip first element for odd num_regs. */
15903 for (i = num_regs - 1, j = LAST_ARM_REGNUM; i >= (num_regs % 2); j--)
15904 if (saved_regs_mask & (1 << j))
15905 {
15906 /* Create RTX for store. New RTX is created for dwarf as
15907 they are not sharable. */
15908 reg = gen_rtx_REG (SImode, j);
15909 tmp = gen_rtx_SET (SImode,
15910 gen_frame_mem
15911 (SImode,
15912 plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
15913 reg);
15914
15915 tmp1 = gen_rtx_SET (SImode,
15916 gen_frame_mem
15917 (SImode,
15918 plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
15919 reg);
15920 RTX_FRAME_RELATED_P (tmp) = 1;
15921 RTX_FRAME_RELATED_P (tmp1) = 1;
15922
15923 if (((i - (num_regs % 2)) % 2) == 1)
15924 /* When (i - (num_regs % 2)) is odd, the RTX to be emitted is yet to
15925 be created. Hence create it first. The STRD pattern we are
15926 generating is :
15927 [ (SET (MEM (PLUS (SP) (NUM))) (reg_t1))
15928 (SET (MEM (PLUS (SP) (NUM + 4))) (reg_t2)) ]
15929 where the target registers need not be consecutive. */
15930 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
15931
15932 /* Register R_j is added in PARALLEL RTX. If (i - (num_regs % 2)) is
15933 even, the reg_j is added as 0th element and if it is odd, reg_i is
15934 added as 1st element of STRD pattern shown above. */
15935 XVECEXP (par, 0, ((i - (num_regs % 2)) % 2)) = tmp;
15936 XVECEXP (dwarf, 0, (i + 1)) = tmp1;
15937
15938 if (((i - (num_regs % 2)) % 2) == 0)
15939 /* When (i - (num_regs % 2)) is even, RTXs for both the registers
15940 to be loaded are generated in above given STRD pattern, and the
15941 pattern can be emitted now. */
15942 emit_insn (par);
15943
15944 i--;
15945 }
15946
15947 if ((num_regs % 2) == 1)
15948 {
15949 /* If odd number of registers are pushed, generate STR pattern to store
15950 lone register. */
15951 for (; (saved_regs_mask & (1 << j)) == 0; j--);
15952
15953 tmp1 = gen_frame_mem (SImode, plus_constant (Pmode,
15954 stack_pointer_rtx, 4 * i));
15955 reg = gen_rtx_REG (SImode, j);
15956 tmp = gen_rtx_SET (SImode, tmp1, reg);
15957 RTX_FRAME_RELATED_P (tmp) = 1;
15958
15959 emit_insn (tmp);
15960
15961 tmp1 = gen_rtx_SET (SImode,
15962 gen_frame_mem
15963 (SImode,
15964 plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
15965 reg);
15966 RTX_FRAME_RELATED_P (tmp1) = 1;
15967 XVECEXP (dwarf, 0, (i + 1)) = tmp1;
15968 }
15969
15970 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
15971 RTX_FRAME_RELATED_P (insn) = 1;
15972 return;
15973 }
15974
15975 /* Generate and emit an insn that we will recognize as a push_multi.
15976 Unfortunately, since this insn does not reflect very well the actual
15977 semantics of the operation, we need to annotate the insn for the benefit
15978 of DWARF2 frame unwind information. */
15979 static rtx
15980 emit_multi_reg_push (unsigned long mask)
15981 {
15982 int num_regs = 0;
15983 int num_dwarf_regs;
15984 int i, j;
15985 rtx par;
15986 rtx dwarf;
15987 int dwarf_par_index;
15988 rtx tmp, reg;
15989
15990 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15991 if (mask & (1 << i))
15992 num_regs++;
15993
15994 gcc_assert (num_regs && num_regs <= 16);
15995
15996 /* We don't record the PC in the dwarf frame information. */
15997 num_dwarf_regs = num_regs;
15998 if (mask & (1 << PC_REGNUM))
15999 num_dwarf_regs--;
16000
16001 /* For the body of the insn we are going to generate an UNSPEC in
16002 parallel with several USEs. This allows the insn to be recognized
16003 by the push_multi pattern in the arm.md file.
16004
16005 The body of the insn looks something like this:
16006
16007 (parallel [
16008 (set (mem:BLK (pre_modify:SI (reg:SI sp)
16009 (const_int:SI <num>)))
16010 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
16011 (use (reg:SI XX))
16012 (use (reg:SI YY))
16013 ...
16014 ])
16015
16016 For the frame note however, we try to be more explicit and actually
16017 show each register being stored into the stack frame, plus a (single)
16018 decrement of the stack pointer. We do it this way in order to be
16019 friendly to the stack unwinding code, which only wants to see a single
16020 stack decrement per instruction. The RTL we generate for the note looks
16021 something like this:
16022
16023 (sequence [
16024 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
16025 (set (mem:SI (reg:SI sp)) (reg:SI r4))
16026 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
16027 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
16028 ...
16029 ])
16030
16031 FIXME:: In an ideal world the PRE_MODIFY would not exist and
16032 instead we'd have a parallel expression detailing all
16033 the stores to the various memory addresses so that debug
16034 information is more up-to-date. Remember however while writing
16035 this to take care of the constraints with the push instruction.
16036
16037 Note also that this has to be taken care of for the VFP registers.
16038
16039 For more see PR43399. */
16040
16041 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
16042 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
16043 dwarf_par_index = 1;
16044
16045 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16046 {
16047 if (mask & (1 << i))
16048 {
16049 reg = gen_rtx_REG (SImode, i);
16050
16051 XVECEXP (par, 0, 0)
16052 = gen_rtx_SET (VOIDmode,
16053 gen_frame_mem
16054 (BLKmode,
16055 gen_rtx_PRE_MODIFY (Pmode,
16056 stack_pointer_rtx,
16057 plus_constant
16058 (Pmode, stack_pointer_rtx,
16059 -4 * num_regs))
16060 ),
16061 gen_rtx_UNSPEC (BLKmode,
16062 gen_rtvec (1, reg),
16063 UNSPEC_PUSH_MULT));
16064
16065 if (i != PC_REGNUM)
16066 {
16067 tmp = gen_rtx_SET (VOIDmode,
16068 gen_frame_mem (SImode, stack_pointer_rtx),
16069 reg);
16070 RTX_FRAME_RELATED_P (tmp) = 1;
16071 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
16072 dwarf_par_index++;
16073 }
16074
16075 break;
16076 }
16077 }
16078
16079 for (j = 1, i++; j < num_regs; i++)
16080 {
16081 if (mask & (1 << i))
16082 {
16083 reg = gen_rtx_REG (SImode, i);
16084
16085 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
16086
16087 if (i != PC_REGNUM)
16088 {
16089 tmp
16090 = gen_rtx_SET (VOIDmode,
16091 gen_frame_mem
16092 (SImode,
16093 plus_constant (Pmode, stack_pointer_rtx,
16094 4 * j)),
16095 reg);
16096 RTX_FRAME_RELATED_P (tmp) = 1;
16097 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
16098 }
16099
16100 j++;
16101 }
16102 }
16103
16104 par = emit_insn (par);
16105
16106 tmp = gen_rtx_SET (VOIDmode,
16107 stack_pointer_rtx,
16108 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
16109 RTX_FRAME_RELATED_P (tmp) = 1;
16110 XVECEXP (dwarf, 0, 0) = tmp;
16111
16112 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
16113
16114 return par;
16115 }
16116
16117 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
16118 SAVED_REGS_MASK shows which registers need to be restored.
16119
16120 Unfortunately, since this insn does not reflect very well the actual
16121 semantics of the operation, we need to annotate the insn for the benefit
16122 of DWARF2 frame unwind information. */
16123 static void
16124 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
16125 {
16126 int num_regs = 0;
16127 int i, j;
16128 rtx par;
16129 rtx dwarf = NULL_RTX;
16130 rtx tmp, reg;
16131 bool return_in_pc;
16132 int offset_adj;
16133 int emit_update;
16134
16135 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
16136 offset_adj = return_in_pc ? 1 : 0;
16137 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16138 if (saved_regs_mask & (1 << i))
16139 num_regs++;
16140
16141 gcc_assert (num_regs && num_regs <= 16);
16142
16143 /* If SP is in reglist, then we don't emit SP update insn. */
16144 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
16145
16146 /* The parallel needs to hold num_regs SETs
16147 and one SET for the stack update. */
16148 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
16149
16150 if (return_in_pc)
16151 {
16152 tmp = ret_rtx;
16153 XVECEXP (par, 0, 0) = tmp;
16154 }
16155
16156 if (emit_update)
16157 {
16158 /* Increment the stack pointer, based on there being
16159 num_regs 4-byte registers to restore. */
16160 tmp = gen_rtx_SET (VOIDmode,
16161 stack_pointer_rtx,
16162 plus_constant (Pmode,
16163 stack_pointer_rtx,
16164 4 * num_regs));
16165 RTX_FRAME_RELATED_P (tmp) = 1;
16166 XVECEXP (par, 0, offset_adj) = tmp;
16167 }
16168
16169 /* Now restore every reg, which may include PC. */
16170 for (j = 0, i = 0; j < num_regs; i++)
16171 if (saved_regs_mask & (1 << i))
16172 {
16173 reg = gen_rtx_REG (SImode, i);
16174 tmp = gen_rtx_SET (VOIDmode,
16175 reg,
16176 gen_frame_mem
16177 (SImode,
16178 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
16179 RTX_FRAME_RELATED_P (tmp) = 1;
16180 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
16181
16182 /* We need to maintain a sequence for DWARF info too. As dwarf info
16183 should not have PC, skip PC. */
16184 if (i != PC_REGNUM)
16185 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
16186
16187 j++;
16188 }
16189
16190 if (return_in_pc)
16191 par = emit_jump_insn (par);
16192 else
16193 par = emit_insn (par);
16194
16195 REG_NOTES (par) = dwarf;
16196 }
16197
16198 /* Generate and emit an insn pattern that we will recognize as a pop_multi
16199 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
16200
16201 Unfortunately, since this insn does not reflect very well the actual
16202 semantics of the operation, we need to annotate the insn for the benefit
16203 of DWARF2 frame unwind information. */
16204 static void
16205 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
16206 {
16207 int i, j;
16208 rtx par;
16209 rtx dwarf = NULL_RTX;
16210 rtx tmp, reg;
16211
16212 gcc_assert (num_regs && num_regs <= 32);
16213
16214 /* Workaround ARM10 VFPr1 bug. */
16215 if (num_regs == 2 && !arm_arch6)
16216 {
16217 if (first_reg == 15)
16218 first_reg--;
16219
16220 num_regs++;
16221 }
16222
16223 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
16224 there could be up to 32 D-registers to restore.
16225 If there are more than 16 D-registers, make two recursive calls,
16226 each of which emits one pop_multi instruction. */
16227 if (num_regs > 16)
16228 {
16229 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
16230 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
16231 return;
16232 }
16233
16234 /* The parallel needs to hold num_regs SETs
16235 and one SET for the stack update. */
16236 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
16237
16238 /* Increment the stack pointer, based on there being
16239 num_regs 8-byte registers to restore. */
16240 tmp = gen_rtx_SET (VOIDmode,
16241 base_reg,
16242 plus_constant (Pmode, base_reg, 8 * num_regs));
16243 RTX_FRAME_RELATED_P (tmp) = 1;
16244 XVECEXP (par, 0, 0) = tmp;
16245
16246 /* Now show every reg that will be restored, using a SET for each. */
16247 for (j = 0, i=first_reg; j < num_regs; i += 2)
16248 {
16249 reg = gen_rtx_REG (DFmode, i);
16250
16251 tmp = gen_rtx_SET (VOIDmode,
16252 reg,
16253 gen_frame_mem
16254 (DFmode,
16255 plus_constant (Pmode, base_reg, 8 * j)));
16256 RTX_FRAME_RELATED_P (tmp) = 1;
16257 XVECEXP (par, 0, j + 1) = tmp;
16258
16259 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
16260
16261 j++;
16262 }
16263
16264 par = emit_insn (par);
16265 REG_NOTES (par) = dwarf;
16266 }
16267
16268 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
16269 number of registers are being popped, multiple LDRD patterns are created for
16270 all register pairs. If odd number of registers are popped, last register is
16271 loaded by using LDR pattern. */
16272 static void
16273 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
16274 {
16275 int num_regs = 0;
16276 int i, j;
16277 rtx par = NULL_RTX;
16278 rtx dwarf = NULL_RTX;
16279 rtx tmp, reg, tmp1;
16280 bool return_in_pc;
16281
16282 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
16283 for (i = 0; i <= LAST_ARM_REGNUM; i++)
16284 if (saved_regs_mask & (1 << i))
16285 num_regs++;
16286
16287 gcc_assert (num_regs && num_regs <= 16);
16288
16289 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
16290 to be popped. So, if num_regs is even, now it will become odd,
16291 and we can generate pop with PC. If num_regs is odd, it will be
16292 even now, and ldr with return can be generated for PC. */
16293 if (return_in_pc)
16294 num_regs--;
16295
16296 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
16297
16298 /* Var j iterates over all the registers to gather all the registers in
16299 saved_regs_mask. Var i gives index of saved registers in stack frame.
16300 A PARALLEL RTX of register-pair is created here, so that pattern for
16301 LDRD can be matched. As PC is always last register to be popped, and
16302 we have already decremented num_regs if PC, we don't have to worry
16303 about PC in this loop. */
16304 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
16305 if (saved_regs_mask & (1 << j))
16306 {
16307 /* Create RTX for memory load. */
16308 reg = gen_rtx_REG (SImode, j);
16309 tmp = gen_rtx_SET (SImode,
16310 reg,
16311 gen_frame_mem (SImode,
16312 plus_constant (Pmode,
16313 stack_pointer_rtx, 4 * i)));
16314 RTX_FRAME_RELATED_P (tmp) = 1;
16315
16316 if (i % 2 == 0)
16317 {
16318 /* When saved-register index (i) is even, the RTX to be emitted is
16319 yet to be created. Hence create it first. The LDRD pattern we
16320 are generating is :
16321 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
16322 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
16323 where target registers need not be consecutive. */
16324 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
16325 dwarf = NULL_RTX;
16326 }
16327
16328 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
16329 added as 0th element and if i is odd, reg_i is added as 1st element
16330 of LDRD pattern shown above. */
16331 XVECEXP (par, 0, (i % 2)) = tmp;
16332 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
16333
16334 if ((i % 2) == 1)
16335 {
16336 /* When saved-register index (i) is odd, RTXs for both the registers
16337 to be loaded are generated in above given LDRD pattern, and the
16338 pattern can be emitted now. */
16339 par = emit_insn (par);
16340 REG_NOTES (par) = dwarf;
16341 }
16342
16343 i++;
16344 }
16345
16346 /* If the number of registers pushed is odd AND return_in_pc is false OR
16347 number of registers are even AND return_in_pc is true, last register is
16348 popped using LDR. It can be PC as well. Hence, adjust the stack first and
16349 then LDR with post increment. */
16350
16351 /* Increment the stack pointer, based on there being
16352 num_regs 4-byte registers to restore. */
16353 tmp = gen_rtx_SET (VOIDmode,
16354 stack_pointer_rtx,
16355 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
16356 RTX_FRAME_RELATED_P (tmp) = 1;
16357 emit_insn (tmp);
16358
16359 dwarf = NULL_RTX;
16360
16361 if (((num_regs % 2) == 1 && !return_in_pc)
16362 || ((num_regs % 2) == 0 && return_in_pc))
16363 {
16364 /* Scan for the single register to be popped. Skip until the saved
16365 register is found. */
16366 for (; (saved_regs_mask & (1 << j)) == 0; j++);
16367
16368 /* Gen LDR with post increment here. */
16369 tmp1 = gen_rtx_MEM (SImode,
16370 gen_rtx_POST_INC (SImode,
16371 stack_pointer_rtx));
16372 set_mem_alias_set (tmp1, get_frame_alias_set ());
16373
16374 reg = gen_rtx_REG (SImode, j);
16375 tmp = gen_rtx_SET (SImode, reg, tmp1);
16376 RTX_FRAME_RELATED_P (tmp) = 1;
16377 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
16378
16379 if (return_in_pc)
16380 {
16381 /* If return_in_pc, j must be PC_REGNUM. */
16382 gcc_assert (j == PC_REGNUM);
16383 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
16384 XVECEXP (par, 0, 0) = ret_rtx;
16385 XVECEXP (par, 0, 1) = tmp;
16386 par = emit_jump_insn (par);
16387 }
16388 else
16389 {
16390 par = emit_insn (tmp);
16391 }
16392
16393 REG_NOTES (par) = dwarf;
16394 }
16395 else if ((num_regs % 2) == 1 && return_in_pc)
16396 {
16397 /* There are 2 registers to be popped. So, generate the pattern
16398 pop_multiple_with_stack_update_and_return to pop in PC. */
16399 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
16400 }
16401
16402 return;
16403 }
16404
16405 /* Calculate the size of the return value that is passed in registers. */
16406 static unsigned
16407 arm_size_return_regs (void)
16408 {
16409 enum machine_mode mode;
16410
16411 if (crtl->return_rtx != 0)
16412 mode = GET_MODE (crtl->return_rtx);
16413 else
16414 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16415
16416 return GET_MODE_SIZE (mode);
16417 }
16418
16419 /* Return true if the current function needs to save/restore LR. */
16420 static bool
16421 thumb_force_lr_save (void)
16422 {
16423 return !cfun->machine->lr_save_eliminated
16424 && (!leaf_function_p ()
16425 || thumb_far_jump_used_p ()
16426 || df_regs_ever_live_p (LR_REGNUM));
16427 }
16428
16429
16430 /* Return true if r3 is used by any of the tail call insns in the
16431 current function. */
16432 static bool
16433 any_sibcall_uses_r3 (void)
16434 {
16435 edge_iterator ei;
16436 edge e;
16437
16438 if (!crtl->tail_call_emit)
16439 return false;
16440 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16441 if (e->flags & EDGE_SIBCALL)
16442 {
16443 rtx call = BB_END (e->src);
16444 if (!CALL_P (call))
16445 call = prev_nonnote_nondebug_insn (call);
16446 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
16447 if (find_regno_fusage (call, USE, 3))
16448 return true;
16449 }
16450 return false;
16451 }
16452
16453
16454 /* Compute the distance from register FROM to register TO.
16455 These can be the arg pointer (26), the soft frame pointer (25),
16456 the stack pointer (13) or the hard frame pointer (11).
16457 In thumb mode r7 is used as the soft frame pointer, if needed.
16458 Typical stack layout looks like this:
16459
16460 old stack pointer -> | |
16461 ----
16462 | | \
16463 | | saved arguments for
16464 | | vararg functions
16465 | | /
16466 --
16467 hard FP & arg pointer -> | | \
16468 | | stack
16469 | | frame
16470 | | /
16471 --
16472 | | \
16473 | | call saved
16474 | | registers
16475 soft frame pointer -> | | /
16476 --
16477 | | \
16478 | | local
16479 | | variables
16480 locals base pointer -> | | /
16481 --
16482 | | \
16483 | | outgoing
16484 | | arguments
16485 current stack pointer -> | | /
16486 --
16487
16488 For a given function some or all of these stack components
16489 may not be needed, giving rise to the possibility of
16490 eliminating some of the registers.
16491
16492 The values returned by this function must reflect the behavior
16493 of arm_expand_prologue() and arm_compute_save_reg_mask().
16494
16495 The sign of the number returned reflects the direction of stack
16496 growth, so the values are positive for all eliminations except
16497 from the soft frame pointer to the hard frame pointer.
16498
16499 SFP may point just inside the local variables block to ensure correct
16500 alignment. */
16501
16502
16503 /* Calculate stack offsets. These are used to calculate register elimination
16504 offsets and in prologue/epilogue code. Also calculates which registers
16505 should be saved. */
16506
16507 static arm_stack_offsets *
16508 arm_get_frame_offsets (void)
16509 {
16510 struct arm_stack_offsets *offsets;
16511 unsigned long func_type;
16512 int leaf;
16513 int saved;
16514 int core_saved;
16515 HOST_WIDE_INT frame_size;
16516 int i;
16517
16518 offsets = &cfun->machine->stack_offsets;
16519
16520 /* We need to know if we are a leaf function. Unfortunately, it
16521 is possible to be called after start_sequence has been called,
16522 which causes get_insns to return the insns for the sequence,
16523 not the function, which will cause leaf_function_p to return
16524 the incorrect result.
16525
16526 to know about leaf functions once reload has completed, and the
16527 frame size cannot be changed after that time, so we can safely
16528 use the cached value. */
16529
16530 if (reload_completed)
16531 return offsets;
16532
16533 /* Initially this is the size of the local variables. It will translated
16534 into an offset once we have determined the size of preceding data. */
16535 frame_size = ROUND_UP_WORD (get_frame_size ());
16536
16537 leaf = leaf_function_p ();
16538
16539 /* Space for variadic functions. */
16540 offsets->saved_args = crtl->args.pretend_args_size;
16541
16542 /* In Thumb mode this is incorrect, but never used. */
16543 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
16544 arm_compute_static_chain_stack_bytes();
16545
16546 if (TARGET_32BIT)
16547 {
16548 unsigned int regno;
16549
16550 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
16551 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16552 saved = core_saved;
16553
16554 /* We know that SP will be doubleword aligned on entry, and we must
16555 preserve that condition at any subroutine call. We also require the
16556 soft frame pointer to be doubleword aligned. */
16557
16558 if (TARGET_REALLY_IWMMXT)
16559 {
16560 /* Check for the call-saved iWMMXt registers. */
16561 for (regno = FIRST_IWMMXT_REGNUM;
16562 regno <= LAST_IWMMXT_REGNUM;
16563 regno++)
16564 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16565 saved += 8;
16566 }
16567
16568 func_type = arm_current_func_type ();
16569 /* Space for saved VFP registers. */
16570 if (! IS_VOLATILE (func_type)
16571 && TARGET_HARD_FLOAT && TARGET_VFP)
16572 saved += arm_get_vfp_saved_size ();
16573 }
16574 else /* TARGET_THUMB1 */
16575 {
16576 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
16577 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16578 saved = core_saved;
16579 if (TARGET_BACKTRACE)
16580 saved += 16;
16581 }
16582
16583 /* Saved registers include the stack frame. */
16584 offsets->saved_regs = offsets->saved_args + saved +
16585 arm_compute_static_chain_stack_bytes();
16586 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
16587 /* A leaf function does not need any stack alignment if it has nothing
16588 on the stack. */
16589 if (leaf && frame_size == 0
16590 /* However if it calls alloca(), we have a dynamically allocated
16591 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
16592 && ! cfun->calls_alloca)
16593 {
16594 offsets->outgoing_args = offsets->soft_frame;
16595 offsets->locals_base = offsets->soft_frame;
16596 return offsets;
16597 }
16598
16599 /* Ensure SFP has the correct alignment. */
16600 if (ARM_DOUBLEWORD_ALIGN
16601 && (offsets->soft_frame & 7))
16602 {
16603 offsets->soft_frame += 4;
16604 /* Try to align stack by pushing an extra reg. Don't bother doing this
16605 when there is a stack frame as the alignment will be rolled into
16606 the normal stack adjustment. */
16607 if (frame_size + crtl->outgoing_args_size == 0)
16608 {
16609 int reg = -1;
16610
16611 /* If it is safe to use r3, then do so. This sometimes
16612 generates better code on Thumb-2 by avoiding the need to
16613 use 32-bit push/pop instructions. */
16614 if (! any_sibcall_uses_r3 ()
16615 && arm_size_return_regs () <= 12
16616 && (offsets->saved_regs_mask & (1 << 3)) == 0)
16617 {
16618 reg = 3;
16619 }
16620 else
16621 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
16622 {
16623 /* Avoid fixed registers; they may be changed at
16624 arbitrary times so it's unsafe to restore them
16625 during the epilogue. */
16626 if (!fixed_regs[i]
16627 && (offsets->saved_regs_mask & (1 << i)) == 0)
16628 {
16629 reg = i;
16630 break;
16631 }
16632 }
16633
16634 if (reg != -1)
16635 {
16636 offsets->saved_regs += 4;
16637 offsets->saved_regs_mask |= (1 << reg);
16638 }
16639 }
16640 }
16641
16642 offsets->locals_base = offsets->soft_frame + frame_size;
16643 offsets->outgoing_args = (offsets->locals_base
16644 + crtl->outgoing_args_size);
16645
16646 if (ARM_DOUBLEWORD_ALIGN)
16647 {
16648 /* Ensure SP remains doubleword aligned. */
16649 if (offsets->outgoing_args & 7)
16650 offsets->outgoing_args += 4;
16651 gcc_assert (!(offsets->outgoing_args & 7));
16652 }
16653
16654 return offsets;
16655 }
16656
16657
16658 /* Calculate the relative offsets for the different stack pointers. Positive
16659 offsets are in the direction of stack growth. */
16660
16661 HOST_WIDE_INT
16662 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16663 {
16664 arm_stack_offsets *offsets;
16665
16666 offsets = arm_get_frame_offsets ();
16667
16668 /* OK, now we have enough information to compute the distances.
16669 There must be an entry in these switch tables for each pair
16670 of registers in ELIMINABLE_REGS, even if some of the entries
16671 seem to be redundant or useless. */
16672 switch (from)
16673 {
16674 case ARG_POINTER_REGNUM:
16675 switch (to)
16676 {
16677 case THUMB_HARD_FRAME_POINTER_REGNUM:
16678 return 0;
16679
16680 case FRAME_POINTER_REGNUM:
16681 /* This is the reverse of the soft frame pointer
16682 to hard frame pointer elimination below. */
16683 return offsets->soft_frame - offsets->saved_args;
16684
16685 case ARM_HARD_FRAME_POINTER_REGNUM:
16686 /* This is only non-zero in the case where the static chain register
16687 is stored above the frame. */
16688 return offsets->frame - offsets->saved_args - 4;
16689
16690 case STACK_POINTER_REGNUM:
16691 /* If nothing has been pushed on the stack at all
16692 then this will return -4. This *is* correct! */
16693 return offsets->outgoing_args - (offsets->saved_args + 4);
16694
16695 default:
16696 gcc_unreachable ();
16697 }
16698 gcc_unreachable ();
16699
16700 case FRAME_POINTER_REGNUM:
16701 switch (to)
16702 {
16703 case THUMB_HARD_FRAME_POINTER_REGNUM:
16704 return 0;
16705
16706 case ARM_HARD_FRAME_POINTER_REGNUM:
16707 /* The hard frame pointer points to the top entry in the
16708 stack frame. The soft frame pointer to the bottom entry
16709 in the stack frame. If there is no stack frame at all,
16710 then they are identical. */
16711
16712 return offsets->frame - offsets->soft_frame;
16713
16714 case STACK_POINTER_REGNUM:
16715 return offsets->outgoing_args - offsets->soft_frame;
16716
16717 default:
16718 gcc_unreachable ();
16719 }
16720 gcc_unreachable ();
16721
16722 default:
16723 /* You cannot eliminate from the stack pointer.
16724 In theory you could eliminate from the hard frame
16725 pointer to the stack pointer, but this will never
16726 happen, since if a stack frame is not needed the
16727 hard frame pointer will never be used. */
16728 gcc_unreachable ();
16729 }
16730 }
16731
16732 /* Given FROM and TO register numbers, say whether this elimination is
16733 allowed. Frame pointer elimination is automatically handled.
16734
16735 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
16736 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
16737 pointer, we must eliminate FRAME_POINTER_REGNUM into
16738 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
16739 ARG_POINTER_REGNUM. */
16740
16741 bool
16742 arm_can_eliminate (const int from, const int to)
16743 {
16744 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
16745 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
16746 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
16747 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
16748 true);
16749 }
16750
16751 /* Emit RTL to save coprocessor registers on function entry. Returns the
16752 number of bytes pushed. */
16753
16754 static int
16755 arm_save_coproc_regs(void)
16756 {
16757 int saved_size = 0;
16758 unsigned reg;
16759 unsigned start_reg;
16760 rtx insn;
16761
16762 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
16763 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
16764 {
16765 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16766 insn = gen_rtx_MEM (V2SImode, insn);
16767 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
16768 RTX_FRAME_RELATED_P (insn) = 1;
16769 saved_size += 8;
16770 }
16771
16772 if (TARGET_HARD_FLOAT && TARGET_VFP)
16773 {
16774 start_reg = FIRST_VFP_REGNUM;
16775
16776 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
16777 {
16778 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
16779 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
16780 {
16781 if (start_reg != reg)
16782 saved_size += vfp_emit_fstmd (start_reg,
16783 (reg - start_reg) / 2);
16784 start_reg = reg + 2;
16785 }
16786 }
16787 if (start_reg != reg)
16788 saved_size += vfp_emit_fstmd (start_reg,
16789 (reg - start_reg) / 2);
16790 }
16791 return saved_size;
16792 }
16793
16794
16795 /* Set the Thumb frame pointer from the stack pointer. */
16796
16797 static void
16798 thumb_set_frame_pointer (arm_stack_offsets *offsets)
16799 {
16800 HOST_WIDE_INT amount;
16801 rtx insn, dwarf;
16802
16803 amount = offsets->outgoing_args - offsets->locals_base;
16804 if (amount < 1024)
16805 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16806 stack_pointer_rtx, GEN_INT (amount)));
16807 else
16808 {
16809 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
16810 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
16811 expects the first two operands to be the same. */
16812 if (TARGET_THUMB2)
16813 {
16814 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16815 stack_pointer_rtx,
16816 hard_frame_pointer_rtx));
16817 }
16818 else
16819 {
16820 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16821 hard_frame_pointer_rtx,
16822 stack_pointer_rtx));
16823 }
16824 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
16825 plus_constant (Pmode, stack_pointer_rtx, amount));
16826 RTX_FRAME_RELATED_P (dwarf) = 1;
16827 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16828 }
16829
16830 RTX_FRAME_RELATED_P (insn) = 1;
16831 }
16832
16833 /* Generate the prologue instructions for entry into an ARM or Thumb-2
16834 function. */
16835 void
16836 arm_expand_prologue (void)
16837 {
16838 rtx amount;
16839 rtx insn;
16840 rtx ip_rtx;
16841 unsigned long live_regs_mask;
16842 unsigned long func_type;
16843 int fp_offset = 0;
16844 int saved_pretend_args = 0;
16845 int saved_regs = 0;
16846 unsigned HOST_WIDE_INT args_to_push;
16847 arm_stack_offsets *offsets;
16848
16849 func_type = arm_current_func_type ();
16850
16851 /* Naked functions don't have prologues. */
16852 if (IS_NAKED (func_type))
16853 return;
16854
16855 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
16856 args_to_push = crtl->args.pretend_args_size;
16857
16858 /* Compute which register we will have to save onto the stack. */
16859 offsets = arm_get_frame_offsets ();
16860 live_regs_mask = offsets->saved_regs_mask;
16861
16862 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
16863
16864 if (IS_STACKALIGN (func_type))
16865 {
16866 rtx r0, r1;
16867
16868 /* Handle a word-aligned stack pointer. We generate the following:
16869
16870 mov r0, sp
16871 bic r1, r0, #7
16872 mov sp, r1
16873 <save and restore r0 in normal prologue/epilogue>
16874 mov sp, r0
16875 bx lr
16876
16877 The unwinder doesn't need to know about the stack realignment.
16878 Just tell it we saved SP in r0. */
16879 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
16880
16881 r0 = gen_rtx_REG (SImode, 0);
16882 r1 = gen_rtx_REG (SImode, 1);
16883
16884 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
16885 RTX_FRAME_RELATED_P (insn) = 1;
16886 add_reg_note (insn, REG_CFA_REGISTER, NULL);
16887
16888 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
16889
16890 /* ??? The CFA changes here, which may cause GDB to conclude that it
16891 has entered a different function. That said, the unwind info is
16892 correct, individually, before and after this instruction because
16893 we've described the save of SP, which will override the default
16894 handling of SP as restoring from the CFA. */
16895 emit_insn (gen_movsi (stack_pointer_rtx, r1));
16896 }
16897
16898 /* For APCS frames, if IP register is clobbered
16899 when creating frame, save that register in a special
16900 way. */
16901 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
16902 {
16903 if (IS_INTERRUPT (func_type))
16904 {
16905 /* Interrupt functions must not corrupt any registers.
16906 Creating a frame pointer however, corrupts the IP
16907 register, so we must push it first. */
16908 emit_multi_reg_push (1 << IP_REGNUM);
16909
16910 /* Do not set RTX_FRAME_RELATED_P on this insn.
16911 The dwarf stack unwinding code only wants to see one
16912 stack decrement per function, and this is not it. If
16913 this instruction is labeled as being part of the frame
16914 creation sequence then dwarf2out_frame_debug_expr will
16915 die when it encounters the assignment of IP to FP
16916 later on, since the use of SP here establishes SP as
16917 the CFA register and not IP.
16918
16919 Anyway this instruction is not really part of the stack
16920 frame creation although it is part of the prologue. */
16921 }
16922 else if (IS_NESTED (func_type))
16923 {
16924 /* The Static chain register is the same as the IP register
16925 used as a scratch register during stack frame creation.
16926 To get around this need to find somewhere to store IP
16927 whilst the frame is being created. We try the following
16928 places in order:
16929
16930 1. The last argument register.
16931 2. A slot on the stack above the frame. (This only
16932 works if the function is not a varargs function).
16933 3. Register r3, after pushing the argument registers
16934 onto the stack.
16935
16936 Note - we only need to tell the dwarf2 backend about the SP
16937 adjustment in the second variant; the static chain register
16938 doesn't need to be unwound, as it doesn't contain a value
16939 inherited from the caller. */
16940
16941 if (df_regs_ever_live_p (3) == false)
16942 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16943 else if (args_to_push == 0)
16944 {
16945 rtx dwarf;
16946
16947 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
16948 saved_regs += 4;
16949
16950 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
16951 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
16952 fp_offset = 4;
16953
16954 /* Just tell the dwarf backend that we adjusted SP. */
16955 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16956 plus_constant (Pmode, stack_pointer_rtx,
16957 -fp_offset));
16958 RTX_FRAME_RELATED_P (insn) = 1;
16959 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16960 }
16961 else
16962 {
16963 /* Store the args on the stack. */
16964 if (cfun->machine->uses_anonymous_args)
16965 insn = emit_multi_reg_push
16966 ((0xf0 >> (args_to_push / 4)) & 0xf);
16967 else
16968 insn = emit_insn
16969 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16970 GEN_INT (- args_to_push)));
16971
16972 RTX_FRAME_RELATED_P (insn) = 1;
16973
16974 saved_pretend_args = 1;
16975 fp_offset = args_to_push;
16976 args_to_push = 0;
16977
16978 /* Now reuse r3 to preserve IP. */
16979 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16980 }
16981 }
16982
16983 insn = emit_set_insn (ip_rtx,
16984 plus_constant (Pmode, stack_pointer_rtx,
16985 fp_offset));
16986 RTX_FRAME_RELATED_P (insn) = 1;
16987 }
16988
16989 if (args_to_push)
16990 {
16991 /* Push the argument registers, or reserve space for them. */
16992 if (cfun->machine->uses_anonymous_args)
16993 insn = emit_multi_reg_push
16994 ((0xf0 >> (args_to_push / 4)) & 0xf);
16995 else
16996 insn = emit_insn
16997 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16998 GEN_INT (- args_to_push)));
16999 RTX_FRAME_RELATED_P (insn) = 1;
17000 }
17001
17002 /* If this is an interrupt service routine, and the link register
17003 is going to be pushed, and we're not generating extra
17004 push of IP (needed when frame is needed and frame layout if apcs),
17005 subtracting four from LR now will mean that the function return
17006 can be done with a single instruction. */
17007 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
17008 && (live_regs_mask & (1 << LR_REGNUM)) != 0
17009 && !(frame_pointer_needed && TARGET_APCS_FRAME)
17010 && TARGET_ARM)
17011 {
17012 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
17013
17014 emit_set_insn (lr, plus_constant (SImode, lr, -4));
17015 }
17016
17017 if (live_regs_mask)
17018 {
17019 saved_regs += bit_count (live_regs_mask) * 4;
17020 if (optimize_size && !frame_pointer_needed
17021 && saved_regs == offsets->saved_regs - offsets->saved_args)
17022 {
17023 /* If no coprocessor registers are being pushed and we don't have
17024 to worry about a frame pointer then push extra registers to
17025 create the stack frame. This is done is a way that does not
17026 alter the frame layout, so is independent of the epilogue. */
17027 int n;
17028 int frame;
17029 n = 0;
17030 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
17031 n++;
17032 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
17033 if (frame && n * 4 >= frame)
17034 {
17035 n = frame / 4;
17036 live_regs_mask |= (1 << n) - 1;
17037 saved_regs += frame;
17038 }
17039 }
17040
17041 if (current_tune->prefer_ldrd_strd
17042 && !optimize_function_for_size_p (cfun))
17043 {
17044 if (TARGET_THUMB2)
17045 {
17046 thumb2_emit_strd_push (live_regs_mask);
17047 }
17048 else
17049 {
17050 insn = emit_multi_reg_push (live_regs_mask);
17051 RTX_FRAME_RELATED_P (insn) = 1;
17052 }
17053 }
17054 else
17055 {
17056 insn = emit_multi_reg_push (live_regs_mask);
17057 RTX_FRAME_RELATED_P (insn) = 1;
17058 }
17059 }
17060
17061 if (! IS_VOLATILE (func_type))
17062 saved_regs += arm_save_coproc_regs ();
17063
17064 if (frame_pointer_needed && TARGET_ARM)
17065 {
17066 /* Create the new frame pointer. */
17067 if (TARGET_APCS_FRAME)
17068 {
17069 insn = GEN_INT (-(4 + args_to_push + fp_offset));
17070 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
17071 RTX_FRAME_RELATED_P (insn) = 1;
17072
17073 if (IS_NESTED (func_type))
17074 {
17075 /* Recover the static chain register. */
17076 if (!df_regs_ever_live_p (3)
17077 || saved_pretend_args)
17078 insn = gen_rtx_REG (SImode, 3);
17079 else /* if (crtl->args.pretend_args_size == 0) */
17080 {
17081 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
17082 insn = gen_frame_mem (SImode, insn);
17083 }
17084 emit_set_insn (ip_rtx, insn);
17085 /* Add a USE to stop propagate_one_insn() from barfing. */
17086 emit_insn (gen_force_register_use (ip_rtx));
17087 }
17088 }
17089 else
17090 {
17091 insn = GEN_INT (saved_regs - 4);
17092 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
17093 stack_pointer_rtx, insn));
17094 RTX_FRAME_RELATED_P (insn) = 1;
17095 }
17096 }
17097
17098 if (flag_stack_usage_info)
17099 current_function_static_stack_size
17100 = offsets->outgoing_args - offsets->saved_args;
17101
17102 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
17103 {
17104 /* This add can produce multiple insns for a large constant, so we
17105 need to get tricky. */
17106 rtx last = get_last_insn ();
17107
17108 amount = GEN_INT (offsets->saved_args + saved_regs
17109 - offsets->outgoing_args);
17110
17111 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17112 amount));
17113 do
17114 {
17115 last = last ? NEXT_INSN (last) : get_insns ();
17116 RTX_FRAME_RELATED_P (last) = 1;
17117 }
17118 while (last != insn);
17119
17120 /* If the frame pointer is needed, emit a special barrier that
17121 will prevent the scheduler from moving stores to the frame
17122 before the stack adjustment. */
17123 if (frame_pointer_needed)
17124 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
17125 hard_frame_pointer_rtx));
17126 }
17127
17128
17129 if (frame_pointer_needed && TARGET_THUMB2)
17130 thumb_set_frame_pointer (offsets);
17131
17132 if (flag_pic && arm_pic_register != INVALID_REGNUM)
17133 {
17134 unsigned long mask;
17135
17136 mask = live_regs_mask;
17137 mask &= THUMB2_WORK_REGS;
17138 if (!IS_NESTED (func_type))
17139 mask |= (1 << IP_REGNUM);
17140 arm_load_pic_register (mask);
17141 }
17142
17143 /* If we are profiling, make sure no instructions are scheduled before
17144 the call to mcount. Similarly if the user has requested no
17145 scheduling in the prolog. Similarly if we want non-call exceptions
17146 using the EABI unwinder, to prevent faulting instructions from being
17147 swapped with a stack adjustment. */
17148 if (crtl->profile || !TARGET_SCHED_PROLOG
17149 || (arm_except_unwind_info (&global_options) == UI_TARGET
17150 && cfun->can_throw_non_call_exceptions))
17151 emit_insn (gen_blockage ());
17152
17153 /* If the link register is being kept alive, with the return address in it,
17154 then make sure that it does not get reused by the ce2 pass. */
17155 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
17156 cfun->machine->lr_save_eliminated = 1;
17157 }
17158 \f
17159 /* Print condition code to STREAM. Helper function for arm_print_operand. */
17160 static void
17161 arm_print_condition (FILE *stream)
17162 {
17163 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
17164 {
17165 /* Branch conversion is not implemented for Thumb-2. */
17166 if (TARGET_THUMB)
17167 {
17168 output_operand_lossage ("predicated Thumb instruction");
17169 return;
17170 }
17171 if (current_insn_predicate != NULL)
17172 {
17173 output_operand_lossage
17174 ("predicated instruction in conditional sequence");
17175 return;
17176 }
17177
17178 fputs (arm_condition_codes[arm_current_cc], stream);
17179 }
17180 else if (current_insn_predicate)
17181 {
17182 enum arm_cond_code code;
17183
17184 if (TARGET_THUMB1)
17185 {
17186 output_operand_lossage ("predicated Thumb instruction");
17187 return;
17188 }
17189
17190 code = get_arm_condition_code (current_insn_predicate);
17191 fputs (arm_condition_codes[code], stream);
17192 }
17193 }
17194
17195
17196 /* If CODE is 'd', then the X is a condition operand and the instruction
17197 should only be executed if the condition is true.
17198 if CODE is 'D', then the X is a condition operand and the instruction
17199 should only be executed if the condition is false: however, if the mode
17200 of the comparison is CCFPEmode, then always execute the instruction -- we
17201 do this because in these circumstances !GE does not necessarily imply LT;
17202 in these cases the instruction pattern will take care to make sure that
17203 an instruction containing %d will follow, thereby undoing the effects of
17204 doing this instruction unconditionally.
17205 If CODE is 'N' then X is a floating point operand that must be negated
17206 before output.
17207 If CODE is 'B' then output a bitwise inverted value of X (a const int).
17208 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
17209 static void
17210 arm_print_operand (FILE *stream, rtx x, int code)
17211 {
17212 switch (code)
17213 {
17214 case '@':
17215 fputs (ASM_COMMENT_START, stream);
17216 return;
17217
17218 case '_':
17219 fputs (user_label_prefix, stream);
17220 return;
17221
17222 case '|':
17223 fputs (REGISTER_PREFIX, stream);
17224 return;
17225
17226 case '?':
17227 arm_print_condition (stream);
17228 return;
17229
17230 case '(':
17231 /* Nothing in unified syntax, otherwise the current condition code. */
17232 if (!TARGET_UNIFIED_ASM)
17233 arm_print_condition (stream);
17234 break;
17235
17236 case ')':
17237 /* The current condition code in unified syntax, otherwise nothing. */
17238 if (TARGET_UNIFIED_ASM)
17239 arm_print_condition (stream);
17240 break;
17241
17242 case '.':
17243 /* The current condition code for a condition code setting instruction.
17244 Preceded by 's' in unified syntax, otherwise followed by 's'. */
17245 if (TARGET_UNIFIED_ASM)
17246 {
17247 fputc('s', stream);
17248 arm_print_condition (stream);
17249 }
17250 else
17251 {
17252 arm_print_condition (stream);
17253 fputc('s', stream);
17254 }
17255 return;
17256
17257 case '!':
17258 /* If the instruction is conditionally executed then print
17259 the current condition code, otherwise print 's'. */
17260 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
17261 if (current_insn_predicate)
17262 arm_print_condition (stream);
17263 else
17264 fputc('s', stream);
17265 break;
17266
17267 /* %# is a "break" sequence. It doesn't output anything, but is used to
17268 separate e.g. operand numbers from following text, if that text consists
17269 of further digits which we don't want to be part of the operand
17270 number. */
17271 case '#':
17272 return;
17273
17274 case 'N':
17275 {
17276 REAL_VALUE_TYPE r;
17277 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17278 r = real_value_negate (&r);
17279 fprintf (stream, "%s", fp_const_from_val (&r));
17280 }
17281 return;
17282
17283 /* An integer or symbol address without a preceding # sign. */
17284 case 'c':
17285 switch (GET_CODE (x))
17286 {
17287 case CONST_INT:
17288 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
17289 break;
17290
17291 case SYMBOL_REF:
17292 output_addr_const (stream, x);
17293 break;
17294
17295 case CONST:
17296 if (GET_CODE (XEXP (x, 0)) == PLUS
17297 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
17298 {
17299 output_addr_const (stream, x);
17300 break;
17301 }
17302 /* Fall through. */
17303
17304 default:
17305 output_operand_lossage ("Unsupported operand for code '%c'", code);
17306 }
17307 return;
17308
17309 /* An integer that we want to print in HEX. */
17310 case 'x':
17311 switch (GET_CODE (x))
17312 {
17313 case CONST_INT:
17314 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
17315 break;
17316
17317 default:
17318 output_operand_lossage ("Unsupported operand for code '%c'", code);
17319 }
17320 return;
17321
17322 case 'B':
17323 if (CONST_INT_P (x))
17324 {
17325 HOST_WIDE_INT val;
17326 val = ARM_SIGN_EXTEND (~INTVAL (x));
17327 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
17328 }
17329 else
17330 {
17331 putc ('~', stream);
17332 output_addr_const (stream, x);
17333 }
17334 return;
17335
17336 case 'L':
17337 /* The low 16 bits of an immediate constant. */
17338 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
17339 return;
17340
17341 case 'i':
17342 fprintf (stream, "%s", arithmetic_instr (x, 1));
17343 return;
17344
17345 case 'I':
17346 fprintf (stream, "%s", arithmetic_instr (x, 0));
17347 return;
17348
17349 case 'S':
17350 {
17351 HOST_WIDE_INT val;
17352 const char *shift;
17353
17354 if (!shift_operator (x, SImode))
17355 {
17356 output_operand_lossage ("invalid shift operand");
17357 break;
17358 }
17359
17360 shift = shift_op (x, &val);
17361
17362 if (shift)
17363 {
17364 fprintf (stream, ", %s ", shift);
17365 if (val == -1)
17366 arm_print_operand (stream, XEXP (x, 1), 0);
17367 else
17368 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
17369 }
17370 }
17371 return;
17372
17373 /* An explanation of the 'Q', 'R' and 'H' register operands:
17374
17375 In a pair of registers containing a DI or DF value the 'Q'
17376 operand returns the register number of the register containing
17377 the least significant part of the value. The 'R' operand returns
17378 the register number of the register containing the most
17379 significant part of the value.
17380
17381 The 'H' operand returns the higher of the two register numbers.
17382 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
17383 same as the 'Q' operand, since the most significant part of the
17384 value is held in the lower number register. The reverse is true
17385 on systems where WORDS_BIG_ENDIAN is false.
17386
17387 The purpose of these operands is to distinguish between cases
17388 where the endian-ness of the values is important (for example
17389 when they are added together), and cases where the endian-ness
17390 is irrelevant, but the order of register operations is important.
17391 For example when loading a value from memory into a register
17392 pair, the endian-ness does not matter. Provided that the value
17393 from the lower memory address is put into the lower numbered
17394 register, and the value from the higher address is put into the
17395 higher numbered register, the load will work regardless of whether
17396 the value being loaded is big-wordian or little-wordian. The
17397 order of the two register loads can matter however, if the address
17398 of the memory location is actually held in one of the registers
17399 being overwritten by the load.
17400
17401 The 'Q' and 'R' constraints are also available for 64-bit
17402 constants. */
17403 case 'Q':
17404 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
17405 {
17406 rtx part = gen_lowpart (SImode, x);
17407 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17408 return;
17409 }
17410
17411 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17412 {
17413 output_operand_lossage ("invalid operand for code '%c'", code);
17414 return;
17415 }
17416
17417 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
17418 return;
17419
17420 case 'R':
17421 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
17422 {
17423 enum machine_mode mode = GET_MODE (x);
17424 rtx part;
17425
17426 if (mode == VOIDmode)
17427 mode = DImode;
17428 part = gen_highpart_mode (SImode, mode, x);
17429 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17430 return;
17431 }
17432
17433 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17434 {
17435 output_operand_lossage ("invalid operand for code '%c'", code);
17436 return;
17437 }
17438
17439 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
17440 return;
17441
17442 case 'H':
17443 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17444 {
17445 output_operand_lossage ("invalid operand for code '%c'", code);
17446 return;
17447 }
17448
17449 asm_fprintf (stream, "%r", REGNO (x) + 1);
17450 return;
17451
17452 case 'J':
17453 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17454 {
17455 output_operand_lossage ("invalid operand for code '%c'", code);
17456 return;
17457 }
17458
17459 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
17460 return;
17461
17462 case 'K':
17463 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
17464 {
17465 output_operand_lossage ("invalid operand for code '%c'", code);
17466 return;
17467 }
17468
17469 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
17470 return;
17471
17472 case 'm':
17473 asm_fprintf (stream, "%r",
17474 REG_P (XEXP (x, 0))
17475 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
17476 return;
17477
17478 case 'M':
17479 asm_fprintf (stream, "{%r-%r}",
17480 REGNO (x),
17481 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
17482 return;
17483
17484 /* Like 'M', but writing doubleword vector registers, for use by Neon
17485 insns. */
17486 case 'h':
17487 {
17488 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
17489 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
17490 if (numregs == 1)
17491 asm_fprintf (stream, "{d%d}", regno);
17492 else
17493 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
17494 }
17495 return;
17496
17497 case 'd':
17498 /* CONST_TRUE_RTX means always -- that's the default. */
17499 if (x == const_true_rtx)
17500 return;
17501
17502 if (!COMPARISON_P (x))
17503 {
17504 output_operand_lossage ("invalid operand for code '%c'", code);
17505 return;
17506 }
17507
17508 fputs (arm_condition_codes[get_arm_condition_code (x)],
17509 stream);
17510 return;
17511
17512 case 'D':
17513 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
17514 want to do that. */
17515 if (x == const_true_rtx)
17516 {
17517 output_operand_lossage ("instruction never executed");
17518 return;
17519 }
17520 if (!COMPARISON_P (x))
17521 {
17522 output_operand_lossage ("invalid operand for code '%c'", code);
17523 return;
17524 }
17525
17526 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
17527 (get_arm_condition_code (x))],
17528 stream);
17529 return;
17530
17531 case 's':
17532 case 'V':
17533 case 'W':
17534 case 'X':
17535 case 'Y':
17536 case 'Z':
17537 /* Former Maverick support, removed after GCC-4.7. */
17538 output_operand_lossage ("obsolete Maverick format code '%c'", code);
17539 return;
17540
17541 case 'U':
17542 if (!REG_P (x)
17543 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
17544 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
17545 /* Bad value for wCG register number. */
17546 {
17547 output_operand_lossage ("invalid operand for code '%c'", code);
17548 return;
17549 }
17550
17551 else
17552 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
17553 return;
17554
17555 /* Print an iWMMXt control register name. */
17556 case 'w':
17557 if (!CONST_INT_P (x)
17558 || INTVAL (x) < 0
17559 || INTVAL (x) >= 16)
17560 /* Bad value for wC register number. */
17561 {
17562 output_operand_lossage ("invalid operand for code '%c'", code);
17563 return;
17564 }
17565
17566 else
17567 {
17568 static const char * wc_reg_names [16] =
17569 {
17570 "wCID", "wCon", "wCSSF", "wCASF",
17571 "wC4", "wC5", "wC6", "wC7",
17572 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
17573 "wC12", "wC13", "wC14", "wC15"
17574 };
17575
17576 fprintf (stream, wc_reg_names [INTVAL (x)]);
17577 }
17578 return;
17579
17580 /* Print the high single-precision register of a VFP double-precision
17581 register. */
17582 case 'p':
17583 {
17584 int mode = GET_MODE (x);
17585 int regno;
17586
17587 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
17588 {
17589 output_operand_lossage ("invalid operand for code '%c'", code);
17590 return;
17591 }
17592
17593 regno = REGNO (x);
17594 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
17595 {
17596 output_operand_lossage ("invalid operand for code '%c'", code);
17597 return;
17598 }
17599
17600 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
17601 }
17602 return;
17603
17604 /* Print a VFP/Neon double precision or quad precision register name. */
17605 case 'P':
17606 case 'q':
17607 {
17608 int mode = GET_MODE (x);
17609 int is_quad = (code == 'q');
17610 int regno;
17611
17612 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
17613 {
17614 output_operand_lossage ("invalid operand for code '%c'", code);
17615 return;
17616 }
17617
17618 if (!REG_P (x)
17619 || !IS_VFP_REGNUM (REGNO (x)))
17620 {
17621 output_operand_lossage ("invalid operand for code '%c'", code);
17622 return;
17623 }
17624
17625 regno = REGNO (x);
17626 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
17627 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
17628 {
17629 output_operand_lossage ("invalid operand for code '%c'", code);
17630 return;
17631 }
17632
17633 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
17634 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
17635 }
17636 return;
17637
17638 /* These two codes print the low/high doubleword register of a Neon quad
17639 register, respectively. For pair-structure types, can also print
17640 low/high quadword registers. */
17641 case 'e':
17642 case 'f':
17643 {
17644 int mode = GET_MODE (x);
17645 int regno;
17646
17647 if ((GET_MODE_SIZE (mode) != 16
17648 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
17649 {
17650 output_operand_lossage ("invalid operand for code '%c'", code);
17651 return;
17652 }
17653
17654 regno = REGNO (x);
17655 if (!NEON_REGNO_OK_FOR_QUAD (regno))
17656 {
17657 output_operand_lossage ("invalid operand for code '%c'", code);
17658 return;
17659 }
17660
17661 if (GET_MODE_SIZE (mode) == 16)
17662 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
17663 + (code == 'f' ? 1 : 0));
17664 else
17665 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
17666 + (code == 'f' ? 1 : 0));
17667 }
17668 return;
17669
17670 /* Print a VFPv3 floating-point constant, represented as an integer
17671 index. */
17672 case 'G':
17673 {
17674 int index = vfp3_const_double_index (x);
17675 gcc_assert (index != -1);
17676 fprintf (stream, "%d", index);
17677 }
17678 return;
17679
17680 /* Print bits representing opcode features for Neon.
17681
17682 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
17683 and polynomials as unsigned.
17684
17685 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
17686
17687 Bit 2 is 1 for rounding functions, 0 otherwise. */
17688
17689 /* Identify the type as 's', 'u', 'p' or 'f'. */
17690 case 'T':
17691 {
17692 HOST_WIDE_INT bits = INTVAL (x);
17693 fputc ("uspf"[bits & 3], stream);
17694 }
17695 return;
17696
17697 /* Likewise, but signed and unsigned integers are both 'i'. */
17698 case 'F':
17699 {
17700 HOST_WIDE_INT bits = INTVAL (x);
17701 fputc ("iipf"[bits & 3], stream);
17702 }
17703 return;
17704
17705 /* As for 'T', but emit 'u' instead of 'p'. */
17706 case 't':
17707 {
17708 HOST_WIDE_INT bits = INTVAL (x);
17709 fputc ("usuf"[bits & 3], stream);
17710 }
17711 return;
17712
17713 /* Bit 2: rounding (vs none). */
17714 case 'O':
17715 {
17716 HOST_WIDE_INT bits = INTVAL (x);
17717 fputs ((bits & 4) != 0 ? "r" : "", stream);
17718 }
17719 return;
17720
17721 /* Memory operand for vld1/vst1 instruction. */
17722 case 'A':
17723 {
17724 rtx addr;
17725 bool postinc = FALSE;
17726 unsigned align, memsize, align_bits;
17727
17728 gcc_assert (MEM_P (x));
17729 addr = XEXP (x, 0);
17730 if (GET_CODE (addr) == POST_INC)
17731 {
17732 postinc = 1;
17733 addr = XEXP (addr, 0);
17734 }
17735 asm_fprintf (stream, "[%r", REGNO (addr));
17736
17737 /* We know the alignment of this access, so we can emit a hint in the
17738 instruction (for some alignments) as an aid to the memory subsystem
17739 of the target. */
17740 align = MEM_ALIGN (x) >> 3;
17741 memsize = MEM_SIZE (x);
17742
17743 /* Only certain alignment specifiers are supported by the hardware. */
17744 if (memsize == 32 && (align % 32) == 0)
17745 align_bits = 256;
17746 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
17747 align_bits = 128;
17748 else if (memsize >= 8 && (align % 8) == 0)
17749 align_bits = 64;
17750 else
17751 align_bits = 0;
17752
17753 if (align_bits != 0)
17754 asm_fprintf (stream, ":%d", align_bits);
17755
17756 asm_fprintf (stream, "]");
17757
17758 if (postinc)
17759 fputs("!", stream);
17760 }
17761 return;
17762
17763 case 'C':
17764 {
17765 rtx addr;
17766
17767 gcc_assert (MEM_P (x));
17768 addr = XEXP (x, 0);
17769 gcc_assert (REG_P (addr));
17770 asm_fprintf (stream, "[%r]", REGNO (addr));
17771 }
17772 return;
17773
17774 /* Translate an S register number into a D register number and element index. */
17775 case 'y':
17776 {
17777 int mode = GET_MODE (x);
17778 int regno;
17779
17780 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
17781 {
17782 output_operand_lossage ("invalid operand for code '%c'", code);
17783 return;
17784 }
17785
17786 regno = REGNO (x);
17787 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17788 {
17789 output_operand_lossage ("invalid operand for code '%c'", code);
17790 return;
17791 }
17792
17793 regno = regno - FIRST_VFP_REGNUM;
17794 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
17795 }
17796 return;
17797
17798 case 'v':
17799 gcc_assert (CONST_DOUBLE_P (x));
17800 fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
17801 return;
17802
17803 /* Register specifier for vld1.16/vst1.16. Translate the S register
17804 number into a D register number and element index. */
17805 case 'z':
17806 {
17807 int mode = GET_MODE (x);
17808 int regno;
17809
17810 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
17811 {
17812 output_operand_lossage ("invalid operand for code '%c'", code);
17813 return;
17814 }
17815
17816 regno = REGNO (x);
17817 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17818 {
17819 output_operand_lossage ("invalid operand for code '%c'", code);
17820 return;
17821 }
17822
17823 regno = regno - FIRST_VFP_REGNUM;
17824 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
17825 }
17826 return;
17827
17828 default:
17829 if (x == 0)
17830 {
17831 output_operand_lossage ("missing operand");
17832 return;
17833 }
17834
17835 switch (GET_CODE (x))
17836 {
17837 case REG:
17838 asm_fprintf (stream, "%r", REGNO (x));
17839 break;
17840
17841 case MEM:
17842 output_memory_reference_mode = GET_MODE (x);
17843 output_address (XEXP (x, 0));
17844 break;
17845
17846 case CONST_DOUBLE:
17847 if (TARGET_NEON)
17848 {
17849 char fpstr[20];
17850 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17851 sizeof (fpstr), 0, 1);
17852 fprintf (stream, "#%s", fpstr);
17853 }
17854 else
17855 fprintf (stream, "#%s", fp_immediate_constant (x));
17856 break;
17857
17858 default:
17859 gcc_assert (GET_CODE (x) != NEG);
17860 fputc ('#', stream);
17861 if (GET_CODE (x) == HIGH)
17862 {
17863 fputs (":lower16:", stream);
17864 x = XEXP (x, 0);
17865 }
17866
17867 output_addr_const (stream, x);
17868 break;
17869 }
17870 }
17871 }
17872 \f
17873 /* Target hook for printing a memory address. */
17874 static void
17875 arm_print_operand_address (FILE *stream, rtx x)
17876 {
17877 if (TARGET_32BIT)
17878 {
17879 int is_minus = GET_CODE (x) == MINUS;
17880
17881 if (REG_P (x))
17882 asm_fprintf (stream, "[%r]", REGNO (x));
17883 else if (GET_CODE (x) == PLUS || is_minus)
17884 {
17885 rtx base = XEXP (x, 0);
17886 rtx index = XEXP (x, 1);
17887 HOST_WIDE_INT offset = 0;
17888 if (!REG_P (base)
17889 || (REG_P (index) && REGNO (index) == SP_REGNUM))
17890 {
17891 /* Ensure that BASE is a register. */
17892 /* (one of them must be). */
17893 /* Also ensure the SP is not used as in index register. */
17894 rtx temp = base;
17895 base = index;
17896 index = temp;
17897 }
17898 switch (GET_CODE (index))
17899 {
17900 case CONST_INT:
17901 offset = INTVAL (index);
17902 if (is_minus)
17903 offset = -offset;
17904 asm_fprintf (stream, "[%r, #%wd]",
17905 REGNO (base), offset);
17906 break;
17907
17908 case REG:
17909 asm_fprintf (stream, "[%r, %s%r]",
17910 REGNO (base), is_minus ? "-" : "",
17911 REGNO (index));
17912 break;
17913
17914 case MULT:
17915 case ASHIFTRT:
17916 case LSHIFTRT:
17917 case ASHIFT:
17918 case ROTATERT:
17919 {
17920 asm_fprintf (stream, "[%r, %s%r",
17921 REGNO (base), is_minus ? "-" : "",
17922 REGNO (XEXP (index, 0)));
17923 arm_print_operand (stream, index, 'S');
17924 fputs ("]", stream);
17925 break;
17926 }
17927
17928 default:
17929 gcc_unreachable ();
17930 }
17931 }
17932 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
17933 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
17934 {
17935 extern enum machine_mode output_memory_reference_mode;
17936
17937 gcc_assert (REG_P (XEXP (x, 0)));
17938
17939 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
17940 asm_fprintf (stream, "[%r, #%s%d]!",
17941 REGNO (XEXP (x, 0)),
17942 GET_CODE (x) == PRE_DEC ? "-" : "",
17943 GET_MODE_SIZE (output_memory_reference_mode));
17944 else
17945 asm_fprintf (stream, "[%r], #%s%d",
17946 REGNO (XEXP (x, 0)),
17947 GET_CODE (x) == POST_DEC ? "-" : "",
17948 GET_MODE_SIZE (output_memory_reference_mode));
17949 }
17950 else if (GET_CODE (x) == PRE_MODIFY)
17951 {
17952 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
17953 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
17954 asm_fprintf (stream, "#%wd]!",
17955 INTVAL (XEXP (XEXP (x, 1), 1)));
17956 else
17957 asm_fprintf (stream, "%r]!",
17958 REGNO (XEXP (XEXP (x, 1), 1)));
17959 }
17960 else if (GET_CODE (x) == POST_MODIFY)
17961 {
17962 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
17963 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
17964 asm_fprintf (stream, "#%wd",
17965 INTVAL (XEXP (XEXP (x, 1), 1)));
17966 else
17967 asm_fprintf (stream, "%r",
17968 REGNO (XEXP (XEXP (x, 1), 1)));
17969 }
17970 else output_addr_const (stream, x);
17971 }
17972 else
17973 {
17974 if (REG_P (x))
17975 asm_fprintf (stream, "[%r]", REGNO (x));
17976 else if (GET_CODE (x) == POST_INC)
17977 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
17978 else if (GET_CODE (x) == PLUS)
17979 {
17980 gcc_assert (REG_P (XEXP (x, 0)));
17981 if (CONST_INT_P (XEXP (x, 1)))
17982 asm_fprintf (stream, "[%r, #%wd]",
17983 REGNO (XEXP (x, 0)),
17984 INTVAL (XEXP (x, 1)));
17985 else
17986 asm_fprintf (stream, "[%r, %r]",
17987 REGNO (XEXP (x, 0)),
17988 REGNO (XEXP (x, 1)));
17989 }
17990 else
17991 output_addr_const (stream, x);
17992 }
17993 }
17994 \f
17995 /* Target hook for indicating whether a punctuation character for
17996 TARGET_PRINT_OPERAND is valid. */
17997 static bool
17998 arm_print_operand_punct_valid_p (unsigned char code)
17999 {
18000 return (code == '@' || code == '|' || code == '.'
18001 || code == '(' || code == ')' || code == '#'
18002 || (TARGET_32BIT && (code == '?'))
18003 || (TARGET_THUMB2 && (code == '!'))
18004 || (TARGET_THUMB && (code == '_')));
18005 }
18006 \f
18007 /* Target hook for assembling integer objects. The ARM version needs to
18008 handle word-sized values specially. */
18009 static bool
18010 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
18011 {
18012 enum machine_mode mode;
18013
18014 if (size == UNITS_PER_WORD && aligned_p)
18015 {
18016 fputs ("\t.word\t", asm_out_file);
18017 output_addr_const (asm_out_file, x);
18018
18019 /* Mark symbols as position independent. We only do this in the
18020 .text segment, not in the .data segment. */
18021 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
18022 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
18023 {
18024 /* See legitimize_pic_address for an explanation of the
18025 TARGET_VXWORKS_RTP check. */
18026 if (TARGET_VXWORKS_RTP
18027 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
18028 fputs ("(GOT)", asm_out_file);
18029 else
18030 fputs ("(GOTOFF)", asm_out_file);
18031 }
18032 fputc ('\n', asm_out_file);
18033 return true;
18034 }
18035
18036 mode = GET_MODE (x);
18037
18038 if (arm_vector_mode_supported_p (mode))
18039 {
18040 int i, units;
18041
18042 gcc_assert (GET_CODE (x) == CONST_VECTOR);
18043
18044 units = CONST_VECTOR_NUNITS (x);
18045 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
18046
18047 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
18048 for (i = 0; i < units; i++)
18049 {
18050 rtx elt = CONST_VECTOR_ELT (x, i);
18051 assemble_integer
18052 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
18053 }
18054 else
18055 for (i = 0; i < units; i++)
18056 {
18057 rtx elt = CONST_VECTOR_ELT (x, i);
18058 REAL_VALUE_TYPE rval;
18059
18060 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
18061
18062 assemble_real
18063 (rval, GET_MODE_INNER (mode),
18064 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
18065 }
18066
18067 return true;
18068 }
18069
18070 return default_assemble_integer (x, size, aligned_p);
18071 }
18072
18073 static void
18074 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
18075 {
18076 section *s;
18077
18078 if (!TARGET_AAPCS_BASED)
18079 {
18080 (is_ctor ?
18081 default_named_section_asm_out_constructor
18082 : default_named_section_asm_out_destructor) (symbol, priority);
18083 return;
18084 }
18085
18086 /* Put these in the .init_array section, using a special relocation. */
18087 if (priority != DEFAULT_INIT_PRIORITY)
18088 {
18089 char buf[18];
18090 sprintf (buf, "%s.%.5u",
18091 is_ctor ? ".init_array" : ".fini_array",
18092 priority);
18093 s = get_section (buf, SECTION_WRITE, NULL_TREE);
18094 }
18095 else if (is_ctor)
18096 s = ctors_section;
18097 else
18098 s = dtors_section;
18099
18100 switch_to_section (s);
18101 assemble_align (POINTER_SIZE);
18102 fputs ("\t.word\t", asm_out_file);
18103 output_addr_const (asm_out_file, symbol);
18104 fputs ("(target1)\n", asm_out_file);
18105 }
18106
18107 /* Add a function to the list of static constructors. */
18108
18109 static void
18110 arm_elf_asm_constructor (rtx symbol, int priority)
18111 {
18112 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
18113 }
18114
18115 /* Add a function to the list of static destructors. */
18116
18117 static void
18118 arm_elf_asm_destructor (rtx symbol, int priority)
18119 {
18120 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
18121 }
18122 \f
18123 /* A finite state machine takes care of noticing whether or not instructions
18124 can be conditionally executed, and thus decrease execution time and code
18125 size by deleting branch instructions. The fsm is controlled by
18126 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
18127
18128 /* The state of the fsm controlling condition codes are:
18129 0: normal, do nothing special
18130 1: make ASM_OUTPUT_OPCODE not output this instruction
18131 2: make ASM_OUTPUT_OPCODE not output this instruction
18132 3: make instructions conditional
18133 4: make instructions conditional
18134
18135 State transitions (state->state by whom under condition):
18136 0 -> 1 final_prescan_insn if the `target' is a label
18137 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
18138 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
18139 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
18140 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
18141 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
18142 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
18143 (the target insn is arm_target_insn).
18144
18145 If the jump clobbers the conditions then we use states 2 and 4.
18146
18147 A similar thing can be done with conditional return insns.
18148
18149 XXX In case the `target' is an unconditional branch, this conditionalising
18150 of the instructions always reduces code size, but not always execution
18151 time. But then, I want to reduce the code size to somewhere near what
18152 /bin/cc produces. */
18153
18154 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
18155 instructions. When a COND_EXEC instruction is seen the subsequent
18156 instructions are scanned so that multiple conditional instructions can be
18157 combined into a single IT block. arm_condexec_count and arm_condexec_mask
18158 specify the length and true/false mask for the IT block. These will be
18159 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
18160
18161 /* Returns the index of the ARM condition code string in
18162 `arm_condition_codes', or ARM_NV if the comparison is invalid.
18163 COMPARISON should be an rtx like `(eq (...) (...))'. */
18164
18165 enum arm_cond_code
18166 maybe_get_arm_condition_code (rtx comparison)
18167 {
18168 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
18169 enum arm_cond_code code;
18170 enum rtx_code comp_code = GET_CODE (comparison);
18171
18172 if (GET_MODE_CLASS (mode) != MODE_CC)
18173 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
18174 XEXP (comparison, 1));
18175
18176 switch (mode)
18177 {
18178 case CC_DNEmode: code = ARM_NE; goto dominance;
18179 case CC_DEQmode: code = ARM_EQ; goto dominance;
18180 case CC_DGEmode: code = ARM_GE; goto dominance;
18181 case CC_DGTmode: code = ARM_GT; goto dominance;
18182 case CC_DLEmode: code = ARM_LE; goto dominance;
18183 case CC_DLTmode: code = ARM_LT; goto dominance;
18184 case CC_DGEUmode: code = ARM_CS; goto dominance;
18185 case CC_DGTUmode: code = ARM_HI; goto dominance;
18186 case CC_DLEUmode: code = ARM_LS; goto dominance;
18187 case CC_DLTUmode: code = ARM_CC;
18188
18189 dominance:
18190 if (comp_code == EQ)
18191 return ARM_INVERSE_CONDITION_CODE (code);
18192 if (comp_code == NE)
18193 return code;
18194 return ARM_NV;
18195
18196 case CC_NOOVmode:
18197 switch (comp_code)
18198 {
18199 case NE: return ARM_NE;
18200 case EQ: return ARM_EQ;
18201 case GE: return ARM_PL;
18202 case LT: return ARM_MI;
18203 default: return ARM_NV;
18204 }
18205
18206 case CC_Zmode:
18207 switch (comp_code)
18208 {
18209 case NE: return ARM_NE;
18210 case EQ: return ARM_EQ;
18211 default: return ARM_NV;
18212 }
18213
18214 case CC_Nmode:
18215 switch (comp_code)
18216 {
18217 case NE: return ARM_MI;
18218 case EQ: return ARM_PL;
18219 default: return ARM_NV;
18220 }
18221
18222 case CCFPEmode:
18223 case CCFPmode:
18224 /* We can handle all cases except UNEQ and LTGT. */
18225 switch (comp_code)
18226 {
18227 case GE: return ARM_GE;
18228 case GT: return ARM_GT;
18229 case LE: return ARM_LS;
18230 case LT: return ARM_MI;
18231 case NE: return ARM_NE;
18232 case EQ: return ARM_EQ;
18233 case ORDERED: return ARM_VC;
18234 case UNORDERED: return ARM_VS;
18235 case UNLT: return ARM_LT;
18236 case UNLE: return ARM_LE;
18237 case UNGT: return ARM_HI;
18238 case UNGE: return ARM_PL;
18239 /* UNEQ and LTGT do not have a representation. */
18240 case UNEQ: /* Fall through. */
18241 case LTGT: /* Fall through. */
18242 default: return ARM_NV;
18243 }
18244
18245 case CC_SWPmode:
18246 switch (comp_code)
18247 {
18248 case NE: return ARM_NE;
18249 case EQ: return ARM_EQ;
18250 case GE: return ARM_LE;
18251 case GT: return ARM_LT;
18252 case LE: return ARM_GE;
18253 case LT: return ARM_GT;
18254 case GEU: return ARM_LS;
18255 case GTU: return ARM_CC;
18256 case LEU: return ARM_CS;
18257 case LTU: return ARM_HI;
18258 default: return ARM_NV;
18259 }
18260
18261 case CC_Cmode:
18262 switch (comp_code)
18263 {
18264 case LTU: return ARM_CS;
18265 case GEU: return ARM_CC;
18266 default: return ARM_NV;
18267 }
18268
18269 case CC_CZmode:
18270 switch (comp_code)
18271 {
18272 case NE: return ARM_NE;
18273 case EQ: return ARM_EQ;
18274 case GEU: return ARM_CS;
18275 case GTU: return ARM_HI;
18276 case LEU: return ARM_LS;
18277 case LTU: return ARM_CC;
18278 default: return ARM_NV;
18279 }
18280
18281 case CC_NCVmode:
18282 switch (comp_code)
18283 {
18284 case GE: return ARM_GE;
18285 case LT: return ARM_LT;
18286 case GEU: return ARM_CS;
18287 case LTU: return ARM_CC;
18288 default: return ARM_NV;
18289 }
18290
18291 case CCmode:
18292 switch (comp_code)
18293 {
18294 case NE: return ARM_NE;
18295 case EQ: return ARM_EQ;
18296 case GE: return ARM_GE;
18297 case GT: return ARM_GT;
18298 case LE: return ARM_LE;
18299 case LT: return ARM_LT;
18300 case GEU: return ARM_CS;
18301 case GTU: return ARM_HI;
18302 case LEU: return ARM_LS;
18303 case LTU: return ARM_CC;
18304 default: return ARM_NV;
18305 }
18306
18307 default: gcc_unreachable ();
18308 }
18309 }
18310
18311 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
18312 static enum arm_cond_code
18313 get_arm_condition_code (rtx comparison)
18314 {
18315 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
18316 gcc_assert (code != ARM_NV);
18317 return code;
18318 }
18319
18320 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
18321 instructions. */
18322 void
18323 thumb2_final_prescan_insn (rtx insn)
18324 {
18325 rtx first_insn = insn;
18326 rtx body = PATTERN (insn);
18327 rtx predicate;
18328 enum arm_cond_code code;
18329 int n;
18330 int mask;
18331
18332 /* Remove the previous insn from the count of insns to be output. */
18333 if (arm_condexec_count)
18334 arm_condexec_count--;
18335
18336 /* Nothing to do if we are already inside a conditional block. */
18337 if (arm_condexec_count)
18338 return;
18339
18340 if (GET_CODE (body) != COND_EXEC)
18341 return;
18342
18343 /* Conditional jumps are implemented directly. */
18344 if (JUMP_P (insn))
18345 return;
18346
18347 predicate = COND_EXEC_TEST (body);
18348 arm_current_cc = get_arm_condition_code (predicate);
18349
18350 n = get_attr_ce_count (insn);
18351 arm_condexec_count = 1;
18352 arm_condexec_mask = (1 << n) - 1;
18353 arm_condexec_masklen = n;
18354 /* See if subsequent instructions can be combined into the same block. */
18355 for (;;)
18356 {
18357 insn = next_nonnote_insn (insn);
18358
18359 /* Jumping into the middle of an IT block is illegal, so a label or
18360 barrier terminates the block. */
18361 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
18362 break;
18363
18364 body = PATTERN (insn);
18365 /* USE and CLOBBER aren't really insns, so just skip them. */
18366 if (GET_CODE (body) == USE
18367 || GET_CODE (body) == CLOBBER)
18368 continue;
18369
18370 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
18371 if (GET_CODE (body) != COND_EXEC)
18372 break;
18373 /* Allow up to 4 conditionally executed instructions in a block. */
18374 n = get_attr_ce_count (insn);
18375 if (arm_condexec_masklen + n > 4)
18376 break;
18377
18378 predicate = COND_EXEC_TEST (body);
18379 code = get_arm_condition_code (predicate);
18380 mask = (1 << n) - 1;
18381 if (arm_current_cc == code)
18382 arm_condexec_mask |= (mask << arm_condexec_masklen);
18383 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
18384 break;
18385
18386 arm_condexec_count++;
18387 arm_condexec_masklen += n;
18388
18389 /* A jump must be the last instruction in a conditional block. */
18390 if (JUMP_P (insn))
18391 break;
18392 }
18393 /* Restore recog_data (getting the attributes of other insns can
18394 destroy this array, but final.c assumes that it remains intact
18395 across this call). */
18396 extract_constrain_insn_cached (first_insn);
18397 }
18398
18399 void
18400 arm_final_prescan_insn (rtx insn)
18401 {
18402 /* BODY will hold the body of INSN. */
18403 rtx body = PATTERN (insn);
18404
18405 /* This will be 1 if trying to repeat the trick, and things need to be
18406 reversed if it appears to fail. */
18407 int reverse = 0;
18408
18409 /* If we start with a return insn, we only succeed if we find another one. */
18410 int seeking_return = 0;
18411 enum rtx_code return_code = UNKNOWN;
18412
18413 /* START_INSN will hold the insn from where we start looking. This is the
18414 first insn after the following code_label if REVERSE is true. */
18415 rtx start_insn = insn;
18416
18417 /* If in state 4, check if the target branch is reached, in order to
18418 change back to state 0. */
18419 if (arm_ccfsm_state == 4)
18420 {
18421 if (insn == arm_target_insn)
18422 {
18423 arm_target_insn = NULL;
18424 arm_ccfsm_state = 0;
18425 }
18426 return;
18427 }
18428
18429 /* If in state 3, it is possible to repeat the trick, if this insn is an
18430 unconditional branch to a label, and immediately following this branch
18431 is the previous target label which is only used once, and the label this
18432 branch jumps to is not too far off. */
18433 if (arm_ccfsm_state == 3)
18434 {
18435 if (simplejump_p (insn))
18436 {
18437 start_insn = next_nonnote_insn (start_insn);
18438 if (BARRIER_P (start_insn))
18439 {
18440 /* XXX Isn't this always a barrier? */
18441 start_insn = next_nonnote_insn (start_insn);
18442 }
18443 if (LABEL_P (start_insn)
18444 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18445 && LABEL_NUSES (start_insn) == 1)
18446 reverse = TRUE;
18447 else
18448 return;
18449 }
18450 else if (ANY_RETURN_P (body))
18451 {
18452 start_insn = next_nonnote_insn (start_insn);
18453 if (BARRIER_P (start_insn))
18454 start_insn = next_nonnote_insn (start_insn);
18455 if (LABEL_P (start_insn)
18456 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18457 && LABEL_NUSES (start_insn) == 1)
18458 {
18459 reverse = TRUE;
18460 seeking_return = 1;
18461 return_code = GET_CODE (body);
18462 }
18463 else
18464 return;
18465 }
18466 else
18467 return;
18468 }
18469
18470 gcc_assert (!arm_ccfsm_state || reverse);
18471 if (!JUMP_P (insn))
18472 return;
18473
18474 /* This jump might be paralleled with a clobber of the condition codes
18475 the jump should always come first */
18476 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
18477 body = XVECEXP (body, 0, 0);
18478
18479 if (reverse
18480 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
18481 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
18482 {
18483 int insns_skipped;
18484 int fail = FALSE, succeed = FALSE;
18485 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
18486 int then_not_else = TRUE;
18487 rtx this_insn = start_insn, label = 0;
18488
18489 /* Register the insn jumped to. */
18490 if (reverse)
18491 {
18492 if (!seeking_return)
18493 label = XEXP (SET_SRC (body), 0);
18494 }
18495 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
18496 label = XEXP (XEXP (SET_SRC (body), 1), 0);
18497 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
18498 {
18499 label = XEXP (XEXP (SET_SRC (body), 2), 0);
18500 then_not_else = FALSE;
18501 }
18502 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
18503 {
18504 seeking_return = 1;
18505 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
18506 }
18507 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
18508 {
18509 seeking_return = 1;
18510 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
18511 then_not_else = FALSE;
18512 }
18513 else
18514 gcc_unreachable ();
18515
18516 /* See how many insns this branch skips, and what kind of insns. If all
18517 insns are okay, and the label or unconditional branch to the same
18518 label is not too far away, succeed. */
18519 for (insns_skipped = 0;
18520 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
18521 {
18522 rtx scanbody;
18523
18524 this_insn = next_nonnote_insn (this_insn);
18525 if (!this_insn)
18526 break;
18527
18528 switch (GET_CODE (this_insn))
18529 {
18530 case CODE_LABEL:
18531 /* Succeed if it is the target label, otherwise fail since
18532 control falls in from somewhere else. */
18533 if (this_insn == label)
18534 {
18535 arm_ccfsm_state = 1;
18536 succeed = TRUE;
18537 }
18538 else
18539 fail = TRUE;
18540 break;
18541
18542 case BARRIER:
18543 /* Succeed if the following insn is the target label.
18544 Otherwise fail.
18545 If return insns are used then the last insn in a function
18546 will be a barrier. */
18547 this_insn = next_nonnote_insn (this_insn);
18548 if (this_insn && this_insn == label)
18549 {
18550 arm_ccfsm_state = 1;
18551 succeed = TRUE;
18552 }
18553 else
18554 fail = TRUE;
18555 break;
18556
18557 case CALL_INSN:
18558 /* The AAPCS says that conditional calls should not be
18559 used since they make interworking inefficient (the
18560 linker can't transform BL<cond> into BLX). That's
18561 only a problem if the machine has BLX. */
18562 if (arm_arch5)
18563 {
18564 fail = TRUE;
18565 break;
18566 }
18567
18568 /* Succeed if the following insn is the target label, or
18569 if the following two insns are a barrier and the
18570 target label. */
18571 this_insn = next_nonnote_insn (this_insn);
18572 if (this_insn && BARRIER_P (this_insn))
18573 this_insn = next_nonnote_insn (this_insn);
18574
18575 if (this_insn && this_insn == label
18576 && insns_skipped < max_insns_skipped)
18577 {
18578 arm_ccfsm_state = 1;
18579 succeed = TRUE;
18580 }
18581 else
18582 fail = TRUE;
18583 break;
18584
18585 case JUMP_INSN:
18586 /* If this is an unconditional branch to the same label, succeed.
18587 If it is to another label, do nothing. If it is conditional,
18588 fail. */
18589 /* XXX Probably, the tests for SET and the PC are
18590 unnecessary. */
18591
18592 scanbody = PATTERN (this_insn);
18593 if (GET_CODE (scanbody) == SET
18594 && GET_CODE (SET_DEST (scanbody)) == PC)
18595 {
18596 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
18597 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
18598 {
18599 arm_ccfsm_state = 2;
18600 succeed = TRUE;
18601 }
18602 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
18603 fail = TRUE;
18604 }
18605 /* Fail if a conditional return is undesirable (e.g. on a
18606 StrongARM), but still allow this if optimizing for size. */
18607 else if (GET_CODE (scanbody) == return_code
18608 && !use_return_insn (TRUE, NULL)
18609 && !optimize_size)
18610 fail = TRUE;
18611 else if (GET_CODE (scanbody) == return_code)
18612 {
18613 arm_ccfsm_state = 2;
18614 succeed = TRUE;
18615 }
18616 else if (GET_CODE (scanbody) == PARALLEL)
18617 {
18618 switch (get_attr_conds (this_insn))
18619 {
18620 case CONDS_NOCOND:
18621 break;
18622 default:
18623 fail = TRUE;
18624 break;
18625 }
18626 }
18627 else
18628 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
18629
18630 break;
18631
18632 case INSN:
18633 /* Instructions using or affecting the condition codes make it
18634 fail. */
18635 scanbody = PATTERN (this_insn);
18636 if (!(GET_CODE (scanbody) == SET
18637 || GET_CODE (scanbody) == PARALLEL)
18638 || get_attr_conds (this_insn) != CONDS_NOCOND)
18639 fail = TRUE;
18640 break;
18641
18642 default:
18643 break;
18644 }
18645 }
18646 if (succeed)
18647 {
18648 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
18649 arm_target_label = CODE_LABEL_NUMBER (label);
18650 else
18651 {
18652 gcc_assert (seeking_return || arm_ccfsm_state == 2);
18653
18654 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
18655 {
18656 this_insn = next_nonnote_insn (this_insn);
18657 gcc_assert (!this_insn
18658 || (!BARRIER_P (this_insn)
18659 && !LABEL_P (this_insn)));
18660 }
18661 if (!this_insn)
18662 {
18663 /* Oh, dear! we ran off the end.. give up. */
18664 extract_constrain_insn_cached (insn);
18665 arm_ccfsm_state = 0;
18666 arm_target_insn = NULL;
18667 return;
18668 }
18669 arm_target_insn = this_insn;
18670 }
18671
18672 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
18673 what it was. */
18674 if (!reverse)
18675 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
18676
18677 if (reverse || then_not_else)
18678 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
18679 }
18680
18681 /* Restore recog_data (getting the attributes of other insns can
18682 destroy this array, but final.c assumes that it remains intact
18683 across this call. */
18684 extract_constrain_insn_cached (insn);
18685 }
18686 }
18687
18688 /* Output IT instructions. */
18689 void
18690 thumb2_asm_output_opcode (FILE * stream)
18691 {
18692 char buff[5];
18693 int n;
18694
18695 if (arm_condexec_mask)
18696 {
18697 for (n = 0; n < arm_condexec_masklen; n++)
18698 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
18699 buff[n] = 0;
18700 asm_fprintf(stream, "i%s\t%s\n\t", buff,
18701 arm_condition_codes[arm_current_cc]);
18702 arm_condexec_mask = 0;
18703 }
18704 }
18705
18706 /* Returns true if REGNO is a valid register
18707 for holding a quantity of type MODE. */
18708 int
18709 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
18710 {
18711 if (GET_MODE_CLASS (mode) == MODE_CC)
18712 return (regno == CC_REGNUM
18713 || (TARGET_HARD_FLOAT && TARGET_VFP
18714 && regno == VFPCC_REGNUM));
18715
18716 if (TARGET_THUMB1)
18717 /* For the Thumb we only allow values bigger than SImode in
18718 registers 0 - 6, so that there is always a second low
18719 register available to hold the upper part of the value.
18720 We probably we ought to ensure that the register is the
18721 start of an even numbered register pair. */
18722 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
18723
18724 if (TARGET_HARD_FLOAT && TARGET_VFP
18725 && IS_VFP_REGNUM (regno))
18726 {
18727 if (mode == SFmode || mode == SImode)
18728 return VFP_REGNO_OK_FOR_SINGLE (regno);
18729
18730 if (mode == DFmode)
18731 return VFP_REGNO_OK_FOR_DOUBLE (regno);
18732
18733 /* VFP registers can hold HFmode values, but there is no point in
18734 putting them there unless we have hardware conversion insns. */
18735 if (mode == HFmode)
18736 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
18737
18738 if (TARGET_NEON)
18739 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
18740 || (VALID_NEON_QREG_MODE (mode)
18741 && NEON_REGNO_OK_FOR_QUAD (regno))
18742 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
18743 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
18744 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
18745 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
18746 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
18747
18748 return FALSE;
18749 }
18750
18751 if (TARGET_REALLY_IWMMXT)
18752 {
18753 if (IS_IWMMXT_GR_REGNUM (regno))
18754 return mode == SImode;
18755
18756 if (IS_IWMMXT_REGNUM (regno))
18757 return VALID_IWMMXT_REG_MODE (mode);
18758 }
18759
18760 /* We allow almost any value to be stored in the general registers.
18761 Restrict doubleword quantities to even register pairs so that we can
18762 use ldrd. Do not allow very large Neon structure opaque modes in
18763 general registers; they would use too many. */
18764 if (regno <= LAST_ARM_REGNUM)
18765 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
18766 && ARM_NUM_REGS (mode) <= 4;
18767
18768 if (regno == FRAME_POINTER_REGNUM
18769 || regno == ARG_POINTER_REGNUM)
18770 /* We only allow integers in the fake hard registers. */
18771 return GET_MODE_CLASS (mode) == MODE_INT;
18772
18773 return FALSE;
18774 }
18775
18776 /* Implement MODES_TIEABLE_P. */
18777
18778 bool
18779 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18780 {
18781 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
18782 return true;
18783
18784 /* We specifically want to allow elements of "structure" modes to
18785 be tieable to the structure. This more general condition allows
18786 other rarer situations too. */
18787 if (TARGET_NEON
18788 && (VALID_NEON_DREG_MODE (mode1)
18789 || VALID_NEON_QREG_MODE (mode1)
18790 || VALID_NEON_STRUCT_MODE (mode1))
18791 && (VALID_NEON_DREG_MODE (mode2)
18792 || VALID_NEON_QREG_MODE (mode2)
18793 || VALID_NEON_STRUCT_MODE (mode2)))
18794 return true;
18795
18796 return false;
18797 }
18798
18799 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
18800 not used in arm mode. */
18801
18802 enum reg_class
18803 arm_regno_class (int regno)
18804 {
18805 if (TARGET_THUMB1)
18806 {
18807 if (regno == STACK_POINTER_REGNUM)
18808 return STACK_REG;
18809 if (regno == CC_REGNUM)
18810 return CC_REG;
18811 if (regno < 8)
18812 return LO_REGS;
18813 return HI_REGS;
18814 }
18815
18816 if (TARGET_THUMB2 && regno < 8)
18817 return LO_REGS;
18818
18819 if ( regno <= LAST_ARM_REGNUM
18820 || regno == FRAME_POINTER_REGNUM
18821 || regno == ARG_POINTER_REGNUM)
18822 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
18823
18824 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
18825 return TARGET_THUMB2 ? CC_REG : NO_REGS;
18826
18827 if (IS_VFP_REGNUM (regno))
18828 {
18829 if (regno <= D7_VFP_REGNUM)
18830 return VFP_D0_D7_REGS;
18831 else if (regno <= LAST_LO_VFP_REGNUM)
18832 return VFP_LO_REGS;
18833 else
18834 return VFP_HI_REGS;
18835 }
18836
18837 if (IS_IWMMXT_REGNUM (regno))
18838 return IWMMXT_REGS;
18839
18840 if (IS_IWMMXT_GR_REGNUM (regno))
18841 return IWMMXT_GR_REGS;
18842
18843 return NO_REGS;
18844 }
18845
18846 /* Handle a special case when computing the offset
18847 of an argument from the frame pointer. */
18848 int
18849 arm_debugger_arg_offset (int value, rtx addr)
18850 {
18851 rtx insn;
18852
18853 /* We are only interested if dbxout_parms() failed to compute the offset. */
18854 if (value != 0)
18855 return 0;
18856
18857 /* We can only cope with the case where the address is held in a register. */
18858 if (!REG_P (addr))
18859 return 0;
18860
18861 /* If we are using the frame pointer to point at the argument, then
18862 an offset of 0 is correct. */
18863 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
18864 return 0;
18865
18866 /* If we are using the stack pointer to point at the
18867 argument, then an offset of 0 is correct. */
18868 /* ??? Check this is consistent with thumb2 frame layout. */
18869 if ((TARGET_THUMB || !frame_pointer_needed)
18870 && REGNO (addr) == SP_REGNUM)
18871 return 0;
18872
18873 /* Oh dear. The argument is pointed to by a register rather
18874 than being held in a register, or being stored at a known
18875 offset from the frame pointer. Since GDB only understands
18876 those two kinds of argument we must translate the address
18877 held in the register into an offset from the frame pointer.
18878 We do this by searching through the insns for the function
18879 looking to see where this register gets its value. If the
18880 register is initialized from the frame pointer plus an offset
18881 then we are in luck and we can continue, otherwise we give up.
18882
18883 This code is exercised by producing debugging information
18884 for a function with arguments like this:
18885
18886 double func (double a, double b, int c, double d) {return d;}
18887
18888 Without this code the stab for parameter 'd' will be set to
18889 an offset of 0 from the frame pointer, rather than 8. */
18890
18891 /* The if() statement says:
18892
18893 If the insn is a normal instruction
18894 and if the insn is setting the value in a register
18895 and if the register being set is the register holding the address of the argument
18896 and if the address is computing by an addition
18897 that involves adding to a register
18898 which is the frame pointer
18899 a constant integer
18900
18901 then... */
18902
18903 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18904 {
18905 if ( NONJUMP_INSN_P (insn)
18906 && GET_CODE (PATTERN (insn)) == SET
18907 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
18908 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
18909 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
18910 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
18911 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
18912 )
18913 {
18914 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
18915
18916 break;
18917 }
18918 }
18919
18920 if (value == 0)
18921 {
18922 debug_rtx (addr);
18923 warning (0, "unable to compute real location of stacked parameter");
18924 value = 8; /* XXX magic hack */
18925 }
18926
18927 return value;
18928 }
18929 \f
18930 typedef enum {
18931 T_V8QI,
18932 T_V4HI,
18933 T_V2SI,
18934 T_V2SF,
18935 T_DI,
18936 T_V16QI,
18937 T_V8HI,
18938 T_V4SI,
18939 T_V4SF,
18940 T_V2DI,
18941 T_TI,
18942 T_EI,
18943 T_OI,
18944 T_MAX /* Size of enum. Keep last. */
18945 } neon_builtin_type_mode;
18946
18947 #define TYPE_MODE_BIT(X) (1 << (X))
18948
18949 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
18950 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
18951 | TYPE_MODE_BIT (T_DI))
18952 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
18953 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
18954 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
18955
18956 #define v8qi_UP T_V8QI
18957 #define v4hi_UP T_V4HI
18958 #define v2si_UP T_V2SI
18959 #define v2sf_UP T_V2SF
18960 #define di_UP T_DI
18961 #define v16qi_UP T_V16QI
18962 #define v8hi_UP T_V8HI
18963 #define v4si_UP T_V4SI
18964 #define v4sf_UP T_V4SF
18965 #define v2di_UP T_V2DI
18966 #define ti_UP T_TI
18967 #define ei_UP T_EI
18968 #define oi_UP T_OI
18969
18970 #define UP(X) X##_UP
18971
18972 typedef enum {
18973 NEON_BINOP,
18974 NEON_TERNOP,
18975 NEON_UNOP,
18976 NEON_GETLANE,
18977 NEON_SETLANE,
18978 NEON_CREATE,
18979 NEON_DUP,
18980 NEON_DUPLANE,
18981 NEON_COMBINE,
18982 NEON_SPLIT,
18983 NEON_LANEMUL,
18984 NEON_LANEMULL,
18985 NEON_LANEMULH,
18986 NEON_LANEMAC,
18987 NEON_SCALARMUL,
18988 NEON_SCALARMULL,
18989 NEON_SCALARMULH,
18990 NEON_SCALARMAC,
18991 NEON_CONVERT,
18992 NEON_FIXCONV,
18993 NEON_SELECT,
18994 NEON_RESULTPAIR,
18995 NEON_REINTERP,
18996 NEON_VTBL,
18997 NEON_VTBX,
18998 NEON_LOAD1,
18999 NEON_LOAD1LANE,
19000 NEON_STORE1,
19001 NEON_STORE1LANE,
19002 NEON_LOADSTRUCT,
19003 NEON_LOADSTRUCTLANE,
19004 NEON_STORESTRUCT,
19005 NEON_STORESTRUCTLANE,
19006 NEON_LOGICBINOP,
19007 NEON_SHIFTINSERT,
19008 NEON_SHIFTIMM,
19009 NEON_SHIFTACC
19010 } neon_itype;
19011
19012 typedef struct {
19013 const char *name;
19014 const neon_itype itype;
19015 const neon_builtin_type_mode mode;
19016 const enum insn_code code;
19017 unsigned int fcode;
19018 } neon_builtin_datum;
19019
19020 #define CF(N,X) CODE_FOR_neon_##N##X
19021
19022 #define VAR1(T, N, A) \
19023 {#N, NEON_##T, UP (A), CF (N, A), 0}
19024 #define VAR2(T, N, A, B) \
19025 VAR1 (T, N, A), \
19026 {#N, NEON_##T, UP (B), CF (N, B), 0}
19027 #define VAR3(T, N, A, B, C) \
19028 VAR2 (T, N, A, B), \
19029 {#N, NEON_##T, UP (C), CF (N, C), 0}
19030 #define VAR4(T, N, A, B, C, D) \
19031 VAR3 (T, N, A, B, C), \
19032 {#N, NEON_##T, UP (D), CF (N, D), 0}
19033 #define VAR5(T, N, A, B, C, D, E) \
19034 VAR4 (T, N, A, B, C, D), \
19035 {#N, NEON_##T, UP (E), CF (N, E), 0}
19036 #define VAR6(T, N, A, B, C, D, E, F) \
19037 VAR5 (T, N, A, B, C, D, E), \
19038 {#N, NEON_##T, UP (F), CF (N, F), 0}
19039 #define VAR7(T, N, A, B, C, D, E, F, G) \
19040 VAR6 (T, N, A, B, C, D, E, F), \
19041 {#N, NEON_##T, UP (G), CF (N, G), 0}
19042 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
19043 VAR7 (T, N, A, B, C, D, E, F, G), \
19044 {#N, NEON_##T, UP (H), CF (N, H), 0}
19045 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
19046 VAR8 (T, N, A, B, C, D, E, F, G, H), \
19047 {#N, NEON_##T, UP (I), CF (N, I), 0}
19048 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
19049 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
19050 {#N, NEON_##T, UP (J), CF (N, J), 0}
19051
19052 /* The mode entries in the following table correspond to the "key" type of the
19053 instruction variant, i.e. equivalent to that which would be specified after
19054 the assembler mnemonic, which usually refers to the last vector operand.
19055 (Signed/unsigned/polynomial types are not differentiated between though, and
19056 are all mapped onto the same mode for a given element size.) The modes
19057 listed per instruction should be the same as those defined for that
19058 instruction's pattern in neon.md. */
19059
19060 static neon_builtin_datum neon_builtin_data[] =
19061 {
19062 VAR10 (BINOP, vadd,
19063 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19064 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
19065 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
19066 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19067 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19068 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
19069 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19070 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19071 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
19072 VAR2 (TERNOP, vfma, v2sf, v4sf),
19073 VAR2 (TERNOP, vfms, v2sf, v4sf),
19074 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19075 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
19076 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
19077 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
19078 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
19079 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
19080 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
19081 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
19082 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
19083 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
19084 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
19085 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
19086 VAR2 (BINOP, vqdmull, v4hi, v2si),
19087 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19088 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19089 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19090 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
19091 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
19092 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
19093 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19094 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19095 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19096 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
19097 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19098 VAR10 (BINOP, vsub,
19099 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19100 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
19101 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
19102 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19103 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19104 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
19105 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19106 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19107 VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19108 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19109 VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19110 VAR2 (BINOP, vcage, v2sf, v4sf),
19111 VAR2 (BINOP, vcagt, v2sf, v4sf),
19112 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19113 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19114 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
19115 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19116 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
19117 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19118 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19119 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
19120 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19121 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19122 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
19123 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
19124 VAR2 (BINOP, vrecps, v2sf, v4sf),
19125 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
19126 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19127 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
19128 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19129 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19130 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19131 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19132 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19133 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19134 VAR2 (UNOP, vcnt, v8qi, v16qi),
19135 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
19136 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
19137 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
19138 /* FIXME: vget_lane supports more variants than this! */
19139 VAR10 (GETLANE, vget_lane,
19140 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19141 VAR10 (SETLANE, vset_lane,
19142 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19143 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
19144 VAR10 (DUP, vdup_n,
19145 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19146 VAR10 (DUPLANE, vdup_lane,
19147 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19148 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
19149 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
19150 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
19151 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
19152 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
19153 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
19154 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
19155 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19156 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19157 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
19158 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
19159 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19160 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
19161 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
19162 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19163 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19164 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
19165 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
19166 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19167 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
19168 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
19169 VAR10 (BINOP, vext,
19170 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19171 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19172 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
19173 VAR2 (UNOP, vrev16, v8qi, v16qi),
19174 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
19175 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
19176 VAR10 (SELECT, vbsl,
19177 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19178 VAR1 (VTBL, vtbl1, v8qi),
19179 VAR1 (VTBL, vtbl2, v8qi),
19180 VAR1 (VTBL, vtbl3, v8qi),
19181 VAR1 (VTBL, vtbl4, v8qi),
19182 VAR1 (VTBX, vtbx1, v8qi),
19183 VAR1 (VTBX, vtbx2, v8qi),
19184 VAR1 (VTBX, vtbx3, v8qi),
19185 VAR1 (VTBX, vtbx4, v8qi),
19186 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19187 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19188 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
19189 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
19190 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
19191 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
19192 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
19193 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
19194 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
19195 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
19196 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
19197 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
19198 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
19199 VAR10 (LOAD1, vld1,
19200 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19201 VAR10 (LOAD1LANE, vld1_lane,
19202 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19203 VAR10 (LOAD1, vld1_dup,
19204 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19205 VAR10 (STORE1, vst1,
19206 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19207 VAR10 (STORE1LANE, vst1_lane,
19208 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19209 VAR9 (LOADSTRUCT,
19210 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19211 VAR7 (LOADSTRUCTLANE, vld2_lane,
19212 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19213 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
19214 VAR9 (STORESTRUCT, vst2,
19215 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19216 VAR7 (STORESTRUCTLANE, vst2_lane,
19217 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19218 VAR9 (LOADSTRUCT,
19219 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19220 VAR7 (LOADSTRUCTLANE, vld3_lane,
19221 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19222 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
19223 VAR9 (STORESTRUCT, vst3,
19224 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19225 VAR7 (STORESTRUCTLANE, vst3_lane,
19226 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19227 VAR9 (LOADSTRUCT, vld4,
19228 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19229 VAR7 (LOADSTRUCTLANE, vld4_lane,
19230 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19231 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
19232 VAR9 (STORESTRUCT, vst4,
19233 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
19234 VAR7 (STORESTRUCTLANE, vst4_lane,
19235 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
19236 VAR10 (LOGICBINOP, vand,
19237 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19238 VAR10 (LOGICBINOP, vorr,
19239 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19240 VAR10 (BINOP, veor,
19241 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19242 VAR10 (LOGICBINOP, vbic,
19243 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
19244 VAR10 (LOGICBINOP, vorn,
19245 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
19246 };
19247
19248 #undef CF
19249 #undef VAR1
19250 #undef VAR2
19251 #undef VAR3
19252 #undef VAR4
19253 #undef VAR5
19254 #undef VAR6
19255 #undef VAR7
19256 #undef VAR8
19257 #undef VAR9
19258 #undef VAR10
19259
19260 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
19261 symbolic names defined here (which would require too much duplication).
19262 FIXME? */
19263 enum arm_builtins
19264 {
19265 ARM_BUILTIN_GETWCGR0,
19266 ARM_BUILTIN_GETWCGR1,
19267 ARM_BUILTIN_GETWCGR2,
19268 ARM_BUILTIN_GETWCGR3,
19269
19270 ARM_BUILTIN_SETWCGR0,
19271 ARM_BUILTIN_SETWCGR1,
19272 ARM_BUILTIN_SETWCGR2,
19273 ARM_BUILTIN_SETWCGR3,
19274
19275 ARM_BUILTIN_WZERO,
19276
19277 ARM_BUILTIN_WAVG2BR,
19278 ARM_BUILTIN_WAVG2HR,
19279 ARM_BUILTIN_WAVG2B,
19280 ARM_BUILTIN_WAVG2H,
19281
19282 ARM_BUILTIN_WACCB,
19283 ARM_BUILTIN_WACCH,
19284 ARM_BUILTIN_WACCW,
19285
19286 ARM_BUILTIN_WMACS,
19287 ARM_BUILTIN_WMACSZ,
19288 ARM_BUILTIN_WMACU,
19289 ARM_BUILTIN_WMACUZ,
19290
19291 ARM_BUILTIN_WSADB,
19292 ARM_BUILTIN_WSADBZ,
19293 ARM_BUILTIN_WSADH,
19294 ARM_BUILTIN_WSADHZ,
19295
19296 ARM_BUILTIN_WALIGNI,
19297 ARM_BUILTIN_WALIGNR0,
19298 ARM_BUILTIN_WALIGNR1,
19299 ARM_BUILTIN_WALIGNR2,
19300 ARM_BUILTIN_WALIGNR3,
19301
19302 ARM_BUILTIN_TMIA,
19303 ARM_BUILTIN_TMIAPH,
19304 ARM_BUILTIN_TMIABB,
19305 ARM_BUILTIN_TMIABT,
19306 ARM_BUILTIN_TMIATB,
19307 ARM_BUILTIN_TMIATT,
19308
19309 ARM_BUILTIN_TMOVMSKB,
19310 ARM_BUILTIN_TMOVMSKH,
19311 ARM_BUILTIN_TMOVMSKW,
19312
19313 ARM_BUILTIN_TBCSTB,
19314 ARM_BUILTIN_TBCSTH,
19315 ARM_BUILTIN_TBCSTW,
19316
19317 ARM_BUILTIN_WMADDS,
19318 ARM_BUILTIN_WMADDU,
19319
19320 ARM_BUILTIN_WPACKHSS,
19321 ARM_BUILTIN_WPACKWSS,
19322 ARM_BUILTIN_WPACKDSS,
19323 ARM_BUILTIN_WPACKHUS,
19324 ARM_BUILTIN_WPACKWUS,
19325 ARM_BUILTIN_WPACKDUS,
19326
19327 ARM_BUILTIN_WADDB,
19328 ARM_BUILTIN_WADDH,
19329 ARM_BUILTIN_WADDW,
19330 ARM_BUILTIN_WADDSSB,
19331 ARM_BUILTIN_WADDSSH,
19332 ARM_BUILTIN_WADDSSW,
19333 ARM_BUILTIN_WADDUSB,
19334 ARM_BUILTIN_WADDUSH,
19335 ARM_BUILTIN_WADDUSW,
19336 ARM_BUILTIN_WSUBB,
19337 ARM_BUILTIN_WSUBH,
19338 ARM_BUILTIN_WSUBW,
19339 ARM_BUILTIN_WSUBSSB,
19340 ARM_BUILTIN_WSUBSSH,
19341 ARM_BUILTIN_WSUBSSW,
19342 ARM_BUILTIN_WSUBUSB,
19343 ARM_BUILTIN_WSUBUSH,
19344 ARM_BUILTIN_WSUBUSW,
19345
19346 ARM_BUILTIN_WAND,
19347 ARM_BUILTIN_WANDN,
19348 ARM_BUILTIN_WOR,
19349 ARM_BUILTIN_WXOR,
19350
19351 ARM_BUILTIN_WCMPEQB,
19352 ARM_BUILTIN_WCMPEQH,
19353 ARM_BUILTIN_WCMPEQW,
19354 ARM_BUILTIN_WCMPGTUB,
19355 ARM_BUILTIN_WCMPGTUH,
19356 ARM_BUILTIN_WCMPGTUW,
19357 ARM_BUILTIN_WCMPGTSB,
19358 ARM_BUILTIN_WCMPGTSH,
19359 ARM_BUILTIN_WCMPGTSW,
19360
19361 ARM_BUILTIN_TEXTRMSB,
19362 ARM_BUILTIN_TEXTRMSH,
19363 ARM_BUILTIN_TEXTRMSW,
19364 ARM_BUILTIN_TEXTRMUB,
19365 ARM_BUILTIN_TEXTRMUH,
19366 ARM_BUILTIN_TEXTRMUW,
19367 ARM_BUILTIN_TINSRB,
19368 ARM_BUILTIN_TINSRH,
19369 ARM_BUILTIN_TINSRW,
19370
19371 ARM_BUILTIN_WMAXSW,
19372 ARM_BUILTIN_WMAXSH,
19373 ARM_BUILTIN_WMAXSB,
19374 ARM_BUILTIN_WMAXUW,
19375 ARM_BUILTIN_WMAXUH,
19376 ARM_BUILTIN_WMAXUB,
19377 ARM_BUILTIN_WMINSW,
19378 ARM_BUILTIN_WMINSH,
19379 ARM_BUILTIN_WMINSB,
19380 ARM_BUILTIN_WMINUW,
19381 ARM_BUILTIN_WMINUH,
19382 ARM_BUILTIN_WMINUB,
19383
19384 ARM_BUILTIN_WMULUM,
19385 ARM_BUILTIN_WMULSM,
19386 ARM_BUILTIN_WMULUL,
19387
19388 ARM_BUILTIN_PSADBH,
19389 ARM_BUILTIN_WSHUFH,
19390
19391 ARM_BUILTIN_WSLLH,
19392 ARM_BUILTIN_WSLLW,
19393 ARM_BUILTIN_WSLLD,
19394 ARM_BUILTIN_WSRAH,
19395 ARM_BUILTIN_WSRAW,
19396 ARM_BUILTIN_WSRAD,
19397 ARM_BUILTIN_WSRLH,
19398 ARM_BUILTIN_WSRLW,
19399 ARM_BUILTIN_WSRLD,
19400 ARM_BUILTIN_WRORH,
19401 ARM_BUILTIN_WRORW,
19402 ARM_BUILTIN_WRORD,
19403 ARM_BUILTIN_WSLLHI,
19404 ARM_BUILTIN_WSLLWI,
19405 ARM_BUILTIN_WSLLDI,
19406 ARM_BUILTIN_WSRAHI,
19407 ARM_BUILTIN_WSRAWI,
19408 ARM_BUILTIN_WSRADI,
19409 ARM_BUILTIN_WSRLHI,
19410 ARM_BUILTIN_WSRLWI,
19411 ARM_BUILTIN_WSRLDI,
19412 ARM_BUILTIN_WRORHI,
19413 ARM_BUILTIN_WRORWI,
19414 ARM_BUILTIN_WRORDI,
19415
19416 ARM_BUILTIN_WUNPCKIHB,
19417 ARM_BUILTIN_WUNPCKIHH,
19418 ARM_BUILTIN_WUNPCKIHW,
19419 ARM_BUILTIN_WUNPCKILB,
19420 ARM_BUILTIN_WUNPCKILH,
19421 ARM_BUILTIN_WUNPCKILW,
19422
19423 ARM_BUILTIN_WUNPCKEHSB,
19424 ARM_BUILTIN_WUNPCKEHSH,
19425 ARM_BUILTIN_WUNPCKEHSW,
19426 ARM_BUILTIN_WUNPCKEHUB,
19427 ARM_BUILTIN_WUNPCKEHUH,
19428 ARM_BUILTIN_WUNPCKEHUW,
19429 ARM_BUILTIN_WUNPCKELSB,
19430 ARM_BUILTIN_WUNPCKELSH,
19431 ARM_BUILTIN_WUNPCKELSW,
19432 ARM_BUILTIN_WUNPCKELUB,
19433 ARM_BUILTIN_WUNPCKELUH,
19434 ARM_BUILTIN_WUNPCKELUW,
19435
19436 ARM_BUILTIN_WABSB,
19437 ARM_BUILTIN_WABSH,
19438 ARM_BUILTIN_WABSW,
19439
19440 ARM_BUILTIN_WADDSUBHX,
19441 ARM_BUILTIN_WSUBADDHX,
19442
19443 ARM_BUILTIN_WABSDIFFB,
19444 ARM_BUILTIN_WABSDIFFH,
19445 ARM_BUILTIN_WABSDIFFW,
19446
19447 ARM_BUILTIN_WADDCH,
19448 ARM_BUILTIN_WADDCW,
19449
19450 ARM_BUILTIN_WAVG4,
19451 ARM_BUILTIN_WAVG4R,
19452
19453 ARM_BUILTIN_WMADDSX,
19454 ARM_BUILTIN_WMADDUX,
19455
19456 ARM_BUILTIN_WMADDSN,
19457 ARM_BUILTIN_WMADDUN,
19458
19459 ARM_BUILTIN_WMULWSM,
19460 ARM_BUILTIN_WMULWUM,
19461
19462 ARM_BUILTIN_WMULWSMR,
19463 ARM_BUILTIN_WMULWUMR,
19464
19465 ARM_BUILTIN_WMULWL,
19466
19467 ARM_BUILTIN_WMULSMR,
19468 ARM_BUILTIN_WMULUMR,
19469
19470 ARM_BUILTIN_WQMULM,
19471 ARM_BUILTIN_WQMULMR,
19472
19473 ARM_BUILTIN_WQMULWM,
19474 ARM_BUILTIN_WQMULWMR,
19475
19476 ARM_BUILTIN_WADDBHUSM,
19477 ARM_BUILTIN_WADDBHUSL,
19478
19479 ARM_BUILTIN_WQMIABB,
19480 ARM_BUILTIN_WQMIABT,
19481 ARM_BUILTIN_WQMIATB,
19482 ARM_BUILTIN_WQMIATT,
19483
19484 ARM_BUILTIN_WQMIABBN,
19485 ARM_BUILTIN_WQMIABTN,
19486 ARM_BUILTIN_WQMIATBN,
19487 ARM_BUILTIN_WQMIATTN,
19488
19489 ARM_BUILTIN_WMIABB,
19490 ARM_BUILTIN_WMIABT,
19491 ARM_BUILTIN_WMIATB,
19492 ARM_BUILTIN_WMIATT,
19493
19494 ARM_BUILTIN_WMIABBN,
19495 ARM_BUILTIN_WMIABTN,
19496 ARM_BUILTIN_WMIATBN,
19497 ARM_BUILTIN_WMIATTN,
19498
19499 ARM_BUILTIN_WMIAWBB,
19500 ARM_BUILTIN_WMIAWBT,
19501 ARM_BUILTIN_WMIAWTB,
19502 ARM_BUILTIN_WMIAWTT,
19503
19504 ARM_BUILTIN_WMIAWBBN,
19505 ARM_BUILTIN_WMIAWBTN,
19506 ARM_BUILTIN_WMIAWTBN,
19507 ARM_BUILTIN_WMIAWTTN,
19508
19509 ARM_BUILTIN_WMERGE,
19510
19511 ARM_BUILTIN_NEON_BASE,
19512
19513 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
19514 };
19515
19516 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
19517
19518 static void
19519 arm_init_neon_builtins (void)
19520 {
19521 unsigned int i, fcode;
19522 tree decl;
19523
19524 tree neon_intQI_type_node;
19525 tree neon_intHI_type_node;
19526 tree neon_polyQI_type_node;
19527 tree neon_polyHI_type_node;
19528 tree neon_intSI_type_node;
19529 tree neon_intDI_type_node;
19530 tree neon_float_type_node;
19531
19532 tree intQI_pointer_node;
19533 tree intHI_pointer_node;
19534 tree intSI_pointer_node;
19535 tree intDI_pointer_node;
19536 tree float_pointer_node;
19537
19538 tree const_intQI_node;
19539 tree const_intHI_node;
19540 tree const_intSI_node;
19541 tree const_intDI_node;
19542 tree const_float_node;
19543
19544 tree const_intQI_pointer_node;
19545 tree const_intHI_pointer_node;
19546 tree const_intSI_pointer_node;
19547 tree const_intDI_pointer_node;
19548 tree const_float_pointer_node;
19549
19550 tree V8QI_type_node;
19551 tree V4HI_type_node;
19552 tree V2SI_type_node;
19553 tree V2SF_type_node;
19554 tree V16QI_type_node;
19555 tree V8HI_type_node;
19556 tree V4SI_type_node;
19557 tree V4SF_type_node;
19558 tree V2DI_type_node;
19559
19560 tree intUQI_type_node;
19561 tree intUHI_type_node;
19562 tree intUSI_type_node;
19563 tree intUDI_type_node;
19564
19565 tree intEI_type_node;
19566 tree intOI_type_node;
19567 tree intCI_type_node;
19568 tree intXI_type_node;
19569
19570 tree V8QI_pointer_node;
19571 tree V4HI_pointer_node;
19572 tree V2SI_pointer_node;
19573 tree V2SF_pointer_node;
19574 tree V16QI_pointer_node;
19575 tree V8HI_pointer_node;
19576 tree V4SI_pointer_node;
19577 tree V4SF_pointer_node;
19578 tree V2DI_pointer_node;
19579
19580 tree void_ftype_pv8qi_v8qi_v8qi;
19581 tree void_ftype_pv4hi_v4hi_v4hi;
19582 tree void_ftype_pv2si_v2si_v2si;
19583 tree void_ftype_pv2sf_v2sf_v2sf;
19584 tree void_ftype_pdi_di_di;
19585 tree void_ftype_pv16qi_v16qi_v16qi;
19586 tree void_ftype_pv8hi_v8hi_v8hi;
19587 tree void_ftype_pv4si_v4si_v4si;
19588 tree void_ftype_pv4sf_v4sf_v4sf;
19589 tree void_ftype_pv2di_v2di_v2di;
19590
19591 tree reinterp_ftype_dreg[5][5];
19592 tree reinterp_ftype_qreg[5][5];
19593 tree dreg_types[5], qreg_types[5];
19594
19595 /* Create distinguished type nodes for NEON vector element types,
19596 and pointers to values of such types, so we can detect them later. */
19597 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19598 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19599 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19600 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19601 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
19602 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
19603 neon_float_type_node = make_node (REAL_TYPE);
19604 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
19605 layout_type (neon_float_type_node);
19606
19607 /* Define typedefs which exactly correspond to the modes we are basing vector
19608 types on. If you change these names you'll need to change
19609 the table used by arm_mangle_type too. */
19610 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
19611 "__builtin_neon_qi");
19612 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
19613 "__builtin_neon_hi");
19614 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
19615 "__builtin_neon_si");
19616 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
19617 "__builtin_neon_sf");
19618 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
19619 "__builtin_neon_di");
19620 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
19621 "__builtin_neon_poly8");
19622 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
19623 "__builtin_neon_poly16");
19624
19625 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
19626 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
19627 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
19628 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
19629 float_pointer_node = build_pointer_type (neon_float_type_node);
19630
19631 /* Next create constant-qualified versions of the above types. */
19632 const_intQI_node = build_qualified_type (neon_intQI_type_node,
19633 TYPE_QUAL_CONST);
19634 const_intHI_node = build_qualified_type (neon_intHI_type_node,
19635 TYPE_QUAL_CONST);
19636 const_intSI_node = build_qualified_type (neon_intSI_type_node,
19637 TYPE_QUAL_CONST);
19638 const_intDI_node = build_qualified_type (neon_intDI_type_node,
19639 TYPE_QUAL_CONST);
19640 const_float_node = build_qualified_type (neon_float_type_node,
19641 TYPE_QUAL_CONST);
19642
19643 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
19644 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
19645 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
19646 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
19647 const_float_pointer_node = build_pointer_type (const_float_node);
19648
19649 /* Now create vector types based on our NEON element types. */
19650 /* 64-bit vectors. */
19651 V8QI_type_node =
19652 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
19653 V4HI_type_node =
19654 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
19655 V2SI_type_node =
19656 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
19657 V2SF_type_node =
19658 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
19659 /* 128-bit vectors. */
19660 V16QI_type_node =
19661 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
19662 V8HI_type_node =
19663 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
19664 V4SI_type_node =
19665 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
19666 V4SF_type_node =
19667 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
19668 V2DI_type_node =
19669 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
19670
19671 /* Unsigned integer types for various mode sizes. */
19672 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
19673 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
19674 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
19675 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
19676
19677 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
19678 "__builtin_neon_uqi");
19679 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
19680 "__builtin_neon_uhi");
19681 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
19682 "__builtin_neon_usi");
19683 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
19684 "__builtin_neon_udi");
19685
19686 /* Opaque integer types for structures of vectors. */
19687 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
19688 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
19689 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
19690 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
19691
19692 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
19693 "__builtin_neon_ti");
19694 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
19695 "__builtin_neon_ei");
19696 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
19697 "__builtin_neon_oi");
19698 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
19699 "__builtin_neon_ci");
19700 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
19701 "__builtin_neon_xi");
19702
19703 /* Pointers to vector types. */
19704 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
19705 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
19706 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
19707 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
19708 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
19709 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
19710 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
19711 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
19712 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
19713
19714 /* Operations which return results as pairs. */
19715 void_ftype_pv8qi_v8qi_v8qi =
19716 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
19717 V8QI_type_node, NULL);
19718 void_ftype_pv4hi_v4hi_v4hi =
19719 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
19720 V4HI_type_node, NULL);
19721 void_ftype_pv2si_v2si_v2si =
19722 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
19723 V2SI_type_node, NULL);
19724 void_ftype_pv2sf_v2sf_v2sf =
19725 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
19726 V2SF_type_node, NULL);
19727 void_ftype_pdi_di_di =
19728 build_function_type_list (void_type_node, intDI_pointer_node,
19729 neon_intDI_type_node, neon_intDI_type_node, NULL);
19730 void_ftype_pv16qi_v16qi_v16qi =
19731 build_function_type_list (void_type_node, V16QI_pointer_node,
19732 V16QI_type_node, V16QI_type_node, NULL);
19733 void_ftype_pv8hi_v8hi_v8hi =
19734 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
19735 V8HI_type_node, NULL);
19736 void_ftype_pv4si_v4si_v4si =
19737 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
19738 V4SI_type_node, NULL);
19739 void_ftype_pv4sf_v4sf_v4sf =
19740 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
19741 V4SF_type_node, NULL);
19742 void_ftype_pv2di_v2di_v2di =
19743 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
19744 V2DI_type_node, NULL);
19745
19746 dreg_types[0] = V8QI_type_node;
19747 dreg_types[1] = V4HI_type_node;
19748 dreg_types[2] = V2SI_type_node;
19749 dreg_types[3] = V2SF_type_node;
19750 dreg_types[4] = neon_intDI_type_node;
19751
19752 qreg_types[0] = V16QI_type_node;
19753 qreg_types[1] = V8HI_type_node;
19754 qreg_types[2] = V4SI_type_node;
19755 qreg_types[3] = V4SF_type_node;
19756 qreg_types[4] = V2DI_type_node;
19757
19758 for (i = 0; i < 5; i++)
19759 {
19760 int j;
19761 for (j = 0; j < 5; j++)
19762 {
19763 reinterp_ftype_dreg[i][j]
19764 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
19765 reinterp_ftype_qreg[i][j]
19766 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
19767 }
19768 }
19769
19770 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
19771 i < ARRAY_SIZE (neon_builtin_data);
19772 i++, fcode++)
19773 {
19774 neon_builtin_datum *d = &neon_builtin_data[i];
19775
19776 const char* const modenames[] = {
19777 "v8qi", "v4hi", "v2si", "v2sf", "di",
19778 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
19779 "ti", "ei", "oi"
19780 };
19781 char namebuf[60];
19782 tree ftype = NULL;
19783 int is_load = 0, is_store = 0;
19784
19785 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
19786
19787 d->fcode = fcode;
19788
19789 switch (d->itype)
19790 {
19791 case NEON_LOAD1:
19792 case NEON_LOAD1LANE:
19793 case NEON_LOADSTRUCT:
19794 case NEON_LOADSTRUCTLANE:
19795 is_load = 1;
19796 /* Fall through. */
19797 case NEON_STORE1:
19798 case NEON_STORE1LANE:
19799 case NEON_STORESTRUCT:
19800 case NEON_STORESTRUCTLANE:
19801 if (!is_load)
19802 is_store = 1;
19803 /* Fall through. */
19804 case NEON_UNOP:
19805 case NEON_BINOP:
19806 case NEON_LOGICBINOP:
19807 case NEON_SHIFTINSERT:
19808 case NEON_TERNOP:
19809 case NEON_GETLANE:
19810 case NEON_SETLANE:
19811 case NEON_CREATE:
19812 case NEON_DUP:
19813 case NEON_DUPLANE:
19814 case NEON_SHIFTIMM:
19815 case NEON_SHIFTACC:
19816 case NEON_COMBINE:
19817 case NEON_SPLIT:
19818 case NEON_CONVERT:
19819 case NEON_FIXCONV:
19820 case NEON_LANEMUL:
19821 case NEON_LANEMULL:
19822 case NEON_LANEMULH:
19823 case NEON_LANEMAC:
19824 case NEON_SCALARMUL:
19825 case NEON_SCALARMULL:
19826 case NEON_SCALARMULH:
19827 case NEON_SCALARMAC:
19828 case NEON_SELECT:
19829 case NEON_VTBL:
19830 case NEON_VTBX:
19831 {
19832 int k;
19833 tree return_type = void_type_node, args = void_list_node;
19834
19835 /* Build a function type directly from the insn_data for
19836 this builtin. The build_function_type() function takes
19837 care of removing duplicates for us. */
19838 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
19839 {
19840 tree eltype;
19841
19842 if (is_load && k == 1)
19843 {
19844 /* Neon load patterns always have the memory
19845 operand in the operand 1 position. */
19846 gcc_assert (insn_data[d->code].operand[k].predicate
19847 == neon_struct_operand);
19848
19849 switch (d->mode)
19850 {
19851 case T_V8QI:
19852 case T_V16QI:
19853 eltype = const_intQI_pointer_node;
19854 break;
19855
19856 case T_V4HI:
19857 case T_V8HI:
19858 eltype = const_intHI_pointer_node;
19859 break;
19860
19861 case T_V2SI:
19862 case T_V4SI:
19863 eltype = const_intSI_pointer_node;
19864 break;
19865
19866 case T_V2SF:
19867 case T_V4SF:
19868 eltype = const_float_pointer_node;
19869 break;
19870
19871 case T_DI:
19872 case T_V2DI:
19873 eltype = const_intDI_pointer_node;
19874 break;
19875
19876 default: gcc_unreachable ();
19877 }
19878 }
19879 else if (is_store && k == 0)
19880 {
19881 /* Similarly, Neon store patterns use operand 0 as
19882 the memory location to store to. */
19883 gcc_assert (insn_data[d->code].operand[k].predicate
19884 == neon_struct_operand);
19885
19886 switch (d->mode)
19887 {
19888 case T_V8QI:
19889 case T_V16QI:
19890 eltype = intQI_pointer_node;
19891 break;
19892
19893 case T_V4HI:
19894 case T_V8HI:
19895 eltype = intHI_pointer_node;
19896 break;
19897
19898 case T_V2SI:
19899 case T_V4SI:
19900 eltype = intSI_pointer_node;
19901 break;
19902
19903 case T_V2SF:
19904 case T_V4SF:
19905 eltype = float_pointer_node;
19906 break;
19907
19908 case T_DI:
19909 case T_V2DI:
19910 eltype = intDI_pointer_node;
19911 break;
19912
19913 default: gcc_unreachable ();
19914 }
19915 }
19916 else
19917 {
19918 switch (insn_data[d->code].operand[k].mode)
19919 {
19920 case VOIDmode: eltype = void_type_node; break;
19921 /* Scalars. */
19922 case QImode: eltype = neon_intQI_type_node; break;
19923 case HImode: eltype = neon_intHI_type_node; break;
19924 case SImode: eltype = neon_intSI_type_node; break;
19925 case SFmode: eltype = neon_float_type_node; break;
19926 case DImode: eltype = neon_intDI_type_node; break;
19927 case TImode: eltype = intTI_type_node; break;
19928 case EImode: eltype = intEI_type_node; break;
19929 case OImode: eltype = intOI_type_node; break;
19930 case CImode: eltype = intCI_type_node; break;
19931 case XImode: eltype = intXI_type_node; break;
19932 /* 64-bit vectors. */
19933 case V8QImode: eltype = V8QI_type_node; break;
19934 case V4HImode: eltype = V4HI_type_node; break;
19935 case V2SImode: eltype = V2SI_type_node; break;
19936 case V2SFmode: eltype = V2SF_type_node; break;
19937 /* 128-bit vectors. */
19938 case V16QImode: eltype = V16QI_type_node; break;
19939 case V8HImode: eltype = V8HI_type_node; break;
19940 case V4SImode: eltype = V4SI_type_node; break;
19941 case V4SFmode: eltype = V4SF_type_node; break;
19942 case V2DImode: eltype = V2DI_type_node; break;
19943 default: gcc_unreachable ();
19944 }
19945 }
19946
19947 if (k == 0 && !is_store)
19948 return_type = eltype;
19949 else
19950 args = tree_cons (NULL_TREE, eltype, args);
19951 }
19952
19953 ftype = build_function_type (return_type, args);
19954 }
19955 break;
19956
19957 case NEON_RESULTPAIR:
19958 {
19959 switch (insn_data[d->code].operand[1].mode)
19960 {
19961 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
19962 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
19963 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
19964 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
19965 case DImode: ftype = void_ftype_pdi_di_di; break;
19966 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
19967 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
19968 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
19969 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
19970 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
19971 default: gcc_unreachable ();
19972 }
19973 }
19974 break;
19975
19976 case NEON_REINTERP:
19977 {
19978 /* We iterate over 5 doubleword types, then 5 quadword
19979 types. */
19980 int rhs = d->mode % 5;
19981 switch (insn_data[d->code].operand[0].mode)
19982 {
19983 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
19984 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
19985 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
19986 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
19987 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
19988 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
19989 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
19990 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
19991 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
19992 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
19993 default: gcc_unreachable ();
19994 }
19995 }
19996 break;
19997
19998 default:
19999 gcc_unreachable ();
20000 }
20001
20002 gcc_assert (ftype != NULL);
20003
20004 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
20005
20006 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
20007 NULL_TREE);
20008 arm_builtin_decls[fcode] = decl;
20009 }
20010 }
20011
20012 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
20013 do \
20014 { \
20015 if ((MASK) & insn_flags) \
20016 { \
20017 tree bdecl; \
20018 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
20019 BUILT_IN_MD, NULL, NULL_TREE); \
20020 arm_builtin_decls[CODE] = bdecl; \
20021 } \
20022 } \
20023 while (0)
20024
20025 struct builtin_description
20026 {
20027 const unsigned int mask;
20028 const enum insn_code icode;
20029 const char * const name;
20030 const enum arm_builtins code;
20031 const enum rtx_code comparison;
20032 const unsigned int flag;
20033 };
20034
20035 static const struct builtin_description bdesc_2arg[] =
20036 {
20037 #define IWMMXT_BUILTIN(code, string, builtin) \
20038 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
20039 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20040
20041 #define IWMMXT2_BUILTIN(code, string, builtin) \
20042 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
20043 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20044
20045 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
20046 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
20047 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
20048 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
20049 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
20050 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
20051 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
20052 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
20053 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
20054 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
20055 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
20056 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
20057 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
20058 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
20059 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
20060 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
20061 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
20062 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
20063 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
20064 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
20065 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
20066 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
20067 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
20068 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
20069 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
20070 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
20071 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
20072 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
20073 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
20074 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
20075 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
20076 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
20077 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
20078 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
20079 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
20080 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
20081 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
20082 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
20083 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
20084 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
20085 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
20086 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
20087 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
20088 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
20089 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
20090 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
20091 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
20092 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
20093 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
20094 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
20095 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
20096 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
20097 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
20098 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
20099 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
20100 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
20101 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
20102 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
20103 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
20104 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
20105 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
20106 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
20107 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
20108 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
20109 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
20110 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
20111 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
20112 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
20113 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
20114 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
20115 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
20116 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
20117 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
20118 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
20119 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
20120 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
20121 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
20122 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
20123
20124 #define IWMMXT_BUILTIN2(code, builtin) \
20125 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20126
20127 #define IWMMXT2_BUILTIN2(code, builtin) \
20128 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
20129
20130 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
20131 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
20132 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
20133 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
20134 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
20135 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
20136 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
20137 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
20138 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
20139 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
20140 };
20141
20142 static const struct builtin_description bdesc_1arg[] =
20143 {
20144 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
20145 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
20146 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
20147 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
20148 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
20149 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
20150 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
20151 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
20152 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
20153 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
20154 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
20155 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
20156 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
20157 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
20158 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
20159 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
20160 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
20161 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
20162 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
20163 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
20164 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
20165 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
20166 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
20167 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
20168 };
20169
20170 /* Set up all the iWMMXt builtins. This is not called if
20171 TARGET_IWMMXT is zero. */
20172
20173 static void
20174 arm_init_iwmmxt_builtins (void)
20175 {
20176 const struct builtin_description * d;
20177 size_t i;
20178
20179 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
20180 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
20181 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
20182
20183 tree v8qi_ftype_v8qi_v8qi_int
20184 = build_function_type_list (V8QI_type_node,
20185 V8QI_type_node, V8QI_type_node,
20186 integer_type_node, NULL_TREE);
20187 tree v4hi_ftype_v4hi_int
20188 = build_function_type_list (V4HI_type_node,
20189 V4HI_type_node, integer_type_node, NULL_TREE);
20190 tree v2si_ftype_v2si_int
20191 = build_function_type_list (V2SI_type_node,
20192 V2SI_type_node, integer_type_node, NULL_TREE);
20193 tree v2si_ftype_di_di
20194 = build_function_type_list (V2SI_type_node,
20195 long_long_integer_type_node,
20196 long_long_integer_type_node,
20197 NULL_TREE);
20198 tree di_ftype_di_int
20199 = build_function_type_list (long_long_integer_type_node,
20200 long_long_integer_type_node,
20201 integer_type_node, NULL_TREE);
20202 tree di_ftype_di_int_int
20203 = build_function_type_list (long_long_integer_type_node,
20204 long_long_integer_type_node,
20205 integer_type_node,
20206 integer_type_node, NULL_TREE);
20207 tree int_ftype_v8qi
20208 = build_function_type_list (integer_type_node,
20209 V8QI_type_node, NULL_TREE);
20210 tree int_ftype_v4hi
20211 = build_function_type_list (integer_type_node,
20212 V4HI_type_node, NULL_TREE);
20213 tree int_ftype_v2si
20214 = build_function_type_list (integer_type_node,
20215 V2SI_type_node, NULL_TREE);
20216 tree int_ftype_v8qi_int
20217 = build_function_type_list (integer_type_node,
20218 V8QI_type_node, integer_type_node, NULL_TREE);
20219 tree int_ftype_v4hi_int
20220 = build_function_type_list (integer_type_node,
20221 V4HI_type_node, integer_type_node, NULL_TREE);
20222 tree int_ftype_v2si_int
20223 = build_function_type_list (integer_type_node,
20224 V2SI_type_node, integer_type_node, NULL_TREE);
20225 tree v8qi_ftype_v8qi_int_int
20226 = build_function_type_list (V8QI_type_node,
20227 V8QI_type_node, integer_type_node,
20228 integer_type_node, NULL_TREE);
20229 tree v4hi_ftype_v4hi_int_int
20230 = build_function_type_list (V4HI_type_node,
20231 V4HI_type_node, integer_type_node,
20232 integer_type_node, NULL_TREE);
20233 tree v2si_ftype_v2si_int_int
20234 = build_function_type_list (V2SI_type_node,
20235 V2SI_type_node, integer_type_node,
20236 integer_type_node, NULL_TREE);
20237 /* Miscellaneous. */
20238 tree v8qi_ftype_v4hi_v4hi
20239 = build_function_type_list (V8QI_type_node,
20240 V4HI_type_node, V4HI_type_node, NULL_TREE);
20241 tree v4hi_ftype_v2si_v2si
20242 = build_function_type_list (V4HI_type_node,
20243 V2SI_type_node, V2SI_type_node, NULL_TREE);
20244 tree v8qi_ftype_v4hi_v8qi
20245 = build_function_type_list (V8QI_type_node,
20246 V4HI_type_node, V8QI_type_node, NULL_TREE);
20247 tree v2si_ftype_v4hi_v4hi
20248 = build_function_type_list (V2SI_type_node,
20249 V4HI_type_node, V4HI_type_node, NULL_TREE);
20250 tree v2si_ftype_v8qi_v8qi
20251 = build_function_type_list (V2SI_type_node,
20252 V8QI_type_node, V8QI_type_node, NULL_TREE);
20253 tree v4hi_ftype_v4hi_di
20254 = build_function_type_list (V4HI_type_node,
20255 V4HI_type_node, long_long_integer_type_node,
20256 NULL_TREE);
20257 tree v2si_ftype_v2si_di
20258 = build_function_type_list (V2SI_type_node,
20259 V2SI_type_node, long_long_integer_type_node,
20260 NULL_TREE);
20261 tree di_ftype_void
20262 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
20263 tree int_ftype_void
20264 = build_function_type_list (integer_type_node, NULL_TREE);
20265 tree di_ftype_v8qi
20266 = build_function_type_list (long_long_integer_type_node,
20267 V8QI_type_node, NULL_TREE);
20268 tree di_ftype_v4hi
20269 = build_function_type_list (long_long_integer_type_node,
20270 V4HI_type_node, NULL_TREE);
20271 tree di_ftype_v2si
20272 = build_function_type_list (long_long_integer_type_node,
20273 V2SI_type_node, NULL_TREE);
20274 tree v2si_ftype_v4hi
20275 = build_function_type_list (V2SI_type_node,
20276 V4HI_type_node, NULL_TREE);
20277 tree v4hi_ftype_v8qi
20278 = build_function_type_list (V4HI_type_node,
20279 V8QI_type_node, NULL_TREE);
20280 tree v8qi_ftype_v8qi
20281 = build_function_type_list (V8QI_type_node,
20282 V8QI_type_node, NULL_TREE);
20283 tree v4hi_ftype_v4hi
20284 = build_function_type_list (V4HI_type_node,
20285 V4HI_type_node, NULL_TREE);
20286 tree v2si_ftype_v2si
20287 = build_function_type_list (V2SI_type_node,
20288 V2SI_type_node, NULL_TREE);
20289
20290 tree di_ftype_di_v4hi_v4hi
20291 = build_function_type_list (long_long_unsigned_type_node,
20292 long_long_unsigned_type_node,
20293 V4HI_type_node, V4HI_type_node,
20294 NULL_TREE);
20295
20296 tree di_ftype_v4hi_v4hi
20297 = build_function_type_list (long_long_unsigned_type_node,
20298 V4HI_type_node,V4HI_type_node,
20299 NULL_TREE);
20300
20301 tree v2si_ftype_v2si_v4hi_v4hi
20302 = build_function_type_list (V2SI_type_node,
20303 V2SI_type_node, V4HI_type_node,
20304 V4HI_type_node, NULL_TREE);
20305
20306 tree v2si_ftype_v2si_v8qi_v8qi
20307 = build_function_type_list (V2SI_type_node,
20308 V2SI_type_node, V8QI_type_node,
20309 V8QI_type_node, NULL_TREE);
20310
20311 tree di_ftype_di_v2si_v2si
20312 = build_function_type_list (long_long_unsigned_type_node,
20313 long_long_unsigned_type_node,
20314 V2SI_type_node, V2SI_type_node,
20315 NULL_TREE);
20316
20317 tree di_ftype_di_di_int
20318 = build_function_type_list (long_long_unsigned_type_node,
20319 long_long_unsigned_type_node,
20320 long_long_unsigned_type_node,
20321 integer_type_node, NULL_TREE);
20322
20323 tree void_ftype_int
20324 = build_function_type_list (void_type_node,
20325 integer_type_node, NULL_TREE);
20326
20327 tree v8qi_ftype_char
20328 = build_function_type_list (V8QI_type_node,
20329 signed_char_type_node, NULL_TREE);
20330
20331 tree v4hi_ftype_short
20332 = build_function_type_list (V4HI_type_node,
20333 short_integer_type_node, NULL_TREE);
20334
20335 tree v2si_ftype_int
20336 = build_function_type_list (V2SI_type_node,
20337 integer_type_node, NULL_TREE);
20338
20339 /* Normal vector binops. */
20340 tree v8qi_ftype_v8qi_v8qi
20341 = build_function_type_list (V8QI_type_node,
20342 V8QI_type_node, V8QI_type_node, NULL_TREE);
20343 tree v4hi_ftype_v4hi_v4hi
20344 = build_function_type_list (V4HI_type_node,
20345 V4HI_type_node,V4HI_type_node, NULL_TREE);
20346 tree v2si_ftype_v2si_v2si
20347 = build_function_type_list (V2SI_type_node,
20348 V2SI_type_node, V2SI_type_node, NULL_TREE);
20349 tree di_ftype_di_di
20350 = build_function_type_list (long_long_unsigned_type_node,
20351 long_long_unsigned_type_node,
20352 long_long_unsigned_type_node,
20353 NULL_TREE);
20354
20355 /* Add all builtins that are more or less simple operations on two
20356 operands. */
20357 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
20358 {
20359 /* Use one of the operands; the target can have a different mode for
20360 mask-generating compares. */
20361 enum machine_mode mode;
20362 tree type;
20363
20364 if (d->name == 0)
20365 continue;
20366
20367 mode = insn_data[d->icode].operand[1].mode;
20368
20369 switch (mode)
20370 {
20371 case V8QImode:
20372 type = v8qi_ftype_v8qi_v8qi;
20373 break;
20374 case V4HImode:
20375 type = v4hi_ftype_v4hi_v4hi;
20376 break;
20377 case V2SImode:
20378 type = v2si_ftype_v2si_v2si;
20379 break;
20380 case DImode:
20381 type = di_ftype_di_di;
20382 break;
20383
20384 default:
20385 gcc_unreachable ();
20386 }
20387
20388 def_mbuiltin (d->mask, d->name, type, d->code);
20389 }
20390
20391 /* Add the remaining MMX insns with somewhat more complicated types. */
20392 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
20393 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
20394 ARM_BUILTIN_ ## CODE)
20395
20396 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
20397 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
20398 ARM_BUILTIN_ ## CODE)
20399
20400 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
20401 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
20402 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
20403 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
20404 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
20405 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
20406 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
20407 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
20408 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
20409
20410 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
20411 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
20412 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
20413 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
20414 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
20415 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
20416
20417 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
20418 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
20419 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
20420 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
20421 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
20422 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
20423
20424 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
20425 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
20426 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
20427 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
20428 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
20429 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
20430
20431 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
20432 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
20433 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
20434 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
20435 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
20436 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
20437
20438 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
20439
20440 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
20441 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
20442 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
20443 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
20444 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
20445 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
20446 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
20447 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
20448 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
20449 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
20450
20451 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
20452 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
20453 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
20454 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
20455 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
20456 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
20457 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
20458 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
20459 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
20460
20461 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
20462 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
20463 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
20464
20465 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
20466 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
20467 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
20468
20469 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
20470 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
20471
20472 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
20473 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
20474 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
20475 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
20476 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
20477 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
20478
20479 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
20480 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
20481 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
20482 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
20483 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
20484 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
20485 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
20486 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
20487 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
20488 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
20489 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
20490 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
20491
20492 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
20493 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
20494 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
20495 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
20496
20497 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
20498 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
20499 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
20500 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
20501 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
20502 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
20503 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
20504
20505 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
20506 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
20507 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
20508
20509 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
20510 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
20511 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
20512 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
20513
20514 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
20515 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
20516 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
20517 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
20518
20519 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
20520 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
20521 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
20522 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
20523
20524 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
20525 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
20526 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
20527 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
20528
20529 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
20530 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
20531 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
20532 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
20533
20534 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
20535 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
20536 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
20537 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
20538
20539 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
20540
20541 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
20542 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
20543 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
20544
20545 #undef iwmmx_mbuiltin
20546 #undef iwmmx2_mbuiltin
20547 }
20548
20549 static void
20550 arm_init_fp16_builtins (void)
20551 {
20552 tree fp16_type = make_node (REAL_TYPE);
20553 TYPE_PRECISION (fp16_type) = 16;
20554 layout_type (fp16_type);
20555 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
20556 }
20557
20558 static void
20559 arm_init_builtins (void)
20560 {
20561 if (TARGET_REALLY_IWMMXT)
20562 arm_init_iwmmxt_builtins ();
20563
20564 if (TARGET_NEON)
20565 arm_init_neon_builtins ();
20566
20567 if (arm_fp16_format)
20568 arm_init_fp16_builtins ();
20569 }
20570
20571 /* Return the ARM builtin for CODE. */
20572
20573 static tree
20574 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
20575 {
20576 if (code >= ARM_BUILTIN_MAX)
20577 return error_mark_node;
20578
20579 return arm_builtin_decls[code];
20580 }
20581
20582 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20583
20584 static const char *
20585 arm_invalid_parameter_type (const_tree t)
20586 {
20587 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20588 return N_("function parameters cannot have __fp16 type");
20589 return NULL;
20590 }
20591
20592 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20593
20594 static const char *
20595 arm_invalid_return_type (const_tree t)
20596 {
20597 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20598 return N_("functions cannot return __fp16 type");
20599 return NULL;
20600 }
20601
20602 /* Implement TARGET_PROMOTED_TYPE. */
20603
20604 static tree
20605 arm_promoted_type (const_tree t)
20606 {
20607 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20608 return float_type_node;
20609 return NULL_TREE;
20610 }
20611
20612 /* Implement TARGET_CONVERT_TO_TYPE.
20613 Specifically, this hook implements the peculiarity of the ARM
20614 half-precision floating-point C semantics that requires conversions between
20615 __fp16 to or from double to do an intermediate conversion to float. */
20616
20617 static tree
20618 arm_convert_to_type (tree type, tree expr)
20619 {
20620 tree fromtype = TREE_TYPE (expr);
20621 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
20622 return NULL_TREE;
20623 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
20624 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
20625 return convert (type, convert (float_type_node, expr));
20626 return NULL_TREE;
20627 }
20628
20629 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
20630 This simply adds HFmode as a supported mode; even though we don't
20631 implement arithmetic on this type directly, it's supported by
20632 optabs conversions, much the way the double-word arithmetic is
20633 special-cased in the default hook. */
20634
20635 static bool
20636 arm_scalar_mode_supported_p (enum machine_mode mode)
20637 {
20638 if (mode == HFmode)
20639 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
20640 else if (ALL_FIXED_POINT_MODE_P (mode))
20641 return true;
20642 else
20643 return default_scalar_mode_supported_p (mode);
20644 }
20645
20646 /* Errors in the source file can cause expand_expr to return const0_rtx
20647 where we expect a vector. To avoid crashing, use one of the vector
20648 clear instructions. */
20649
20650 static rtx
20651 safe_vector_operand (rtx x, enum machine_mode mode)
20652 {
20653 if (x != const0_rtx)
20654 return x;
20655 x = gen_reg_rtx (mode);
20656
20657 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
20658 : gen_rtx_SUBREG (DImode, x, 0)));
20659 return x;
20660 }
20661
20662 /* Subroutine of arm_expand_builtin to take care of binop insns. */
20663
20664 static rtx
20665 arm_expand_binop_builtin (enum insn_code icode,
20666 tree exp, rtx target)
20667 {
20668 rtx pat;
20669 tree arg0 = CALL_EXPR_ARG (exp, 0);
20670 tree arg1 = CALL_EXPR_ARG (exp, 1);
20671 rtx op0 = expand_normal (arg0);
20672 rtx op1 = expand_normal (arg1);
20673 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20674 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20675 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20676
20677 if (VECTOR_MODE_P (mode0))
20678 op0 = safe_vector_operand (op0, mode0);
20679 if (VECTOR_MODE_P (mode1))
20680 op1 = safe_vector_operand (op1, mode1);
20681
20682 if (! target
20683 || GET_MODE (target) != tmode
20684 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20685 target = gen_reg_rtx (tmode);
20686
20687 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
20688 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
20689
20690 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20691 op0 = copy_to_mode_reg (mode0, op0);
20692 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20693 op1 = copy_to_mode_reg (mode1, op1);
20694
20695 pat = GEN_FCN (icode) (target, op0, op1);
20696 if (! pat)
20697 return 0;
20698 emit_insn (pat);
20699 return target;
20700 }
20701
20702 /* Subroutine of arm_expand_builtin to take care of unop insns. */
20703
20704 static rtx
20705 arm_expand_unop_builtin (enum insn_code icode,
20706 tree exp, rtx target, int do_load)
20707 {
20708 rtx pat;
20709 tree arg0 = CALL_EXPR_ARG (exp, 0);
20710 rtx op0 = expand_normal (arg0);
20711 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20712 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20713
20714 if (! target
20715 || GET_MODE (target) != tmode
20716 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20717 target = gen_reg_rtx (tmode);
20718 if (do_load)
20719 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20720 else
20721 {
20722 if (VECTOR_MODE_P (mode0))
20723 op0 = safe_vector_operand (op0, mode0);
20724
20725 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20726 op0 = copy_to_mode_reg (mode0, op0);
20727 }
20728
20729 pat = GEN_FCN (icode) (target, op0);
20730 if (! pat)
20731 return 0;
20732 emit_insn (pat);
20733 return target;
20734 }
20735
20736 typedef enum {
20737 NEON_ARG_COPY_TO_REG,
20738 NEON_ARG_CONSTANT,
20739 NEON_ARG_MEMORY,
20740 NEON_ARG_STOP
20741 } builtin_arg;
20742
20743 #define NEON_MAX_BUILTIN_ARGS 5
20744
20745 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
20746 and return an expression for the accessed memory.
20747
20748 The intrinsic function operates on a block of registers that has
20749 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
20750 function references the memory at EXP of type TYPE and in mode
20751 MEM_MODE; this mode may be BLKmode if no more suitable mode is
20752 available. */
20753
20754 static tree
20755 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
20756 enum machine_mode reg_mode,
20757 neon_builtin_type_mode type_mode)
20758 {
20759 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
20760 tree elem_type, upper_bound, array_type;
20761
20762 /* Work out the size of the register block in bytes. */
20763 reg_size = GET_MODE_SIZE (reg_mode);
20764
20765 /* Work out the size of each vector in bytes. */
20766 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
20767 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
20768
20769 /* Work out how many vectors there are. */
20770 gcc_assert (reg_size % vector_size == 0);
20771 nvectors = reg_size / vector_size;
20772
20773 /* Work out the type of each element. */
20774 gcc_assert (POINTER_TYPE_P (type));
20775 elem_type = TREE_TYPE (type);
20776
20777 /* Work out how many elements are being loaded or stored.
20778 MEM_MODE == REG_MODE implies a one-to-one mapping between register
20779 and memory elements; anything else implies a lane load or store. */
20780 if (mem_mode == reg_mode)
20781 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
20782 else
20783 nelems = nvectors;
20784
20785 /* Create a type that describes the full access. */
20786 upper_bound = build_int_cst (size_type_node, nelems - 1);
20787 array_type = build_array_type (elem_type, build_index_type (upper_bound));
20788
20789 /* Dereference EXP using that type. */
20790 return fold_build2 (MEM_REF, array_type, exp,
20791 build_int_cst (build_pointer_type (array_type), 0));
20792 }
20793
20794 /* Expand a Neon builtin. */
20795 static rtx
20796 arm_expand_neon_args (rtx target, int icode, int have_retval,
20797 neon_builtin_type_mode type_mode,
20798 tree exp, int fcode, ...)
20799 {
20800 va_list ap;
20801 rtx pat;
20802 tree arg[NEON_MAX_BUILTIN_ARGS];
20803 rtx op[NEON_MAX_BUILTIN_ARGS];
20804 tree arg_type;
20805 tree formals;
20806 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20807 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
20808 enum machine_mode other_mode;
20809 int argc = 0;
20810 int opno;
20811
20812 if (have_retval
20813 && (!target
20814 || GET_MODE (target) != tmode
20815 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
20816 target = gen_reg_rtx (tmode);
20817
20818 va_start (ap, fcode);
20819
20820 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
20821
20822 for (;;)
20823 {
20824 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
20825
20826 if (thisarg == NEON_ARG_STOP)
20827 break;
20828 else
20829 {
20830 opno = argc + have_retval;
20831 mode[argc] = insn_data[icode].operand[opno].mode;
20832 arg[argc] = CALL_EXPR_ARG (exp, argc);
20833 arg_type = TREE_VALUE (formals);
20834 if (thisarg == NEON_ARG_MEMORY)
20835 {
20836 other_mode = insn_data[icode].operand[1 - opno].mode;
20837 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
20838 mode[argc], other_mode,
20839 type_mode);
20840 }
20841
20842 op[argc] = expand_normal (arg[argc]);
20843
20844 switch (thisarg)
20845 {
20846 case NEON_ARG_COPY_TO_REG:
20847 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
20848 if (!(*insn_data[icode].operand[opno].predicate)
20849 (op[argc], mode[argc]))
20850 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
20851 break;
20852
20853 case NEON_ARG_CONSTANT:
20854 /* FIXME: This error message is somewhat unhelpful. */
20855 if (!(*insn_data[icode].operand[opno].predicate)
20856 (op[argc], mode[argc]))
20857 error ("argument must be a constant");
20858 break;
20859
20860 case NEON_ARG_MEMORY:
20861 gcc_assert (MEM_P (op[argc]));
20862 PUT_MODE (op[argc], mode[argc]);
20863 /* ??? arm_neon.h uses the same built-in functions for signed
20864 and unsigned accesses, casting where necessary. This isn't
20865 alias safe. */
20866 set_mem_alias_set (op[argc], 0);
20867 if (!(*insn_data[icode].operand[opno].predicate)
20868 (op[argc], mode[argc]))
20869 op[argc] = (replace_equiv_address
20870 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
20871 break;
20872
20873 case NEON_ARG_STOP:
20874 gcc_unreachable ();
20875 }
20876
20877 argc++;
20878 formals = TREE_CHAIN (formals);
20879 }
20880 }
20881
20882 va_end (ap);
20883
20884 if (have_retval)
20885 switch (argc)
20886 {
20887 case 1:
20888 pat = GEN_FCN (icode) (target, op[0]);
20889 break;
20890
20891 case 2:
20892 pat = GEN_FCN (icode) (target, op[0], op[1]);
20893 break;
20894
20895 case 3:
20896 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
20897 break;
20898
20899 case 4:
20900 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
20901 break;
20902
20903 case 5:
20904 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
20905 break;
20906
20907 default:
20908 gcc_unreachable ();
20909 }
20910 else
20911 switch (argc)
20912 {
20913 case 1:
20914 pat = GEN_FCN (icode) (op[0]);
20915 break;
20916
20917 case 2:
20918 pat = GEN_FCN (icode) (op[0], op[1]);
20919 break;
20920
20921 case 3:
20922 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
20923 break;
20924
20925 case 4:
20926 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
20927 break;
20928
20929 case 5:
20930 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
20931 break;
20932
20933 default:
20934 gcc_unreachable ();
20935 }
20936
20937 if (!pat)
20938 return 0;
20939
20940 emit_insn (pat);
20941
20942 return target;
20943 }
20944
20945 /* Expand a Neon builtin. These are "special" because they don't have symbolic
20946 constants defined per-instruction or per instruction-variant. Instead, the
20947 required info is looked up in the table neon_builtin_data. */
20948 static rtx
20949 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
20950 {
20951 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
20952 neon_itype itype = d->itype;
20953 enum insn_code icode = d->code;
20954 neon_builtin_type_mode type_mode = d->mode;
20955
20956 switch (itype)
20957 {
20958 case NEON_UNOP:
20959 case NEON_CONVERT:
20960 case NEON_DUPLANE:
20961 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20962 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20963
20964 case NEON_BINOP:
20965 case NEON_SETLANE:
20966 case NEON_SCALARMUL:
20967 case NEON_SCALARMULL:
20968 case NEON_SCALARMULH:
20969 case NEON_SHIFTINSERT:
20970 case NEON_LOGICBINOP:
20971 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20972 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20973 NEON_ARG_STOP);
20974
20975 case NEON_TERNOP:
20976 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20977 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20978 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20979
20980 case NEON_GETLANE:
20981 case NEON_FIXCONV:
20982 case NEON_SHIFTIMM:
20983 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20984 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
20985 NEON_ARG_STOP);
20986
20987 case NEON_CREATE:
20988 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20989 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20990
20991 case NEON_DUP:
20992 case NEON_SPLIT:
20993 case NEON_REINTERP:
20994 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20995 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20996
20997 case NEON_COMBINE:
20998 case NEON_VTBL:
20999 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21000 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
21001
21002 case NEON_RESULTPAIR:
21003 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
21004 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
21005 NEON_ARG_STOP);
21006
21007 case NEON_LANEMUL:
21008 case NEON_LANEMULL:
21009 case NEON_LANEMULH:
21010 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21011 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
21012 NEON_ARG_CONSTANT, NEON_ARG_STOP);
21013
21014 case NEON_LANEMAC:
21015 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21016 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
21017 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
21018
21019 case NEON_SHIFTACC:
21020 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21021 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
21022 NEON_ARG_CONSTANT, NEON_ARG_STOP);
21023
21024 case NEON_SCALARMAC:
21025 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21026 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
21027 NEON_ARG_CONSTANT, NEON_ARG_STOP);
21028
21029 case NEON_SELECT:
21030 case NEON_VTBX:
21031 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21032 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
21033 NEON_ARG_STOP);
21034
21035 case NEON_LOAD1:
21036 case NEON_LOADSTRUCT:
21037 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21038 NEON_ARG_MEMORY, NEON_ARG_STOP);
21039
21040 case NEON_LOAD1LANE:
21041 case NEON_LOADSTRUCTLANE:
21042 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
21043 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
21044 NEON_ARG_STOP);
21045
21046 case NEON_STORE1:
21047 case NEON_STORESTRUCT:
21048 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
21049 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
21050
21051 case NEON_STORE1LANE:
21052 case NEON_STORESTRUCTLANE:
21053 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
21054 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
21055 NEON_ARG_STOP);
21056 }
21057
21058 gcc_unreachable ();
21059 }
21060
21061 /* Emit code to reinterpret one Neon type as another, without altering bits. */
21062 void
21063 neon_reinterpret (rtx dest, rtx src)
21064 {
21065 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
21066 }
21067
21068 /* Emit code to place a Neon pair result in memory locations (with equal
21069 registers). */
21070 void
21071 neon_emit_pair_result_insn (enum machine_mode mode,
21072 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
21073 rtx op1, rtx op2)
21074 {
21075 rtx mem = gen_rtx_MEM (mode, destaddr);
21076 rtx tmp1 = gen_reg_rtx (mode);
21077 rtx tmp2 = gen_reg_rtx (mode);
21078
21079 emit_insn (intfn (tmp1, op1, op2, tmp2));
21080
21081 emit_move_insn (mem, tmp1);
21082 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
21083 emit_move_insn (mem, tmp2);
21084 }
21085
21086 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
21087 not to early-clobber SRC registers in the process.
21088
21089 We assume that the operands described by SRC and DEST represent a
21090 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
21091 number of components into which the copy has been decomposed. */
21092 void
21093 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
21094 {
21095 unsigned int i;
21096
21097 if (!reg_overlap_mentioned_p (operands[0], operands[1])
21098 || REGNO (operands[0]) < REGNO (operands[1]))
21099 {
21100 for (i = 0; i < count; i++)
21101 {
21102 operands[2 * i] = dest[i];
21103 operands[2 * i + 1] = src[i];
21104 }
21105 }
21106 else
21107 {
21108 for (i = 0; i < count; i++)
21109 {
21110 operands[2 * i] = dest[count - i - 1];
21111 operands[2 * i + 1] = src[count - i - 1];
21112 }
21113 }
21114 }
21115
21116 /* Split operands into moves from op[1] + op[2] into op[0]. */
21117
21118 void
21119 neon_split_vcombine (rtx operands[3])
21120 {
21121 unsigned int dest = REGNO (operands[0]);
21122 unsigned int src1 = REGNO (operands[1]);
21123 unsigned int src2 = REGNO (operands[2]);
21124 enum machine_mode halfmode = GET_MODE (operands[1]);
21125 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
21126 rtx destlo, desthi;
21127
21128 if (src1 == dest && src2 == dest + halfregs)
21129 {
21130 /* No-op move. Can't split to nothing; emit something. */
21131 emit_note (NOTE_INSN_DELETED);
21132 return;
21133 }
21134
21135 /* Preserve register attributes for variable tracking. */
21136 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
21137 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
21138 GET_MODE_SIZE (halfmode));
21139
21140 /* Special case of reversed high/low parts. Use VSWP. */
21141 if (src2 == dest && src1 == dest + halfregs)
21142 {
21143 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
21144 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
21145 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
21146 return;
21147 }
21148
21149 if (!reg_overlap_mentioned_p (operands[2], destlo))
21150 {
21151 /* Try to avoid unnecessary moves if part of the result
21152 is in the right place already. */
21153 if (src1 != dest)
21154 emit_move_insn (destlo, operands[1]);
21155 if (src2 != dest + halfregs)
21156 emit_move_insn (desthi, operands[2]);
21157 }
21158 else
21159 {
21160 if (src2 != dest + halfregs)
21161 emit_move_insn (desthi, operands[2]);
21162 if (src1 != dest)
21163 emit_move_insn (destlo, operands[1]);
21164 }
21165 }
21166
21167 /* Expand an expression EXP that calls a built-in function,
21168 with result going to TARGET if that's convenient
21169 (and in mode MODE if that's convenient).
21170 SUBTARGET may be used as the target for computing one of EXP's operands.
21171 IGNORE is nonzero if the value is to be ignored. */
21172
21173 static rtx
21174 arm_expand_builtin (tree exp,
21175 rtx target,
21176 rtx subtarget ATTRIBUTE_UNUSED,
21177 enum machine_mode mode ATTRIBUTE_UNUSED,
21178 int ignore ATTRIBUTE_UNUSED)
21179 {
21180 const struct builtin_description * d;
21181 enum insn_code icode;
21182 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
21183 tree arg0;
21184 tree arg1;
21185 tree arg2;
21186 rtx op0;
21187 rtx op1;
21188 rtx op2;
21189 rtx pat;
21190 int fcode = DECL_FUNCTION_CODE (fndecl);
21191 size_t i;
21192 enum machine_mode tmode;
21193 enum machine_mode mode0;
21194 enum machine_mode mode1;
21195 enum machine_mode mode2;
21196 int opint;
21197 int selector;
21198 int mask;
21199 int imm;
21200
21201 if (fcode >= ARM_BUILTIN_NEON_BASE)
21202 return arm_expand_neon_builtin (fcode, exp, target);
21203
21204 switch (fcode)
21205 {
21206 case ARM_BUILTIN_TEXTRMSB:
21207 case ARM_BUILTIN_TEXTRMUB:
21208 case ARM_BUILTIN_TEXTRMSH:
21209 case ARM_BUILTIN_TEXTRMUH:
21210 case ARM_BUILTIN_TEXTRMSW:
21211 case ARM_BUILTIN_TEXTRMUW:
21212 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
21213 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
21214 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
21215 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
21216 : CODE_FOR_iwmmxt_textrmw);
21217
21218 arg0 = CALL_EXPR_ARG (exp, 0);
21219 arg1 = CALL_EXPR_ARG (exp, 1);
21220 op0 = expand_normal (arg0);
21221 op1 = expand_normal (arg1);
21222 tmode = insn_data[icode].operand[0].mode;
21223 mode0 = insn_data[icode].operand[1].mode;
21224 mode1 = insn_data[icode].operand[2].mode;
21225
21226 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21227 op0 = copy_to_mode_reg (mode0, op0);
21228 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21229 {
21230 /* @@@ better error message */
21231 error ("selector must be an immediate");
21232 return gen_reg_rtx (tmode);
21233 }
21234
21235 opint = INTVAL (op1);
21236 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
21237 {
21238 if (opint > 7 || opint < 0)
21239 error ("the range of selector should be in 0 to 7");
21240 }
21241 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
21242 {
21243 if (opint > 3 || opint < 0)
21244 error ("the range of selector should be in 0 to 3");
21245 }
21246 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
21247 {
21248 if (opint > 1 || opint < 0)
21249 error ("the range of selector should be in 0 to 1");
21250 }
21251
21252 if (target == 0
21253 || GET_MODE (target) != tmode
21254 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21255 target = gen_reg_rtx (tmode);
21256 pat = GEN_FCN (icode) (target, op0, op1);
21257 if (! pat)
21258 return 0;
21259 emit_insn (pat);
21260 return target;
21261
21262 case ARM_BUILTIN_WALIGNI:
21263 /* If op2 is immediate, call walighi, else call walighr. */
21264 arg0 = CALL_EXPR_ARG (exp, 0);
21265 arg1 = CALL_EXPR_ARG (exp, 1);
21266 arg2 = CALL_EXPR_ARG (exp, 2);
21267 op0 = expand_normal (arg0);
21268 op1 = expand_normal (arg1);
21269 op2 = expand_normal (arg2);
21270 if (CONST_INT_P (op2))
21271 {
21272 icode = CODE_FOR_iwmmxt_waligni;
21273 tmode = insn_data[icode].operand[0].mode;
21274 mode0 = insn_data[icode].operand[1].mode;
21275 mode1 = insn_data[icode].operand[2].mode;
21276 mode2 = insn_data[icode].operand[3].mode;
21277 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
21278 op0 = copy_to_mode_reg (mode0, op0);
21279 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
21280 op1 = copy_to_mode_reg (mode1, op1);
21281 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
21282 selector = INTVAL (op2);
21283 if (selector > 7 || selector < 0)
21284 error ("the range of selector should be in 0 to 7");
21285 }
21286 else
21287 {
21288 icode = CODE_FOR_iwmmxt_walignr;
21289 tmode = insn_data[icode].operand[0].mode;
21290 mode0 = insn_data[icode].operand[1].mode;
21291 mode1 = insn_data[icode].operand[2].mode;
21292 mode2 = insn_data[icode].operand[3].mode;
21293 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
21294 op0 = copy_to_mode_reg (mode0, op0);
21295 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
21296 op1 = copy_to_mode_reg (mode1, op1);
21297 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
21298 op2 = copy_to_mode_reg (mode2, op2);
21299 }
21300 if (target == 0
21301 || GET_MODE (target) != tmode
21302 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
21303 target = gen_reg_rtx (tmode);
21304 pat = GEN_FCN (icode) (target, op0, op1, op2);
21305 if (!pat)
21306 return 0;
21307 emit_insn (pat);
21308 return target;
21309
21310 case ARM_BUILTIN_TINSRB:
21311 case ARM_BUILTIN_TINSRH:
21312 case ARM_BUILTIN_TINSRW:
21313 case ARM_BUILTIN_WMERGE:
21314 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
21315 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
21316 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
21317 : CODE_FOR_iwmmxt_tinsrw);
21318 arg0 = CALL_EXPR_ARG (exp, 0);
21319 arg1 = CALL_EXPR_ARG (exp, 1);
21320 arg2 = CALL_EXPR_ARG (exp, 2);
21321 op0 = expand_normal (arg0);
21322 op1 = expand_normal (arg1);
21323 op2 = expand_normal (arg2);
21324 tmode = insn_data[icode].operand[0].mode;
21325 mode0 = insn_data[icode].operand[1].mode;
21326 mode1 = insn_data[icode].operand[2].mode;
21327 mode2 = insn_data[icode].operand[3].mode;
21328
21329 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21330 op0 = copy_to_mode_reg (mode0, op0);
21331 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21332 op1 = copy_to_mode_reg (mode1, op1);
21333 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21334 {
21335 error ("selector must be an immediate");
21336 return const0_rtx;
21337 }
21338 if (icode == CODE_FOR_iwmmxt_wmerge)
21339 {
21340 selector = INTVAL (op2);
21341 if (selector > 7 || selector < 0)
21342 error ("the range of selector should be in 0 to 7");
21343 }
21344 if ((icode == CODE_FOR_iwmmxt_tinsrb)
21345 || (icode == CODE_FOR_iwmmxt_tinsrh)
21346 || (icode == CODE_FOR_iwmmxt_tinsrw))
21347 {
21348 mask = 0x01;
21349 selector= INTVAL (op2);
21350 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
21351 error ("the range of selector should be in 0 to 7");
21352 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
21353 error ("the range of selector should be in 0 to 3");
21354 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
21355 error ("the range of selector should be in 0 to 1");
21356 mask <<= selector;
21357 op2 = GEN_INT (mask);
21358 }
21359 if (target == 0
21360 || GET_MODE (target) != tmode
21361 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21362 target = gen_reg_rtx (tmode);
21363 pat = GEN_FCN (icode) (target, op0, op1, op2);
21364 if (! pat)
21365 return 0;
21366 emit_insn (pat);
21367 return target;
21368
21369 case ARM_BUILTIN_SETWCGR0:
21370 case ARM_BUILTIN_SETWCGR1:
21371 case ARM_BUILTIN_SETWCGR2:
21372 case ARM_BUILTIN_SETWCGR3:
21373 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
21374 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
21375 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
21376 : CODE_FOR_iwmmxt_setwcgr3);
21377 arg0 = CALL_EXPR_ARG (exp, 0);
21378 op0 = expand_normal (arg0);
21379 mode0 = insn_data[icode].operand[0].mode;
21380 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
21381 op0 = copy_to_mode_reg (mode0, op0);
21382 pat = GEN_FCN (icode) (op0);
21383 if (!pat)
21384 return 0;
21385 emit_insn (pat);
21386 return 0;
21387
21388 case ARM_BUILTIN_GETWCGR0:
21389 case ARM_BUILTIN_GETWCGR1:
21390 case ARM_BUILTIN_GETWCGR2:
21391 case ARM_BUILTIN_GETWCGR3:
21392 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
21393 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
21394 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
21395 : CODE_FOR_iwmmxt_getwcgr3);
21396 tmode = insn_data[icode].operand[0].mode;
21397 if (target == 0
21398 || GET_MODE (target) != tmode
21399 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
21400 target = gen_reg_rtx (tmode);
21401 pat = GEN_FCN (icode) (target);
21402 if (!pat)
21403 return 0;
21404 emit_insn (pat);
21405 return target;
21406
21407 case ARM_BUILTIN_WSHUFH:
21408 icode = CODE_FOR_iwmmxt_wshufh;
21409 arg0 = CALL_EXPR_ARG (exp, 0);
21410 arg1 = CALL_EXPR_ARG (exp, 1);
21411 op0 = expand_normal (arg0);
21412 op1 = expand_normal (arg1);
21413 tmode = insn_data[icode].operand[0].mode;
21414 mode1 = insn_data[icode].operand[1].mode;
21415 mode2 = insn_data[icode].operand[2].mode;
21416
21417 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21418 op0 = copy_to_mode_reg (mode1, op0);
21419 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21420 {
21421 error ("mask must be an immediate");
21422 return const0_rtx;
21423 }
21424 selector = INTVAL (op1);
21425 if (selector < 0 || selector > 255)
21426 error ("the range of mask should be in 0 to 255");
21427 if (target == 0
21428 || GET_MODE (target) != tmode
21429 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21430 target = gen_reg_rtx (tmode);
21431 pat = GEN_FCN (icode) (target, op0, op1);
21432 if (! pat)
21433 return 0;
21434 emit_insn (pat);
21435 return target;
21436
21437 case ARM_BUILTIN_WMADDS:
21438 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
21439 case ARM_BUILTIN_WMADDSX:
21440 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
21441 case ARM_BUILTIN_WMADDSN:
21442 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
21443 case ARM_BUILTIN_WMADDU:
21444 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
21445 case ARM_BUILTIN_WMADDUX:
21446 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
21447 case ARM_BUILTIN_WMADDUN:
21448 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
21449 case ARM_BUILTIN_WSADBZ:
21450 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
21451 case ARM_BUILTIN_WSADHZ:
21452 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
21453
21454 /* Several three-argument builtins. */
21455 case ARM_BUILTIN_WMACS:
21456 case ARM_BUILTIN_WMACU:
21457 case ARM_BUILTIN_TMIA:
21458 case ARM_BUILTIN_TMIAPH:
21459 case ARM_BUILTIN_TMIATT:
21460 case ARM_BUILTIN_TMIATB:
21461 case ARM_BUILTIN_TMIABT:
21462 case ARM_BUILTIN_TMIABB:
21463 case ARM_BUILTIN_WQMIABB:
21464 case ARM_BUILTIN_WQMIABT:
21465 case ARM_BUILTIN_WQMIATB:
21466 case ARM_BUILTIN_WQMIATT:
21467 case ARM_BUILTIN_WQMIABBN:
21468 case ARM_BUILTIN_WQMIABTN:
21469 case ARM_BUILTIN_WQMIATBN:
21470 case ARM_BUILTIN_WQMIATTN:
21471 case ARM_BUILTIN_WMIABB:
21472 case ARM_BUILTIN_WMIABT:
21473 case ARM_BUILTIN_WMIATB:
21474 case ARM_BUILTIN_WMIATT:
21475 case ARM_BUILTIN_WMIABBN:
21476 case ARM_BUILTIN_WMIABTN:
21477 case ARM_BUILTIN_WMIATBN:
21478 case ARM_BUILTIN_WMIATTN:
21479 case ARM_BUILTIN_WMIAWBB:
21480 case ARM_BUILTIN_WMIAWBT:
21481 case ARM_BUILTIN_WMIAWTB:
21482 case ARM_BUILTIN_WMIAWTT:
21483 case ARM_BUILTIN_WMIAWBBN:
21484 case ARM_BUILTIN_WMIAWBTN:
21485 case ARM_BUILTIN_WMIAWTBN:
21486 case ARM_BUILTIN_WMIAWTTN:
21487 case ARM_BUILTIN_WSADB:
21488 case ARM_BUILTIN_WSADH:
21489 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
21490 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
21491 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
21492 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
21493 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
21494 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
21495 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
21496 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
21497 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
21498 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
21499 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
21500 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
21501 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
21502 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
21503 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
21504 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
21505 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
21506 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
21507 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
21508 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
21509 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
21510 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
21511 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
21512 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
21513 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
21514 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
21515 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
21516 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
21517 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
21518 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
21519 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
21520 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
21521 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
21522 : CODE_FOR_iwmmxt_wsadh);
21523 arg0 = CALL_EXPR_ARG (exp, 0);
21524 arg1 = CALL_EXPR_ARG (exp, 1);
21525 arg2 = CALL_EXPR_ARG (exp, 2);
21526 op0 = expand_normal (arg0);
21527 op1 = expand_normal (arg1);
21528 op2 = expand_normal (arg2);
21529 tmode = insn_data[icode].operand[0].mode;
21530 mode0 = insn_data[icode].operand[1].mode;
21531 mode1 = insn_data[icode].operand[2].mode;
21532 mode2 = insn_data[icode].operand[3].mode;
21533
21534 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21535 op0 = copy_to_mode_reg (mode0, op0);
21536 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21537 op1 = copy_to_mode_reg (mode1, op1);
21538 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21539 op2 = copy_to_mode_reg (mode2, op2);
21540 if (target == 0
21541 || GET_MODE (target) != tmode
21542 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21543 target = gen_reg_rtx (tmode);
21544 pat = GEN_FCN (icode) (target, op0, op1, op2);
21545 if (! pat)
21546 return 0;
21547 emit_insn (pat);
21548 return target;
21549
21550 case ARM_BUILTIN_WZERO:
21551 target = gen_reg_rtx (DImode);
21552 emit_insn (gen_iwmmxt_clrdi (target));
21553 return target;
21554
21555 case ARM_BUILTIN_WSRLHI:
21556 case ARM_BUILTIN_WSRLWI:
21557 case ARM_BUILTIN_WSRLDI:
21558 case ARM_BUILTIN_WSLLHI:
21559 case ARM_BUILTIN_WSLLWI:
21560 case ARM_BUILTIN_WSLLDI:
21561 case ARM_BUILTIN_WSRAHI:
21562 case ARM_BUILTIN_WSRAWI:
21563 case ARM_BUILTIN_WSRADI:
21564 case ARM_BUILTIN_WRORHI:
21565 case ARM_BUILTIN_WRORWI:
21566 case ARM_BUILTIN_WRORDI:
21567 case ARM_BUILTIN_WSRLH:
21568 case ARM_BUILTIN_WSRLW:
21569 case ARM_BUILTIN_WSRLD:
21570 case ARM_BUILTIN_WSLLH:
21571 case ARM_BUILTIN_WSLLW:
21572 case ARM_BUILTIN_WSLLD:
21573 case ARM_BUILTIN_WSRAH:
21574 case ARM_BUILTIN_WSRAW:
21575 case ARM_BUILTIN_WSRAD:
21576 case ARM_BUILTIN_WRORH:
21577 case ARM_BUILTIN_WRORW:
21578 case ARM_BUILTIN_WRORD:
21579 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
21580 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
21581 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
21582 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
21583 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
21584 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
21585 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
21586 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
21587 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
21588 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
21589 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
21590 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
21591 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
21592 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
21593 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
21594 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
21595 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
21596 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
21597 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
21598 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
21599 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
21600 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
21601 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
21602 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
21603 : CODE_FOR_nothing);
21604 arg1 = CALL_EXPR_ARG (exp, 1);
21605 op1 = expand_normal (arg1);
21606 if (GET_MODE (op1) == VOIDmode)
21607 {
21608 imm = INTVAL (op1);
21609 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
21610 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
21611 && (imm < 0 || imm > 32))
21612 {
21613 if (fcode == ARM_BUILTIN_WRORHI)
21614 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
21615 else if (fcode == ARM_BUILTIN_WRORWI)
21616 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
21617 else if (fcode == ARM_BUILTIN_WRORH)
21618 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
21619 else
21620 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
21621 }
21622 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
21623 && (imm < 0 || imm > 64))
21624 {
21625 if (fcode == ARM_BUILTIN_WRORDI)
21626 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
21627 else
21628 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
21629 }
21630 else if (imm < 0)
21631 {
21632 if (fcode == ARM_BUILTIN_WSRLHI)
21633 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
21634 else if (fcode == ARM_BUILTIN_WSRLWI)
21635 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
21636 else if (fcode == ARM_BUILTIN_WSRLDI)
21637 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
21638 else if (fcode == ARM_BUILTIN_WSLLHI)
21639 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
21640 else if (fcode == ARM_BUILTIN_WSLLWI)
21641 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
21642 else if (fcode == ARM_BUILTIN_WSLLDI)
21643 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
21644 else if (fcode == ARM_BUILTIN_WSRAHI)
21645 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
21646 else if (fcode == ARM_BUILTIN_WSRAWI)
21647 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
21648 else if (fcode == ARM_BUILTIN_WSRADI)
21649 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
21650 else if (fcode == ARM_BUILTIN_WSRLH)
21651 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
21652 else if (fcode == ARM_BUILTIN_WSRLW)
21653 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
21654 else if (fcode == ARM_BUILTIN_WSRLD)
21655 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
21656 else if (fcode == ARM_BUILTIN_WSLLH)
21657 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
21658 else if (fcode == ARM_BUILTIN_WSLLW)
21659 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
21660 else if (fcode == ARM_BUILTIN_WSLLD)
21661 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
21662 else if (fcode == ARM_BUILTIN_WSRAH)
21663 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
21664 else if (fcode == ARM_BUILTIN_WSRAW)
21665 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
21666 else
21667 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
21668 }
21669 }
21670 return arm_expand_binop_builtin (icode, exp, target);
21671
21672 default:
21673 break;
21674 }
21675
21676 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21677 if (d->code == (const enum arm_builtins) fcode)
21678 return arm_expand_binop_builtin (d->icode, exp, target);
21679
21680 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21681 if (d->code == (const enum arm_builtins) fcode)
21682 return arm_expand_unop_builtin (d->icode, exp, target, 0);
21683
21684 /* @@@ Should really do something sensible here. */
21685 return NULL_RTX;
21686 }
21687 \f
21688 /* Return the number (counting from 0) of
21689 the least significant set bit in MASK. */
21690
21691 inline static int
21692 number_of_first_bit_set (unsigned mask)
21693 {
21694 return ctz_hwi (mask);
21695 }
21696
21697 /* Like emit_multi_reg_push, but allowing for a different set of
21698 registers to be described as saved. MASK is the set of registers
21699 to be saved; REAL_REGS is the set of registers to be described as
21700 saved. If REAL_REGS is 0, only describe the stack adjustment. */
21701
21702 static rtx
21703 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
21704 {
21705 unsigned long regno;
21706 rtx par[10], tmp, reg, insn;
21707 int i, j;
21708
21709 /* Build the parallel of the registers actually being stored. */
21710 for (i = 0; mask; ++i, mask &= mask - 1)
21711 {
21712 regno = ctz_hwi (mask);
21713 reg = gen_rtx_REG (SImode, regno);
21714
21715 if (i == 0)
21716 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
21717 else
21718 tmp = gen_rtx_USE (VOIDmode, reg);
21719
21720 par[i] = tmp;
21721 }
21722
21723 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
21724 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21725 tmp = gen_frame_mem (BLKmode, tmp);
21726 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
21727 par[0] = tmp;
21728
21729 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
21730 insn = emit_insn (tmp);
21731
21732 /* Always build the stack adjustment note for unwind info. */
21733 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
21734 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
21735 par[0] = tmp;
21736
21737 /* Build the parallel of the registers recorded as saved for unwind. */
21738 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
21739 {
21740 regno = ctz_hwi (real_regs);
21741 reg = gen_rtx_REG (SImode, regno);
21742
21743 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
21744 tmp = gen_frame_mem (SImode, tmp);
21745 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
21746 RTX_FRAME_RELATED_P (tmp) = 1;
21747 par[j + 1] = tmp;
21748 }
21749
21750 if (j == 0)
21751 tmp = par[0];
21752 else
21753 {
21754 RTX_FRAME_RELATED_P (par[0]) = 1;
21755 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
21756 }
21757
21758 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
21759
21760 return insn;
21761 }
21762
21763 /* Emit code to push or pop registers to or from the stack. F is the
21764 assembly file. MASK is the registers to pop. */
21765 static void
21766 thumb_pop (FILE *f, unsigned long mask)
21767 {
21768 int regno;
21769 int lo_mask = mask & 0xFF;
21770 int pushed_words = 0;
21771
21772 gcc_assert (mask);
21773
21774 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
21775 {
21776 /* Special case. Do not generate a POP PC statement here, do it in
21777 thumb_exit() */
21778 thumb_exit (f, -1);
21779 return;
21780 }
21781
21782 fprintf (f, "\tpop\t{");
21783
21784 /* Look at the low registers first. */
21785 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
21786 {
21787 if (lo_mask & 1)
21788 {
21789 asm_fprintf (f, "%r", regno);
21790
21791 if ((lo_mask & ~1) != 0)
21792 fprintf (f, ", ");
21793
21794 pushed_words++;
21795 }
21796 }
21797
21798 if (mask & (1 << PC_REGNUM))
21799 {
21800 /* Catch popping the PC. */
21801 if (TARGET_INTERWORK || TARGET_BACKTRACE
21802 || crtl->calls_eh_return)
21803 {
21804 /* The PC is never poped directly, instead
21805 it is popped into r3 and then BX is used. */
21806 fprintf (f, "}\n");
21807
21808 thumb_exit (f, -1);
21809
21810 return;
21811 }
21812 else
21813 {
21814 if (mask & 0xFF)
21815 fprintf (f, ", ");
21816
21817 asm_fprintf (f, "%r", PC_REGNUM);
21818 }
21819 }
21820
21821 fprintf (f, "}\n");
21822 }
21823
21824 /* Generate code to return from a thumb function.
21825 If 'reg_containing_return_addr' is -1, then the return address is
21826 actually on the stack, at the stack pointer. */
21827 static void
21828 thumb_exit (FILE *f, int reg_containing_return_addr)
21829 {
21830 unsigned regs_available_for_popping;
21831 unsigned regs_to_pop;
21832 int pops_needed;
21833 unsigned available;
21834 unsigned required;
21835 int mode;
21836 int size;
21837 int restore_a4 = FALSE;
21838
21839 /* Compute the registers we need to pop. */
21840 regs_to_pop = 0;
21841 pops_needed = 0;
21842
21843 if (reg_containing_return_addr == -1)
21844 {
21845 regs_to_pop |= 1 << LR_REGNUM;
21846 ++pops_needed;
21847 }
21848
21849 if (TARGET_BACKTRACE)
21850 {
21851 /* Restore the (ARM) frame pointer and stack pointer. */
21852 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
21853 pops_needed += 2;
21854 }
21855
21856 /* If there is nothing to pop then just emit the BX instruction and
21857 return. */
21858 if (pops_needed == 0)
21859 {
21860 if (crtl->calls_eh_return)
21861 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21862
21863 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21864 return;
21865 }
21866 /* Otherwise if we are not supporting interworking and we have not created
21867 a backtrace structure and the function was not entered in ARM mode then
21868 just pop the return address straight into the PC. */
21869 else if (!TARGET_INTERWORK
21870 && !TARGET_BACKTRACE
21871 && !is_called_in_ARM_mode (current_function_decl)
21872 && !crtl->calls_eh_return)
21873 {
21874 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
21875 return;
21876 }
21877
21878 /* Find out how many of the (return) argument registers we can corrupt. */
21879 regs_available_for_popping = 0;
21880
21881 /* If returning via __builtin_eh_return, the bottom three registers
21882 all contain information needed for the return. */
21883 if (crtl->calls_eh_return)
21884 size = 12;
21885 else
21886 {
21887 /* If we can deduce the registers used from the function's
21888 return value. This is more reliable that examining
21889 df_regs_ever_live_p () because that will be set if the register is
21890 ever used in the function, not just if the register is used
21891 to hold a return value. */
21892
21893 if (crtl->return_rtx != 0)
21894 mode = GET_MODE (crtl->return_rtx);
21895 else
21896 mode = DECL_MODE (DECL_RESULT (current_function_decl));
21897
21898 size = GET_MODE_SIZE (mode);
21899
21900 if (size == 0)
21901 {
21902 /* In a void function we can use any argument register.
21903 In a function that returns a structure on the stack
21904 we can use the second and third argument registers. */
21905 if (mode == VOIDmode)
21906 regs_available_for_popping =
21907 (1 << ARG_REGISTER (1))
21908 | (1 << ARG_REGISTER (2))
21909 | (1 << ARG_REGISTER (3));
21910 else
21911 regs_available_for_popping =
21912 (1 << ARG_REGISTER (2))
21913 | (1 << ARG_REGISTER (3));
21914 }
21915 else if (size <= 4)
21916 regs_available_for_popping =
21917 (1 << ARG_REGISTER (2))
21918 | (1 << ARG_REGISTER (3));
21919 else if (size <= 8)
21920 regs_available_for_popping =
21921 (1 << ARG_REGISTER (3));
21922 }
21923
21924 /* Match registers to be popped with registers into which we pop them. */
21925 for (available = regs_available_for_popping,
21926 required = regs_to_pop;
21927 required != 0 && available != 0;
21928 available &= ~(available & - available),
21929 required &= ~(required & - required))
21930 -- pops_needed;
21931
21932 /* If we have any popping registers left over, remove them. */
21933 if (available > 0)
21934 regs_available_for_popping &= ~available;
21935
21936 /* Otherwise if we need another popping register we can use
21937 the fourth argument register. */
21938 else if (pops_needed)
21939 {
21940 /* If we have not found any free argument registers and
21941 reg a4 contains the return address, we must move it. */
21942 if (regs_available_for_popping == 0
21943 && reg_containing_return_addr == LAST_ARG_REGNUM)
21944 {
21945 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21946 reg_containing_return_addr = LR_REGNUM;
21947 }
21948 else if (size > 12)
21949 {
21950 /* Register a4 is being used to hold part of the return value,
21951 but we have dire need of a free, low register. */
21952 restore_a4 = TRUE;
21953
21954 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
21955 }
21956
21957 if (reg_containing_return_addr != LAST_ARG_REGNUM)
21958 {
21959 /* The fourth argument register is available. */
21960 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
21961
21962 --pops_needed;
21963 }
21964 }
21965
21966 /* Pop as many registers as we can. */
21967 thumb_pop (f, regs_available_for_popping);
21968
21969 /* Process the registers we popped. */
21970 if (reg_containing_return_addr == -1)
21971 {
21972 /* The return address was popped into the lowest numbered register. */
21973 regs_to_pop &= ~(1 << LR_REGNUM);
21974
21975 reg_containing_return_addr =
21976 number_of_first_bit_set (regs_available_for_popping);
21977
21978 /* Remove this register for the mask of available registers, so that
21979 the return address will not be corrupted by further pops. */
21980 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
21981 }
21982
21983 /* If we popped other registers then handle them here. */
21984 if (regs_available_for_popping)
21985 {
21986 int frame_pointer;
21987
21988 /* Work out which register currently contains the frame pointer. */
21989 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
21990
21991 /* Move it into the correct place. */
21992 asm_fprintf (f, "\tmov\t%r, %r\n",
21993 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
21994
21995 /* (Temporarily) remove it from the mask of popped registers. */
21996 regs_available_for_popping &= ~(1 << frame_pointer);
21997 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
21998
21999 if (regs_available_for_popping)
22000 {
22001 int stack_pointer;
22002
22003 /* We popped the stack pointer as well,
22004 find the register that contains it. */
22005 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
22006
22007 /* Move it into the stack register. */
22008 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
22009
22010 /* At this point we have popped all necessary registers, so
22011 do not worry about restoring regs_available_for_popping
22012 to its correct value:
22013
22014 assert (pops_needed == 0)
22015 assert (regs_available_for_popping == (1 << frame_pointer))
22016 assert (regs_to_pop == (1 << STACK_POINTER)) */
22017 }
22018 else
22019 {
22020 /* Since we have just move the popped value into the frame
22021 pointer, the popping register is available for reuse, and
22022 we know that we still have the stack pointer left to pop. */
22023 regs_available_for_popping |= (1 << frame_pointer);
22024 }
22025 }
22026
22027 /* If we still have registers left on the stack, but we no longer have
22028 any registers into which we can pop them, then we must move the return
22029 address into the link register and make available the register that
22030 contained it. */
22031 if (regs_available_for_popping == 0 && pops_needed > 0)
22032 {
22033 regs_available_for_popping |= 1 << reg_containing_return_addr;
22034
22035 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
22036 reg_containing_return_addr);
22037
22038 reg_containing_return_addr = LR_REGNUM;
22039 }
22040
22041 /* If we have registers left on the stack then pop some more.
22042 We know that at most we will want to pop FP and SP. */
22043 if (pops_needed > 0)
22044 {
22045 int popped_into;
22046 int move_to;
22047
22048 thumb_pop (f, regs_available_for_popping);
22049
22050 /* We have popped either FP or SP.
22051 Move whichever one it is into the correct register. */
22052 popped_into = number_of_first_bit_set (regs_available_for_popping);
22053 move_to = number_of_first_bit_set (regs_to_pop);
22054
22055 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
22056
22057 regs_to_pop &= ~(1 << move_to);
22058
22059 --pops_needed;
22060 }
22061
22062 /* If we still have not popped everything then we must have only
22063 had one register available to us and we are now popping the SP. */
22064 if (pops_needed > 0)
22065 {
22066 int popped_into;
22067
22068 thumb_pop (f, regs_available_for_popping);
22069
22070 popped_into = number_of_first_bit_set (regs_available_for_popping);
22071
22072 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
22073 /*
22074 assert (regs_to_pop == (1 << STACK_POINTER))
22075 assert (pops_needed == 1)
22076 */
22077 }
22078
22079 /* If necessary restore the a4 register. */
22080 if (restore_a4)
22081 {
22082 if (reg_containing_return_addr != LR_REGNUM)
22083 {
22084 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
22085 reg_containing_return_addr = LR_REGNUM;
22086 }
22087
22088 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
22089 }
22090
22091 if (crtl->calls_eh_return)
22092 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
22093
22094 /* Return to caller. */
22095 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
22096 }
22097 \f
22098 /* Scan INSN just before assembler is output for it.
22099 For Thumb-1, we track the status of the condition codes; this
22100 information is used in the cbranchsi4_insn pattern. */
22101 void
22102 thumb1_final_prescan_insn (rtx insn)
22103 {
22104 if (flag_print_asm_name)
22105 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
22106 INSN_ADDRESSES (INSN_UID (insn)));
22107 /* Don't overwrite the previous setter when we get to a cbranch. */
22108 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
22109 {
22110 enum attr_conds conds;
22111
22112 if (cfun->machine->thumb1_cc_insn)
22113 {
22114 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
22115 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
22116 CC_STATUS_INIT;
22117 }
22118 conds = get_attr_conds (insn);
22119 if (conds == CONDS_SET)
22120 {
22121 rtx set = single_set (insn);
22122 cfun->machine->thumb1_cc_insn = insn;
22123 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
22124 cfun->machine->thumb1_cc_op1 = const0_rtx;
22125 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
22126 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
22127 {
22128 rtx src1 = XEXP (SET_SRC (set), 1);
22129 if (src1 == const0_rtx)
22130 cfun->machine->thumb1_cc_mode = CCmode;
22131 }
22132 }
22133 else if (conds != CONDS_NOCOND)
22134 cfun->machine->thumb1_cc_insn = NULL_RTX;
22135 }
22136 }
22137
22138 int
22139 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
22140 {
22141 unsigned HOST_WIDE_INT mask = 0xff;
22142 int i;
22143
22144 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
22145 if (val == 0) /* XXX */
22146 return 0;
22147
22148 for (i = 0; i < 25; i++)
22149 if ((val & (mask << i)) == val)
22150 return 1;
22151
22152 return 0;
22153 }
22154
22155 /* Returns nonzero if the current function contains,
22156 or might contain a far jump. */
22157 static int
22158 thumb_far_jump_used_p (void)
22159 {
22160 rtx insn;
22161
22162 /* This test is only important for leaf functions. */
22163 /* assert (!leaf_function_p ()); */
22164
22165 /* If we have already decided that far jumps may be used,
22166 do not bother checking again, and always return true even if
22167 it turns out that they are not being used. Once we have made
22168 the decision that far jumps are present (and that hence the link
22169 register will be pushed onto the stack) we cannot go back on it. */
22170 if (cfun->machine->far_jump_used)
22171 return 1;
22172
22173 /* If this function is not being called from the prologue/epilogue
22174 generation code then it must be being called from the
22175 INITIAL_ELIMINATION_OFFSET macro. */
22176 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
22177 {
22178 /* In this case we know that we are being asked about the elimination
22179 of the arg pointer register. If that register is not being used,
22180 then there are no arguments on the stack, and we do not have to
22181 worry that a far jump might force the prologue to push the link
22182 register, changing the stack offsets. In this case we can just
22183 return false, since the presence of far jumps in the function will
22184 not affect stack offsets.
22185
22186 If the arg pointer is live (or if it was live, but has now been
22187 eliminated and so set to dead) then we do have to test to see if
22188 the function might contain a far jump. This test can lead to some
22189 false negatives, since before reload is completed, then length of
22190 branch instructions is not known, so gcc defaults to returning their
22191 longest length, which in turn sets the far jump attribute to true.
22192
22193 A false negative will not result in bad code being generated, but it
22194 will result in a needless push and pop of the link register. We
22195 hope that this does not occur too often.
22196
22197 If we need doubleword stack alignment this could affect the other
22198 elimination offsets so we can't risk getting it wrong. */
22199 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
22200 cfun->machine->arg_pointer_live = 1;
22201 else if (!cfun->machine->arg_pointer_live)
22202 return 0;
22203 }
22204
22205 /* Check to see if the function contains a branch
22206 insn with the far jump attribute set. */
22207 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22208 {
22209 if (JUMP_P (insn)
22210 /* Ignore tablejump patterns. */
22211 && GET_CODE (PATTERN (insn)) != ADDR_VEC
22212 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
22213 && get_attr_far_jump (insn) == FAR_JUMP_YES
22214 )
22215 {
22216 /* Record the fact that we have decided that
22217 the function does use far jumps. */
22218 cfun->machine->far_jump_used = 1;
22219 return 1;
22220 }
22221 }
22222
22223 return 0;
22224 }
22225
22226 /* Return nonzero if FUNC must be entered in ARM mode. */
22227 int
22228 is_called_in_ARM_mode (tree func)
22229 {
22230 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
22231
22232 /* Ignore the problem about functions whose address is taken. */
22233 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
22234 return TRUE;
22235
22236 #ifdef ARM_PE
22237 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
22238 #else
22239 return FALSE;
22240 #endif
22241 }
22242
22243 /* Given the stack offsets and register mask in OFFSETS, decide how
22244 many additional registers to push instead of subtracting a constant
22245 from SP. For epilogues the principle is the same except we use pop.
22246 FOR_PROLOGUE indicates which we're generating. */
22247 static int
22248 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
22249 {
22250 HOST_WIDE_INT amount;
22251 unsigned long live_regs_mask = offsets->saved_regs_mask;
22252 /* Extract a mask of the ones we can give to the Thumb's push/pop
22253 instruction. */
22254 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
22255 /* Then count how many other high registers will need to be pushed. */
22256 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
22257 int n_free, reg_base, size;
22258
22259 if (!for_prologue && frame_pointer_needed)
22260 amount = offsets->locals_base - offsets->saved_regs;
22261 else
22262 amount = offsets->outgoing_args - offsets->saved_regs;
22263
22264 /* If the stack frame size is 512 exactly, we can save one load
22265 instruction, which should make this a win even when optimizing
22266 for speed. */
22267 if (!optimize_size && amount != 512)
22268 return 0;
22269
22270 /* Can't do this if there are high registers to push. */
22271 if (high_regs_pushed != 0)
22272 return 0;
22273
22274 /* Shouldn't do it in the prologue if no registers would normally
22275 be pushed at all. In the epilogue, also allow it if we'll have
22276 a pop insn for the PC. */
22277 if (l_mask == 0
22278 && (for_prologue
22279 || TARGET_BACKTRACE
22280 || (live_regs_mask & 1 << LR_REGNUM) == 0
22281 || TARGET_INTERWORK
22282 || crtl->args.pretend_args_size != 0))
22283 return 0;
22284
22285 /* Don't do this if thumb_expand_prologue wants to emit instructions
22286 between the push and the stack frame allocation. */
22287 if (for_prologue
22288 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
22289 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
22290 return 0;
22291
22292 reg_base = 0;
22293 n_free = 0;
22294 if (!for_prologue)
22295 {
22296 size = arm_size_return_regs ();
22297 reg_base = ARM_NUM_INTS (size);
22298 live_regs_mask >>= reg_base;
22299 }
22300
22301 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
22302 && (for_prologue || call_used_regs[reg_base + n_free]))
22303 {
22304 live_regs_mask >>= 1;
22305 n_free++;
22306 }
22307
22308 if (n_free == 0)
22309 return 0;
22310 gcc_assert (amount / 4 * 4 == amount);
22311
22312 if (amount >= 512 && (amount - n_free * 4) < 512)
22313 return (amount - 508) / 4;
22314 if (amount <= n_free * 4)
22315 return amount / 4;
22316 return 0;
22317 }
22318
22319 /* The bits which aren't usefully expanded as rtl. */
22320 const char *
22321 thumb1_unexpanded_epilogue (void)
22322 {
22323 arm_stack_offsets *offsets;
22324 int regno;
22325 unsigned long live_regs_mask = 0;
22326 int high_regs_pushed = 0;
22327 int extra_pop;
22328 int had_to_push_lr;
22329 int size;
22330
22331 if (cfun->machine->return_used_this_function != 0)
22332 return "";
22333
22334 if (IS_NAKED (arm_current_func_type ()))
22335 return "";
22336
22337 offsets = arm_get_frame_offsets ();
22338 live_regs_mask = offsets->saved_regs_mask;
22339 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
22340
22341 /* If we can deduce the registers used from the function's return value.
22342 This is more reliable that examining df_regs_ever_live_p () because that
22343 will be set if the register is ever used in the function, not just if
22344 the register is used to hold a return value. */
22345 size = arm_size_return_regs ();
22346
22347 extra_pop = thumb1_extra_regs_pushed (offsets, false);
22348 if (extra_pop > 0)
22349 {
22350 unsigned long extra_mask = (1 << extra_pop) - 1;
22351 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
22352 }
22353
22354 /* The prolog may have pushed some high registers to use as
22355 work registers. e.g. the testsuite file:
22356 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
22357 compiles to produce:
22358 push {r4, r5, r6, r7, lr}
22359 mov r7, r9
22360 mov r6, r8
22361 push {r6, r7}
22362 as part of the prolog. We have to undo that pushing here. */
22363
22364 if (high_regs_pushed)
22365 {
22366 unsigned long mask = live_regs_mask & 0xff;
22367 int next_hi_reg;
22368
22369 /* The available low registers depend on the size of the value we are
22370 returning. */
22371 if (size <= 12)
22372 mask |= 1 << 3;
22373 if (size <= 8)
22374 mask |= 1 << 2;
22375
22376 if (mask == 0)
22377 /* Oh dear! We have no low registers into which we can pop
22378 high registers! */
22379 internal_error
22380 ("no low registers available for popping high registers");
22381
22382 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
22383 if (live_regs_mask & (1 << next_hi_reg))
22384 break;
22385
22386 while (high_regs_pushed)
22387 {
22388 /* Find lo register(s) into which the high register(s) can
22389 be popped. */
22390 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
22391 {
22392 if (mask & (1 << regno))
22393 high_regs_pushed--;
22394 if (high_regs_pushed == 0)
22395 break;
22396 }
22397
22398 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
22399
22400 /* Pop the values into the low register(s). */
22401 thumb_pop (asm_out_file, mask);
22402
22403 /* Move the value(s) into the high registers. */
22404 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
22405 {
22406 if (mask & (1 << regno))
22407 {
22408 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
22409 regno);
22410
22411 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
22412 if (live_regs_mask & (1 << next_hi_reg))
22413 break;
22414 }
22415 }
22416 }
22417 live_regs_mask &= ~0x0f00;
22418 }
22419
22420 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
22421 live_regs_mask &= 0xff;
22422
22423 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
22424 {
22425 /* Pop the return address into the PC. */
22426 if (had_to_push_lr)
22427 live_regs_mask |= 1 << PC_REGNUM;
22428
22429 /* Either no argument registers were pushed or a backtrace
22430 structure was created which includes an adjusted stack
22431 pointer, so just pop everything. */
22432 if (live_regs_mask)
22433 thumb_pop (asm_out_file, live_regs_mask);
22434
22435 /* We have either just popped the return address into the
22436 PC or it is was kept in LR for the entire function.
22437 Note that thumb_pop has already called thumb_exit if the
22438 PC was in the list. */
22439 if (!had_to_push_lr)
22440 thumb_exit (asm_out_file, LR_REGNUM);
22441 }
22442 else
22443 {
22444 /* Pop everything but the return address. */
22445 if (live_regs_mask)
22446 thumb_pop (asm_out_file, live_regs_mask);
22447
22448 if (had_to_push_lr)
22449 {
22450 if (size > 12)
22451 {
22452 /* We have no free low regs, so save one. */
22453 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
22454 LAST_ARG_REGNUM);
22455 }
22456
22457 /* Get the return address into a temporary register. */
22458 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
22459
22460 if (size > 12)
22461 {
22462 /* Move the return address to lr. */
22463 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
22464 LAST_ARG_REGNUM);
22465 /* Restore the low register. */
22466 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
22467 IP_REGNUM);
22468 regno = LR_REGNUM;
22469 }
22470 else
22471 regno = LAST_ARG_REGNUM;
22472 }
22473 else
22474 regno = LR_REGNUM;
22475
22476 /* Remove the argument registers that were pushed onto the stack. */
22477 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
22478 SP_REGNUM, SP_REGNUM,
22479 crtl->args.pretend_args_size);
22480
22481 thumb_exit (asm_out_file, regno);
22482 }
22483
22484 return "";
22485 }
22486
22487 /* Functions to save and restore machine-specific function data. */
22488 static struct machine_function *
22489 arm_init_machine_status (void)
22490 {
22491 struct machine_function *machine;
22492 machine = ggc_alloc_cleared_machine_function ();
22493
22494 #if ARM_FT_UNKNOWN != 0
22495 machine->func_type = ARM_FT_UNKNOWN;
22496 #endif
22497 return machine;
22498 }
22499
22500 /* Return an RTX indicating where the return address to the
22501 calling function can be found. */
22502 rtx
22503 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
22504 {
22505 if (count != 0)
22506 return NULL_RTX;
22507
22508 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
22509 }
22510
22511 /* Do anything needed before RTL is emitted for each function. */
22512 void
22513 arm_init_expanders (void)
22514 {
22515 /* Arrange to initialize and mark the machine per-function status. */
22516 init_machine_status = arm_init_machine_status;
22517
22518 /* This is to stop the combine pass optimizing away the alignment
22519 adjustment of va_arg. */
22520 /* ??? It is claimed that this should not be necessary. */
22521 if (cfun)
22522 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
22523 }
22524
22525
22526 /* Like arm_compute_initial_elimination offset. Simpler because there
22527 isn't an ABI specified frame pointer for Thumb. Instead, we set it
22528 to point at the base of the local variables after static stack
22529 space for a function has been allocated. */
22530
22531 HOST_WIDE_INT
22532 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
22533 {
22534 arm_stack_offsets *offsets;
22535
22536 offsets = arm_get_frame_offsets ();
22537
22538 switch (from)
22539 {
22540 case ARG_POINTER_REGNUM:
22541 switch (to)
22542 {
22543 case STACK_POINTER_REGNUM:
22544 return offsets->outgoing_args - offsets->saved_args;
22545
22546 case FRAME_POINTER_REGNUM:
22547 return offsets->soft_frame - offsets->saved_args;
22548
22549 case ARM_HARD_FRAME_POINTER_REGNUM:
22550 return offsets->saved_regs - offsets->saved_args;
22551
22552 case THUMB_HARD_FRAME_POINTER_REGNUM:
22553 return offsets->locals_base - offsets->saved_args;
22554
22555 default:
22556 gcc_unreachable ();
22557 }
22558 break;
22559
22560 case FRAME_POINTER_REGNUM:
22561 switch (to)
22562 {
22563 case STACK_POINTER_REGNUM:
22564 return offsets->outgoing_args - offsets->soft_frame;
22565
22566 case ARM_HARD_FRAME_POINTER_REGNUM:
22567 return offsets->saved_regs - offsets->soft_frame;
22568
22569 case THUMB_HARD_FRAME_POINTER_REGNUM:
22570 return offsets->locals_base - offsets->soft_frame;
22571
22572 default:
22573 gcc_unreachable ();
22574 }
22575 break;
22576
22577 default:
22578 gcc_unreachable ();
22579 }
22580 }
22581
22582 /* Generate the function's prologue. */
22583
22584 void
22585 thumb1_expand_prologue (void)
22586 {
22587 rtx insn;
22588
22589 HOST_WIDE_INT amount;
22590 arm_stack_offsets *offsets;
22591 unsigned long func_type;
22592 int regno;
22593 unsigned long live_regs_mask;
22594 unsigned long l_mask;
22595 unsigned high_regs_pushed = 0;
22596
22597 func_type = arm_current_func_type ();
22598
22599 /* Naked functions don't have prologues. */
22600 if (IS_NAKED (func_type))
22601 return;
22602
22603 if (IS_INTERRUPT (func_type))
22604 {
22605 error ("interrupt Service Routines cannot be coded in Thumb mode");
22606 return;
22607 }
22608
22609 if (is_called_in_ARM_mode (current_function_decl))
22610 emit_insn (gen_prologue_thumb1_interwork ());
22611
22612 offsets = arm_get_frame_offsets ();
22613 live_regs_mask = offsets->saved_regs_mask;
22614
22615 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
22616 l_mask = live_regs_mask & 0x40ff;
22617 /* Then count how many other high registers will need to be pushed. */
22618 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
22619
22620 if (crtl->args.pretend_args_size)
22621 {
22622 rtx x = GEN_INT (-crtl->args.pretend_args_size);
22623
22624 if (cfun->machine->uses_anonymous_args)
22625 {
22626 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
22627 unsigned long mask;
22628
22629 mask = 1ul << (LAST_ARG_REGNUM + 1);
22630 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
22631
22632 insn = thumb1_emit_multi_reg_push (mask, 0);
22633 }
22634 else
22635 {
22636 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22637 stack_pointer_rtx, x));
22638 }
22639 RTX_FRAME_RELATED_P (insn) = 1;
22640 }
22641
22642 if (TARGET_BACKTRACE)
22643 {
22644 HOST_WIDE_INT offset = 0;
22645 unsigned work_register;
22646 rtx work_reg, x, arm_hfp_rtx;
22647
22648 /* We have been asked to create a stack backtrace structure.
22649 The code looks like this:
22650
22651 0 .align 2
22652 0 func:
22653 0 sub SP, #16 Reserve space for 4 registers.
22654 2 push {R7} Push low registers.
22655 4 add R7, SP, #20 Get the stack pointer before the push.
22656 6 str R7, [SP, #8] Store the stack pointer
22657 (before reserving the space).
22658 8 mov R7, PC Get hold of the start of this code + 12.
22659 10 str R7, [SP, #16] Store it.
22660 12 mov R7, FP Get hold of the current frame pointer.
22661 14 str R7, [SP, #4] Store it.
22662 16 mov R7, LR Get hold of the current return address.
22663 18 str R7, [SP, #12] Store it.
22664 20 add R7, SP, #16 Point at the start of the
22665 backtrace structure.
22666 22 mov FP, R7 Put this value into the frame pointer. */
22667
22668 work_register = thumb_find_work_register (live_regs_mask);
22669 work_reg = gen_rtx_REG (SImode, work_register);
22670 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
22671
22672 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22673 stack_pointer_rtx, GEN_INT (-16)));
22674 RTX_FRAME_RELATED_P (insn) = 1;
22675
22676 if (l_mask)
22677 {
22678 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
22679 RTX_FRAME_RELATED_P (insn) = 1;
22680
22681 offset = bit_count (l_mask) * UNITS_PER_WORD;
22682 }
22683
22684 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
22685 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22686
22687 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
22688 x = gen_frame_mem (SImode, x);
22689 emit_move_insn (x, work_reg);
22690
22691 /* Make sure that the instruction fetching the PC is in the right place
22692 to calculate "start of backtrace creation code + 12". */
22693 /* ??? The stores using the common WORK_REG ought to be enough to
22694 prevent the scheduler from doing anything weird. Failing that
22695 we could always move all of the following into an UNSPEC_VOLATILE. */
22696 if (l_mask)
22697 {
22698 x = gen_rtx_REG (SImode, PC_REGNUM);
22699 emit_move_insn (work_reg, x);
22700
22701 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
22702 x = gen_frame_mem (SImode, x);
22703 emit_move_insn (x, work_reg);
22704
22705 emit_move_insn (work_reg, arm_hfp_rtx);
22706
22707 x = plus_constant (Pmode, stack_pointer_rtx, offset);
22708 x = gen_frame_mem (SImode, x);
22709 emit_move_insn (x, work_reg);
22710 }
22711 else
22712 {
22713 emit_move_insn (work_reg, arm_hfp_rtx);
22714
22715 x = plus_constant (Pmode, stack_pointer_rtx, offset);
22716 x = gen_frame_mem (SImode, x);
22717 emit_move_insn (x, work_reg);
22718
22719 x = gen_rtx_REG (SImode, PC_REGNUM);
22720 emit_move_insn (work_reg, x);
22721
22722 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
22723 x = gen_frame_mem (SImode, x);
22724 emit_move_insn (x, work_reg);
22725 }
22726
22727 x = gen_rtx_REG (SImode, LR_REGNUM);
22728 emit_move_insn (work_reg, x);
22729
22730 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
22731 x = gen_frame_mem (SImode, x);
22732 emit_move_insn (x, work_reg);
22733
22734 x = GEN_INT (offset + 12);
22735 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22736
22737 emit_move_insn (arm_hfp_rtx, work_reg);
22738 }
22739 /* Optimization: If we are not pushing any low registers but we are going
22740 to push some high registers then delay our first push. This will just
22741 be a push of LR and we can combine it with the push of the first high
22742 register. */
22743 else if ((l_mask & 0xff) != 0
22744 || (high_regs_pushed == 0 && l_mask))
22745 {
22746 unsigned long mask = l_mask;
22747 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
22748 insn = thumb1_emit_multi_reg_push (mask, mask);
22749 RTX_FRAME_RELATED_P (insn) = 1;
22750 }
22751
22752 if (high_regs_pushed)
22753 {
22754 unsigned pushable_regs;
22755 unsigned next_hi_reg;
22756 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
22757 : crtl->args.info.nregs;
22758 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
22759
22760 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
22761 if (live_regs_mask & (1 << next_hi_reg))
22762 break;
22763
22764 /* Here we need to mask out registers used for passing arguments
22765 even if they can be pushed. This is to avoid using them to stash the high
22766 registers. Such kind of stash may clobber the use of arguments. */
22767 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
22768
22769 if (pushable_regs == 0)
22770 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
22771
22772 while (high_regs_pushed > 0)
22773 {
22774 unsigned long real_regs_mask = 0;
22775
22776 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
22777 {
22778 if (pushable_regs & (1 << regno))
22779 {
22780 emit_move_insn (gen_rtx_REG (SImode, regno),
22781 gen_rtx_REG (SImode, next_hi_reg));
22782
22783 high_regs_pushed --;
22784 real_regs_mask |= (1 << next_hi_reg);
22785
22786 if (high_regs_pushed)
22787 {
22788 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
22789 next_hi_reg --)
22790 if (live_regs_mask & (1 << next_hi_reg))
22791 break;
22792 }
22793 else
22794 {
22795 pushable_regs &= ~((1 << regno) - 1);
22796 break;
22797 }
22798 }
22799 }
22800
22801 /* If we had to find a work register and we have not yet
22802 saved the LR then add it to the list of regs to push. */
22803 if (l_mask == (1 << LR_REGNUM))
22804 {
22805 pushable_regs |= l_mask;
22806 real_regs_mask |= l_mask;
22807 l_mask = 0;
22808 }
22809
22810 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
22811 RTX_FRAME_RELATED_P (insn) = 1;
22812 }
22813 }
22814
22815 /* Load the pic register before setting the frame pointer,
22816 so we can use r7 as a temporary work register. */
22817 if (flag_pic && arm_pic_register != INVALID_REGNUM)
22818 arm_load_pic_register (live_regs_mask);
22819
22820 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
22821 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
22822 stack_pointer_rtx);
22823
22824 if (flag_stack_usage_info)
22825 current_function_static_stack_size
22826 = offsets->outgoing_args - offsets->saved_args;
22827
22828 amount = offsets->outgoing_args - offsets->saved_regs;
22829 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
22830 if (amount)
22831 {
22832 if (amount < 512)
22833 {
22834 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22835 GEN_INT (- amount)));
22836 RTX_FRAME_RELATED_P (insn) = 1;
22837 }
22838 else
22839 {
22840 rtx reg, dwarf;
22841
22842 /* The stack decrement is too big for an immediate value in a single
22843 insn. In theory we could issue multiple subtracts, but after
22844 three of them it becomes more space efficient to place the full
22845 value in the constant pool and load into a register. (Also the
22846 ARM debugger really likes to see only one stack decrement per
22847 function). So instead we look for a scratch register into which
22848 we can load the decrement, and then we subtract this from the
22849 stack pointer. Unfortunately on the thumb the only available
22850 scratch registers are the argument registers, and we cannot use
22851 these as they may hold arguments to the function. Instead we
22852 attempt to locate a call preserved register which is used by this
22853 function. If we can find one, then we know that it will have
22854 been pushed at the start of the prologue and so we can corrupt
22855 it now. */
22856 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
22857 if (live_regs_mask & (1 << regno))
22858 break;
22859
22860 gcc_assert(regno <= LAST_LO_REGNUM);
22861
22862 reg = gen_rtx_REG (SImode, regno);
22863
22864 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
22865
22866 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22867 stack_pointer_rtx, reg));
22868
22869 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
22870 plus_constant (Pmode, stack_pointer_rtx,
22871 -amount));
22872 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22873 RTX_FRAME_RELATED_P (insn) = 1;
22874 }
22875 }
22876
22877 if (frame_pointer_needed)
22878 thumb_set_frame_pointer (offsets);
22879
22880 /* If we are profiling, make sure no instructions are scheduled before
22881 the call to mcount. Similarly if the user has requested no
22882 scheduling in the prolog. Similarly if we want non-call exceptions
22883 using the EABI unwinder, to prevent faulting instructions from being
22884 swapped with a stack adjustment. */
22885 if (crtl->profile || !TARGET_SCHED_PROLOG
22886 || (arm_except_unwind_info (&global_options) == UI_TARGET
22887 && cfun->can_throw_non_call_exceptions))
22888 emit_insn (gen_blockage ());
22889
22890 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
22891 if (live_regs_mask & 0xff)
22892 cfun->machine->lr_save_eliminated = 0;
22893 }
22894
22895 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
22896 POP instruction can be generated. LR should be replaced by PC. All
22897 the checks required are already done by USE_RETURN_INSN (). Hence,
22898 all we really need to check here is if single register is to be
22899 returned, or multiple register return. */
22900 void
22901 thumb2_expand_return (void)
22902 {
22903 int i, num_regs;
22904 unsigned long saved_regs_mask;
22905 arm_stack_offsets *offsets;
22906
22907 offsets = arm_get_frame_offsets ();
22908 saved_regs_mask = offsets->saved_regs_mask;
22909
22910 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
22911 if (saved_regs_mask & (1 << i))
22912 num_regs++;
22913
22914 if (saved_regs_mask)
22915 {
22916 if (num_regs == 1)
22917 {
22918 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22919 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
22920 rtx addr = gen_rtx_MEM (SImode,
22921 gen_rtx_POST_INC (SImode,
22922 stack_pointer_rtx));
22923 set_mem_alias_set (addr, get_frame_alias_set ());
22924 XVECEXP (par, 0, 0) = ret_rtx;
22925 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
22926 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
22927 emit_jump_insn (par);
22928 }
22929 else
22930 {
22931 saved_regs_mask &= ~ (1 << LR_REGNUM);
22932 saved_regs_mask |= (1 << PC_REGNUM);
22933 arm_emit_multi_reg_pop (saved_regs_mask);
22934 }
22935 }
22936 else
22937 {
22938 emit_jump_insn (simple_return_rtx);
22939 }
22940 }
22941
22942 void
22943 thumb1_expand_epilogue (void)
22944 {
22945 HOST_WIDE_INT amount;
22946 arm_stack_offsets *offsets;
22947 int regno;
22948
22949 /* Naked functions don't have prologues. */
22950 if (IS_NAKED (arm_current_func_type ()))
22951 return;
22952
22953 offsets = arm_get_frame_offsets ();
22954 amount = offsets->outgoing_args - offsets->saved_regs;
22955
22956 if (frame_pointer_needed)
22957 {
22958 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
22959 amount = offsets->locals_base - offsets->saved_regs;
22960 }
22961 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
22962
22963 gcc_assert (amount >= 0);
22964 if (amount)
22965 {
22966 emit_insn (gen_blockage ());
22967
22968 if (amount < 512)
22969 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22970 GEN_INT (amount)));
22971 else
22972 {
22973 /* r3 is always free in the epilogue. */
22974 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
22975
22976 emit_insn (gen_movsi (reg, GEN_INT (amount)));
22977 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
22978 }
22979 }
22980
22981 /* Emit a USE (stack_pointer_rtx), so that
22982 the stack adjustment will not be deleted. */
22983 emit_insn (gen_force_register_use (stack_pointer_rtx));
22984
22985 if (crtl->profile || !TARGET_SCHED_PROLOG)
22986 emit_insn (gen_blockage ());
22987
22988 /* Emit a clobber for each insn that will be restored in the epilogue,
22989 so that flow2 will get register lifetimes correct. */
22990 for (regno = 0; regno < 13; regno++)
22991 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
22992 emit_clobber (gen_rtx_REG (SImode, regno));
22993
22994 if (! df_regs_ever_live_p (LR_REGNUM))
22995 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
22996 }
22997
22998 /* Epilogue code for APCS frame. */
22999 static void
23000 arm_expand_epilogue_apcs_frame (bool really_return)
23001 {
23002 unsigned long func_type;
23003 unsigned long saved_regs_mask;
23004 int num_regs = 0;
23005 int i;
23006 int floats_from_frame = 0;
23007 arm_stack_offsets *offsets;
23008
23009 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
23010 func_type = arm_current_func_type ();
23011
23012 /* Get frame offsets for ARM. */
23013 offsets = arm_get_frame_offsets ();
23014 saved_regs_mask = offsets->saved_regs_mask;
23015
23016 /* Find the offset of the floating-point save area in the frame. */
23017 floats_from_frame = offsets->saved_args - offsets->frame;
23018
23019 /* Compute how many core registers saved and how far away the floats are. */
23020 for (i = 0; i <= LAST_ARM_REGNUM; i++)
23021 if (saved_regs_mask & (1 << i))
23022 {
23023 num_regs++;
23024 floats_from_frame += 4;
23025 }
23026
23027 if (TARGET_HARD_FLOAT && TARGET_VFP)
23028 {
23029 int start_reg;
23030
23031 /* The offset is from IP_REGNUM. */
23032 int saved_size = arm_get_vfp_saved_size ();
23033 if (saved_size > 0)
23034 {
23035 floats_from_frame += saved_size;
23036 emit_insn (gen_addsi3 (gen_rtx_REG (SImode, IP_REGNUM),
23037 hard_frame_pointer_rtx,
23038 GEN_INT (-floats_from_frame)));
23039 }
23040
23041 /* Generate VFP register multi-pop. */
23042 start_reg = FIRST_VFP_REGNUM;
23043
23044 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
23045 /* Look for a case where a reg does not need restoring. */
23046 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
23047 && (!df_regs_ever_live_p (i + 1)
23048 || call_used_regs[i + 1]))
23049 {
23050 if (start_reg != i)
23051 arm_emit_vfp_multi_reg_pop (start_reg,
23052 (i - start_reg) / 2,
23053 gen_rtx_REG (SImode,
23054 IP_REGNUM));
23055 start_reg = i + 2;
23056 }
23057
23058 /* Restore the remaining regs that we have discovered (or possibly
23059 even all of them, if the conditional in the for loop never
23060 fired). */
23061 if (start_reg != i)
23062 arm_emit_vfp_multi_reg_pop (start_reg,
23063 (i - start_reg) / 2,
23064 gen_rtx_REG (SImode, IP_REGNUM));
23065 }
23066
23067 if (TARGET_IWMMXT)
23068 {
23069 /* The frame pointer is guaranteed to be non-double-word aligned, as
23070 it is set to double-word-aligned old_stack_pointer - 4. */
23071 rtx insn;
23072 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
23073
23074 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
23075 if (df_regs_ever_live_p (i) && !call_used_regs[i])
23076 {
23077 rtx addr = gen_frame_mem (V2SImode,
23078 plus_constant (Pmode, hard_frame_pointer_rtx,
23079 - lrm_count * 4));
23080 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
23081 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
23082 gen_rtx_REG (V2SImode, i),
23083 NULL_RTX);
23084 lrm_count += 2;
23085 }
23086 }
23087
23088 /* saved_regs_mask should contain IP which contains old stack pointer
23089 at the time of activation creation. Since SP and IP are adjacent registers,
23090 we can restore the value directly into SP. */
23091 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
23092 saved_regs_mask &= ~(1 << IP_REGNUM);
23093 saved_regs_mask |= (1 << SP_REGNUM);
23094
23095 /* There are two registers left in saved_regs_mask - LR and PC. We
23096 only need to restore LR (the return address), but to
23097 save time we can load it directly into PC, unless we need a
23098 special function exit sequence, or we are not really returning. */
23099 if (really_return
23100 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
23101 && !crtl->calls_eh_return)
23102 /* Delete LR from the register mask, so that LR on
23103 the stack is loaded into the PC in the register mask. */
23104 saved_regs_mask &= ~(1 << LR_REGNUM);
23105 else
23106 saved_regs_mask &= ~(1 << PC_REGNUM);
23107
23108 num_regs = bit_count (saved_regs_mask);
23109 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
23110 {
23111 /* Unwind the stack to just below the saved registers. */
23112 emit_insn (gen_addsi3 (stack_pointer_rtx,
23113 hard_frame_pointer_rtx,
23114 GEN_INT (- 4 * num_regs)));
23115 }
23116
23117 arm_emit_multi_reg_pop (saved_regs_mask);
23118
23119 if (IS_INTERRUPT (func_type))
23120 {
23121 /* Interrupt handlers will have pushed the
23122 IP onto the stack, so restore it now. */
23123 rtx insn;
23124 rtx addr = gen_rtx_MEM (SImode,
23125 gen_rtx_POST_INC (SImode,
23126 stack_pointer_rtx));
23127 set_mem_alias_set (addr, get_frame_alias_set ());
23128 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
23129 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
23130 gen_rtx_REG (SImode, IP_REGNUM),
23131 NULL_RTX);
23132 }
23133
23134 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
23135 return;
23136
23137 if (crtl->calls_eh_return)
23138 emit_insn (gen_addsi3 (stack_pointer_rtx,
23139 stack_pointer_rtx,
23140 GEN_INT (ARM_EH_STACKADJ_REGNUM)));
23141
23142 if (IS_STACKALIGN (func_type))
23143 /* Restore the original stack pointer. Before prologue, the stack was
23144 realigned and the original stack pointer saved in r0. For details,
23145 see comment in arm_expand_prologue. */
23146 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
23147
23148 emit_jump_insn (simple_return_rtx);
23149 }
23150
23151 /* Generate RTL to represent ARM epilogue. Really_return is true if the
23152 function is not a sibcall. */
23153 void
23154 arm_expand_epilogue (bool really_return)
23155 {
23156 unsigned long func_type;
23157 unsigned long saved_regs_mask;
23158 int num_regs = 0;
23159 int i;
23160 int amount;
23161 arm_stack_offsets *offsets;
23162
23163 func_type = arm_current_func_type ();
23164
23165 /* Naked functions don't have epilogue. Hence, generate return pattern, and
23166 let output_return_instruction take care of instruction emition if any. */
23167 if (IS_NAKED (func_type)
23168 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
23169 {
23170 emit_jump_insn (simple_return_rtx);
23171 return;
23172 }
23173
23174 /* If we are throwing an exception, then we really must be doing a
23175 return, so we can't tail-call. */
23176 gcc_assert (!crtl->calls_eh_return || really_return);
23177
23178 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
23179 {
23180 arm_expand_epilogue_apcs_frame (really_return);
23181 return;
23182 }
23183
23184 /* Get frame offsets for ARM. */
23185 offsets = arm_get_frame_offsets ();
23186 saved_regs_mask = offsets->saved_regs_mask;
23187 num_regs = bit_count (saved_regs_mask);
23188
23189 if (frame_pointer_needed)
23190 {
23191 /* Restore stack pointer if necessary. */
23192 if (TARGET_ARM)
23193 {
23194 /* In ARM mode, frame pointer points to first saved register.
23195 Restore stack pointer to last saved register. */
23196 amount = offsets->frame - offsets->saved_regs;
23197
23198 /* Force out any pending memory operations that reference stacked data
23199 before stack de-allocation occurs. */
23200 emit_insn (gen_blockage ());
23201 emit_insn (gen_addsi3 (stack_pointer_rtx,
23202 hard_frame_pointer_rtx,
23203 GEN_INT (amount)));
23204
23205 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
23206 deleted. */
23207 emit_insn (gen_force_register_use (stack_pointer_rtx));
23208 }
23209 else
23210 {
23211 /* In Thumb-2 mode, the frame pointer points to the last saved
23212 register. */
23213 amount = offsets->locals_base - offsets->saved_regs;
23214 if (amount)
23215 emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
23216 hard_frame_pointer_rtx,
23217 GEN_INT (amount)));
23218
23219 /* Force out any pending memory operations that reference stacked data
23220 before stack de-allocation occurs. */
23221 emit_insn (gen_blockage ());
23222 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
23223 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
23224 deleted. */
23225 emit_insn (gen_force_register_use (stack_pointer_rtx));
23226 }
23227 }
23228 else
23229 {
23230 /* Pop off outgoing args and local frame to adjust stack pointer to
23231 last saved register. */
23232 amount = offsets->outgoing_args - offsets->saved_regs;
23233 if (amount)
23234 {
23235 /* Force out any pending memory operations that reference stacked data
23236 before stack de-allocation occurs. */
23237 emit_insn (gen_blockage ());
23238 emit_insn (gen_addsi3 (stack_pointer_rtx,
23239 stack_pointer_rtx,
23240 GEN_INT (amount)));
23241 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
23242 not deleted. */
23243 emit_insn (gen_force_register_use (stack_pointer_rtx));
23244 }
23245 }
23246
23247 if (TARGET_HARD_FLOAT && TARGET_VFP)
23248 {
23249 /* Generate VFP register multi-pop. */
23250 int end_reg = LAST_VFP_REGNUM + 1;
23251
23252 /* Scan the registers in reverse order. We need to match
23253 any groupings made in the prologue and generate matching
23254 vldm operations. The need to match groups is because,
23255 unlike pop, vldm can only do consecutive regs. */
23256 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
23257 /* Look for a case where a reg does not need restoring. */
23258 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
23259 && (!df_regs_ever_live_p (i + 1)
23260 || call_used_regs[i + 1]))
23261 {
23262 /* Restore the regs discovered so far (from reg+2 to
23263 end_reg). */
23264 if (end_reg > i + 2)
23265 arm_emit_vfp_multi_reg_pop (i + 2,
23266 (end_reg - (i + 2)) / 2,
23267 stack_pointer_rtx);
23268 end_reg = i;
23269 }
23270
23271 /* Restore the remaining regs that we have discovered (or possibly
23272 even all of them, if the conditional in the for loop never
23273 fired). */
23274 if (end_reg > i + 2)
23275 arm_emit_vfp_multi_reg_pop (i + 2,
23276 (end_reg - (i + 2)) / 2,
23277 stack_pointer_rtx);
23278 }
23279
23280 if (TARGET_IWMMXT)
23281 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
23282 if (df_regs_ever_live_p (i) && !call_used_regs[i])
23283 {
23284 rtx insn;
23285 rtx addr = gen_rtx_MEM (V2SImode,
23286 gen_rtx_POST_INC (SImode,
23287 stack_pointer_rtx));
23288 set_mem_alias_set (addr, get_frame_alias_set ());
23289 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
23290 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
23291 gen_rtx_REG (V2SImode, i),
23292 NULL_RTX);
23293 }
23294
23295 if (saved_regs_mask)
23296 {
23297 rtx insn;
23298 bool return_in_pc = false;
23299
23300 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
23301 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
23302 && !IS_STACKALIGN (func_type)
23303 && really_return
23304 && crtl->args.pretend_args_size == 0
23305 && saved_regs_mask & (1 << LR_REGNUM)
23306 && !crtl->calls_eh_return)
23307 {
23308 saved_regs_mask &= ~(1 << LR_REGNUM);
23309 saved_regs_mask |= (1 << PC_REGNUM);
23310 return_in_pc = true;
23311 }
23312
23313 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
23314 {
23315 for (i = 0; i <= LAST_ARM_REGNUM; i++)
23316 if (saved_regs_mask & (1 << i))
23317 {
23318 rtx addr = gen_rtx_MEM (SImode,
23319 gen_rtx_POST_INC (SImode,
23320 stack_pointer_rtx));
23321 set_mem_alias_set (addr, get_frame_alias_set ());
23322
23323 if (i == PC_REGNUM)
23324 {
23325 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
23326 XVECEXP (insn, 0, 0) = ret_rtx;
23327 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
23328 gen_rtx_REG (SImode, i),
23329 addr);
23330 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
23331 insn = emit_jump_insn (insn);
23332 }
23333 else
23334 {
23335 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
23336 addr));
23337 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
23338 gen_rtx_REG (SImode, i),
23339 NULL_RTX);
23340 }
23341 }
23342 }
23343 else
23344 {
23345 if (current_tune->prefer_ldrd_strd
23346 && !optimize_function_for_size_p (cfun))
23347 {
23348 if (TARGET_THUMB2)
23349 thumb2_emit_ldrd_pop (saved_regs_mask);
23350 else
23351 arm_emit_multi_reg_pop (saved_regs_mask);
23352 }
23353 else
23354 arm_emit_multi_reg_pop (saved_regs_mask);
23355 }
23356
23357 if (return_in_pc == true)
23358 return;
23359 }
23360
23361 if (crtl->args.pretend_args_size)
23362 emit_insn (gen_addsi3 (stack_pointer_rtx,
23363 stack_pointer_rtx,
23364 GEN_INT (crtl->args.pretend_args_size)));
23365
23366 if (!really_return)
23367 return;
23368
23369 if (crtl->calls_eh_return)
23370 emit_insn (gen_addsi3 (stack_pointer_rtx,
23371 stack_pointer_rtx,
23372 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
23373
23374 if (IS_STACKALIGN (func_type))
23375 /* Restore the original stack pointer. Before prologue, the stack was
23376 realigned and the original stack pointer saved in r0. For details,
23377 see comment in arm_expand_prologue. */
23378 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
23379
23380 emit_jump_insn (simple_return_rtx);
23381 }
23382
23383 /* Implementation of insn prologue_thumb1_interwork. This is the first
23384 "instruction" of a function called in ARM mode. Swap to thumb mode. */
23385
23386 const char *
23387 thumb1_output_interwork (void)
23388 {
23389 const char * name;
23390 FILE *f = asm_out_file;
23391
23392 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
23393 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
23394 == SYMBOL_REF);
23395 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
23396
23397 /* Generate code sequence to switch us into Thumb mode. */
23398 /* The .code 32 directive has already been emitted by
23399 ASM_DECLARE_FUNCTION_NAME. */
23400 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
23401 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
23402
23403 /* Generate a label, so that the debugger will notice the
23404 change in instruction sets. This label is also used by
23405 the assembler to bypass the ARM code when this function
23406 is called from a Thumb encoded function elsewhere in the
23407 same file. Hence the definition of STUB_NAME here must
23408 agree with the definition in gas/config/tc-arm.c. */
23409
23410 #define STUB_NAME ".real_start_of"
23411
23412 fprintf (f, "\t.code\t16\n");
23413 #ifdef ARM_PE
23414 if (arm_dllexport_name_p (name))
23415 name = arm_strip_name_encoding (name);
23416 #endif
23417 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
23418 fprintf (f, "\t.thumb_func\n");
23419 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
23420
23421 return "";
23422 }
23423
23424 /* Handle the case of a double word load into a low register from
23425 a computed memory address. The computed address may involve a
23426 register which is overwritten by the load. */
23427 const char *
23428 thumb_load_double_from_address (rtx *operands)
23429 {
23430 rtx addr;
23431 rtx base;
23432 rtx offset;
23433 rtx arg1;
23434 rtx arg2;
23435
23436 gcc_assert (REG_P (operands[0]));
23437 gcc_assert (MEM_P (operands[1]));
23438
23439 /* Get the memory address. */
23440 addr = XEXP (operands[1], 0);
23441
23442 /* Work out how the memory address is computed. */
23443 switch (GET_CODE (addr))
23444 {
23445 case REG:
23446 operands[2] = adjust_address (operands[1], SImode, 4);
23447
23448 if (REGNO (operands[0]) == REGNO (addr))
23449 {
23450 output_asm_insn ("ldr\t%H0, %2", operands);
23451 output_asm_insn ("ldr\t%0, %1", operands);
23452 }
23453 else
23454 {
23455 output_asm_insn ("ldr\t%0, %1", operands);
23456 output_asm_insn ("ldr\t%H0, %2", operands);
23457 }
23458 break;
23459
23460 case CONST:
23461 /* Compute <address> + 4 for the high order load. */
23462 operands[2] = adjust_address (operands[1], SImode, 4);
23463
23464 output_asm_insn ("ldr\t%0, %1", operands);
23465 output_asm_insn ("ldr\t%H0, %2", operands);
23466 break;
23467
23468 case PLUS:
23469 arg1 = XEXP (addr, 0);
23470 arg2 = XEXP (addr, 1);
23471
23472 if (CONSTANT_P (arg1))
23473 base = arg2, offset = arg1;
23474 else
23475 base = arg1, offset = arg2;
23476
23477 gcc_assert (REG_P (base));
23478
23479 /* Catch the case of <address> = <reg> + <reg> */
23480 if (REG_P (offset))
23481 {
23482 int reg_offset = REGNO (offset);
23483 int reg_base = REGNO (base);
23484 int reg_dest = REGNO (operands[0]);
23485
23486 /* Add the base and offset registers together into the
23487 higher destination register. */
23488 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
23489 reg_dest + 1, reg_base, reg_offset);
23490
23491 /* Load the lower destination register from the address in
23492 the higher destination register. */
23493 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
23494 reg_dest, reg_dest + 1);
23495
23496 /* Load the higher destination register from its own address
23497 plus 4. */
23498 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
23499 reg_dest + 1, reg_dest + 1);
23500 }
23501 else
23502 {
23503 /* Compute <address> + 4 for the high order load. */
23504 operands[2] = adjust_address (operands[1], SImode, 4);
23505
23506 /* If the computed address is held in the low order register
23507 then load the high order register first, otherwise always
23508 load the low order register first. */
23509 if (REGNO (operands[0]) == REGNO (base))
23510 {
23511 output_asm_insn ("ldr\t%H0, %2", operands);
23512 output_asm_insn ("ldr\t%0, %1", operands);
23513 }
23514 else
23515 {
23516 output_asm_insn ("ldr\t%0, %1", operands);
23517 output_asm_insn ("ldr\t%H0, %2", operands);
23518 }
23519 }
23520 break;
23521
23522 case LABEL_REF:
23523 /* With no registers to worry about we can just load the value
23524 directly. */
23525 operands[2] = adjust_address (operands[1], SImode, 4);
23526
23527 output_asm_insn ("ldr\t%H0, %2", operands);
23528 output_asm_insn ("ldr\t%0, %1", operands);
23529 break;
23530
23531 default:
23532 gcc_unreachable ();
23533 }
23534
23535 return "";
23536 }
23537
23538 const char *
23539 thumb_output_move_mem_multiple (int n, rtx *operands)
23540 {
23541 rtx tmp;
23542
23543 switch (n)
23544 {
23545 case 2:
23546 if (REGNO (operands[4]) > REGNO (operands[5]))
23547 {
23548 tmp = operands[4];
23549 operands[4] = operands[5];
23550 operands[5] = tmp;
23551 }
23552 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
23553 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
23554 break;
23555
23556 case 3:
23557 if (REGNO (operands[4]) > REGNO (operands[5]))
23558 {
23559 tmp = operands[4];
23560 operands[4] = operands[5];
23561 operands[5] = tmp;
23562 }
23563 if (REGNO (operands[5]) > REGNO (operands[6]))
23564 {
23565 tmp = operands[5];
23566 operands[5] = operands[6];
23567 operands[6] = tmp;
23568 }
23569 if (REGNO (operands[4]) > REGNO (operands[5]))
23570 {
23571 tmp = operands[4];
23572 operands[4] = operands[5];
23573 operands[5] = tmp;
23574 }
23575
23576 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
23577 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
23578 break;
23579
23580 default:
23581 gcc_unreachable ();
23582 }
23583
23584 return "";
23585 }
23586
23587 /* Output a call-via instruction for thumb state. */
23588 const char *
23589 thumb_call_via_reg (rtx reg)
23590 {
23591 int regno = REGNO (reg);
23592 rtx *labelp;
23593
23594 gcc_assert (regno < LR_REGNUM);
23595
23596 /* If we are in the normal text section we can use a single instance
23597 per compilation unit. If we are doing function sections, then we need
23598 an entry per section, since we can't rely on reachability. */
23599 if (in_section == text_section)
23600 {
23601 thumb_call_reg_needed = 1;
23602
23603 if (thumb_call_via_label[regno] == NULL)
23604 thumb_call_via_label[regno] = gen_label_rtx ();
23605 labelp = thumb_call_via_label + regno;
23606 }
23607 else
23608 {
23609 if (cfun->machine->call_via[regno] == NULL)
23610 cfun->machine->call_via[regno] = gen_label_rtx ();
23611 labelp = cfun->machine->call_via + regno;
23612 }
23613
23614 output_asm_insn ("bl\t%a0", labelp);
23615 return "";
23616 }
23617
23618 /* Routines for generating rtl. */
23619 void
23620 thumb_expand_movmemqi (rtx *operands)
23621 {
23622 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
23623 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
23624 HOST_WIDE_INT len = INTVAL (operands[2]);
23625 HOST_WIDE_INT offset = 0;
23626
23627 while (len >= 12)
23628 {
23629 emit_insn (gen_movmem12b (out, in, out, in));
23630 len -= 12;
23631 }
23632
23633 if (len >= 8)
23634 {
23635 emit_insn (gen_movmem8b (out, in, out, in));
23636 len -= 8;
23637 }
23638
23639 if (len >= 4)
23640 {
23641 rtx reg = gen_reg_rtx (SImode);
23642 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
23643 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
23644 len -= 4;
23645 offset += 4;
23646 }
23647
23648 if (len >= 2)
23649 {
23650 rtx reg = gen_reg_rtx (HImode);
23651 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
23652 plus_constant (Pmode, in,
23653 offset))));
23654 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
23655 offset)),
23656 reg));
23657 len -= 2;
23658 offset += 2;
23659 }
23660
23661 if (len)
23662 {
23663 rtx reg = gen_reg_rtx (QImode);
23664 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
23665 plus_constant (Pmode, in,
23666 offset))));
23667 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
23668 offset)),
23669 reg));
23670 }
23671 }
23672
23673 void
23674 thumb_reload_out_hi (rtx *operands)
23675 {
23676 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
23677 }
23678
23679 /* Handle reading a half-word from memory during reload. */
23680 void
23681 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
23682 {
23683 gcc_unreachable ();
23684 }
23685
23686 /* Return the length of a function name prefix
23687 that starts with the character 'c'. */
23688 static int
23689 arm_get_strip_length (int c)
23690 {
23691 switch (c)
23692 {
23693 ARM_NAME_ENCODING_LENGTHS
23694 default: return 0;
23695 }
23696 }
23697
23698 /* Return a pointer to a function's name with any
23699 and all prefix encodings stripped from it. */
23700 const char *
23701 arm_strip_name_encoding (const char *name)
23702 {
23703 int skip;
23704
23705 while ((skip = arm_get_strip_length (* name)))
23706 name += skip;
23707
23708 return name;
23709 }
23710
23711 /* If there is a '*' anywhere in the name's prefix, then
23712 emit the stripped name verbatim, otherwise prepend an
23713 underscore if leading underscores are being used. */
23714 void
23715 arm_asm_output_labelref (FILE *stream, const char *name)
23716 {
23717 int skip;
23718 int verbatim = 0;
23719
23720 while ((skip = arm_get_strip_length (* name)))
23721 {
23722 verbatim |= (*name == '*');
23723 name += skip;
23724 }
23725
23726 if (verbatim)
23727 fputs (name, stream);
23728 else
23729 asm_fprintf (stream, "%U%s", name);
23730 }
23731
23732 /* This function is used to emit an EABI tag and its associated value.
23733 We emit the numerical value of the tag in case the assembler does not
23734 support textual tags. (Eg gas prior to 2.20). If requested we include
23735 the tag name in a comment so that anyone reading the assembler output
23736 will know which tag is being set.
23737
23738 This function is not static because arm-c.c needs it too. */
23739
23740 void
23741 arm_emit_eabi_attribute (const char *name, int num, int val)
23742 {
23743 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
23744 if (flag_verbose_asm || flag_debug_asm)
23745 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
23746 asm_fprintf (asm_out_file, "\n");
23747 }
23748
23749 static void
23750 arm_file_start (void)
23751 {
23752 int val;
23753
23754 if (TARGET_UNIFIED_ASM)
23755 asm_fprintf (asm_out_file, "\t.syntax unified\n");
23756
23757 if (TARGET_BPABI)
23758 {
23759 const char *fpu_name;
23760 if (arm_selected_arch)
23761 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
23762 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
23763 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
23764 else
23765 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
23766
23767 if (TARGET_SOFT_FLOAT)
23768 {
23769 fpu_name = "softvfp";
23770 }
23771 else
23772 {
23773 fpu_name = arm_fpu_desc->name;
23774 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
23775 {
23776 if (TARGET_HARD_FLOAT)
23777 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
23778 if (TARGET_HARD_FLOAT_ABI)
23779 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
23780 }
23781 }
23782 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
23783
23784 /* Some of these attributes only apply when the corresponding features
23785 are used. However we don't have any easy way of figuring this out.
23786 Conservatively record the setting that would have been used. */
23787
23788 if (flag_rounding_math)
23789 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
23790
23791 if (!flag_unsafe_math_optimizations)
23792 {
23793 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
23794 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
23795 }
23796 if (flag_signaling_nans)
23797 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
23798
23799 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
23800 flag_finite_math_only ? 1 : 3);
23801
23802 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
23803 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
23804 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
23805 flag_short_enums ? 1 : 2);
23806
23807 /* Tag_ABI_optimization_goals. */
23808 if (optimize_size)
23809 val = 4;
23810 else if (optimize >= 2)
23811 val = 2;
23812 else if (optimize)
23813 val = 1;
23814 else
23815 val = 6;
23816 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
23817
23818 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
23819 unaligned_access);
23820
23821 if (arm_fp16_format)
23822 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
23823 (int) arm_fp16_format);
23824
23825 if (arm_lang_output_object_attributes_hook)
23826 arm_lang_output_object_attributes_hook();
23827 }
23828
23829 default_file_start ();
23830 }
23831
23832 static void
23833 arm_file_end (void)
23834 {
23835 int regno;
23836
23837 if (NEED_INDICATE_EXEC_STACK)
23838 /* Add .note.GNU-stack. */
23839 file_end_indicate_exec_stack ();
23840
23841 if (! thumb_call_reg_needed)
23842 return;
23843
23844 switch_to_section (text_section);
23845 asm_fprintf (asm_out_file, "\t.code 16\n");
23846 ASM_OUTPUT_ALIGN (asm_out_file, 1);
23847
23848 for (regno = 0; regno < LR_REGNUM; regno++)
23849 {
23850 rtx label = thumb_call_via_label[regno];
23851
23852 if (label != 0)
23853 {
23854 targetm.asm_out.internal_label (asm_out_file, "L",
23855 CODE_LABEL_NUMBER (label));
23856 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
23857 }
23858 }
23859 }
23860
23861 #ifndef ARM_PE
23862 /* Symbols in the text segment can be accessed without indirecting via the
23863 constant pool; it may take an extra binary operation, but this is still
23864 faster than indirecting via memory. Don't do this when not optimizing,
23865 since we won't be calculating al of the offsets necessary to do this
23866 simplification. */
23867
23868 static void
23869 arm_encode_section_info (tree decl, rtx rtl, int first)
23870 {
23871 if (optimize > 0 && TREE_CONSTANT (decl))
23872 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
23873
23874 default_encode_section_info (decl, rtl, first);
23875 }
23876 #endif /* !ARM_PE */
23877
23878 static void
23879 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
23880 {
23881 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
23882 && !strcmp (prefix, "L"))
23883 {
23884 arm_ccfsm_state = 0;
23885 arm_target_insn = NULL;
23886 }
23887 default_internal_label (stream, prefix, labelno);
23888 }
23889
23890 /* Output code to add DELTA to the first argument, and then jump
23891 to FUNCTION. Used for C++ multiple inheritance. */
23892 static void
23893 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
23894 HOST_WIDE_INT delta,
23895 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
23896 tree function)
23897 {
23898 static int thunk_label = 0;
23899 char label[256];
23900 char labelpc[256];
23901 int mi_delta = delta;
23902 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
23903 int shift = 0;
23904 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
23905 ? 1 : 0);
23906 if (mi_delta < 0)
23907 mi_delta = - mi_delta;
23908
23909 if (TARGET_THUMB1)
23910 {
23911 int labelno = thunk_label++;
23912 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
23913 /* Thunks are entered in arm mode when avaiable. */
23914 if (TARGET_THUMB1_ONLY)
23915 {
23916 /* push r3 so we can use it as a temporary. */
23917 /* TODO: Omit this save if r3 is not used. */
23918 fputs ("\tpush {r3}\n", file);
23919 fputs ("\tldr\tr3, ", file);
23920 }
23921 else
23922 {
23923 fputs ("\tldr\tr12, ", file);
23924 }
23925 assemble_name (file, label);
23926 fputc ('\n', file);
23927 if (flag_pic)
23928 {
23929 /* If we are generating PIC, the ldr instruction below loads
23930 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
23931 the address of the add + 8, so we have:
23932
23933 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
23934 = target + 1.
23935
23936 Note that we have "+ 1" because some versions of GNU ld
23937 don't set the low bit of the result for R_ARM_REL32
23938 relocations against thumb function symbols.
23939 On ARMv6M this is +4, not +8. */
23940 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
23941 assemble_name (file, labelpc);
23942 fputs (":\n", file);
23943 if (TARGET_THUMB1_ONLY)
23944 {
23945 /* This is 2 insns after the start of the thunk, so we know it
23946 is 4-byte aligned. */
23947 fputs ("\tadd\tr3, pc, r3\n", file);
23948 fputs ("\tmov r12, r3\n", file);
23949 }
23950 else
23951 fputs ("\tadd\tr12, pc, r12\n", file);
23952 }
23953 else if (TARGET_THUMB1_ONLY)
23954 fputs ("\tmov r12, r3\n", file);
23955 }
23956 if (TARGET_THUMB1_ONLY)
23957 {
23958 if (mi_delta > 255)
23959 {
23960 fputs ("\tldr\tr3, ", file);
23961 assemble_name (file, label);
23962 fputs ("+4\n", file);
23963 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
23964 mi_op, this_regno, this_regno);
23965 }
23966 else if (mi_delta != 0)
23967 {
23968 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23969 mi_op, this_regno, this_regno,
23970 mi_delta);
23971 }
23972 }
23973 else
23974 {
23975 /* TODO: Use movw/movt for large constants when available. */
23976 while (mi_delta != 0)
23977 {
23978 if ((mi_delta & (3 << shift)) == 0)
23979 shift += 2;
23980 else
23981 {
23982 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23983 mi_op, this_regno, this_regno,
23984 mi_delta & (0xff << shift));
23985 mi_delta &= ~(0xff << shift);
23986 shift += 8;
23987 }
23988 }
23989 }
23990 if (TARGET_THUMB1)
23991 {
23992 if (TARGET_THUMB1_ONLY)
23993 fputs ("\tpop\t{r3}\n", file);
23994
23995 fprintf (file, "\tbx\tr12\n");
23996 ASM_OUTPUT_ALIGN (file, 2);
23997 assemble_name (file, label);
23998 fputs (":\n", file);
23999 if (flag_pic)
24000 {
24001 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
24002 rtx tem = XEXP (DECL_RTL (function), 0);
24003 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
24004 tem = gen_rtx_MINUS (GET_MODE (tem),
24005 tem,
24006 gen_rtx_SYMBOL_REF (Pmode,
24007 ggc_strdup (labelpc)));
24008 assemble_integer (tem, 4, BITS_PER_WORD, 1);
24009 }
24010 else
24011 /* Output ".word .LTHUNKn". */
24012 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
24013
24014 if (TARGET_THUMB1_ONLY && mi_delta > 255)
24015 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
24016 }
24017 else
24018 {
24019 fputs ("\tb\t", file);
24020 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
24021 if (NEED_PLT_RELOC)
24022 fputs ("(PLT)", file);
24023 fputc ('\n', file);
24024 }
24025 }
24026
24027 int
24028 arm_emit_vector_const (FILE *file, rtx x)
24029 {
24030 int i;
24031 const char * pattern;
24032
24033 gcc_assert (GET_CODE (x) == CONST_VECTOR);
24034
24035 switch (GET_MODE (x))
24036 {
24037 case V2SImode: pattern = "%08x"; break;
24038 case V4HImode: pattern = "%04x"; break;
24039 case V8QImode: pattern = "%02x"; break;
24040 default: gcc_unreachable ();
24041 }
24042
24043 fprintf (file, "0x");
24044 for (i = CONST_VECTOR_NUNITS (x); i--;)
24045 {
24046 rtx element;
24047
24048 element = CONST_VECTOR_ELT (x, i);
24049 fprintf (file, pattern, INTVAL (element));
24050 }
24051
24052 return 1;
24053 }
24054
24055 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
24056 HFmode constant pool entries are actually loaded with ldr. */
24057 void
24058 arm_emit_fp16_const (rtx c)
24059 {
24060 REAL_VALUE_TYPE r;
24061 long bits;
24062
24063 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
24064 bits = real_to_target (NULL, &r, HFmode);
24065 if (WORDS_BIG_ENDIAN)
24066 assemble_zeros (2);
24067 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
24068 if (!WORDS_BIG_ENDIAN)
24069 assemble_zeros (2);
24070 }
24071
24072 const char *
24073 arm_output_load_gr (rtx *operands)
24074 {
24075 rtx reg;
24076 rtx offset;
24077 rtx wcgr;
24078 rtx sum;
24079
24080 if (!MEM_P (operands [1])
24081 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
24082 || !REG_P (reg = XEXP (sum, 0))
24083 || !CONST_INT_P (offset = XEXP (sum, 1))
24084 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
24085 return "wldrw%?\t%0, %1";
24086
24087 /* Fix up an out-of-range load of a GR register. */
24088 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
24089 wcgr = operands[0];
24090 operands[0] = reg;
24091 output_asm_insn ("ldr%?\t%0, %1", operands);
24092
24093 operands[0] = wcgr;
24094 operands[1] = reg;
24095 output_asm_insn ("tmcr%?\t%0, %1", operands);
24096 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
24097
24098 return "";
24099 }
24100
24101 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
24102
24103 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
24104 named arg and all anonymous args onto the stack.
24105 XXX I know the prologue shouldn't be pushing registers, but it is faster
24106 that way. */
24107
24108 static void
24109 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
24110 enum machine_mode mode,
24111 tree type,
24112 int *pretend_size,
24113 int second_time ATTRIBUTE_UNUSED)
24114 {
24115 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
24116 int nregs;
24117
24118 cfun->machine->uses_anonymous_args = 1;
24119 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
24120 {
24121 nregs = pcum->aapcs_ncrn;
24122 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
24123 nregs++;
24124 }
24125 else
24126 nregs = pcum->nregs;
24127
24128 if (nregs < NUM_ARG_REGS)
24129 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
24130 }
24131
24132 /* Return nonzero if the CONSUMER instruction (a store) does not need
24133 PRODUCER's value to calculate the address. */
24134
24135 int
24136 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
24137 {
24138 rtx value = PATTERN (producer);
24139 rtx addr = PATTERN (consumer);
24140
24141 if (GET_CODE (value) == COND_EXEC)
24142 value = COND_EXEC_CODE (value);
24143 if (GET_CODE (value) == PARALLEL)
24144 value = XVECEXP (value, 0, 0);
24145 value = XEXP (value, 0);
24146 if (GET_CODE (addr) == COND_EXEC)
24147 addr = COND_EXEC_CODE (addr);
24148 if (GET_CODE (addr) == PARALLEL)
24149 addr = XVECEXP (addr, 0, 0);
24150 addr = XEXP (addr, 0);
24151
24152 return !reg_overlap_mentioned_p (value, addr);
24153 }
24154
24155 /* Return nonzero if the CONSUMER instruction (a store) does need
24156 PRODUCER's value to calculate the address. */
24157
24158 int
24159 arm_early_store_addr_dep (rtx producer, rtx consumer)
24160 {
24161 return !arm_no_early_store_addr_dep (producer, consumer);
24162 }
24163
24164 /* Return nonzero if the CONSUMER instruction (a load) does need
24165 PRODUCER's value to calculate the address. */
24166
24167 int
24168 arm_early_load_addr_dep (rtx producer, rtx consumer)
24169 {
24170 rtx value = PATTERN (producer);
24171 rtx addr = PATTERN (consumer);
24172
24173 if (GET_CODE (value) == COND_EXEC)
24174 value = COND_EXEC_CODE (value);
24175 if (GET_CODE (value) == PARALLEL)
24176 value = XVECEXP (value, 0, 0);
24177 value = XEXP (value, 0);
24178 if (GET_CODE (addr) == COND_EXEC)
24179 addr = COND_EXEC_CODE (addr);
24180 if (GET_CODE (addr) == PARALLEL)
24181 {
24182 if (GET_CODE (XVECEXP (addr, 0, 0)) == RETURN)
24183 addr = XVECEXP (addr, 0, 1);
24184 else
24185 addr = XVECEXP (addr, 0, 0);
24186 }
24187 addr = XEXP (addr, 1);
24188
24189 return reg_overlap_mentioned_p (value, addr);
24190 }
24191
24192 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
24193 have an early register shift value or amount dependency on the
24194 result of PRODUCER. */
24195
24196 int
24197 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
24198 {
24199 rtx value = PATTERN (producer);
24200 rtx op = PATTERN (consumer);
24201 rtx early_op;
24202
24203 if (GET_CODE (value) == COND_EXEC)
24204 value = COND_EXEC_CODE (value);
24205 if (GET_CODE (value) == PARALLEL)
24206 value = XVECEXP (value, 0, 0);
24207 value = XEXP (value, 0);
24208 if (GET_CODE (op) == COND_EXEC)
24209 op = COND_EXEC_CODE (op);
24210 if (GET_CODE (op) == PARALLEL)
24211 op = XVECEXP (op, 0, 0);
24212 op = XEXP (op, 1);
24213
24214 early_op = XEXP (op, 0);
24215 /* This is either an actual independent shift, or a shift applied to
24216 the first operand of another operation. We want the whole shift
24217 operation. */
24218 if (REG_P (early_op))
24219 early_op = op;
24220
24221 return !reg_overlap_mentioned_p (value, early_op);
24222 }
24223
24224 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
24225 have an early register shift value dependency on the result of
24226 PRODUCER. */
24227
24228 int
24229 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
24230 {
24231 rtx value = PATTERN (producer);
24232 rtx op = PATTERN (consumer);
24233 rtx early_op;
24234
24235 if (GET_CODE (value) == COND_EXEC)
24236 value = COND_EXEC_CODE (value);
24237 if (GET_CODE (value) == PARALLEL)
24238 value = XVECEXP (value, 0, 0);
24239 value = XEXP (value, 0);
24240 if (GET_CODE (op) == COND_EXEC)
24241 op = COND_EXEC_CODE (op);
24242 if (GET_CODE (op) == PARALLEL)
24243 op = XVECEXP (op, 0, 0);
24244 op = XEXP (op, 1);
24245
24246 early_op = XEXP (op, 0);
24247
24248 /* This is either an actual independent shift, or a shift applied to
24249 the first operand of another operation. We want the value being
24250 shifted, in either case. */
24251 if (!REG_P (early_op))
24252 early_op = XEXP (early_op, 0);
24253
24254 return !reg_overlap_mentioned_p (value, early_op);
24255 }
24256
24257 /* Return nonzero if the CONSUMER (a mul or mac op) does not
24258 have an early register mult dependency on the result of
24259 PRODUCER. */
24260
24261 int
24262 arm_no_early_mul_dep (rtx producer, rtx consumer)
24263 {
24264 rtx value = PATTERN (producer);
24265 rtx op = PATTERN (consumer);
24266
24267 if (GET_CODE (value) == COND_EXEC)
24268 value = COND_EXEC_CODE (value);
24269 if (GET_CODE (value) == PARALLEL)
24270 value = XVECEXP (value, 0, 0);
24271 value = XEXP (value, 0);
24272 if (GET_CODE (op) == COND_EXEC)
24273 op = COND_EXEC_CODE (op);
24274 if (GET_CODE (op) == PARALLEL)
24275 op = XVECEXP (op, 0, 0);
24276 op = XEXP (op, 1);
24277
24278 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
24279 {
24280 if (GET_CODE (XEXP (op, 0)) == MULT)
24281 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
24282 else
24283 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
24284 }
24285
24286 return 0;
24287 }
24288
24289 /* We can't rely on the caller doing the proper promotion when
24290 using APCS or ATPCS. */
24291
24292 static bool
24293 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
24294 {
24295 return !TARGET_AAPCS_BASED;
24296 }
24297
24298 static enum machine_mode
24299 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
24300 enum machine_mode mode,
24301 int *punsignedp ATTRIBUTE_UNUSED,
24302 const_tree fntype ATTRIBUTE_UNUSED,
24303 int for_return ATTRIBUTE_UNUSED)
24304 {
24305 if (GET_MODE_CLASS (mode) == MODE_INT
24306 && GET_MODE_SIZE (mode) < 4)
24307 return SImode;
24308
24309 return mode;
24310 }
24311
24312 /* AAPCS based ABIs use short enums by default. */
24313
24314 static bool
24315 arm_default_short_enums (void)
24316 {
24317 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
24318 }
24319
24320
24321 /* AAPCS requires that anonymous bitfields affect structure alignment. */
24322
24323 static bool
24324 arm_align_anon_bitfield (void)
24325 {
24326 return TARGET_AAPCS_BASED;
24327 }
24328
24329
24330 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
24331
24332 static tree
24333 arm_cxx_guard_type (void)
24334 {
24335 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
24336 }
24337
24338 /* Return non-zero if the consumer (a multiply-accumulate instruction)
24339 has an accumulator dependency on the result of the producer (a
24340 multiplication instruction) and no other dependency on that result. */
24341 int
24342 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
24343 {
24344 rtx mul = PATTERN (producer);
24345 rtx mac = PATTERN (consumer);
24346 rtx mul_result;
24347 rtx mac_op0, mac_op1, mac_acc;
24348
24349 if (GET_CODE (mul) == COND_EXEC)
24350 mul = COND_EXEC_CODE (mul);
24351 if (GET_CODE (mac) == COND_EXEC)
24352 mac = COND_EXEC_CODE (mac);
24353
24354 /* Check that mul is of the form (set (...) (mult ...))
24355 and mla is of the form (set (...) (plus (mult ...) (...))). */
24356 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
24357 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
24358 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
24359 return 0;
24360
24361 mul_result = XEXP (mul, 0);
24362 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
24363 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
24364 mac_acc = XEXP (XEXP (mac, 1), 1);
24365
24366 return (reg_overlap_mentioned_p (mul_result, mac_acc)
24367 && !reg_overlap_mentioned_p (mul_result, mac_op0)
24368 && !reg_overlap_mentioned_p (mul_result, mac_op1));
24369 }
24370
24371
24372 /* The EABI says test the least significant bit of a guard variable. */
24373
24374 static bool
24375 arm_cxx_guard_mask_bit (void)
24376 {
24377 return TARGET_AAPCS_BASED;
24378 }
24379
24380
24381 /* The EABI specifies that all array cookies are 8 bytes long. */
24382
24383 static tree
24384 arm_get_cookie_size (tree type)
24385 {
24386 tree size;
24387
24388 if (!TARGET_AAPCS_BASED)
24389 return default_cxx_get_cookie_size (type);
24390
24391 size = build_int_cst (sizetype, 8);
24392 return size;
24393 }
24394
24395
24396 /* The EABI says that array cookies should also contain the element size. */
24397
24398 static bool
24399 arm_cookie_has_size (void)
24400 {
24401 return TARGET_AAPCS_BASED;
24402 }
24403
24404
24405 /* The EABI says constructors and destructors should return a pointer to
24406 the object constructed/destroyed. */
24407
24408 static bool
24409 arm_cxx_cdtor_returns_this (void)
24410 {
24411 return TARGET_AAPCS_BASED;
24412 }
24413
24414 /* The EABI says that an inline function may never be the key
24415 method. */
24416
24417 static bool
24418 arm_cxx_key_method_may_be_inline (void)
24419 {
24420 return !TARGET_AAPCS_BASED;
24421 }
24422
24423 static void
24424 arm_cxx_determine_class_data_visibility (tree decl)
24425 {
24426 if (!TARGET_AAPCS_BASED
24427 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
24428 return;
24429
24430 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
24431 is exported. However, on systems without dynamic vague linkage,
24432 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
24433 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
24434 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
24435 else
24436 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
24437 DECL_VISIBILITY_SPECIFIED (decl) = 1;
24438 }
24439
24440 static bool
24441 arm_cxx_class_data_always_comdat (void)
24442 {
24443 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
24444 vague linkage if the class has no key function. */
24445 return !TARGET_AAPCS_BASED;
24446 }
24447
24448
24449 /* The EABI says __aeabi_atexit should be used to register static
24450 destructors. */
24451
24452 static bool
24453 arm_cxx_use_aeabi_atexit (void)
24454 {
24455 return TARGET_AAPCS_BASED;
24456 }
24457
24458
24459 void
24460 arm_set_return_address (rtx source, rtx scratch)
24461 {
24462 arm_stack_offsets *offsets;
24463 HOST_WIDE_INT delta;
24464 rtx addr;
24465 unsigned long saved_regs;
24466
24467 offsets = arm_get_frame_offsets ();
24468 saved_regs = offsets->saved_regs_mask;
24469
24470 if ((saved_regs & (1 << LR_REGNUM)) == 0)
24471 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
24472 else
24473 {
24474 if (frame_pointer_needed)
24475 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
24476 else
24477 {
24478 /* LR will be the first saved register. */
24479 delta = offsets->outgoing_args - (offsets->frame + 4);
24480
24481
24482 if (delta >= 4096)
24483 {
24484 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
24485 GEN_INT (delta & ~4095)));
24486 addr = scratch;
24487 delta &= 4095;
24488 }
24489 else
24490 addr = stack_pointer_rtx;
24491
24492 addr = plus_constant (Pmode, addr, delta);
24493 }
24494 emit_move_insn (gen_frame_mem (Pmode, addr), source);
24495 }
24496 }
24497
24498
24499 void
24500 thumb_set_return_address (rtx source, rtx scratch)
24501 {
24502 arm_stack_offsets *offsets;
24503 HOST_WIDE_INT delta;
24504 HOST_WIDE_INT limit;
24505 int reg;
24506 rtx addr;
24507 unsigned long mask;
24508
24509 emit_use (source);
24510
24511 offsets = arm_get_frame_offsets ();
24512 mask = offsets->saved_regs_mask;
24513 if (mask & (1 << LR_REGNUM))
24514 {
24515 limit = 1024;
24516 /* Find the saved regs. */
24517 if (frame_pointer_needed)
24518 {
24519 delta = offsets->soft_frame - offsets->saved_args;
24520 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
24521 if (TARGET_THUMB1)
24522 limit = 128;
24523 }
24524 else
24525 {
24526 delta = offsets->outgoing_args - offsets->saved_args;
24527 reg = SP_REGNUM;
24528 }
24529 /* Allow for the stack frame. */
24530 if (TARGET_THUMB1 && TARGET_BACKTRACE)
24531 delta -= 16;
24532 /* The link register is always the first saved register. */
24533 delta -= 4;
24534
24535 /* Construct the address. */
24536 addr = gen_rtx_REG (SImode, reg);
24537 if (delta > limit)
24538 {
24539 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
24540 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
24541 addr = scratch;
24542 }
24543 else
24544 addr = plus_constant (Pmode, addr, delta);
24545
24546 emit_move_insn (gen_frame_mem (Pmode, addr), source);
24547 }
24548 else
24549 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
24550 }
24551
24552 /* Implements target hook vector_mode_supported_p. */
24553 bool
24554 arm_vector_mode_supported_p (enum machine_mode mode)
24555 {
24556 /* Neon also supports V2SImode, etc. listed in the clause below. */
24557 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
24558 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
24559 return true;
24560
24561 if ((TARGET_NEON || TARGET_IWMMXT)
24562 && ((mode == V2SImode)
24563 || (mode == V4HImode)
24564 || (mode == V8QImode)))
24565 return true;
24566
24567 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
24568 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
24569 || mode == V2HAmode))
24570 return true;
24571
24572 return false;
24573 }
24574
24575 /* Implements target hook array_mode_supported_p. */
24576
24577 static bool
24578 arm_array_mode_supported_p (enum machine_mode mode,
24579 unsigned HOST_WIDE_INT nelems)
24580 {
24581 if (TARGET_NEON
24582 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
24583 && (nelems >= 2 && nelems <= 4))
24584 return true;
24585
24586 return false;
24587 }
24588
24589 /* Use the option -mvectorize-with-neon-double to override the use of quardword
24590 registers when autovectorizing for Neon, at least until multiple vector
24591 widths are supported properly by the middle-end. */
24592
24593 static enum machine_mode
24594 arm_preferred_simd_mode (enum machine_mode mode)
24595 {
24596 if (TARGET_NEON)
24597 switch (mode)
24598 {
24599 case SFmode:
24600 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
24601 case SImode:
24602 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
24603 case HImode:
24604 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
24605 case QImode:
24606 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
24607 case DImode:
24608 if (!TARGET_NEON_VECTORIZE_DOUBLE)
24609 return V2DImode;
24610 break;
24611
24612 default:;
24613 }
24614
24615 if (TARGET_REALLY_IWMMXT)
24616 switch (mode)
24617 {
24618 case SImode:
24619 return V2SImode;
24620 case HImode:
24621 return V4HImode;
24622 case QImode:
24623 return V8QImode;
24624
24625 default:;
24626 }
24627
24628 return word_mode;
24629 }
24630
24631 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
24632
24633 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
24634 using r0-r4 for function arguments, r7 for the stack frame and don't have
24635 enough left over to do doubleword arithmetic. For Thumb-2 all the
24636 potentially problematic instructions accept high registers so this is not
24637 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
24638 that require many low registers. */
24639 static bool
24640 arm_class_likely_spilled_p (reg_class_t rclass)
24641 {
24642 if ((TARGET_THUMB1 && rclass == LO_REGS)
24643 || rclass == CC_REG)
24644 return true;
24645
24646 return false;
24647 }
24648
24649 /* Implements target hook small_register_classes_for_mode_p. */
24650 bool
24651 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
24652 {
24653 return TARGET_THUMB1;
24654 }
24655
24656 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
24657 ARM insns and therefore guarantee that the shift count is modulo 256.
24658 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
24659 guarantee no particular behavior for out-of-range counts. */
24660
24661 static unsigned HOST_WIDE_INT
24662 arm_shift_truncation_mask (enum machine_mode mode)
24663 {
24664 return mode == SImode ? 255 : 0;
24665 }
24666
24667
24668 /* Map internal gcc register numbers to DWARF2 register numbers. */
24669
24670 unsigned int
24671 arm_dbx_register_number (unsigned int regno)
24672 {
24673 if (regno < 16)
24674 return regno;
24675
24676 if (IS_VFP_REGNUM (regno))
24677 {
24678 /* See comment in arm_dwarf_register_span. */
24679 if (VFP_REGNO_OK_FOR_SINGLE (regno))
24680 return 64 + regno - FIRST_VFP_REGNUM;
24681 else
24682 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
24683 }
24684
24685 if (IS_IWMMXT_GR_REGNUM (regno))
24686 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
24687
24688 if (IS_IWMMXT_REGNUM (regno))
24689 return 112 + regno - FIRST_IWMMXT_REGNUM;
24690
24691 gcc_unreachable ();
24692 }
24693
24694 /* Dwarf models VFPv3 registers as 32 64-bit registers.
24695 GCC models tham as 64 32-bit registers, so we need to describe this to
24696 the DWARF generation code. Other registers can use the default. */
24697 static rtx
24698 arm_dwarf_register_span (rtx rtl)
24699 {
24700 unsigned regno;
24701 int nregs;
24702 int i;
24703 rtx p;
24704
24705 regno = REGNO (rtl);
24706 if (!IS_VFP_REGNUM (regno))
24707 return NULL_RTX;
24708
24709 /* XXX FIXME: The EABI defines two VFP register ranges:
24710 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
24711 256-287: D0-D31
24712 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
24713 corresponding D register. Until GDB supports this, we shall use the
24714 legacy encodings. We also use these encodings for D0-D15 for
24715 compatibility with older debuggers. */
24716 if (VFP_REGNO_OK_FOR_SINGLE (regno))
24717 return NULL_RTX;
24718
24719 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
24720 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
24721 regno = (regno - FIRST_VFP_REGNUM) / 2;
24722 for (i = 0; i < nregs; i++)
24723 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
24724
24725 return p;
24726 }
24727
24728 #if ARM_UNWIND_INFO
24729 /* Emit unwind directives for a store-multiple instruction or stack pointer
24730 push during alignment.
24731 These should only ever be generated by the function prologue code, so
24732 expect them to have a particular form. */
24733
24734 static void
24735 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
24736 {
24737 int i;
24738 HOST_WIDE_INT offset;
24739 HOST_WIDE_INT nregs;
24740 int reg_size;
24741 unsigned reg;
24742 unsigned lastreg;
24743 rtx e;
24744
24745 e = XVECEXP (p, 0, 0);
24746 if (GET_CODE (e) != SET)
24747 abort ();
24748
24749 /* First insn will adjust the stack pointer. */
24750 if (GET_CODE (e) != SET
24751 || !REG_P (XEXP (e, 0))
24752 || REGNO (XEXP (e, 0)) != SP_REGNUM
24753 || GET_CODE (XEXP (e, 1)) != PLUS)
24754 abort ();
24755
24756 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
24757 nregs = XVECLEN (p, 0) - 1;
24758
24759 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
24760 if (reg < 16)
24761 {
24762 /* The function prologue may also push pc, but not annotate it as it is
24763 never restored. We turn this into a stack pointer adjustment. */
24764 if (nregs * 4 == offset - 4)
24765 {
24766 fprintf (asm_out_file, "\t.pad #4\n");
24767 offset -= 4;
24768 }
24769 reg_size = 4;
24770 fprintf (asm_out_file, "\t.save {");
24771 }
24772 else if (IS_VFP_REGNUM (reg))
24773 {
24774 reg_size = 8;
24775 fprintf (asm_out_file, "\t.vsave {");
24776 }
24777 else
24778 /* Unknown register type. */
24779 abort ();
24780
24781 /* If the stack increment doesn't match the size of the saved registers,
24782 something has gone horribly wrong. */
24783 if (offset != nregs * reg_size)
24784 abort ();
24785
24786 offset = 0;
24787 lastreg = 0;
24788 /* The remaining insns will describe the stores. */
24789 for (i = 1; i <= nregs; i++)
24790 {
24791 /* Expect (set (mem <addr>) (reg)).
24792 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
24793 e = XVECEXP (p, 0, i);
24794 if (GET_CODE (e) != SET
24795 || !MEM_P (XEXP (e, 0))
24796 || !REG_P (XEXP (e, 1)))
24797 abort ();
24798
24799 reg = REGNO (XEXP (e, 1));
24800 if (reg < lastreg)
24801 abort ();
24802
24803 if (i != 1)
24804 fprintf (asm_out_file, ", ");
24805 /* We can't use %r for vfp because we need to use the
24806 double precision register names. */
24807 if (IS_VFP_REGNUM (reg))
24808 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
24809 else
24810 asm_fprintf (asm_out_file, "%r", reg);
24811
24812 #ifdef ENABLE_CHECKING
24813 /* Check that the addresses are consecutive. */
24814 e = XEXP (XEXP (e, 0), 0);
24815 if (GET_CODE (e) == PLUS)
24816 {
24817 offset += reg_size;
24818 if (!REG_P (XEXP (e, 0))
24819 || REGNO (XEXP (e, 0)) != SP_REGNUM
24820 || !CONST_INT_P (XEXP (e, 1))
24821 || offset != INTVAL (XEXP (e, 1)))
24822 abort ();
24823 }
24824 else if (i != 1
24825 || !REG_P (e)
24826 || REGNO (e) != SP_REGNUM)
24827 abort ();
24828 #endif
24829 }
24830 fprintf (asm_out_file, "}\n");
24831 }
24832
24833 /* Emit unwind directives for a SET. */
24834
24835 static void
24836 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
24837 {
24838 rtx e0;
24839 rtx e1;
24840 unsigned reg;
24841
24842 e0 = XEXP (p, 0);
24843 e1 = XEXP (p, 1);
24844 switch (GET_CODE (e0))
24845 {
24846 case MEM:
24847 /* Pushing a single register. */
24848 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
24849 || !REG_P (XEXP (XEXP (e0, 0), 0))
24850 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
24851 abort ();
24852
24853 asm_fprintf (asm_out_file, "\t.save ");
24854 if (IS_VFP_REGNUM (REGNO (e1)))
24855 asm_fprintf(asm_out_file, "{d%d}\n",
24856 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
24857 else
24858 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
24859 break;
24860
24861 case REG:
24862 if (REGNO (e0) == SP_REGNUM)
24863 {
24864 /* A stack increment. */
24865 if (GET_CODE (e1) != PLUS
24866 || !REG_P (XEXP (e1, 0))
24867 || REGNO (XEXP (e1, 0)) != SP_REGNUM
24868 || !CONST_INT_P (XEXP (e1, 1)))
24869 abort ();
24870
24871 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
24872 -INTVAL (XEXP (e1, 1)));
24873 }
24874 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
24875 {
24876 HOST_WIDE_INT offset;
24877
24878 if (GET_CODE (e1) == PLUS)
24879 {
24880 if (!REG_P (XEXP (e1, 0))
24881 || !CONST_INT_P (XEXP (e1, 1)))
24882 abort ();
24883 reg = REGNO (XEXP (e1, 0));
24884 offset = INTVAL (XEXP (e1, 1));
24885 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
24886 HARD_FRAME_POINTER_REGNUM, reg,
24887 offset);
24888 }
24889 else if (REG_P (e1))
24890 {
24891 reg = REGNO (e1);
24892 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
24893 HARD_FRAME_POINTER_REGNUM, reg);
24894 }
24895 else
24896 abort ();
24897 }
24898 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
24899 {
24900 /* Move from sp to reg. */
24901 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
24902 }
24903 else if (GET_CODE (e1) == PLUS
24904 && REG_P (XEXP (e1, 0))
24905 && REGNO (XEXP (e1, 0)) == SP_REGNUM
24906 && CONST_INT_P (XEXP (e1, 1)))
24907 {
24908 /* Set reg to offset from sp. */
24909 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
24910 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
24911 }
24912 else
24913 abort ();
24914 break;
24915
24916 default:
24917 abort ();
24918 }
24919 }
24920
24921
24922 /* Emit unwind directives for the given insn. */
24923
24924 static void
24925 arm_unwind_emit (FILE * asm_out_file, rtx insn)
24926 {
24927 rtx note, pat;
24928 bool handled_one = false;
24929
24930 if (arm_except_unwind_info (&global_options) != UI_TARGET)
24931 return;
24932
24933 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24934 && (TREE_NOTHROW (current_function_decl)
24935 || crtl->all_throwers_are_sibcalls))
24936 return;
24937
24938 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
24939 return;
24940
24941 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
24942 {
24943 pat = XEXP (note, 0);
24944 switch (REG_NOTE_KIND (note))
24945 {
24946 case REG_FRAME_RELATED_EXPR:
24947 goto found;
24948
24949 case REG_CFA_REGISTER:
24950 if (pat == NULL)
24951 {
24952 pat = PATTERN (insn);
24953 if (GET_CODE (pat) == PARALLEL)
24954 pat = XVECEXP (pat, 0, 0);
24955 }
24956
24957 /* Only emitted for IS_STACKALIGN re-alignment. */
24958 {
24959 rtx dest, src;
24960 unsigned reg;
24961
24962 src = SET_SRC (pat);
24963 dest = SET_DEST (pat);
24964
24965 gcc_assert (src == stack_pointer_rtx);
24966 reg = REGNO (dest);
24967 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
24968 reg + 0x90, reg);
24969 }
24970 handled_one = true;
24971 break;
24972
24973 case REG_CFA_DEF_CFA:
24974 case REG_CFA_EXPRESSION:
24975 case REG_CFA_ADJUST_CFA:
24976 case REG_CFA_OFFSET:
24977 /* ??? Only handling here what we actually emit. */
24978 gcc_unreachable ();
24979
24980 default:
24981 break;
24982 }
24983 }
24984 if (handled_one)
24985 return;
24986 pat = PATTERN (insn);
24987 found:
24988
24989 switch (GET_CODE (pat))
24990 {
24991 case SET:
24992 arm_unwind_emit_set (asm_out_file, pat);
24993 break;
24994
24995 case SEQUENCE:
24996 /* Store multiple. */
24997 arm_unwind_emit_sequence (asm_out_file, pat);
24998 break;
24999
25000 default:
25001 abort();
25002 }
25003 }
25004
25005
25006 /* Output a reference from a function exception table to the type_info
25007 object X. The EABI specifies that the symbol should be relocated by
25008 an R_ARM_TARGET2 relocation. */
25009
25010 static bool
25011 arm_output_ttype (rtx x)
25012 {
25013 fputs ("\t.word\t", asm_out_file);
25014 output_addr_const (asm_out_file, x);
25015 /* Use special relocations for symbol references. */
25016 if (!CONST_INT_P (x))
25017 fputs ("(TARGET2)", asm_out_file);
25018 fputc ('\n', asm_out_file);
25019
25020 return TRUE;
25021 }
25022
25023 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
25024
25025 static void
25026 arm_asm_emit_except_personality (rtx personality)
25027 {
25028 fputs ("\t.personality\t", asm_out_file);
25029 output_addr_const (asm_out_file, personality);
25030 fputc ('\n', asm_out_file);
25031 }
25032
25033 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
25034
25035 static void
25036 arm_asm_init_sections (void)
25037 {
25038 exception_section = get_unnamed_section (0, output_section_asm_op,
25039 "\t.handlerdata");
25040 }
25041 #endif /* ARM_UNWIND_INFO */
25042
25043 /* Output unwind directives for the start/end of a function. */
25044
25045 void
25046 arm_output_fn_unwind (FILE * f, bool prologue)
25047 {
25048 if (arm_except_unwind_info (&global_options) != UI_TARGET)
25049 return;
25050
25051 if (prologue)
25052 fputs ("\t.fnstart\n", f);
25053 else
25054 {
25055 /* If this function will never be unwound, then mark it as such.
25056 The came condition is used in arm_unwind_emit to suppress
25057 the frame annotations. */
25058 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
25059 && (TREE_NOTHROW (current_function_decl)
25060 || crtl->all_throwers_are_sibcalls))
25061 fputs("\t.cantunwind\n", f);
25062
25063 fputs ("\t.fnend\n", f);
25064 }
25065 }
25066
25067 static bool
25068 arm_emit_tls_decoration (FILE *fp, rtx x)
25069 {
25070 enum tls_reloc reloc;
25071 rtx val;
25072
25073 val = XVECEXP (x, 0, 0);
25074 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
25075
25076 output_addr_const (fp, val);
25077
25078 switch (reloc)
25079 {
25080 case TLS_GD32:
25081 fputs ("(tlsgd)", fp);
25082 break;
25083 case TLS_LDM32:
25084 fputs ("(tlsldm)", fp);
25085 break;
25086 case TLS_LDO32:
25087 fputs ("(tlsldo)", fp);
25088 break;
25089 case TLS_IE32:
25090 fputs ("(gottpoff)", fp);
25091 break;
25092 case TLS_LE32:
25093 fputs ("(tpoff)", fp);
25094 break;
25095 case TLS_DESCSEQ:
25096 fputs ("(tlsdesc)", fp);
25097 break;
25098 default:
25099 gcc_unreachable ();
25100 }
25101
25102 switch (reloc)
25103 {
25104 case TLS_GD32:
25105 case TLS_LDM32:
25106 case TLS_IE32:
25107 case TLS_DESCSEQ:
25108 fputs (" + (. - ", fp);
25109 output_addr_const (fp, XVECEXP (x, 0, 2));
25110 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
25111 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
25112 output_addr_const (fp, XVECEXP (x, 0, 3));
25113 fputc (')', fp);
25114 break;
25115 default:
25116 break;
25117 }
25118
25119 return TRUE;
25120 }
25121
25122 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
25123
25124 static void
25125 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
25126 {
25127 gcc_assert (size == 4);
25128 fputs ("\t.word\t", file);
25129 output_addr_const (file, x);
25130 fputs ("(tlsldo)", file);
25131 }
25132
25133 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
25134
25135 static bool
25136 arm_output_addr_const_extra (FILE *fp, rtx x)
25137 {
25138 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
25139 return arm_emit_tls_decoration (fp, x);
25140 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
25141 {
25142 char label[256];
25143 int labelno = INTVAL (XVECEXP (x, 0, 0));
25144
25145 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
25146 assemble_name_raw (fp, label);
25147
25148 return TRUE;
25149 }
25150 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
25151 {
25152 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
25153 if (GOT_PCREL)
25154 fputs ("+.", fp);
25155 fputs ("-(", fp);
25156 output_addr_const (fp, XVECEXP (x, 0, 0));
25157 fputc (')', fp);
25158 return TRUE;
25159 }
25160 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
25161 {
25162 output_addr_const (fp, XVECEXP (x, 0, 0));
25163 if (GOT_PCREL)
25164 fputs ("+.", fp);
25165 fputs ("-(", fp);
25166 output_addr_const (fp, XVECEXP (x, 0, 1));
25167 fputc (')', fp);
25168 return TRUE;
25169 }
25170 else if (GET_CODE (x) == CONST_VECTOR)
25171 return arm_emit_vector_const (fp, x);
25172
25173 return FALSE;
25174 }
25175
25176 /* Output assembly for a shift instruction.
25177 SET_FLAGS determines how the instruction modifies the condition codes.
25178 0 - Do not set condition codes.
25179 1 - Set condition codes.
25180 2 - Use smallest instruction. */
25181 const char *
25182 arm_output_shift(rtx * operands, int set_flags)
25183 {
25184 char pattern[100];
25185 static const char flag_chars[3] = {'?', '.', '!'};
25186 const char *shift;
25187 HOST_WIDE_INT val;
25188 char c;
25189
25190 c = flag_chars[set_flags];
25191 if (TARGET_UNIFIED_ASM)
25192 {
25193 shift = shift_op(operands[3], &val);
25194 if (shift)
25195 {
25196 if (val != -1)
25197 operands[2] = GEN_INT(val);
25198 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
25199 }
25200 else
25201 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
25202 }
25203 else
25204 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
25205 output_asm_insn (pattern, operands);
25206 return "";
25207 }
25208
25209 /* Output assembly for a WMMX immediate shift instruction. */
25210 const char *
25211 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
25212 {
25213 int shift = INTVAL (operands[2]);
25214 char templ[50];
25215 enum machine_mode opmode = GET_MODE (operands[0]);
25216
25217 gcc_assert (shift >= 0);
25218
25219 /* If the shift value in the register versions is > 63 (for D qualifier),
25220 31 (for W qualifier) or 15 (for H qualifier). */
25221 if (((opmode == V4HImode) && (shift > 15))
25222 || ((opmode == V2SImode) && (shift > 31))
25223 || ((opmode == DImode) && (shift > 63)))
25224 {
25225 if (wror_or_wsra)
25226 {
25227 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
25228 output_asm_insn (templ, operands);
25229 if (opmode == DImode)
25230 {
25231 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
25232 output_asm_insn (templ, operands);
25233 }
25234 }
25235 else
25236 {
25237 /* The destination register will contain all zeros. */
25238 sprintf (templ, "wzero\t%%0");
25239 output_asm_insn (templ, operands);
25240 }
25241 return "";
25242 }
25243
25244 if ((opmode == DImode) && (shift > 32))
25245 {
25246 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
25247 output_asm_insn (templ, operands);
25248 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
25249 output_asm_insn (templ, operands);
25250 }
25251 else
25252 {
25253 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
25254 output_asm_insn (templ, operands);
25255 }
25256 return "";
25257 }
25258
25259 /* Output assembly for a WMMX tinsr instruction. */
25260 const char *
25261 arm_output_iwmmxt_tinsr (rtx *operands)
25262 {
25263 int mask = INTVAL (operands[3]);
25264 int i;
25265 char templ[50];
25266 int units = mode_nunits[GET_MODE (operands[0])];
25267 gcc_assert ((mask & (mask - 1)) == 0);
25268 for (i = 0; i < units; ++i)
25269 {
25270 if ((mask & 0x01) == 1)
25271 {
25272 break;
25273 }
25274 mask >>= 1;
25275 }
25276 gcc_assert (i < units);
25277 {
25278 switch (GET_MODE (operands[0]))
25279 {
25280 case V8QImode:
25281 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
25282 break;
25283 case V4HImode:
25284 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
25285 break;
25286 case V2SImode:
25287 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
25288 break;
25289 default:
25290 gcc_unreachable ();
25291 break;
25292 }
25293 output_asm_insn (templ, operands);
25294 }
25295 return "";
25296 }
25297
25298 /* Output a Thumb-1 casesi dispatch sequence. */
25299 const char *
25300 thumb1_output_casesi (rtx *operands)
25301 {
25302 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
25303
25304 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
25305
25306 switch (GET_MODE(diff_vec))
25307 {
25308 case QImode:
25309 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
25310 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
25311 case HImode:
25312 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
25313 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
25314 case SImode:
25315 return "bl\t%___gnu_thumb1_case_si";
25316 default:
25317 gcc_unreachable ();
25318 }
25319 }
25320
25321 /* Output a Thumb-2 casesi instruction. */
25322 const char *
25323 thumb2_output_casesi (rtx *operands)
25324 {
25325 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
25326
25327 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
25328
25329 output_asm_insn ("cmp\t%0, %1", operands);
25330 output_asm_insn ("bhi\t%l3", operands);
25331 switch (GET_MODE(diff_vec))
25332 {
25333 case QImode:
25334 return "tbb\t[%|pc, %0]";
25335 case HImode:
25336 return "tbh\t[%|pc, %0, lsl #1]";
25337 case SImode:
25338 if (flag_pic)
25339 {
25340 output_asm_insn ("adr\t%4, %l2", operands);
25341 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
25342 output_asm_insn ("add\t%4, %4, %5", operands);
25343 return "bx\t%4";
25344 }
25345 else
25346 {
25347 output_asm_insn ("adr\t%4, %l2", operands);
25348 return "ldr\t%|pc, [%4, %0, lsl #2]";
25349 }
25350 default:
25351 gcc_unreachable ();
25352 }
25353 }
25354
25355 /* Most ARM cores are single issue, but some newer ones can dual issue.
25356 The scheduler descriptions rely on this being correct. */
25357 static int
25358 arm_issue_rate (void)
25359 {
25360 switch (arm_tune)
25361 {
25362 case cortexa15:
25363 return 3;
25364
25365 case cortexr4:
25366 case cortexr4f:
25367 case cortexr5:
25368 case genericv7a:
25369 case cortexa5:
25370 case cortexa8:
25371 case cortexa9:
25372 case fa726te:
25373 return 2;
25374
25375 default:
25376 return 1;
25377 }
25378 }
25379
25380 /* A table and a function to perform ARM-specific name mangling for
25381 NEON vector types in order to conform to the AAPCS (see "Procedure
25382 Call Standard for the ARM Architecture", Appendix A). To qualify
25383 for emission with the mangled names defined in that document, a
25384 vector type must not only be of the correct mode but also be
25385 composed of NEON vector element types (e.g. __builtin_neon_qi). */
25386 typedef struct
25387 {
25388 enum machine_mode mode;
25389 const char *element_type_name;
25390 const char *aapcs_name;
25391 } arm_mangle_map_entry;
25392
25393 static arm_mangle_map_entry arm_mangle_map[] = {
25394 /* 64-bit containerized types. */
25395 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
25396 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
25397 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
25398 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
25399 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
25400 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
25401 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
25402 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
25403 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
25404 /* 128-bit containerized types. */
25405 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
25406 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
25407 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
25408 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
25409 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
25410 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
25411 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
25412 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
25413 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
25414 { VOIDmode, NULL, NULL }
25415 };
25416
25417 const char *
25418 arm_mangle_type (const_tree type)
25419 {
25420 arm_mangle_map_entry *pos = arm_mangle_map;
25421
25422 /* The ARM ABI documents (10th October 2008) say that "__va_list"
25423 has to be managled as if it is in the "std" namespace. */
25424 if (TARGET_AAPCS_BASED
25425 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
25426 return "St9__va_list";
25427
25428 /* Half-precision float. */
25429 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
25430 return "Dh";
25431
25432 if (TREE_CODE (type) != VECTOR_TYPE)
25433 return NULL;
25434
25435 /* Check the mode of the vector type, and the name of the vector
25436 element type, against the table. */
25437 while (pos->mode != VOIDmode)
25438 {
25439 tree elt_type = TREE_TYPE (type);
25440
25441 if (pos->mode == TYPE_MODE (type)
25442 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
25443 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
25444 pos->element_type_name))
25445 return pos->aapcs_name;
25446
25447 pos++;
25448 }
25449
25450 /* Use the default mangling for unrecognized (possibly user-defined)
25451 vector types. */
25452 return NULL;
25453 }
25454
25455 /* Order of allocation of core registers for Thumb: this allocation is
25456 written over the corresponding initial entries of the array
25457 initialized with REG_ALLOC_ORDER. We allocate all low registers
25458 first. Saving and restoring a low register is usually cheaper than
25459 using a call-clobbered high register. */
25460
25461 static const int thumb_core_reg_alloc_order[] =
25462 {
25463 3, 2, 1, 0, 4, 5, 6, 7,
25464 14, 12, 8, 9, 10, 11
25465 };
25466
25467 /* Adjust register allocation order when compiling for Thumb. */
25468
25469 void
25470 arm_order_regs_for_local_alloc (void)
25471 {
25472 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
25473 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
25474 if (TARGET_THUMB)
25475 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
25476 sizeof (thumb_core_reg_alloc_order));
25477 }
25478
25479 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
25480
25481 bool
25482 arm_frame_pointer_required (void)
25483 {
25484 return (cfun->has_nonlocal_label
25485 || SUBTARGET_FRAME_POINTER_REQUIRED
25486 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
25487 }
25488
25489 /* Only thumb1 can't support conditional execution, so return true if
25490 the target is not thumb1. */
25491 static bool
25492 arm_have_conditional_execution (void)
25493 {
25494 return !TARGET_THUMB1;
25495 }
25496
25497 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
25498 static HOST_WIDE_INT
25499 arm_vector_alignment (const_tree type)
25500 {
25501 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
25502
25503 if (TARGET_AAPCS_BASED)
25504 align = MIN (align, 64);
25505
25506 return align;
25507 }
25508
25509 static unsigned int
25510 arm_autovectorize_vector_sizes (void)
25511 {
25512 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
25513 }
25514
25515 static bool
25516 arm_vector_alignment_reachable (const_tree type, bool is_packed)
25517 {
25518 /* Vectors which aren't in packed structures will not be less aligned than
25519 the natural alignment of their element type, so this is safe. */
25520 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
25521 return !is_packed;
25522
25523 return default_builtin_vector_alignment_reachable (type, is_packed);
25524 }
25525
25526 static bool
25527 arm_builtin_support_vector_misalignment (enum machine_mode mode,
25528 const_tree type, int misalignment,
25529 bool is_packed)
25530 {
25531 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
25532 {
25533 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
25534
25535 if (is_packed)
25536 return align == 1;
25537
25538 /* If the misalignment is unknown, we should be able to handle the access
25539 so long as it is not to a member of a packed data structure. */
25540 if (misalignment == -1)
25541 return true;
25542
25543 /* Return true if the misalignment is a multiple of the natural alignment
25544 of the vector's element type. This is probably always going to be
25545 true in practice, since we've already established that this isn't a
25546 packed access. */
25547 return ((misalignment % align) == 0);
25548 }
25549
25550 return default_builtin_support_vector_misalignment (mode, type, misalignment,
25551 is_packed);
25552 }
25553
25554 static void
25555 arm_conditional_register_usage (void)
25556 {
25557 int regno;
25558
25559 if (TARGET_THUMB1 && optimize_size)
25560 {
25561 /* When optimizing for size on Thumb-1, it's better not
25562 to use the HI regs, because of the overhead of
25563 stacking them. */
25564 for (regno = FIRST_HI_REGNUM;
25565 regno <= LAST_HI_REGNUM; ++regno)
25566 fixed_regs[regno] = call_used_regs[regno] = 1;
25567 }
25568
25569 /* The link register can be clobbered by any branch insn,
25570 but we have no way to track that at present, so mark
25571 it as unavailable. */
25572 if (TARGET_THUMB1)
25573 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
25574
25575 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
25576 {
25577 /* VFPv3 registers are disabled when earlier VFP
25578 versions are selected due to the definition of
25579 LAST_VFP_REGNUM. */
25580 for (regno = FIRST_VFP_REGNUM;
25581 regno <= LAST_VFP_REGNUM; ++ regno)
25582 {
25583 fixed_regs[regno] = 0;
25584 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
25585 || regno >= FIRST_VFP_REGNUM + 32;
25586 }
25587 }
25588
25589 if (TARGET_REALLY_IWMMXT)
25590 {
25591 regno = FIRST_IWMMXT_GR_REGNUM;
25592 /* The 2002/10/09 revision of the XScale ABI has wCG0
25593 and wCG1 as call-preserved registers. The 2002/11/21
25594 revision changed this so that all wCG registers are
25595 scratch registers. */
25596 for (regno = FIRST_IWMMXT_GR_REGNUM;
25597 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
25598 fixed_regs[regno] = 0;
25599 /* The XScale ABI has wR0 - wR9 as scratch registers,
25600 the rest as call-preserved registers. */
25601 for (regno = FIRST_IWMMXT_REGNUM;
25602 regno <= LAST_IWMMXT_REGNUM; ++ regno)
25603 {
25604 fixed_regs[regno] = 0;
25605 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
25606 }
25607 }
25608
25609 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
25610 {
25611 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
25612 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
25613 }
25614 else if (TARGET_APCS_STACK)
25615 {
25616 fixed_regs[10] = 1;
25617 call_used_regs[10] = 1;
25618 }
25619 /* -mcaller-super-interworking reserves r11 for calls to
25620 _interwork_r11_call_via_rN(). Making the register global
25621 is an easy way of ensuring that it remains valid for all
25622 calls. */
25623 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
25624 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
25625 {
25626 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25627 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25628 if (TARGET_CALLER_INTERWORKING)
25629 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25630 }
25631 SUBTARGET_CONDITIONAL_REGISTER_USAGE
25632 }
25633
25634 static reg_class_t
25635 arm_preferred_rename_class (reg_class_t rclass)
25636 {
25637 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
25638 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
25639 and code size can be reduced. */
25640 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
25641 return LO_REGS;
25642 else
25643 return NO_REGS;
25644 }
25645
25646 /* Compute the atrribute "length" of insn "*push_multi".
25647 So this function MUST be kept in sync with that insn pattern. */
25648 int
25649 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
25650 {
25651 int i, regno, hi_reg;
25652 int num_saves = XVECLEN (parallel_op, 0);
25653
25654 /* ARM mode. */
25655 if (TARGET_ARM)
25656 return 4;
25657 /* Thumb1 mode. */
25658 if (TARGET_THUMB1)
25659 return 2;
25660
25661 /* Thumb2 mode. */
25662 regno = REGNO (first_op);
25663 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
25664 for (i = 1; i < num_saves && !hi_reg; i++)
25665 {
25666 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
25667 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
25668 }
25669
25670 if (!hi_reg)
25671 return 2;
25672 return 4;
25673 }
25674
25675 /* Compute the number of instructions emitted by output_move_double. */
25676 int
25677 arm_count_output_move_double_insns (rtx *operands)
25678 {
25679 int count;
25680 rtx ops[2];
25681 /* output_move_double may modify the operands array, so call it
25682 here on a copy of the array. */
25683 ops[0] = operands[0];
25684 ops[1] = operands[1];
25685 output_move_double (ops, false, &count);
25686 return count;
25687 }
25688
25689 int
25690 vfp3_const_double_for_fract_bits (rtx operand)
25691 {
25692 REAL_VALUE_TYPE r0;
25693
25694 if (!CONST_DOUBLE_P (operand))
25695 return 0;
25696
25697 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
25698 if (exact_real_inverse (DFmode, &r0))
25699 {
25700 if (exact_real_truncate (DFmode, &r0))
25701 {
25702 HOST_WIDE_INT value = real_to_integer (&r0);
25703 value = value & 0xffffffff;
25704 if ((value != 0) && ( (value & (value - 1)) == 0))
25705 return int_log2 (value);
25706 }
25707 }
25708 return 0;
25709 }
25710 \f
25711 /* Emit a memory barrier around an atomic sequence according to MODEL. */
25712
25713 static void
25714 arm_pre_atomic_barrier (enum memmodel model)
25715 {
25716 if (need_atomic_barrier_p (model, true))
25717 emit_insn (gen_memory_barrier ());
25718 }
25719
25720 static void
25721 arm_post_atomic_barrier (enum memmodel model)
25722 {
25723 if (need_atomic_barrier_p (model, false))
25724 emit_insn (gen_memory_barrier ());
25725 }
25726
25727 /* Emit the load-exclusive and store-exclusive instructions. */
25728
25729 static void
25730 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem)
25731 {
25732 rtx (*gen) (rtx, rtx);
25733
25734 switch (mode)
25735 {
25736 case QImode: gen = gen_arm_load_exclusiveqi; break;
25737 case HImode: gen = gen_arm_load_exclusivehi; break;
25738 case SImode: gen = gen_arm_load_exclusivesi; break;
25739 case DImode: gen = gen_arm_load_exclusivedi; break;
25740 default:
25741 gcc_unreachable ();
25742 }
25743
25744 emit_insn (gen (rval, mem));
25745 }
25746
25747 static void
25748 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, rtx mem)
25749 {
25750 rtx (*gen) (rtx, rtx, rtx);
25751
25752 switch (mode)
25753 {
25754 case QImode: gen = gen_arm_store_exclusiveqi; break;
25755 case HImode: gen = gen_arm_store_exclusivehi; break;
25756 case SImode: gen = gen_arm_store_exclusivesi; break;
25757 case DImode: gen = gen_arm_store_exclusivedi; break;
25758 default:
25759 gcc_unreachable ();
25760 }
25761
25762 emit_insn (gen (bval, rval, mem));
25763 }
25764
25765 /* Mark the previous jump instruction as unlikely. */
25766
25767 static void
25768 emit_unlikely_jump (rtx insn)
25769 {
25770 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
25771
25772 insn = emit_jump_insn (insn);
25773 add_reg_note (insn, REG_BR_PROB, very_unlikely);
25774 }
25775
25776 /* Expand a compare and swap pattern. */
25777
25778 void
25779 arm_expand_compare_and_swap (rtx operands[])
25780 {
25781 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
25782 enum machine_mode mode;
25783 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
25784
25785 bval = operands[0];
25786 rval = operands[1];
25787 mem = operands[2];
25788 oldval = operands[3];
25789 newval = operands[4];
25790 is_weak = operands[5];
25791 mod_s = operands[6];
25792 mod_f = operands[7];
25793 mode = GET_MODE (mem);
25794
25795 switch (mode)
25796 {
25797 case QImode:
25798 case HImode:
25799 /* For narrow modes, we're going to perform the comparison in SImode,
25800 so do the zero-extension now. */
25801 rval = gen_reg_rtx (SImode);
25802 oldval = convert_modes (SImode, mode, oldval, true);
25803 /* FALLTHRU */
25804
25805 case SImode:
25806 /* Force the value into a register if needed. We waited until after
25807 the zero-extension above to do this properly. */
25808 if (!arm_add_operand (oldval, SImode))
25809 oldval = force_reg (SImode, oldval);
25810 break;
25811
25812 case DImode:
25813 if (!cmpdi_operand (oldval, mode))
25814 oldval = force_reg (mode, oldval);
25815 break;
25816
25817 default:
25818 gcc_unreachable ();
25819 }
25820
25821 switch (mode)
25822 {
25823 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
25824 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
25825 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
25826 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
25827 default:
25828 gcc_unreachable ();
25829 }
25830
25831 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
25832
25833 if (mode == QImode || mode == HImode)
25834 emit_move_insn (operands[1], gen_lowpart (mode, rval));
25835
25836 /* In all cases, we arrange for success to be signaled by Z set.
25837 This arrangement allows for the boolean result to be used directly
25838 in a subsequent branch, post optimization. */
25839 x = gen_rtx_REG (CCmode, CC_REGNUM);
25840 x = gen_rtx_EQ (SImode, x, const0_rtx);
25841 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
25842 }
25843
25844 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
25845 another memory store between the load-exclusive and store-exclusive can
25846 reset the monitor from Exclusive to Open state. This means we must wait
25847 until after reload to split the pattern, lest we get a register spill in
25848 the middle of the atomic sequence. */
25849
25850 void
25851 arm_split_compare_and_swap (rtx operands[])
25852 {
25853 rtx rval, mem, oldval, newval, scratch;
25854 enum machine_mode mode;
25855 enum memmodel mod_s, mod_f;
25856 bool is_weak;
25857 rtx label1, label2, x, cond;
25858
25859 rval = operands[0];
25860 mem = operands[1];
25861 oldval = operands[2];
25862 newval = operands[3];
25863 is_weak = (operands[4] != const0_rtx);
25864 mod_s = (enum memmodel) INTVAL (operands[5]);
25865 mod_f = (enum memmodel) INTVAL (operands[6]);
25866 scratch = operands[7];
25867 mode = GET_MODE (mem);
25868
25869 arm_pre_atomic_barrier (mod_s);
25870
25871 label1 = NULL_RTX;
25872 if (!is_weak)
25873 {
25874 label1 = gen_label_rtx ();
25875 emit_label (label1);
25876 }
25877 label2 = gen_label_rtx ();
25878
25879 arm_emit_load_exclusive (mode, rval, mem);
25880
25881 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
25882 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25883 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
25884 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
25885 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
25886
25887 arm_emit_store_exclusive (mode, scratch, mem, newval);
25888
25889 /* Weak or strong, we want EQ to be true for success, so that we
25890 match the flags that we got from the compare above. */
25891 cond = gen_rtx_REG (CCmode, CC_REGNUM);
25892 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
25893 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
25894
25895 if (!is_weak)
25896 {
25897 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25898 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
25899 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
25900 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
25901 }
25902
25903 if (mod_f != MEMMODEL_RELAXED)
25904 emit_label (label2);
25905
25906 arm_post_atomic_barrier (mod_s);
25907
25908 if (mod_f == MEMMODEL_RELAXED)
25909 emit_label (label2);
25910 }
25911
25912 void
25913 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
25914 rtx value, rtx model_rtx, rtx cond)
25915 {
25916 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
25917 enum machine_mode mode = GET_MODE (mem);
25918 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
25919 rtx label, x;
25920
25921 arm_pre_atomic_barrier (model);
25922
25923 label = gen_label_rtx ();
25924 emit_label (label);
25925
25926 if (new_out)
25927 new_out = gen_lowpart (wmode, new_out);
25928 if (old_out)
25929 old_out = gen_lowpart (wmode, old_out);
25930 else
25931 old_out = new_out;
25932 value = simplify_gen_subreg (wmode, value, mode, 0);
25933
25934 arm_emit_load_exclusive (mode, old_out, mem);
25935
25936 switch (code)
25937 {
25938 case SET:
25939 new_out = value;
25940 break;
25941
25942 case NOT:
25943 x = gen_rtx_AND (wmode, old_out, value);
25944 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25945 x = gen_rtx_NOT (wmode, new_out);
25946 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25947 break;
25948
25949 case MINUS:
25950 if (CONST_INT_P (value))
25951 {
25952 value = GEN_INT (-INTVAL (value));
25953 code = PLUS;
25954 }
25955 /* FALLTHRU */
25956
25957 case PLUS:
25958 if (mode == DImode)
25959 {
25960 /* DImode plus/minus need to clobber flags. */
25961 /* The adddi3 and subdi3 patterns are incorrectly written so that
25962 they require matching operands, even when we could easily support
25963 three operands. Thankfully, this can be fixed up post-splitting,
25964 as the individual add+adc patterns do accept three operands and
25965 post-reload cprop can make these moves go away. */
25966 emit_move_insn (new_out, old_out);
25967 if (code == PLUS)
25968 x = gen_adddi3 (new_out, new_out, value);
25969 else
25970 x = gen_subdi3 (new_out, new_out, value);
25971 emit_insn (x);
25972 break;
25973 }
25974 /* FALLTHRU */
25975
25976 default:
25977 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
25978 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25979 break;
25980 }
25981
25982 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out));
25983
25984 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25985 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
25986
25987 arm_post_atomic_barrier (model);
25988 }
25989 \f
25990 #define MAX_VECT_LEN 16
25991
25992 struct expand_vec_perm_d
25993 {
25994 rtx target, op0, op1;
25995 unsigned char perm[MAX_VECT_LEN];
25996 enum machine_mode vmode;
25997 unsigned char nelt;
25998 bool one_vector_p;
25999 bool testing_p;
26000 };
26001
26002 /* Generate a variable permutation. */
26003
26004 static void
26005 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
26006 {
26007 enum machine_mode vmode = GET_MODE (target);
26008 bool one_vector_p = rtx_equal_p (op0, op1);
26009
26010 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
26011 gcc_checking_assert (GET_MODE (op0) == vmode);
26012 gcc_checking_assert (GET_MODE (op1) == vmode);
26013 gcc_checking_assert (GET_MODE (sel) == vmode);
26014 gcc_checking_assert (TARGET_NEON);
26015
26016 if (one_vector_p)
26017 {
26018 if (vmode == V8QImode)
26019 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
26020 else
26021 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
26022 }
26023 else
26024 {
26025 rtx pair;
26026
26027 if (vmode == V8QImode)
26028 {
26029 pair = gen_reg_rtx (V16QImode);
26030 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
26031 pair = gen_lowpart (TImode, pair);
26032 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
26033 }
26034 else
26035 {
26036 pair = gen_reg_rtx (OImode);
26037 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
26038 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
26039 }
26040 }
26041 }
26042
26043 void
26044 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
26045 {
26046 enum machine_mode vmode = GET_MODE (target);
26047 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
26048 bool one_vector_p = rtx_equal_p (op0, op1);
26049 rtx rmask[MAX_VECT_LEN], mask;
26050
26051 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
26052 numbering of elements for big-endian, we must reverse the order. */
26053 gcc_checking_assert (!BYTES_BIG_ENDIAN);
26054
26055 /* The VTBL instruction does not use a modulo index, so we must take care
26056 of that ourselves. */
26057 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
26058 for (i = 0; i < nelt; ++i)
26059 rmask[i] = mask;
26060 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
26061 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
26062
26063 arm_expand_vec_perm_1 (target, op0, op1, sel);
26064 }
26065
26066 /* Generate or test for an insn that supports a constant permutation. */
26067
26068 /* Recognize patterns for the VUZP insns. */
26069
26070 static bool
26071 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
26072 {
26073 unsigned int i, odd, mask, nelt = d->nelt;
26074 rtx out0, out1, in0, in1, x;
26075 rtx (*gen)(rtx, rtx, rtx, rtx);
26076
26077 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
26078 return false;
26079
26080 /* Note that these are little-endian tests. Adjust for big-endian later. */
26081 if (d->perm[0] == 0)
26082 odd = 0;
26083 else if (d->perm[0] == 1)
26084 odd = 1;
26085 else
26086 return false;
26087 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
26088
26089 for (i = 0; i < nelt; i++)
26090 {
26091 unsigned elt = (i * 2 + odd) & mask;
26092 if (d->perm[i] != elt)
26093 return false;
26094 }
26095
26096 /* Success! */
26097 if (d->testing_p)
26098 return true;
26099
26100 switch (d->vmode)
26101 {
26102 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
26103 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
26104 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
26105 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
26106 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
26107 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
26108 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
26109 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
26110 default:
26111 gcc_unreachable ();
26112 }
26113
26114 in0 = d->op0;
26115 in1 = d->op1;
26116 if (BYTES_BIG_ENDIAN)
26117 {
26118 x = in0, in0 = in1, in1 = x;
26119 odd = !odd;
26120 }
26121
26122 out0 = d->target;
26123 out1 = gen_reg_rtx (d->vmode);
26124 if (odd)
26125 x = out0, out0 = out1, out1 = x;
26126
26127 emit_insn (gen (out0, in0, in1, out1));
26128 return true;
26129 }
26130
26131 /* Recognize patterns for the VZIP insns. */
26132
26133 static bool
26134 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
26135 {
26136 unsigned int i, high, mask, nelt = d->nelt;
26137 rtx out0, out1, in0, in1, x;
26138 rtx (*gen)(rtx, rtx, rtx, rtx);
26139
26140 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
26141 return false;
26142
26143 /* Note that these are little-endian tests. Adjust for big-endian later. */
26144 high = nelt / 2;
26145 if (d->perm[0] == high)
26146 ;
26147 else if (d->perm[0] == 0)
26148 high = 0;
26149 else
26150 return false;
26151 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
26152
26153 for (i = 0; i < nelt / 2; i++)
26154 {
26155 unsigned elt = (i + high) & mask;
26156 if (d->perm[i * 2] != elt)
26157 return false;
26158 elt = (elt + nelt) & mask;
26159 if (d->perm[i * 2 + 1] != elt)
26160 return false;
26161 }
26162
26163 /* Success! */
26164 if (d->testing_p)
26165 return true;
26166
26167 switch (d->vmode)
26168 {
26169 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
26170 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
26171 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
26172 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
26173 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
26174 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
26175 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
26176 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
26177 default:
26178 gcc_unreachable ();
26179 }
26180
26181 in0 = d->op0;
26182 in1 = d->op1;
26183 if (BYTES_BIG_ENDIAN)
26184 {
26185 x = in0, in0 = in1, in1 = x;
26186 high = !high;
26187 }
26188
26189 out0 = d->target;
26190 out1 = gen_reg_rtx (d->vmode);
26191 if (high)
26192 x = out0, out0 = out1, out1 = x;
26193
26194 emit_insn (gen (out0, in0, in1, out1));
26195 return true;
26196 }
26197
26198 /* Recognize patterns for the VREV insns. */
26199
26200 static bool
26201 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
26202 {
26203 unsigned int i, j, diff, nelt = d->nelt;
26204 rtx (*gen)(rtx, rtx, rtx);
26205
26206 if (!d->one_vector_p)
26207 return false;
26208
26209 diff = d->perm[0];
26210 switch (diff)
26211 {
26212 case 7:
26213 switch (d->vmode)
26214 {
26215 case V16QImode: gen = gen_neon_vrev64v16qi; break;
26216 case V8QImode: gen = gen_neon_vrev64v8qi; break;
26217 default:
26218 return false;
26219 }
26220 break;
26221 case 3:
26222 switch (d->vmode)
26223 {
26224 case V16QImode: gen = gen_neon_vrev32v16qi; break;
26225 case V8QImode: gen = gen_neon_vrev32v8qi; break;
26226 case V8HImode: gen = gen_neon_vrev64v8hi; break;
26227 case V4HImode: gen = gen_neon_vrev64v4hi; break;
26228 default:
26229 return false;
26230 }
26231 break;
26232 case 1:
26233 switch (d->vmode)
26234 {
26235 case V16QImode: gen = gen_neon_vrev16v16qi; break;
26236 case V8QImode: gen = gen_neon_vrev16v8qi; break;
26237 case V8HImode: gen = gen_neon_vrev32v8hi; break;
26238 case V4HImode: gen = gen_neon_vrev32v4hi; break;
26239 case V4SImode: gen = gen_neon_vrev64v4si; break;
26240 case V2SImode: gen = gen_neon_vrev64v2si; break;
26241 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
26242 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
26243 default:
26244 return false;
26245 }
26246 break;
26247 default:
26248 return false;
26249 }
26250
26251 for (i = 0; i < nelt ; i += diff + 1)
26252 for (j = 0; j <= diff; j += 1)
26253 {
26254 /* This is guaranteed to be true as the value of diff
26255 is 7, 3, 1 and we should have enough elements in the
26256 queue to generate this. Getting a vector mask with a
26257 value of diff other than these values implies that
26258 something is wrong by the time we get here. */
26259 gcc_assert (i + j < nelt);
26260 if (d->perm[i + j] != i + diff - j)
26261 return false;
26262 }
26263
26264 /* Success! */
26265 if (d->testing_p)
26266 return true;
26267
26268 /* ??? The third operand is an artifact of the builtin infrastructure
26269 and is ignored by the actual instruction. */
26270 emit_insn (gen (d->target, d->op0, const0_rtx));
26271 return true;
26272 }
26273
26274 /* Recognize patterns for the VTRN insns. */
26275
26276 static bool
26277 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
26278 {
26279 unsigned int i, odd, mask, nelt = d->nelt;
26280 rtx out0, out1, in0, in1, x;
26281 rtx (*gen)(rtx, rtx, rtx, rtx);
26282
26283 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
26284 return false;
26285
26286 /* Note that these are little-endian tests. Adjust for big-endian later. */
26287 if (d->perm[0] == 0)
26288 odd = 0;
26289 else if (d->perm[0] == 1)
26290 odd = 1;
26291 else
26292 return false;
26293 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
26294
26295 for (i = 0; i < nelt; i += 2)
26296 {
26297 if (d->perm[i] != i + odd)
26298 return false;
26299 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
26300 return false;
26301 }
26302
26303 /* Success! */
26304 if (d->testing_p)
26305 return true;
26306
26307 switch (d->vmode)
26308 {
26309 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
26310 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
26311 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
26312 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
26313 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
26314 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
26315 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
26316 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
26317 default:
26318 gcc_unreachable ();
26319 }
26320
26321 in0 = d->op0;
26322 in1 = d->op1;
26323 if (BYTES_BIG_ENDIAN)
26324 {
26325 x = in0, in0 = in1, in1 = x;
26326 odd = !odd;
26327 }
26328
26329 out0 = d->target;
26330 out1 = gen_reg_rtx (d->vmode);
26331 if (odd)
26332 x = out0, out0 = out1, out1 = x;
26333
26334 emit_insn (gen (out0, in0, in1, out1));
26335 return true;
26336 }
26337
26338 /* Recognize patterns for the VEXT insns. */
26339
26340 static bool
26341 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
26342 {
26343 unsigned int i, nelt = d->nelt;
26344 rtx (*gen) (rtx, rtx, rtx, rtx);
26345 rtx offset;
26346
26347 unsigned int location;
26348
26349 unsigned int next = d->perm[0] + 1;
26350
26351 /* TODO: Handle GCC's numbering of elements for big-endian. */
26352 if (BYTES_BIG_ENDIAN)
26353 return false;
26354
26355 /* Check if the extracted indexes are increasing by one. */
26356 for (i = 1; i < nelt; next++, i++)
26357 {
26358 /* If we hit the most significant element of the 2nd vector in
26359 the previous iteration, no need to test further. */
26360 if (next == 2 * nelt)
26361 return false;
26362
26363 /* If we are operating on only one vector: it could be a
26364 rotation. If there are only two elements of size < 64, let
26365 arm_evpc_neon_vrev catch it. */
26366 if (d->one_vector_p && (next == nelt))
26367 {
26368 if ((nelt == 2) && (d->vmode != V2DImode))
26369 return false;
26370 else
26371 next = 0;
26372 }
26373
26374 if (d->perm[i] != next)
26375 return false;
26376 }
26377
26378 location = d->perm[0];
26379
26380 switch (d->vmode)
26381 {
26382 case V16QImode: gen = gen_neon_vextv16qi; break;
26383 case V8QImode: gen = gen_neon_vextv8qi; break;
26384 case V4HImode: gen = gen_neon_vextv4hi; break;
26385 case V8HImode: gen = gen_neon_vextv8hi; break;
26386 case V2SImode: gen = gen_neon_vextv2si; break;
26387 case V4SImode: gen = gen_neon_vextv4si; break;
26388 case V2SFmode: gen = gen_neon_vextv2sf; break;
26389 case V4SFmode: gen = gen_neon_vextv4sf; break;
26390 case V2DImode: gen = gen_neon_vextv2di; break;
26391 default:
26392 return false;
26393 }
26394
26395 /* Success! */
26396 if (d->testing_p)
26397 return true;
26398
26399 offset = GEN_INT (location);
26400 emit_insn (gen (d->target, d->op0, d->op1, offset));
26401 return true;
26402 }
26403
26404 /* The NEON VTBL instruction is a fully variable permuation that's even
26405 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
26406 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
26407 can do slightly better by expanding this as a constant where we don't
26408 have to apply a mask. */
26409
26410 static bool
26411 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
26412 {
26413 rtx rperm[MAX_VECT_LEN], sel;
26414 enum machine_mode vmode = d->vmode;
26415 unsigned int i, nelt = d->nelt;
26416
26417 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
26418 numbering of elements for big-endian, we must reverse the order. */
26419 if (BYTES_BIG_ENDIAN)
26420 return false;
26421
26422 if (d->testing_p)
26423 return true;
26424
26425 /* Generic code will try constant permutation twice. Once with the
26426 original mode and again with the elements lowered to QImode.
26427 So wait and don't do the selector expansion ourselves. */
26428 if (vmode != V8QImode && vmode != V16QImode)
26429 return false;
26430
26431 for (i = 0; i < nelt; ++i)
26432 rperm[i] = GEN_INT (d->perm[i]);
26433 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
26434 sel = force_reg (vmode, sel);
26435
26436 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
26437 return true;
26438 }
26439
26440 static bool
26441 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
26442 {
26443 /* Check if the input mask matches vext before reordering the
26444 operands. */
26445 if (TARGET_NEON)
26446 if (arm_evpc_neon_vext (d))
26447 return true;
26448
26449 /* The pattern matching functions above are written to look for a small
26450 number to begin the sequence (0, 1, N/2). If we begin with an index
26451 from the second operand, we can swap the operands. */
26452 if (d->perm[0] >= d->nelt)
26453 {
26454 unsigned i, nelt = d->nelt;
26455 rtx x;
26456
26457 for (i = 0; i < nelt; ++i)
26458 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
26459
26460 x = d->op0;
26461 d->op0 = d->op1;
26462 d->op1 = x;
26463 }
26464
26465 if (TARGET_NEON)
26466 {
26467 if (arm_evpc_neon_vuzp (d))
26468 return true;
26469 if (arm_evpc_neon_vzip (d))
26470 return true;
26471 if (arm_evpc_neon_vrev (d))
26472 return true;
26473 if (arm_evpc_neon_vtrn (d))
26474 return true;
26475 return arm_evpc_neon_vtbl (d);
26476 }
26477 return false;
26478 }
26479
26480 /* Expand a vec_perm_const pattern. */
26481
26482 bool
26483 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
26484 {
26485 struct expand_vec_perm_d d;
26486 int i, nelt, which;
26487
26488 d.target = target;
26489 d.op0 = op0;
26490 d.op1 = op1;
26491
26492 d.vmode = GET_MODE (target);
26493 gcc_assert (VECTOR_MODE_P (d.vmode));
26494 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
26495 d.testing_p = false;
26496
26497 for (i = which = 0; i < nelt; ++i)
26498 {
26499 rtx e = XVECEXP (sel, 0, i);
26500 int ei = INTVAL (e) & (2 * nelt - 1);
26501 which |= (ei < nelt ? 1 : 2);
26502 d.perm[i] = ei;
26503 }
26504
26505 switch (which)
26506 {
26507 default:
26508 gcc_unreachable();
26509
26510 case 3:
26511 d.one_vector_p = false;
26512 if (!rtx_equal_p (op0, op1))
26513 break;
26514
26515 /* The elements of PERM do not suggest that only the first operand
26516 is used, but both operands are identical. Allow easier matching
26517 of the permutation by folding the permutation into the single
26518 input vector. */
26519 /* FALLTHRU */
26520 case 2:
26521 for (i = 0; i < nelt; ++i)
26522 d.perm[i] &= nelt - 1;
26523 d.op0 = op1;
26524 d.one_vector_p = true;
26525 break;
26526
26527 case 1:
26528 d.op1 = op0;
26529 d.one_vector_p = true;
26530 break;
26531 }
26532
26533 return arm_expand_vec_perm_const_1 (&d);
26534 }
26535
26536 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
26537
26538 static bool
26539 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
26540 const unsigned char *sel)
26541 {
26542 struct expand_vec_perm_d d;
26543 unsigned int i, nelt, which;
26544 bool ret;
26545
26546 d.vmode = vmode;
26547 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
26548 d.testing_p = true;
26549 memcpy (d.perm, sel, nelt);
26550
26551 /* Categorize the set of elements in the selector. */
26552 for (i = which = 0; i < nelt; ++i)
26553 {
26554 unsigned char e = d.perm[i];
26555 gcc_assert (e < 2 * nelt);
26556 which |= (e < nelt ? 1 : 2);
26557 }
26558
26559 /* For all elements from second vector, fold the elements to first. */
26560 if (which == 2)
26561 for (i = 0; i < nelt; ++i)
26562 d.perm[i] -= nelt;
26563
26564 /* Check whether the mask can be applied to the vector type. */
26565 d.one_vector_p = (which != 3);
26566
26567 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
26568 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
26569 if (!d.one_vector_p)
26570 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
26571
26572 start_sequence ();
26573 ret = arm_expand_vec_perm_const_1 (&d);
26574 end_sequence ();
26575
26576 return ret;
26577 }
26578
26579 bool
26580 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
26581 {
26582 /* If we are soft float and we do not have ldrd
26583 then all auto increment forms are ok. */
26584 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
26585 return true;
26586
26587 switch (code)
26588 {
26589 /* Post increment and Pre Decrement are supported for all
26590 instruction forms except for vector forms. */
26591 case ARM_POST_INC:
26592 case ARM_PRE_DEC:
26593 if (VECTOR_MODE_P (mode))
26594 {
26595 if (code != ARM_PRE_DEC)
26596 return true;
26597 else
26598 return false;
26599 }
26600
26601 return true;
26602
26603 case ARM_POST_DEC:
26604 case ARM_PRE_INC:
26605 /* Without LDRD and mode size greater than
26606 word size, there is no point in auto-incrementing
26607 because ldm and stm will not have these forms. */
26608 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
26609 return false;
26610
26611 /* Vector and floating point modes do not support
26612 these auto increment forms. */
26613 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
26614 return false;
26615
26616 return true;
26617
26618 default:
26619 return false;
26620
26621 }
26622
26623 return false;
26624 }
26625
26626 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
26627 on ARM, since we know that shifts by negative amounts are no-ops.
26628 Additionally, the default expansion code is not available or suitable
26629 for post-reload insn splits (this can occur when the register allocator
26630 chooses not to do a shift in NEON).
26631
26632 This function is used in both initial expand and post-reload splits, and
26633 handles all kinds of 64-bit shifts.
26634
26635 Input requirements:
26636 - It is safe for the input and output to be the same register, but
26637 early-clobber rules apply for the shift amount and scratch registers.
26638 - Shift by register requires both scratch registers. In all other cases
26639 the scratch registers may be NULL.
26640 - Ashiftrt by a register also clobbers the CC register. */
26641 void
26642 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
26643 rtx amount, rtx scratch1, rtx scratch2)
26644 {
26645 rtx out_high = gen_highpart (SImode, out);
26646 rtx out_low = gen_lowpart (SImode, out);
26647 rtx in_high = gen_highpart (SImode, in);
26648 rtx in_low = gen_lowpart (SImode, in);
26649
26650 /* Terminology:
26651 in = the register pair containing the input value.
26652 out = the destination register pair.
26653 up = the high- or low-part of each pair.
26654 down = the opposite part to "up".
26655 In a shift, we can consider bits to shift from "up"-stream to
26656 "down"-stream, so in a left-shift "up" is the low-part and "down"
26657 is the high-part of each register pair. */
26658
26659 rtx out_up = code == ASHIFT ? out_low : out_high;
26660 rtx out_down = code == ASHIFT ? out_high : out_low;
26661 rtx in_up = code == ASHIFT ? in_low : in_high;
26662 rtx in_down = code == ASHIFT ? in_high : in_low;
26663
26664 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
26665 gcc_assert (out
26666 && (REG_P (out) || GET_CODE (out) == SUBREG)
26667 && GET_MODE (out) == DImode);
26668 gcc_assert (in
26669 && (REG_P (in) || GET_CODE (in) == SUBREG)
26670 && GET_MODE (in) == DImode);
26671 gcc_assert (amount
26672 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
26673 && GET_MODE (amount) == SImode)
26674 || CONST_INT_P (amount)));
26675 gcc_assert (scratch1 == NULL
26676 || (GET_CODE (scratch1) == SCRATCH)
26677 || (GET_MODE (scratch1) == SImode
26678 && REG_P (scratch1)));
26679 gcc_assert (scratch2 == NULL
26680 || (GET_CODE (scratch2) == SCRATCH)
26681 || (GET_MODE (scratch2) == SImode
26682 && REG_P (scratch2)));
26683 gcc_assert (!REG_P (out) || !REG_P (amount)
26684 || !HARD_REGISTER_P (out)
26685 || (REGNO (out) != REGNO (amount)
26686 && REGNO (out) + 1 != REGNO (amount)));
26687
26688 /* Macros to make following code more readable. */
26689 #define SUB_32(DEST,SRC) \
26690 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
26691 #define RSB_32(DEST,SRC) \
26692 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
26693 #define SUB_S_32(DEST,SRC) \
26694 gen_addsi3_compare0 ((DEST), (SRC), \
26695 GEN_INT (-32))
26696 #define SET(DEST,SRC) \
26697 gen_rtx_SET (SImode, (DEST), (SRC))
26698 #define SHIFT(CODE,SRC,AMOUNT) \
26699 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
26700 #define LSHIFT(CODE,SRC,AMOUNT) \
26701 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
26702 SImode, (SRC), (AMOUNT))
26703 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
26704 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
26705 SImode, (SRC), (AMOUNT))
26706 #define ORR(A,B) \
26707 gen_rtx_IOR (SImode, (A), (B))
26708 #define BRANCH(COND,LABEL) \
26709 gen_arm_cond_branch ((LABEL), \
26710 gen_rtx_ ## COND (CCmode, cc_reg, \
26711 const0_rtx), \
26712 cc_reg)
26713
26714 /* Shifts by register and shifts by constant are handled separately. */
26715 if (CONST_INT_P (amount))
26716 {
26717 /* We have a shift-by-constant. */
26718
26719 /* First, handle out-of-range shift amounts.
26720 In both cases we try to match the result an ARM instruction in a
26721 shift-by-register would give. This helps reduce execution
26722 differences between optimization levels, but it won't stop other
26723 parts of the compiler doing different things. This is "undefined
26724 behaviour, in any case. */
26725 if (INTVAL (amount) <= 0)
26726 emit_insn (gen_movdi (out, in));
26727 else if (INTVAL (amount) >= 64)
26728 {
26729 if (code == ASHIFTRT)
26730 {
26731 rtx const31_rtx = GEN_INT (31);
26732 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
26733 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
26734 }
26735 else
26736 emit_insn (gen_movdi (out, const0_rtx));
26737 }
26738
26739 /* Now handle valid shifts. */
26740 else if (INTVAL (amount) < 32)
26741 {
26742 /* Shifts by a constant less than 32. */
26743 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
26744
26745 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
26746 emit_insn (SET (out_down,
26747 ORR (REV_LSHIFT (code, in_up, reverse_amount),
26748 out_down)));
26749 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
26750 }
26751 else
26752 {
26753 /* Shifts by a constant greater than 31. */
26754 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
26755
26756 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
26757 if (code == ASHIFTRT)
26758 emit_insn (gen_ashrsi3 (out_up, in_up,
26759 GEN_INT (31)));
26760 else
26761 emit_insn (SET (out_up, const0_rtx));
26762 }
26763 }
26764 else
26765 {
26766 /* We have a shift-by-register. */
26767 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
26768
26769 /* This alternative requires the scratch registers. */
26770 gcc_assert (scratch1 && REG_P (scratch1));
26771 gcc_assert (scratch2 && REG_P (scratch2));
26772
26773 /* We will need the values "amount-32" and "32-amount" later.
26774 Swapping them around now allows the later code to be more general. */
26775 switch (code)
26776 {
26777 case ASHIFT:
26778 emit_insn (SUB_32 (scratch1, amount));
26779 emit_insn (RSB_32 (scratch2, amount));
26780 break;
26781 case ASHIFTRT:
26782 emit_insn (RSB_32 (scratch1, amount));
26783 /* Also set CC = amount > 32. */
26784 emit_insn (SUB_S_32 (scratch2, amount));
26785 break;
26786 case LSHIFTRT:
26787 emit_insn (RSB_32 (scratch1, amount));
26788 emit_insn (SUB_32 (scratch2, amount));
26789 break;
26790 default:
26791 gcc_unreachable ();
26792 }
26793
26794 /* Emit code like this:
26795
26796 arithmetic-left:
26797 out_down = in_down << amount;
26798 out_down = (in_up << (amount - 32)) | out_down;
26799 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
26800 out_up = in_up << amount;
26801
26802 arithmetic-right:
26803 out_down = in_down >> amount;
26804 out_down = (in_up << (32 - amount)) | out_down;
26805 if (amount < 32)
26806 out_down = ((signed)in_up >> (amount - 32)) | out_down;
26807 out_up = in_up << amount;
26808
26809 logical-right:
26810 out_down = in_down >> amount;
26811 out_down = (in_up << (32 - amount)) | out_down;
26812 if (amount < 32)
26813 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
26814 out_up = in_up << amount;
26815
26816 The ARM and Thumb2 variants are the same but implemented slightly
26817 differently. If this were only called during expand we could just
26818 use the Thumb2 case and let combine do the right thing, but this
26819 can also be called from post-reload splitters. */
26820
26821 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
26822
26823 if (!TARGET_THUMB2)
26824 {
26825 /* Emit code for ARM mode. */
26826 emit_insn (SET (out_down,
26827 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
26828 if (code == ASHIFTRT)
26829 {
26830 rtx done_label = gen_label_rtx ();
26831 emit_jump_insn (BRANCH (LT, done_label));
26832 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
26833 out_down)));
26834 emit_label (done_label);
26835 }
26836 else
26837 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
26838 out_down)));
26839 }
26840 else
26841 {
26842 /* Emit code for Thumb2 mode.
26843 Thumb2 can't do shift and or in one insn. */
26844 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
26845 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
26846
26847 if (code == ASHIFTRT)
26848 {
26849 rtx done_label = gen_label_rtx ();
26850 emit_jump_insn (BRANCH (LT, done_label));
26851 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
26852 emit_insn (SET (out_down, ORR (out_down, scratch2)));
26853 emit_label (done_label);
26854 }
26855 else
26856 {
26857 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
26858 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
26859 }
26860 }
26861
26862 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
26863 }
26864
26865 #undef SUB_32
26866 #undef RSB_32
26867 #undef SUB_S_32
26868 #undef SET
26869 #undef SHIFT
26870 #undef LSHIFT
26871 #undef REV_LSHIFT
26872 #undef ORR
26873 #undef BRANCH
26874 }
26875
26876
26877 /* Returns true if a valid comparison operation and makes
26878 the operands in a form that is valid. */
26879 bool
26880 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
26881 {
26882 enum rtx_code code = GET_CODE (*comparison);
26883 enum rtx_code canonical_code;
26884 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
26885 ? GET_MODE (*op2) : GET_MODE (*op1);
26886
26887 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
26888
26889 if (code == UNEQ || code == LTGT)
26890 return false;
26891
26892 canonical_code = arm_canonicalize_comparison (code, op1, op2);
26893 PUT_CODE (*comparison, canonical_code);
26894
26895 switch (mode)
26896 {
26897 case SImode:
26898 if (!arm_add_operand (*op1, mode))
26899 *op1 = force_reg (mode, *op1);
26900 if (!arm_add_operand (*op2, mode))
26901 *op2 = force_reg (mode, *op2);
26902 return true;
26903
26904 case DImode:
26905 if (!cmpdi_operand (*op1, mode))
26906 *op1 = force_reg (mode, *op1);
26907 if (!cmpdi_operand (*op2, mode))
26908 *op2 = force_reg (mode, *op2);
26909 return true;
26910
26911 case SFmode:
26912 case DFmode:
26913 if (!arm_float_compare_operand (*op1, mode))
26914 *op1 = force_reg (mode, *op1);
26915 if (!arm_float_compare_operand (*op2, mode))
26916 *op2 = force_reg (mode, *op2);
26917 return true;
26918 default:
26919 break;
26920 }
26921
26922 return false;
26923
26924 }
26925
26926 #include "gt-arm.h"