381f05525fb7c7164f80bde895bb3931df37efeb
[gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8
9 This file is part of GCC.
10
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
15
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "reload.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "diagnostic-core.h"
44 #include "recog.h"
45 #include "cgraph.h"
46 #include "ggc.h"
47 #include "except.h"
48 #include "tm_p.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "debug.h"
52 #include "langhooks.h"
53 #include "df.h"
54 #include "intl.h"
55 #include "libfuncs.h"
56 #include "params.h"
57 #include "opts.h"
58 #include "dumpfile.h"
59
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode;
62 typedef struct minipool_fixup Mfix;
63
64 void (*arm_lang_output_object_attributes_hook)(void);
65
66 struct four_ints
67 {
68 int i[4];
69 };
70
71 /* Forward function declarations. */
72 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
73 static int arm_compute_static_chain_stack_bytes (void);
74 static arm_stack_offsets *arm_get_frame_offsets (void);
75 static void arm_add_gc_roots (void);
76 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
77 HOST_WIDE_INT, rtx, rtx, int, int);
78 static unsigned bit_count (unsigned long);
79 static int arm_address_register_rtx_p (rtx, int);
80 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
81 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
82 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
83 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
84 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
85 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
86 inline static int thumb1_index_register_rtx_p (rtx, int);
87 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
88 static int thumb_far_jump_used_p (void);
89 static bool thumb_force_lr_save (void);
90 static unsigned arm_size_return_regs (void);
91 static bool arm_assemble_integer (rtx, unsigned int, int);
92 static void arm_print_operand (FILE *, rtx, int);
93 static void arm_print_operand_address (FILE *, rtx);
94 static bool arm_print_operand_punct_valid_p (unsigned char code);
95 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
96 static arm_cc get_arm_condition_code (rtx);
97 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
98 static rtx is_jump_table (rtx);
99 static const char *output_multi_immediate (rtx *, const char *, const char *,
100 int, HOST_WIDE_INT);
101 static const char *shift_op (rtx, HOST_WIDE_INT *);
102 static struct machine_function *arm_init_machine_status (void);
103 static void thumb_exit (FILE *, int);
104 static rtx is_jump_table (rtx);
105 static HOST_WIDE_INT get_jump_table_size (rtx);
106 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
107 static Mnode *add_minipool_forward_ref (Mfix *);
108 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
109 static Mnode *add_minipool_backward_ref (Mfix *);
110 static void assign_minipool_offsets (Mfix *);
111 static void arm_print_value (FILE *, rtx);
112 static void dump_minipool (rtx);
113 static int arm_barrier_cost (rtx);
114 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
115 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
116 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
117 rtx);
118 static void arm_reorg (void);
119 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
120 static unsigned long arm_compute_save_reg0_reg12_mask (void);
121 static unsigned long arm_compute_save_reg_mask (void);
122 static unsigned long arm_isr_value (tree);
123 static unsigned long arm_compute_func_type (void);
124 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
125 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
126 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
127 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
128 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
129 #endif
130 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
131 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
132 static int arm_comp_type_attributes (const_tree, const_tree);
133 static void arm_set_default_type_attributes (tree);
134 static int arm_adjust_cost (rtx, rtx, rtx, int);
135 static int optimal_immediate_sequence (enum rtx_code code,
136 unsigned HOST_WIDE_INT val,
137 struct four_ints *return_sequence);
138 static int optimal_immediate_sequence_1 (enum rtx_code code,
139 unsigned HOST_WIDE_INT val,
140 struct four_ints *return_sequence,
141 int i);
142 static int arm_get_strip_length (int);
143 static bool arm_function_ok_for_sibcall (tree, tree);
144 static enum machine_mode arm_promote_function_mode (const_tree,
145 enum machine_mode, int *,
146 const_tree, int);
147 static bool arm_return_in_memory (const_tree, const_tree);
148 static rtx arm_function_value (const_tree, const_tree, bool);
149 static rtx arm_libcall_value_1 (enum machine_mode);
150 static rtx arm_libcall_value (enum machine_mode, const_rtx);
151 static bool arm_function_value_regno_p (const unsigned int);
152 static void arm_internal_label (FILE *, const char *, unsigned long);
153 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
154 tree);
155 static bool arm_have_conditional_execution (void);
156 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
157 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
158 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
159 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
160 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
161 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
162 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
163 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
165 static int arm_address_cost (rtx, bool);
166 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
167 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
168 static void arm_init_builtins (void);
169 static void arm_init_iwmmxt_builtins (void);
170 static rtx safe_vector_operand (rtx, enum machine_mode);
171 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
172 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
173 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
174 static tree arm_builtin_decl (unsigned, bool);
175 static void emit_constant_insn (rtx cond, rtx pattern);
176 static rtx emit_set_insn (rtx, rtx);
177 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
178 tree, bool);
179 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
180 const_tree, bool);
181 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
182 const_tree, bool);
183 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
184 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
185 const_tree);
186 static rtx aapcs_libcall_value (enum machine_mode);
187 static int aapcs_select_return_coproc (const_tree, const_tree);
188
189 #ifdef OBJECT_FORMAT_ELF
190 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
191 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
192 #endif
193 #ifndef ARM_PE
194 static void arm_encode_section_info (tree, rtx, int);
195 #endif
196
197 static void arm_file_end (void);
198 static void arm_file_start (void);
199
200 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
201 tree, int *, int);
202 static bool arm_pass_by_reference (cumulative_args_t,
203 enum machine_mode, const_tree, bool);
204 static bool arm_promote_prototypes (const_tree);
205 static bool arm_default_short_enums (void);
206 static bool arm_align_anon_bitfield (void);
207 static bool arm_return_in_msb (const_tree);
208 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
209 static bool arm_return_in_memory (const_tree, const_tree);
210 #if ARM_UNWIND_INFO
211 static void arm_unwind_emit (FILE *, rtx);
212 static bool arm_output_ttype (rtx);
213 static void arm_asm_emit_except_personality (rtx);
214 static void arm_asm_init_sections (void);
215 #endif
216 static rtx arm_dwarf_register_span (rtx);
217
218 static tree arm_cxx_guard_type (void);
219 static bool arm_cxx_guard_mask_bit (void);
220 static tree arm_get_cookie_size (tree);
221 static bool arm_cookie_has_size (void);
222 static bool arm_cxx_cdtor_returns_this (void);
223 static bool arm_cxx_key_method_may_be_inline (void);
224 static void arm_cxx_determine_class_data_visibility (tree);
225 static bool arm_cxx_class_data_always_comdat (void);
226 static bool arm_cxx_use_aeabi_atexit (void);
227 static void arm_init_libfuncs (void);
228 static tree arm_build_builtin_va_list (void);
229 static void arm_expand_builtin_va_start (tree, rtx);
230 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
231 static void arm_option_override (void);
232 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
233 static bool arm_cannot_copy_insn_p (rtx);
234 static bool arm_tls_symbol_p (rtx x);
235 static int arm_issue_rate (void);
236 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
237 static bool arm_output_addr_const_extra (FILE *, rtx);
238 static bool arm_allocate_stack_slots_for_args (void);
239 static bool arm_warn_func_return (tree);
240 static const char *arm_invalid_parameter_type (const_tree t);
241 static const char *arm_invalid_return_type (const_tree t);
242 static tree arm_promoted_type (const_tree t);
243 static tree arm_convert_to_type (tree type, tree expr);
244 static bool arm_scalar_mode_supported_p (enum machine_mode);
245 static bool arm_frame_pointer_required (void);
246 static bool arm_can_eliminate (const int, const int);
247 static void arm_asm_trampoline_template (FILE *);
248 static void arm_trampoline_init (rtx, tree, rtx);
249 static rtx arm_trampoline_adjust_address (rtx);
250 static rtx arm_pic_static_addr (rtx orig, rtx reg);
251 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
252 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
253 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
254 static bool arm_array_mode_supported_p (enum machine_mode,
255 unsigned HOST_WIDE_INT);
256 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
257 static bool arm_class_likely_spilled_p (reg_class_t);
258 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
259 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
260 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
261 const_tree type,
262 int misalignment,
263 bool is_packed);
264 static void arm_conditional_register_usage (void);
265 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
266 static unsigned int arm_autovectorize_vector_sizes (void);
267 static int arm_default_branch_cost (bool, bool);
268 static int arm_cortex_a5_branch_cost (bool, bool);
269
270 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
271 const unsigned char *sel);
272
273 \f
274 /* Table of machine attributes. */
275 static const struct attribute_spec arm_attribute_table[] =
276 {
277 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
278 affects_type_identity } */
279 /* Function calls made to this symbol must be done indirectly, because
280 it may lie outside of the 26 bit addressing range of a normal function
281 call. */
282 { "long_call", 0, 0, false, true, true, NULL, false },
283 /* Whereas these functions are always known to reside within the 26 bit
284 addressing range. */
285 { "short_call", 0, 0, false, true, true, NULL, false },
286 /* Specify the procedure call conventions for a function. */
287 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
288 false },
289 /* Interrupt Service Routines have special prologue and epilogue requirements. */
290 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
291 false },
292 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
293 false },
294 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
295 false },
296 #ifdef ARM_PE
297 /* ARM/PE has three new attributes:
298 interfacearm - ?
299 dllexport - for exporting a function/variable that will live in a dll
300 dllimport - for importing a function/variable from a dll
301
302 Microsoft allows multiple declspecs in one __declspec, separating
303 them with spaces. We do NOT support this. Instead, use __declspec
304 multiple times.
305 */
306 { "dllimport", 0, 0, true, false, false, NULL, false },
307 { "dllexport", 0, 0, true, false, false, NULL, false },
308 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
309 false },
310 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
311 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
312 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
313 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
314 false },
315 #endif
316 { NULL, 0, 0, false, false, false, NULL, false }
317 };
318 \f
319 /* Initialize the GCC target structure. */
320 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
321 #undef TARGET_MERGE_DECL_ATTRIBUTES
322 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
323 #endif
324
325 #undef TARGET_LEGITIMIZE_ADDRESS
326 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
327
328 #undef TARGET_ATTRIBUTE_TABLE
329 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
330
331 #undef TARGET_ASM_FILE_START
332 #define TARGET_ASM_FILE_START arm_file_start
333 #undef TARGET_ASM_FILE_END
334 #define TARGET_ASM_FILE_END arm_file_end
335
336 #undef TARGET_ASM_ALIGNED_SI_OP
337 #define TARGET_ASM_ALIGNED_SI_OP NULL
338 #undef TARGET_ASM_INTEGER
339 #define TARGET_ASM_INTEGER arm_assemble_integer
340
341 #undef TARGET_PRINT_OPERAND
342 #define TARGET_PRINT_OPERAND arm_print_operand
343 #undef TARGET_PRINT_OPERAND_ADDRESS
344 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
345 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
346 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
347
348 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
349 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
350
351 #undef TARGET_ASM_FUNCTION_PROLOGUE
352 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
353
354 #undef TARGET_ASM_FUNCTION_EPILOGUE
355 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
356
357 #undef TARGET_OPTION_OVERRIDE
358 #define TARGET_OPTION_OVERRIDE arm_option_override
359
360 #undef TARGET_COMP_TYPE_ATTRIBUTES
361 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
362
363 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
364 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
365
366 #undef TARGET_SCHED_ADJUST_COST
367 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
368
369 #undef TARGET_REGISTER_MOVE_COST
370 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
371
372 #undef TARGET_MEMORY_MOVE_COST
373 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
374
375 #undef TARGET_ENCODE_SECTION_INFO
376 #ifdef ARM_PE
377 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
378 #else
379 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
380 #endif
381
382 #undef TARGET_STRIP_NAME_ENCODING
383 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
384
385 #undef TARGET_ASM_INTERNAL_LABEL
386 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
387
388 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
389 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
390
391 #undef TARGET_FUNCTION_VALUE
392 #define TARGET_FUNCTION_VALUE arm_function_value
393
394 #undef TARGET_LIBCALL_VALUE
395 #define TARGET_LIBCALL_VALUE arm_libcall_value
396
397 #undef TARGET_FUNCTION_VALUE_REGNO_P
398 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
399
400 #undef TARGET_ASM_OUTPUT_MI_THUNK
401 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
402 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
403 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
404
405 #undef TARGET_RTX_COSTS
406 #define TARGET_RTX_COSTS arm_rtx_costs
407 #undef TARGET_ADDRESS_COST
408 #define TARGET_ADDRESS_COST arm_address_cost
409
410 #undef TARGET_SHIFT_TRUNCATION_MASK
411 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
412 #undef TARGET_VECTOR_MODE_SUPPORTED_P
413 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
414 #undef TARGET_ARRAY_MODE_SUPPORTED_P
415 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
416 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
417 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
418 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
419 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
420 arm_autovectorize_vector_sizes
421
422 #undef TARGET_MACHINE_DEPENDENT_REORG
423 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
424
425 #undef TARGET_INIT_BUILTINS
426 #define TARGET_INIT_BUILTINS arm_init_builtins
427 #undef TARGET_EXPAND_BUILTIN
428 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
429 #undef TARGET_BUILTIN_DECL
430 #define TARGET_BUILTIN_DECL arm_builtin_decl
431
432 #undef TARGET_INIT_LIBFUNCS
433 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
434
435 #undef TARGET_PROMOTE_FUNCTION_MODE
436 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
437 #undef TARGET_PROMOTE_PROTOTYPES
438 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
439 #undef TARGET_PASS_BY_REFERENCE
440 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
441 #undef TARGET_ARG_PARTIAL_BYTES
442 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
443 #undef TARGET_FUNCTION_ARG
444 #define TARGET_FUNCTION_ARG arm_function_arg
445 #undef TARGET_FUNCTION_ARG_ADVANCE
446 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
447 #undef TARGET_FUNCTION_ARG_BOUNDARY
448 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
449
450 #undef TARGET_SETUP_INCOMING_VARARGS
451 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
452
453 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
454 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
455
456 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
457 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
458 #undef TARGET_TRAMPOLINE_INIT
459 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
460 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
461 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
462
463 #undef TARGET_WARN_FUNC_RETURN
464 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
465
466 #undef TARGET_DEFAULT_SHORT_ENUMS
467 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
468
469 #undef TARGET_ALIGN_ANON_BITFIELD
470 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
471
472 #undef TARGET_NARROW_VOLATILE_BITFIELD
473 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
474
475 #undef TARGET_CXX_GUARD_TYPE
476 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
477
478 #undef TARGET_CXX_GUARD_MASK_BIT
479 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
480
481 #undef TARGET_CXX_GET_COOKIE_SIZE
482 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
483
484 #undef TARGET_CXX_COOKIE_HAS_SIZE
485 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
486
487 #undef TARGET_CXX_CDTOR_RETURNS_THIS
488 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
489
490 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
491 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
492
493 #undef TARGET_CXX_USE_AEABI_ATEXIT
494 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
495
496 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
497 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
498 arm_cxx_determine_class_data_visibility
499
500 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
501 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
502
503 #undef TARGET_RETURN_IN_MSB
504 #define TARGET_RETURN_IN_MSB arm_return_in_msb
505
506 #undef TARGET_RETURN_IN_MEMORY
507 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
508
509 #undef TARGET_MUST_PASS_IN_STACK
510 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
511
512 #if ARM_UNWIND_INFO
513 #undef TARGET_ASM_UNWIND_EMIT
514 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
515
516 /* EABI unwinding tables use a different format for the typeinfo tables. */
517 #undef TARGET_ASM_TTYPE
518 #define TARGET_ASM_TTYPE arm_output_ttype
519
520 #undef TARGET_ARM_EABI_UNWINDER
521 #define TARGET_ARM_EABI_UNWINDER true
522
523 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
524 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
525
526 #undef TARGET_ASM_INIT_SECTIONS
527 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
528 #endif /* ARM_UNWIND_INFO */
529
530 #undef TARGET_DWARF_REGISTER_SPAN
531 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
532
533 #undef TARGET_CANNOT_COPY_INSN_P
534 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
535
536 #ifdef HAVE_AS_TLS
537 #undef TARGET_HAVE_TLS
538 #define TARGET_HAVE_TLS true
539 #endif
540
541 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
542 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
543
544 #undef TARGET_LEGITIMATE_CONSTANT_P
545 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
546
547 #undef TARGET_CANNOT_FORCE_CONST_MEM
548 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
549
550 #undef TARGET_MAX_ANCHOR_OFFSET
551 #define TARGET_MAX_ANCHOR_OFFSET 4095
552
553 /* The minimum is set such that the total size of the block
554 for a particular anchor is -4088 + 1 + 4095 bytes, which is
555 divisible by eight, ensuring natural spacing of anchors. */
556 #undef TARGET_MIN_ANCHOR_OFFSET
557 #define TARGET_MIN_ANCHOR_OFFSET -4088
558
559 #undef TARGET_SCHED_ISSUE_RATE
560 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
561
562 #undef TARGET_MANGLE_TYPE
563 #define TARGET_MANGLE_TYPE arm_mangle_type
564
565 #undef TARGET_BUILD_BUILTIN_VA_LIST
566 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
567 #undef TARGET_EXPAND_BUILTIN_VA_START
568 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
569 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
570 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
571
572 #ifdef HAVE_AS_TLS
573 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
574 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
575 #endif
576
577 #undef TARGET_LEGITIMATE_ADDRESS_P
578 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
579
580 #undef TARGET_PREFERRED_RELOAD_CLASS
581 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
582
583 #undef TARGET_INVALID_PARAMETER_TYPE
584 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
585
586 #undef TARGET_INVALID_RETURN_TYPE
587 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
588
589 #undef TARGET_PROMOTED_TYPE
590 #define TARGET_PROMOTED_TYPE arm_promoted_type
591
592 #undef TARGET_CONVERT_TO_TYPE
593 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
594
595 #undef TARGET_SCALAR_MODE_SUPPORTED_P
596 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
597
598 #undef TARGET_FRAME_POINTER_REQUIRED
599 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
600
601 #undef TARGET_CAN_ELIMINATE
602 #define TARGET_CAN_ELIMINATE arm_can_eliminate
603
604 #undef TARGET_CONDITIONAL_REGISTER_USAGE
605 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
606
607 #undef TARGET_CLASS_LIKELY_SPILLED_P
608 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
609
610 #undef TARGET_VECTOR_ALIGNMENT
611 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
612
613 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
614 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
615 arm_vector_alignment_reachable
616
617 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
618 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
619 arm_builtin_support_vector_misalignment
620
621 #undef TARGET_PREFERRED_RENAME_CLASS
622 #define TARGET_PREFERRED_RENAME_CLASS \
623 arm_preferred_rename_class
624
625 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
626 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
627 arm_vectorize_vec_perm_const_ok
628
629 struct gcc_target targetm = TARGET_INITIALIZER;
630 \f
631 /* Obstack for minipool constant handling. */
632 static struct obstack minipool_obstack;
633 static char * minipool_startobj;
634
635 /* The maximum number of insns skipped which
636 will be conditionalised if possible. */
637 static int max_insns_skipped = 5;
638
639 extern FILE * asm_out_file;
640
641 /* True if we are currently building a constant table. */
642 int making_const_table;
643
644 /* The processor for which instructions should be scheduled. */
645 enum processor_type arm_tune = arm_none;
646
647 /* The current tuning set. */
648 const struct tune_params *current_tune;
649
650 /* Which floating point hardware to schedule for. */
651 int arm_fpu_attr;
652
653 /* Which floating popint hardware to use. */
654 const struct arm_fpu_desc *arm_fpu_desc;
655
656 /* Used for Thumb call_via trampolines. */
657 rtx thumb_call_via_label[14];
658 static int thumb_call_reg_needed;
659
660 /* Bit values used to identify processor capabilities. */
661 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
662 #define FL_ARCH3M (1 << 1) /* Extended multiply */
663 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
664 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
665 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
666 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
667 #define FL_THUMB (1 << 6) /* Thumb aware */
668 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
669 #define FL_STRONG (1 << 8) /* StrongARM */
670 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
671 #define FL_XSCALE (1 << 10) /* XScale */
672 /* spare (1 << 11) */
673 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
674 media instructions. */
675 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
676 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
677 Note: ARM6 & 7 derivatives only. */
678 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
679 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
680 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
681 profile. */
682 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
683 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
684 #define FL_NEON (1 << 20) /* Neon instructions. */
685 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
686 architecture. */
687 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
688 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
689
690 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
691 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
692
693 /* Flags that only effect tuning, not available instructions. */
694 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
695 | FL_CO_PROC)
696
697 #define FL_FOR_ARCH2 FL_NOTM
698 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
699 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
700 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
701 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
702 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
703 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
704 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
705 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
706 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
707 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
708 #define FL_FOR_ARCH6J FL_FOR_ARCH6
709 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
710 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
711 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
712 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
713 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
714 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
715 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
716 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
717 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
718 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
719
720 /* The bits in this mask specify which
721 instructions we are allowed to generate. */
722 static unsigned long insn_flags = 0;
723
724 /* The bits in this mask specify which instruction scheduling options should
725 be used. */
726 static unsigned long tune_flags = 0;
727
728 /* The highest ARM architecture version supported by the
729 target. */
730 enum base_architecture arm_base_arch = BASE_ARCH_0;
731
732 /* The following are used in the arm.md file as equivalents to bits
733 in the above two flag variables. */
734
735 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
736 int arm_arch3m = 0;
737
738 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
739 int arm_arch4 = 0;
740
741 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
742 int arm_arch4t = 0;
743
744 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
745 int arm_arch5 = 0;
746
747 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
748 int arm_arch5e = 0;
749
750 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
751 int arm_arch6 = 0;
752
753 /* Nonzero if this chip supports the ARM 6K extensions. */
754 int arm_arch6k = 0;
755
756 /* Nonzero if this chip supports the ARM 7 extensions. */
757 int arm_arch7 = 0;
758
759 /* Nonzero if instructions not present in the 'M' profile can be used. */
760 int arm_arch_notm = 0;
761
762 /* Nonzero if instructions present in ARMv7E-M can be used. */
763 int arm_arch7em = 0;
764
765 /* Nonzero if this chip can benefit from load scheduling. */
766 int arm_ld_sched = 0;
767
768 /* Nonzero if this chip is a StrongARM. */
769 int arm_tune_strongarm = 0;
770
771 /* Nonzero if this chip supports Intel Wireless MMX technology. */
772 int arm_arch_iwmmxt = 0;
773
774 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
775 int arm_arch_iwmmxt2 = 0;
776
777 /* Nonzero if this chip is an XScale. */
778 int arm_arch_xscale = 0;
779
780 /* Nonzero if tuning for XScale */
781 int arm_tune_xscale = 0;
782
783 /* Nonzero if we want to tune for stores that access the write-buffer.
784 This typically means an ARM6 or ARM7 with MMU or MPU. */
785 int arm_tune_wbuf = 0;
786
787 /* Nonzero if tuning for Cortex-A9. */
788 int arm_tune_cortex_a9 = 0;
789
790 /* Nonzero if generating Thumb instructions. */
791 int thumb_code = 0;
792
793 /* Nonzero if generating Thumb-1 instructions. */
794 int thumb1_code = 0;
795
796 /* Nonzero if we should define __THUMB_INTERWORK__ in the
797 preprocessor.
798 XXX This is a bit of a hack, it's intended to help work around
799 problems in GLD which doesn't understand that armv5t code is
800 interworking clean. */
801 int arm_cpp_interwork = 0;
802
803 /* Nonzero if chip supports Thumb 2. */
804 int arm_arch_thumb2;
805
806 /* Nonzero if chip supports integer division instruction. */
807 int arm_arch_arm_hwdiv;
808 int arm_arch_thumb_hwdiv;
809
810 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
811 we must report the mode of the memory reference from
812 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
813 enum machine_mode output_memory_reference_mode;
814
815 /* The register number to be used for the PIC offset register. */
816 unsigned arm_pic_register = INVALID_REGNUM;
817
818 /* Set to 1 after arm_reorg has started. Reset to start at the start of
819 the next function. */
820 static int after_arm_reorg = 0;
821
822 enum arm_pcs arm_pcs_default;
823
824 /* For an explanation of these variables, see final_prescan_insn below. */
825 int arm_ccfsm_state;
826 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
827 enum arm_cond_code arm_current_cc;
828
829 rtx arm_target_insn;
830 int arm_target_label;
831 /* The number of conditionally executed insns, including the current insn. */
832 int arm_condexec_count = 0;
833 /* A bitmask specifying the patterns for the IT block.
834 Zero means do not output an IT block before this insn. */
835 int arm_condexec_mask = 0;
836 /* The number of bits used in arm_condexec_mask. */
837 int arm_condexec_masklen = 0;
838
839 /* The condition codes of the ARM, and the inverse function. */
840 static const char * const arm_condition_codes[] =
841 {
842 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
843 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
844 };
845
846 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
847 int arm_regs_in_sequence[] =
848 {
849 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
850 };
851
852 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
853 #define streq(string1, string2) (strcmp (string1, string2) == 0)
854
855 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
856 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
857 | (1 << PIC_OFFSET_TABLE_REGNUM)))
858 \f
859 /* Initialization code. */
860
861 struct processors
862 {
863 const char *const name;
864 enum processor_type core;
865 const char *arch;
866 enum base_architecture base_arch;
867 const unsigned long flags;
868 const struct tune_params *const tune;
869 };
870
871
872 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
873 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
874 prefetch_slots, \
875 l1_size, \
876 l1_line_size
877
878 const struct tune_params arm_slowmul_tune =
879 {
880 arm_slowmul_rtx_costs,
881 NULL,
882 3, /* Constant limit. */
883 5, /* Max cond insns. */
884 ARM_PREFETCH_NOT_BENEFICIAL,
885 true, /* Prefer constant pool. */
886 arm_default_branch_cost,
887 false /* Prefer LDRD/STRD. */
888 };
889
890 const struct tune_params arm_fastmul_tune =
891 {
892 arm_fastmul_rtx_costs,
893 NULL,
894 1, /* Constant limit. */
895 5, /* Max cond insns. */
896 ARM_PREFETCH_NOT_BENEFICIAL,
897 true, /* Prefer constant pool. */
898 arm_default_branch_cost,
899 false /* Prefer LDRD/STRD. */
900 };
901
902 /* StrongARM has early execution of branches, so a sequence that is worth
903 skipping is shorter. Set max_insns_skipped to a lower value. */
904
905 const struct tune_params arm_strongarm_tune =
906 {
907 arm_fastmul_rtx_costs,
908 NULL,
909 1, /* Constant limit. */
910 3, /* Max cond insns. */
911 ARM_PREFETCH_NOT_BENEFICIAL,
912 true, /* Prefer constant pool. */
913 arm_default_branch_cost,
914 false /* Prefer LDRD/STRD. */
915 };
916
917 const struct tune_params arm_xscale_tune =
918 {
919 arm_xscale_rtx_costs,
920 xscale_sched_adjust_cost,
921 2, /* Constant limit. */
922 3, /* Max cond insns. */
923 ARM_PREFETCH_NOT_BENEFICIAL,
924 true, /* Prefer constant pool. */
925 arm_default_branch_cost,
926 false /* Prefer LDRD/STRD. */
927 };
928
929 const struct tune_params arm_9e_tune =
930 {
931 arm_9e_rtx_costs,
932 NULL,
933 1, /* Constant limit. */
934 5, /* Max cond insns. */
935 ARM_PREFETCH_NOT_BENEFICIAL,
936 true, /* Prefer constant pool. */
937 arm_default_branch_cost,
938 false /* Prefer LDRD/STRD. */
939 };
940
941 const struct tune_params arm_v6t2_tune =
942 {
943 arm_9e_rtx_costs,
944 NULL,
945 1, /* Constant limit. */
946 5, /* Max cond insns. */
947 ARM_PREFETCH_NOT_BENEFICIAL,
948 false, /* Prefer constant pool. */
949 arm_default_branch_cost,
950 false /* Prefer LDRD/STRD. */
951 };
952
953 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
954 const struct tune_params arm_cortex_tune =
955 {
956 arm_9e_rtx_costs,
957 NULL,
958 1, /* Constant limit. */
959 5, /* Max cond insns. */
960 ARM_PREFETCH_NOT_BENEFICIAL,
961 false, /* Prefer constant pool. */
962 arm_default_branch_cost,
963 false /* Prefer LDRD/STRD. */
964 };
965
966 const struct tune_params arm_cortex_a15_tune =
967 {
968 arm_9e_rtx_costs,
969 NULL,
970 1, /* Constant limit. */
971 5, /* Max cond insns. */
972 ARM_PREFETCH_NOT_BENEFICIAL,
973 false, /* Prefer constant pool. */
974 arm_default_branch_cost,
975 true /* Prefer LDRD/STRD. */
976 };
977
978 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
979 less appealing. Set max_insns_skipped to a low value. */
980
981 const struct tune_params arm_cortex_a5_tune =
982 {
983 arm_9e_rtx_costs,
984 NULL,
985 1, /* Constant limit. */
986 1, /* Max cond insns. */
987 ARM_PREFETCH_NOT_BENEFICIAL,
988 false, /* Prefer constant pool. */
989 arm_cortex_a5_branch_cost,
990 false /* Prefer LDRD/STRD. */
991 };
992
993 const struct tune_params arm_cortex_a9_tune =
994 {
995 arm_9e_rtx_costs,
996 cortex_a9_sched_adjust_cost,
997 1, /* Constant limit. */
998 5, /* Max cond insns. */
999 ARM_PREFETCH_BENEFICIAL(4,32,32),
1000 false, /* Prefer constant pool. */
1001 arm_default_branch_cost,
1002 false /* Prefer LDRD/STRD. */
1003 };
1004
1005 const struct tune_params arm_fa726te_tune =
1006 {
1007 arm_9e_rtx_costs,
1008 fa726te_sched_adjust_cost,
1009 1, /* Constant limit. */
1010 5, /* Max cond insns. */
1011 ARM_PREFETCH_NOT_BENEFICIAL,
1012 true, /* Prefer constant pool. */
1013 arm_default_branch_cost,
1014 false /* Prefer LDRD/STRD. */
1015 };
1016
1017
1018 /* Not all of these give usefully different compilation alternatives,
1019 but there is no simple way of generalizing them. */
1020 static const struct processors all_cores[] =
1021 {
1022 /* ARM Cores */
1023 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
1024 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1025 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1026 #include "arm-cores.def"
1027 #undef ARM_CORE
1028 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1029 };
1030
1031 static const struct processors all_architectures[] =
1032 {
1033 /* ARM Architectures */
1034 /* We don't specify tuning costs here as it will be figured out
1035 from the core. */
1036
1037 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1038 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1039 #include "arm-arches.def"
1040 #undef ARM_ARCH
1041 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1042 };
1043
1044
1045 /* These are populated as commandline arguments are processed, or NULL
1046 if not specified. */
1047 static const struct processors *arm_selected_arch;
1048 static const struct processors *arm_selected_cpu;
1049 static const struct processors *arm_selected_tune;
1050
1051 /* The name of the preprocessor macro to define for this architecture. */
1052
1053 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1054
1055 /* Available values for -mfpu=. */
1056
1057 static const struct arm_fpu_desc all_fpus[] =
1058 {
1059 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
1060 { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
1061 #include "arm-fpus.def"
1062 #undef ARM_FPU
1063 };
1064
1065
1066 /* Supported TLS relocations. */
1067
1068 enum tls_reloc {
1069 TLS_GD32,
1070 TLS_LDM32,
1071 TLS_LDO32,
1072 TLS_IE32,
1073 TLS_LE32,
1074 TLS_DESCSEQ /* GNU scheme */
1075 };
1076
1077 /* The maximum number of insns to be used when loading a constant. */
1078 inline static int
1079 arm_constant_limit (bool size_p)
1080 {
1081 return size_p ? 1 : current_tune->constant_limit;
1082 }
1083
1084 /* Emit an insn that's a simple single-set. Both the operands must be known
1085 to be valid. */
1086 inline static rtx
1087 emit_set_insn (rtx x, rtx y)
1088 {
1089 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1090 }
1091
1092 /* Return the number of bits set in VALUE. */
1093 static unsigned
1094 bit_count (unsigned long value)
1095 {
1096 unsigned long count = 0;
1097
1098 while (value)
1099 {
1100 count++;
1101 value &= value - 1; /* Clear the least-significant set bit. */
1102 }
1103
1104 return count;
1105 }
1106
1107 typedef struct
1108 {
1109 enum machine_mode mode;
1110 const char *name;
1111 } arm_fixed_mode_set;
1112
1113 /* A small helper for setting fixed-point library libfuncs. */
1114
1115 static void
1116 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1117 const char *funcname, const char *modename,
1118 int num_suffix)
1119 {
1120 char buffer[50];
1121
1122 if (num_suffix == 0)
1123 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1124 else
1125 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1126
1127 set_optab_libfunc (optable, mode, buffer);
1128 }
1129
1130 static void
1131 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1132 enum machine_mode from, const char *funcname,
1133 const char *toname, const char *fromname)
1134 {
1135 char buffer[50];
1136 const char *maybe_suffix_2 = "";
1137
1138 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1139 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1140 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1141 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1142 maybe_suffix_2 = "2";
1143
1144 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1145 maybe_suffix_2);
1146
1147 set_conv_libfunc (optable, to, from, buffer);
1148 }
1149
1150 /* Set up library functions unique to ARM. */
1151
1152 static void
1153 arm_init_libfuncs (void)
1154 {
1155 /* For Linux, we have access to kernel support for atomic operations. */
1156 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1157 init_sync_libfuncs (2 * UNITS_PER_WORD);
1158
1159 /* There are no special library functions unless we are using the
1160 ARM BPABI. */
1161 if (!TARGET_BPABI)
1162 return;
1163
1164 /* The functions below are described in Section 4 of the "Run-Time
1165 ABI for the ARM architecture", Version 1.0. */
1166
1167 /* Double-precision floating-point arithmetic. Table 2. */
1168 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1169 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1170 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1171 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1172 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1173
1174 /* Double-precision comparisons. Table 3. */
1175 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1176 set_optab_libfunc (ne_optab, DFmode, NULL);
1177 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1178 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1179 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1180 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1181 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1182
1183 /* Single-precision floating-point arithmetic. Table 4. */
1184 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1185 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1186 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1187 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1188 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1189
1190 /* Single-precision comparisons. Table 5. */
1191 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1192 set_optab_libfunc (ne_optab, SFmode, NULL);
1193 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1194 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1195 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1196 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1197 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1198
1199 /* Floating-point to integer conversions. Table 6. */
1200 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1201 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1202 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1203 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1204 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1205 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1206 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1207 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1208
1209 /* Conversions between floating types. Table 7. */
1210 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1211 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1212
1213 /* Integer to floating-point conversions. Table 8. */
1214 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1215 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1216 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1217 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1218 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1219 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1220 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1221 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1222
1223 /* Long long. Table 9. */
1224 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1225 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1226 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1227 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1228 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1229 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1230 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1231 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1232
1233 /* Integer (32/32->32) division. \S 4.3.1. */
1234 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1235 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1236
1237 /* The divmod functions are designed so that they can be used for
1238 plain division, even though they return both the quotient and the
1239 remainder. The quotient is returned in the usual location (i.e.,
1240 r0 for SImode, {r0, r1} for DImode), just as would be expected
1241 for an ordinary division routine. Because the AAPCS calling
1242 conventions specify that all of { r0, r1, r2, r3 } are
1243 callee-saved registers, there is no need to tell the compiler
1244 explicitly that those registers are clobbered by these
1245 routines. */
1246 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1247 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1248
1249 /* For SImode division the ABI provides div-without-mod routines,
1250 which are faster. */
1251 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1252 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1253
1254 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1255 divmod libcalls instead. */
1256 set_optab_libfunc (smod_optab, DImode, NULL);
1257 set_optab_libfunc (umod_optab, DImode, NULL);
1258 set_optab_libfunc (smod_optab, SImode, NULL);
1259 set_optab_libfunc (umod_optab, SImode, NULL);
1260
1261 /* Half-precision float operations. The compiler handles all operations
1262 with NULL libfuncs by converting the SFmode. */
1263 switch (arm_fp16_format)
1264 {
1265 case ARM_FP16_FORMAT_IEEE:
1266 case ARM_FP16_FORMAT_ALTERNATIVE:
1267
1268 /* Conversions. */
1269 set_conv_libfunc (trunc_optab, HFmode, SFmode,
1270 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1271 ? "__gnu_f2h_ieee"
1272 : "__gnu_f2h_alternative"));
1273 set_conv_libfunc (sext_optab, SFmode, HFmode,
1274 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
1275 ? "__gnu_h2f_ieee"
1276 : "__gnu_h2f_alternative"));
1277
1278 /* Arithmetic. */
1279 set_optab_libfunc (add_optab, HFmode, NULL);
1280 set_optab_libfunc (sdiv_optab, HFmode, NULL);
1281 set_optab_libfunc (smul_optab, HFmode, NULL);
1282 set_optab_libfunc (neg_optab, HFmode, NULL);
1283 set_optab_libfunc (sub_optab, HFmode, NULL);
1284
1285 /* Comparisons. */
1286 set_optab_libfunc (eq_optab, HFmode, NULL);
1287 set_optab_libfunc (ne_optab, HFmode, NULL);
1288 set_optab_libfunc (lt_optab, HFmode, NULL);
1289 set_optab_libfunc (le_optab, HFmode, NULL);
1290 set_optab_libfunc (ge_optab, HFmode, NULL);
1291 set_optab_libfunc (gt_optab, HFmode, NULL);
1292 set_optab_libfunc (unord_optab, HFmode, NULL);
1293 break;
1294
1295 default:
1296 break;
1297 }
1298
1299 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1300 {
1301 const arm_fixed_mode_set fixed_arith_modes[] =
1302 {
1303 { QQmode, "qq" },
1304 { UQQmode, "uqq" },
1305 { HQmode, "hq" },
1306 { UHQmode, "uhq" },
1307 { SQmode, "sq" },
1308 { USQmode, "usq" },
1309 { DQmode, "dq" },
1310 { UDQmode, "udq" },
1311 { TQmode, "tq" },
1312 { UTQmode, "utq" },
1313 { HAmode, "ha" },
1314 { UHAmode, "uha" },
1315 { SAmode, "sa" },
1316 { USAmode, "usa" },
1317 { DAmode, "da" },
1318 { UDAmode, "uda" },
1319 { TAmode, "ta" },
1320 { UTAmode, "uta" }
1321 };
1322 const arm_fixed_mode_set fixed_conv_modes[] =
1323 {
1324 { QQmode, "qq" },
1325 { UQQmode, "uqq" },
1326 { HQmode, "hq" },
1327 { UHQmode, "uhq" },
1328 { SQmode, "sq" },
1329 { USQmode, "usq" },
1330 { DQmode, "dq" },
1331 { UDQmode, "udq" },
1332 { TQmode, "tq" },
1333 { UTQmode, "utq" },
1334 { HAmode, "ha" },
1335 { UHAmode, "uha" },
1336 { SAmode, "sa" },
1337 { USAmode, "usa" },
1338 { DAmode, "da" },
1339 { UDAmode, "uda" },
1340 { TAmode, "ta" },
1341 { UTAmode, "uta" },
1342 { QImode, "qi" },
1343 { HImode, "hi" },
1344 { SImode, "si" },
1345 { DImode, "di" },
1346 { TImode, "ti" },
1347 { SFmode, "sf" },
1348 { DFmode, "df" }
1349 };
1350 unsigned int i, j;
1351
1352 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
1353 {
1354 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
1355 "add", fixed_arith_modes[i].name, 3);
1356 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
1357 "ssadd", fixed_arith_modes[i].name, 3);
1358 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
1359 "usadd", fixed_arith_modes[i].name, 3);
1360 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
1361 "sub", fixed_arith_modes[i].name, 3);
1362 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
1363 "sssub", fixed_arith_modes[i].name, 3);
1364 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
1365 "ussub", fixed_arith_modes[i].name, 3);
1366 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
1367 "mul", fixed_arith_modes[i].name, 3);
1368 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
1369 "ssmul", fixed_arith_modes[i].name, 3);
1370 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
1371 "usmul", fixed_arith_modes[i].name, 3);
1372 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
1373 "div", fixed_arith_modes[i].name, 3);
1374 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
1375 "udiv", fixed_arith_modes[i].name, 3);
1376 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
1377 "ssdiv", fixed_arith_modes[i].name, 3);
1378 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
1379 "usdiv", fixed_arith_modes[i].name, 3);
1380 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
1381 "neg", fixed_arith_modes[i].name, 2);
1382 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
1383 "ssneg", fixed_arith_modes[i].name, 2);
1384 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
1385 "usneg", fixed_arith_modes[i].name, 2);
1386 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
1387 "ashl", fixed_arith_modes[i].name, 3);
1388 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
1389 "ashr", fixed_arith_modes[i].name, 3);
1390 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
1391 "lshr", fixed_arith_modes[i].name, 3);
1392 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
1393 "ssashl", fixed_arith_modes[i].name, 3);
1394 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
1395 "usashl", fixed_arith_modes[i].name, 3);
1396 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
1397 "cmp", fixed_arith_modes[i].name, 2);
1398 }
1399
1400 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
1401 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
1402 {
1403 if (i == j
1404 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
1405 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
1406 continue;
1407
1408 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
1409 fixed_conv_modes[j].mode, "fract",
1410 fixed_conv_modes[i].name,
1411 fixed_conv_modes[j].name);
1412 arm_set_fixed_conv_libfunc (satfract_optab,
1413 fixed_conv_modes[i].mode,
1414 fixed_conv_modes[j].mode, "satfract",
1415 fixed_conv_modes[i].name,
1416 fixed_conv_modes[j].name);
1417 arm_set_fixed_conv_libfunc (fractuns_optab,
1418 fixed_conv_modes[i].mode,
1419 fixed_conv_modes[j].mode, "fractuns",
1420 fixed_conv_modes[i].name,
1421 fixed_conv_modes[j].name);
1422 arm_set_fixed_conv_libfunc (satfractuns_optab,
1423 fixed_conv_modes[i].mode,
1424 fixed_conv_modes[j].mode, "satfractuns",
1425 fixed_conv_modes[i].name,
1426 fixed_conv_modes[j].name);
1427 }
1428 }
1429
1430 if (TARGET_AAPCS_BASED)
1431 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
1432 }
1433
1434 /* On AAPCS systems, this is the "struct __va_list". */
1435 static GTY(()) tree va_list_type;
1436
1437 /* Return the type to use as __builtin_va_list. */
1438 static tree
1439 arm_build_builtin_va_list (void)
1440 {
1441 tree va_list_name;
1442 tree ap_field;
1443
1444 if (!TARGET_AAPCS_BASED)
1445 return std_build_builtin_va_list ();
1446
1447 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1448 defined as:
1449
1450 struct __va_list
1451 {
1452 void *__ap;
1453 };
1454
1455 The C Library ABI further reinforces this definition in \S
1456 4.1.
1457
1458 We must follow this definition exactly. The structure tag
1459 name is visible in C++ mangled names, and thus forms a part
1460 of the ABI. The field name may be used by people who
1461 #include <stdarg.h>. */
1462 /* Create the type. */
1463 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
1464 /* Give it the required name. */
1465 va_list_name = build_decl (BUILTINS_LOCATION,
1466 TYPE_DECL,
1467 get_identifier ("__va_list"),
1468 va_list_type);
1469 DECL_ARTIFICIAL (va_list_name) = 1;
1470 TYPE_NAME (va_list_type) = va_list_name;
1471 TYPE_STUB_DECL (va_list_type) = va_list_name;
1472 /* Create the __ap field. */
1473 ap_field = build_decl (BUILTINS_LOCATION,
1474 FIELD_DECL,
1475 get_identifier ("__ap"),
1476 ptr_type_node);
1477 DECL_ARTIFICIAL (ap_field) = 1;
1478 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
1479 TYPE_FIELDS (va_list_type) = ap_field;
1480 /* Compute its layout. */
1481 layout_type (va_list_type);
1482
1483 return va_list_type;
1484 }
1485
1486 /* Return an expression of type "void *" pointing to the next
1487 available argument in a variable-argument list. VALIST is the
1488 user-level va_list object, of type __builtin_va_list. */
1489 static tree
1490 arm_extract_valist_ptr (tree valist)
1491 {
1492 if (TREE_TYPE (valist) == error_mark_node)
1493 return error_mark_node;
1494
1495 /* On an AAPCS target, the pointer is stored within "struct
1496 va_list". */
1497 if (TARGET_AAPCS_BASED)
1498 {
1499 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
1500 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
1501 valist, ap_field, NULL_TREE);
1502 }
1503
1504 return valist;
1505 }
1506
1507 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1508 static void
1509 arm_expand_builtin_va_start (tree valist, rtx nextarg)
1510 {
1511 valist = arm_extract_valist_ptr (valist);
1512 std_expand_builtin_va_start (valist, nextarg);
1513 }
1514
1515 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1516 static tree
1517 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
1518 gimple_seq *post_p)
1519 {
1520 valist = arm_extract_valist_ptr (valist);
1521 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1522 }
1523
1524 /* Fix up any incompatible options that the user has specified. */
1525 static void
1526 arm_option_override (void)
1527 {
1528 if (global_options_set.x_arm_arch_option)
1529 arm_selected_arch = &all_architectures[arm_arch_option];
1530
1531 if (global_options_set.x_arm_cpu_option)
1532 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
1533
1534 if (global_options_set.x_arm_tune_option)
1535 arm_selected_tune = &all_cores[(int) arm_tune_option];
1536
1537 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1538 SUBTARGET_OVERRIDE_OPTIONS;
1539 #endif
1540
1541 if (arm_selected_arch)
1542 {
1543 if (arm_selected_cpu)
1544 {
1545 /* Check for conflict between mcpu and march. */
1546 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1547 {
1548 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1549 arm_selected_cpu->name, arm_selected_arch->name);
1550 /* -march wins for code generation.
1551 -mcpu wins for default tuning. */
1552 if (!arm_selected_tune)
1553 arm_selected_tune = arm_selected_cpu;
1554
1555 arm_selected_cpu = arm_selected_arch;
1556 }
1557 else
1558 /* -mcpu wins. */
1559 arm_selected_arch = NULL;
1560 }
1561 else
1562 /* Pick a CPU based on the architecture. */
1563 arm_selected_cpu = arm_selected_arch;
1564 }
1565
1566 /* If the user did not specify a processor, choose one for them. */
1567 if (!arm_selected_cpu)
1568 {
1569 const struct processors * sel;
1570 unsigned int sought;
1571
1572 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1573 if (!arm_selected_cpu->name)
1574 {
1575 #ifdef SUBTARGET_CPU_DEFAULT
1576 /* Use the subtarget default CPU if none was specified by
1577 configure. */
1578 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1579 #endif
1580 /* Default to ARM6. */
1581 if (!arm_selected_cpu->name)
1582 arm_selected_cpu = &all_cores[arm6];
1583 }
1584
1585 sel = arm_selected_cpu;
1586 insn_flags = sel->flags;
1587
1588 /* Now check to see if the user has specified some command line
1589 switch that require certain abilities from the cpu. */
1590 sought = 0;
1591
1592 if (TARGET_INTERWORK || TARGET_THUMB)
1593 {
1594 sought |= (FL_THUMB | FL_MODE32);
1595
1596 /* There are no ARM processors that support both APCS-26 and
1597 interworking. Therefore we force FL_MODE26 to be removed
1598 from insn_flags here (if it was set), so that the search
1599 below will always be able to find a compatible processor. */
1600 insn_flags &= ~FL_MODE26;
1601 }
1602
1603 if (sought != 0 && ((sought & insn_flags) != sought))
1604 {
1605 /* Try to locate a CPU type that supports all of the abilities
1606 of the default CPU, plus the extra abilities requested by
1607 the user. */
1608 for (sel = all_cores; sel->name != NULL; sel++)
1609 if ((sel->flags & sought) == (sought | insn_flags))
1610 break;
1611
1612 if (sel->name == NULL)
1613 {
1614 unsigned current_bit_count = 0;
1615 const struct processors * best_fit = NULL;
1616
1617 /* Ideally we would like to issue an error message here
1618 saying that it was not possible to find a CPU compatible
1619 with the default CPU, but which also supports the command
1620 line options specified by the programmer, and so they
1621 ought to use the -mcpu=<name> command line option to
1622 override the default CPU type.
1623
1624 If we cannot find a cpu that has both the
1625 characteristics of the default cpu and the given
1626 command line options we scan the array again looking
1627 for a best match. */
1628 for (sel = all_cores; sel->name != NULL; sel++)
1629 if ((sel->flags & sought) == sought)
1630 {
1631 unsigned count;
1632
1633 count = bit_count (sel->flags & insn_flags);
1634
1635 if (count >= current_bit_count)
1636 {
1637 best_fit = sel;
1638 current_bit_count = count;
1639 }
1640 }
1641
1642 gcc_assert (best_fit);
1643 sel = best_fit;
1644 }
1645
1646 arm_selected_cpu = sel;
1647 }
1648 }
1649
1650 gcc_assert (arm_selected_cpu);
1651 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1652 if (!arm_selected_tune)
1653 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1654
1655 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1656 insn_flags = arm_selected_cpu->flags;
1657 arm_base_arch = arm_selected_cpu->base_arch;
1658
1659 arm_tune = arm_selected_tune->core;
1660 tune_flags = arm_selected_tune->flags;
1661 current_tune = arm_selected_tune->tune;
1662
1663 /* Make sure that the processor choice does not conflict with any of the
1664 other command line choices. */
1665 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1666 error ("target CPU does not support ARM mode");
1667
1668 /* BPABI targets use linker tricks to allow interworking on cores
1669 without thumb support. */
1670 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
1671 {
1672 warning (0, "target CPU does not support interworking" );
1673 target_flags &= ~MASK_INTERWORK;
1674 }
1675
1676 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1677 {
1678 warning (0, "target CPU does not support THUMB instructions");
1679 target_flags &= ~MASK_THUMB;
1680 }
1681
1682 if (TARGET_APCS_FRAME && TARGET_THUMB)
1683 {
1684 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1685 target_flags &= ~MASK_APCS_FRAME;
1686 }
1687
1688 /* Callee super interworking implies thumb interworking. Adding
1689 this to the flags here simplifies the logic elsewhere. */
1690 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1691 target_flags |= MASK_INTERWORK;
1692
1693 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1694 from here where no function is being compiled currently. */
1695 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1696 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1697
1698 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1699 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1700
1701 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1702 {
1703 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1704 target_flags |= MASK_APCS_FRAME;
1705 }
1706
1707 if (TARGET_POKE_FUNCTION_NAME)
1708 target_flags |= MASK_APCS_FRAME;
1709
1710 if (TARGET_APCS_REENT && flag_pic)
1711 error ("-fpic and -mapcs-reent are incompatible");
1712
1713 if (TARGET_APCS_REENT)
1714 warning (0, "APCS reentrant code not supported. Ignored");
1715
1716 /* If this target is normally configured to use APCS frames, warn if they
1717 are turned off and debugging is turned on. */
1718 if (TARGET_ARM
1719 && write_symbols != NO_DEBUG
1720 && !TARGET_APCS_FRAME
1721 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1722 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1723
1724 if (TARGET_APCS_FLOAT)
1725 warning (0, "passing floating point arguments in fp regs not yet supported");
1726
1727 if (TARGET_LITTLE_WORDS)
1728 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
1729 "will be removed in a future release");
1730
1731 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1732 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1733 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1734 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1735 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1736 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1737 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1738 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1739 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1740 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1741 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1742 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1743 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1744
1745 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1746 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1747 thumb_code = TARGET_ARM == 0;
1748 thumb1_code = TARGET_THUMB1 != 0;
1749 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1750 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1751 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1752 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
1753 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
1754 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
1755 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1756
1757 /* If we are not using the default (ARM mode) section anchor offset
1758 ranges, then set the correct ranges now. */
1759 if (TARGET_THUMB1)
1760 {
1761 /* Thumb-1 LDR instructions cannot have negative offsets.
1762 Permissible positive offset ranges are 5-bit (for byte loads),
1763 6-bit (for halfword loads), or 7-bit (for word loads).
1764 Empirical results suggest a 7-bit anchor range gives the best
1765 overall code size. */
1766 targetm.min_anchor_offset = 0;
1767 targetm.max_anchor_offset = 127;
1768 }
1769 else if (TARGET_THUMB2)
1770 {
1771 /* The minimum is set such that the total size of the block
1772 for a particular anchor is 248 + 1 + 4095 bytes, which is
1773 divisible by eight, ensuring natural spacing of anchors. */
1774 targetm.min_anchor_offset = -248;
1775 targetm.max_anchor_offset = 4095;
1776 }
1777
1778 /* V5 code we generate is completely interworking capable, so we turn off
1779 TARGET_INTERWORK here to avoid many tests later on. */
1780
1781 /* XXX However, we must pass the right pre-processor defines to CPP
1782 or GLD can get confused. This is a hack. */
1783 if (TARGET_INTERWORK)
1784 arm_cpp_interwork = 1;
1785
1786 if (arm_arch5)
1787 target_flags &= ~MASK_INTERWORK;
1788
1789 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1790 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1791
1792 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1793 error ("iwmmxt abi requires an iwmmxt capable cpu");
1794
1795 if (!global_options_set.x_arm_fpu_index)
1796 {
1797 const char *target_fpu_name;
1798 bool ok;
1799
1800 #ifdef FPUTYPE_DEFAULT
1801 target_fpu_name = FPUTYPE_DEFAULT;
1802 #else
1803 target_fpu_name = "vfp";
1804 #endif
1805
1806 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
1807 CL_TARGET);
1808 gcc_assert (ok);
1809 }
1810
1811 arm_fpu_desc = &all_fpus[arm_fpu_index];
1812
1813 switch (arm_fpu_desc->model)
1814 {
1815 case ARM_FP_MODEL_VFP:
1816 arm_fpu_attr = FPU_VFP;
1817 break;
1818
1819 default:
1820 gcc_unreachable();
1821 }
1822
1823 if (TARGET_AAPCS_BASED)
1824 {
1825 if (TARGET_CALLER_INTERWORKING)
1826 error ("AAPCS does not support -mcaller-super-interworking");
1827 else
1828 if (TARGET_CALLEE_INTERWORKING)
1829 error ("AAPCS does not support -mcallee-super-interworking");
1830 }
1831
1832 /* iWMMXt and NEON are incompatible. */
1833 if (TARGET_IWMMXT && TARGET_NEON)
1834 error ("iWMMXt and NEON are incompatible");
1835
1836 /* iWMMXt unsupported under Thumb mode. */
1837 if (TARGET_THUMB && TARGET_IWMMXT)
1838 error ("iWMMXt unsupported under Thumb mode");
1839
1840 /* __fp16 support currently assumes the core has ldrh. */
1841 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
1842 sorry ("__fp16 and no ldrh");
1843
1844 /* If soft-float is specified then don't use FPU. */
1845 if (TARGET_SOFT_FLOAT)
1846 arm_fpu_attr = FPU_NONE;
1847
1848 if (TARGET_AAPCS_BASED)
1849 {
1850 if (arm_abi == ARM_ABI_IWMMXT)
1851 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
1852 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
1853 && TARGET_HARD_FLOAT
1854 && TARGET_VFP)
1855 arm_pcs_default = ARM_PCS_AAPCS_VFP;
1856 else
1857 arm_pcs_default = ARM_PCS_AAPCS;
1858 }
1859 else
1860 {
1861 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1862 sorry ("-mfloat-abi=hard and VFP");
1863
1864 if (arm_abi == ARM_ABI_APCS)
1865 arm_pcs_default = ARM_PCS_APCS;
1866 else
1867 arm_pcs_default = ARM_PCS_ATPCS;
1868 }
1869
1870 /* For arm2/3 there is no need to do any scheduling if we are doing
1871 software floating-point. */
1872 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
1873 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1874
1875 /* Use the cp15 method if it is available. */
1876 if (target_thread_pointer == TP_AUTO)
1877 {
1878 if (arm_arch6k && !TARGET_THUMB1)
1879 target_thread_pointer = TP_CP15;
1880 else
1881 target_thread_pointer = TP_SOFT;
1882 }
1883
1884 if (TARGET_HARD_TP && TARGET_THUMB1)
1885 error ("can not use -mtp=cp15 with 16-bit Thumb");
1886
1887 /* Override the default structure alignment for AAPCS ABI. */
1888 if (!global_options_set.x_arm_structure_size_boundary)
1889 {
1890 if (TARGET_AAPCS_BASED)
1891 arm_structure_size_boundary = 8;
1892 }
1893 else
1894 {
1895 if (arm_structure_size_boundary != 8
1896 && arm_structure_size_boundary != 32
1897 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
1898 {
1899 if (ARM_DOUBLEWORD_ALIGN)
1900 warning (0,
1901 "structure size boundary can only be set to 8, 32 or 64");
1902 else
1903 warning (0, "structure size boundary can only be set to 8 or 32");
1904 arm_structure_size_boundary
1905 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
1906 }
1907 }
1908
1909 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1910 {
1911 error ("RTP PIC is incompatible with Thumb");
1912 flag_pic = 0;
1913 }
1914
1915 /* If stack checking is disabled, we can use r10 as the PIC register,
1916 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1917 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1918 {
1919 if (TARGET_VXWORKS_RTP)
1920 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1921 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1922 }
1923
1924 if (flag_pic && TARGET_VXWORKS_RTP)
1925 arm_pic_register = 9;
1926
1927 if (arm_pic_register_string != NULL)
1928 {
1929 int pic_register = decode_reg_name (arm_pic_register_string);
1930
1931 if (!flag_pic)
1932 warning (0, "-mpic-register= is useless without -fpic");
1933
1934 /* Prevent the user from choosing an obviously stupid PIC register. */
1935 else if (pic_register < 0 || call_used_regs[pic_register]
1936 || pic_register == HARD_FRAME_POINTER_REGNUM
1937 || pic_register == STACK_POINTER_REGNUM
1938 || pic_register >= PC_REGNUM
1939 || (TARGET_VXWORKS_RTP
1940 && (unsigned int) pic_register != arm_pic_register))
1941 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1942 else
1943 arm_pic_register = pic_register;
1944 }
1945
1946 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1947 if (fix_cm3_ldrd == 2)
1948 {
1949 if (arm_selected_cpu->core == cortexm3)
1950 fix_cm3_ldrd = 1;
1951 else
1952 fix_cm3_ldrd = 0;
1953 }
1954
1955 /* Enable -munaligned-access by default for
1956 - all ARMv6 architecture-based processors
1957 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
1958
1959 Disable -munaligned-access by default for
1960 - all pre-ARMv6 architecture-based processors
1961 - ARMv6-M architecture-based processors. */
1962
1963 if (unaligned_access == 2)
1964 {
1965 if (arm_arch6 && (arm_arch_notm || arm_arch7))
1966 unaligned_access = 1;
1967 else
1968 unaligned_access = 0;
1969 }
1970 else if (unaligned_access == 1
1971 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
1972 {
1973 warning (0, "target CPU does not support unaligned accesses");
1974 unaligned_access = 0;
1975 }
1976
1977 if (TARGET_THUMB1 && flag_schedule_insns)
1978 {
1979 /* Don't warn since it's on by default in -O2. */
1980 flag_schedule_insns = 0;
1981 }
1982
1983 if (optimize_size)
1984 {
1985 /* If optimizing for size, bump the number of instructions that we
1986 are prepared to conditionally execute (even on a StrongARM). */
1987 max_insns_skipped = 6;
1988 }
1989 else
1990 max_insns_skipped = current_tune->max_insns_skipped;
1991
1992 /* Hot/Cold partitioning is not currently supported, since we can't
1993 handle literal pool placement in that case. */
1994 if (flag_reorder_blocks_and_partition)
1995 {
1996 inform (input_location,
1997 "-freorder-blocks-and-partition not supported on this architecture");
1998 flag_reorder_blocks_and_partition = 0;
1999 flag_reorder_blocks = 1;
2000 }
2001
2002 if (flag_pic)
2003 /* Hoisting PIC address calculations more aggressively provides a small,
2004 but measurable, size reduction for PIC code. Therefore, we decrease
2005 the bar for unrestricted expression hoisting to the cost of PIC address
2006 calculation, which is 2 instructions. */
2007 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2008 global_options.x_param_values,
2009 global_options_set.x_param_values);
2010
2011 /* ARM EABI defaults to strict volatile bitfields. */
2012 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2013 && abi_version_at_least(2))
2014 flag_strict_volatile_bitfields = 1;
2015
2016 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2017 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2018 if (flag_prefetch_loop_arrays < 0
2019 && HAVE_prefetch
2020 && optimize >= 3
2021 && current_tune->num_prefetch_slots > 0)
2022 flag_prefetch_loop_arrays = 1;
2023
2024 /* Set up parameters to be used in prefetching algorithm. Do not override the
2025 defaults unless we are tuning for a core we have researched values for. */
2026 if (current_tune->num_prefetch_slots > 0)
2027 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2028 current_tune->num_prefetch_slots,
2029 global_options.x_param_values,
2030 global_options_set.x_param_values);
2031 if (current_tune->l1_cache_line_size >= 0)
2032 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2033 current_tune->l1_cache_line_size,
2034 global_options.x_param_values,
2035 global_options_set.x_param_values);
2036 if (current_tune->l1_cache_size >= 0)
2037 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2038 current_tune->l1_cache_size,
2039 global_options.x_param_values,
2040 global_options_set.x_param_values);
2041
2042 /* Use the alternative scheduling-pressure algorithm by default. */
2043 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2044 global_options.x_param_values,
2045 global_options_set.x_param_values);
2046
2047 /* Register global variables with the garbage collector. */
2048 arm_add_gc_roots ();
2049 }
2050
2051 static void
2052 arm_add_gc_roots (void)
2053 {
2054 gcc_obstack_init(&minipool_obstack);
2055 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2056 }
2057 \f
2058 /* A table of known ARM exception types.
2059 For use with the interrupt function attribute. */
2060
2061 typedef struct
2062 {
2063 const char *const arg;
2064 const unsigned long return_value;
2065 }
2066 isr_attribute_arg;
2067
2068 static const isr_attribute_arg isr_attribute_args [] =
2069 {
2070 { "IRQ", ARM_FT_ISR },
2071 { "irq", ARM_FT_ISR },
2072 { "FIQ", ARM_FT_FIQ },
2073 { "fiq", ARM_FT_FIQ },
2074 { "ABORT", ARM_FT_ISR },
2075 { "abort", ARM_FT_ISR },
2076 { "ABORT", ARM_FT_ISR },
2077 { "abort", ARM_FT_ISR },
2078 { "UNDEF", ARM_FT_EXCEPTION },
2079 { "undef", ARM_FT_EXCEPTION },
2080 { "SWI", ARM_FT_EXCEPTION },
2081 { "swi", ARM_FT_EXCEPTION },
2082 { NULL, ARM_FT_NORMAL }
2083 };
2084
2085 /* Returns the (interrupt) function type of the current
2086 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2087
2088 static unsigned long
2089 arm_isr_value (tree argument)
2090 {
2091 const isr_attribute_arg * ptr;
2092 const char * arg;
2093
2094 if (!arm_arch_notm)
2095 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2096
2097 /* No argument - default to IRQ. */
2098 if (argument == NULL_TREE)
2099 return ARM_FT_ISR;
2100
2101 /* Get the value of the argument. */
2102 if (TREE_VALUE (argument) == NULL_TREE
2103 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2104 return ARM_FT_UNKNOWN;
2105
2106 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2107
2108 /* Check it against the list of known arguments. */
2109 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2110 if (streq (arg, ptr->arg))
2111 return ptr->return_value;
2112
2113 /* An unrecognized interrupt type. */
2114 return ARM_FT_UNKNOWN;
2115 }
2116
2117 /* Computes the type of the current function. */
2118
2119 static unsigned long
2120 arm_compute_func_type (void)
2121 {
2122 unsigned long type = ARM_FT_UNKNOWN;
2123 tree a;
2124 tree attr;
2125
2126 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2127
2128 /* Decide if the current function is volatile. Such functions
2129 never return, and many memory cycles can be saved by not storing
2130 register values that will never be needed again. This optimization
2131 was added to speed up context switching in a kernel application. */
2132 if (optimize > 0
2133 && (TREE_NOTHROW (current_function_decl)
2134 || !(flag_unwind_tables
2135 || (flag_exceptions
2136 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2137 && TREE_THIS_VOLATILE (current_function_decl))
2138 type |= ARM_FT_VOLATILE;
2139
2140 if (cfun->static_chain_decl != NULL)
2141 type |= ARM_FT_NESTED;
2142
2143 attr = DECL_ATTRIBUTES (current_function_decl);
2144
2145 a = lookup_attribute ("naked", attr);
2146 if (a != NULL_TREE)
2147 type |= ARM_FT_NAKED;
2148
2149 a = lookup_attribute ("isr", attr);
2150 if (a == NULL_TREE)
2151 a = lookup_attribute ("interrupt", attr);
2152
2153 if (a == NULL_TREE)
2154 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2155 else
2156 type |= arm_isr_value (TREE_VALUE (a));
2157
2158 return type;
2159 }
2160
2161 /* Returns the type of the current function. */
2162
2163 unsigned long
2164 arm_current_func_type (void)
2165 {
2166 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2167 cfun->machine->func_type = arm_compute_func_type ();
2168
2169 return cfun->machine->func_type;
2170 }
2171
2172 bool
2173 arm_allocate_stack_slots_for_args (void)
2174 {
2175 /* Naked functions should not allocate stack slots for arguments. */
2176 return !IS_NAKED (arm_current_func_type ());
2177 }
2178
2179 static bool
2180 arm_warn_func_return (tree decl)
2181 {
2182 /* Naked functions are implemented entirely in assembly, including the
2183 return sequence, so suppress warnings about this. */
2184 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
2185 }
2186
2187 \f
2188 /* Output assembler code for a block containing the constant parts
2189 of a trampoline, leaving space for the variable parts.
2190
2191 On the ARM, (if r8 is the static chain regnum, and remembering that
2192 referencing pc adds an offset of 8) the trampoline looks like:
2193 ldr r8, [pc, #0]
2194 ldr pc, [pc]
2195 .word static chain value
2196 .word function's address
2197 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2198
2199 static void
2200 arm_asm_trampoline_template (FILE *f)
2201 {
2202 if (TARGET_ARM)
2203 {
2204 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2205 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2206 }
2207 else if (TARGET_THUMB2)
2208 {
2209 /* The Thumb-2 trampoline is similar to the arm implementation.
2210 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2211 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2212 STATIC_CHAIN_REGNUM, PC_REGNUM);
2213 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2214 }
2215 else
2216 {
2217 ASM_OUTPUT_ALIGN (f, 2);
2218 fprintf (f, "\t.code\t16\n");
2219 fprintf (f, ".Ltrampoline_start:\n");
2220 asm_fprintf (f, "\tpush\t{r0, r1}\n");
2221 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2222 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
2223 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
2224 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
2225 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
2226 }
2227 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2228 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
2229 }
2230
2231 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2232
2233 static void
2234 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
2235 {
2236 rtx fnaddr, mem, a_tramp;
2237
2238 emit_block_move (m_tramp, assemble_trampoline_template (),
2239 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
2240
2241 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
2242 emit_move_insn (mem, chain_value);
2243
2244 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
2245 fnaddr = XEXP (DECL_RTL (fndecl), 0);
2246 emit_move_insn (mem, fnaddr);
2247
2248 a_tramp = XEXP (m_tramp, 0);
2249 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
2250 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
2251 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
2252 }
2253
2254 /* Thumb trampolines should be entered in thumb mode, so set
2255 the bottom bit of the address. */
2256
2257 static rtx
2258 arm_trampoline_adjust_address (rtx addr)
2259 {
2260 if (TARGET_THUMB)
2261 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
2262 NULL, 0, OPTAB_LIB_WIDEN);
2263 return addr;
2264 }
2265 \f
2266 /* Return 1 if it is possible to return using a single instruction.
2267 If SIBLING is non-null, this is a test for a return before a sibling
2268 call. SIBLING is the call insn, so we can examine its register usage. */
2269
2270 int
2271 use_return_insn (int iscond, rtx sibling)
2272 {
2273 int regno;
2274 unsigned int func_type;
2275 unsigned long saved_int_regs;
2276 unsigned HOST_WIDE_INT stack_adjust;
2277 arm_stack_offsets *offsets;
2278
2279 /* Never use a return instruction before reload has run. */
2280 if (!reload_completed)
2281 return 0;
2282
2283 func_type = arm_current_func_type ();
2284
2285 /* Naked, volatile and stack alignment functions need special
2286 consideration. */
2287 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
2288 return 0;
2289
2290 /* So do interrupt functions that use the frame pointer and Thumb
2291 interrupt functions. */
2292 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
2293 return 0;
2294
2295 offsets = arm_get_frame_offsets ();
2296 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
2297
2298 /* As do variadic functions. */
2299 if (crtl->args.pretend_args_size
2300 || cfun->machine->uses_anonymous_args
2301 /* Or if the function calls __builtin_eh_return () */
2302 || crtl->calls_eh_return
2303 /* Or if the function calls alloca */
2304 || cfun->calls_alloca
2305 /* Or if there is a stack adjustment. However, if the stack pointer
2306 is saved on the stack, we can use a pre-incrementing stack load. */
2307 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
2308 && stack_adjust == 4)))
2309 return 0;
2310
2311 saved_int_regs = offsets->saved_regs_mask;
2312
2313 /* Unfortunately, the insn
2314
2315 ldmib sp, {..., sp, ...}
2316
2317 triggers a bug on most SA-110 based devices, such that the stack
2318 pointer won't be correctly restored if the instruction takes a
2319 page fault. We work around this problem by popping r3 along with
2320 the other registers, since that is never slower than executing
2321 another instruction.
2322
2323 We test for !arm_arch5 here, because code for any architecture
2324 less than this could potentially be run on one of the buggy
2325 chips. */
2326 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
2327 {
2328 /* Validate that r3 is a call-clobbered register (always true in
2329 the default abi) ... */
2330 if (!call_used_regs[3])
2331 return 0;
2332
2333 /* ... that it isn't being used for a return value ... */
2334 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
2335 return 0;
2336
2337 /* ... or for a tail-call argument ... */
2338 if (sibling)
2339 {
2340 gcc_assert (GET_CODE (sibling) == CALL_INSN);
2341
2342 if (find_regno_fusage (sibling, USE, 3))
2343 return 0;
2344 }
2345
2346 /* ... and that there are no call-saved registers in r0-r2
2347 (always true in the default ABI). */
2348 if (saved_int_regs & 0x7)
2349 return 0;
2350 }
2351
2352 /* Can't be done if interworking with Thumb, and any registers have been
2353 stacked. */
2354 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
2355 return 0;
2356
2357 /* On StrongARM, conditional returns are expensive if they aren't
2358 taken and multiple registers have been stacked. */
2359 if (iscond && arm_tune_strongarm)
2360 {
2361 /* Conditional return when just the LR is stored is a simple
2362 conditional-load instruction, that's not expensive. */
2363 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
2364 return 0;
2365
2366 if (flag_pic
2367 && arm_pic_register != INVALID_REGNUM
2368 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
2369 return 0;
2370 }
2371
2372 /* If there are saved registers but the LR isn't saved, then we need
2373 two instructions for the return. */
2374 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
2375 return 0;
2376
2377 /* Can't be done if any of the VFP regs are pushed,
2378 since this also requires an insn. */
2379 if (TARGET_HARD_FLOAT && TARGET_VFP)
2380 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
2381 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
2382 return 0;
2383
2384 if (TARGET_REALLY_IWMMXT)
2385 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
2386 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
2387 return 0;
2388
2389 return 1;
2390 }
2391
2392 /* Return TRUE if int I is a valid immediate ARM constant. */
2393
2394 int
2395 const_ok_for_arm (HOST_WIDE_INT i)
2396 {
2397 int lowbit;
2398
2399 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2400 be all zero, or all one. */
2401 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
2402 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
2403 != ((~(unsigned HOST_WIDE_INT) 0)
2404 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
2405 return FALSE;
2406
2407 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
2408
2409 /* Fast return for 0 and small values. We must do this for zero, since
2410 the code below can't handle that one case. */
2411 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
2412 return TRUE;
2413
2414 /* Get the number of trailing zeros. */
2415 lowbit = ffs((int) i) - 1;
2416
2417 /* Only even shifts are allowed in ARM mode so round down to the
2418 nearest even number. */
2419 if (TARGET_ARM)
2420 lowbit &= ~1;
2421
2422 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
2423 return TRUE;
2424
2425 if (TARGET_ARM)
2426 {
2427 /* Allow rotated constants in ARM mode. */
2428 if (lowbit <= 4
2429 && ((i & ~0xc000003f) == 0
2430 || (i & ~0xf000000f) == 0
2431 || (i & ~0xfc000003) == 0))
2432 return TRUE;
2433 }
2434 else
2435 {
2436 HOST_WIDE_INT v;
2437
2438 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2439 v = i & 0xff;
2440 v |= v << 16;
2441 if (i == v || i == (v | (v << 8)))
2442 return TRUE;
2443
2444 /* Allow repeated pattern 0xXY00XY00. */
2445 v = i & 0xff00;
2446 v |= v << 16;
2447 if (i == v)
2448 return TRUE;
2449 }
2450
2451 return FALSE;
2452 }
2453
2454 /* Return true if I is a valid constant for the operation CODE. */
2455 int
2456 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2457 {
2458 if (const_ok_for_arm (i))
2459 return 1;
2460
2461 switch (code)
2462 {
2463 case SET:
2464 /* See if we can use movw. */
2465 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2466 return 1;
2467 else
2468 /* Otherwise, try mvn. */
2469 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2470
2471 case PLUS:
2472 /* See if we can use addw or subw. */
2473 if (TARGET_THUMB2
2474 && ((i & 0xfffff000) == 0
2475 || ((-i) & 0xfffff000) == 0))
2476 return 1;
2477 /* else fall through. */
2478
2479 case COMPARE:
2480 case EQ:
2481 case NE:
2482 case GT:
2483 case LE:
2484 case LT:
2485 case GE:
2486 case GEU:
2487 case LTU:
2488 case GTU:
2489 case LEU:
2490 case UNORDERED:
2491 case ORDERED:
2492 case UNEQ:
2493 case UNGE:
2494 case UNLT:
2495 case UNGT:
2496 case UNLE:
2497 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
2498
2499 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
2500 case XOR:
2501 return 0;
2502
2503 case IOR:
2504 if (TARGET_THUMB2)
2505 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2506 return 0;
2507
2508 case AND:
2509 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
2510
2511 default:
2512 gcc_unreachable ();
2513 }
2514 }
2515
2516 /* Return true if I is a valid di mode constant for the operation CODE. */
2517 int
2518 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
2519 {
2520 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
2521 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
2522 rtx hi = GEN_INT (hi_val);
2523 rtx lo = GEN_INT (lo_val);
2524
2525 if (TARGET_THUMB1)
2526 return 0;
2527
2528 switch (code)
2529 {
2530 case PLUS:
2531 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
2532
2533 default:
2534 return 0;
2535 }
2536 }
2537
2538 /* Emit a sequence of insns to handle a large constant.
2539 CODE is the code of the operation required, it can be any of SET, PLUS,
2540 IOR, AND, XOR, MINUS;
2541 MODE is the mode in which the operation is being performed;
2542 VAL is the integer to operate on;
2543 SOURCE is the other operand (a register, or a null-pointer for SET);
2544 SUBTARGETS means it is safe to create scratch registers if that will
2545 either produce a simpler sequence, or we will want to cse the values.
2546 Return value is the number of insns emitted. */
2547
2548 /* ??? Tweak this for thumb2. */
2549 int
2550 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
2551 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
2552 {
2553 rtx cond;
2554
2555 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
2556 cond = COND_EXEC_TEST (PATTERN (insn));
2557 else
2558 cond = NULL_RTX;
2559
2560 if (subtargets || code == SET
2561 || (GET_CODE (target) == REG && GET_CODE (source) == REG
2562 && REGNO (target) != REGNO (source)))
2563 {
2564 /* After arm_reorg has been called, we can't fix up expensive
2565 constants by pushing them into memory so we must synthesize
2566 them in-line, regardless of the cost. This is only likely to
2567 be more costly on chips that have load delay slots and we are
2568 compiling without running the scheduler (so no splitting
2569 occurred before the final instruction emission).
2570
2571 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2572 */
2573 if (!after_arm_reorg
2574 && !cond
2575 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2576 1, 0)
2577 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2578 + (code != SET))))
2579 {
2580 if (code == SET)
2581 {
2582 /* Currently SET is the only monadic value for CODE, all
2583 the rest are diadic. */
2584 if (TARGET_USE_MOVT)
2585 arm_emit_movpair (target, GEN_INT (val));
2586 else
2587 emit_set_insn (target, GEN_INT (val));
2588
2589 return 1;
2590 }
2591 else
2592 {
2593 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
2594
2595 if (TARGET_USE_MOVT)
2596 arm_emit_movpair (temp, GEN_INT (val));
2597 else
2598 emit_set_insn (temp, GEN_INT (val));
2599
2600 /* For MINUS, the value is subtracted from, since we never
2601 have subtraction of a constant. */
2602 if (code == MINUS)
2603 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
2604 else
2605 emit_set_insn (target,
2606 gen_rtx_fmt_ee (code, mode, source, temp));
2607 return 2;
2608 }
2609 }
2610 }
2611
2612 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
2613 1);
2614 }
2615
2616 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2617 ARM/THUMB2 immediates, and add up to VAL.
2618 Thr function return value gives the number of insns required. */
2619 static int
2620 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2621 struct four_ints *return_sequence)
2622 {
2623 int best_consecutive_zeros = 0;
2624 int i;
2625 int best_start = 0;
2626 int insns1, insns2;
2627 struct four_ints tmp_sequence;
2628
2629 /* If we aren't targeting ARM, the best place to start is always at
2630 the bottom, otherwise look more closely. */
2631 if (TARGET_ARM)
2632 {
2633 for (i = 0; i < 32; i += 2)
2634 {
2635 int consecutive_zeros = 0;
2636
2637 if (!(val & (3 << i)))
2638 {
2639 while ((i < 32) && !(val & (3 << i)))
2640 {
2641 consecutive_zeros += 2;
2642 i += 2;
2643 }
2644 if (consecutive_zeros > best_consecutive_zeros)
2645 {
2646 best_consecutive_zeros = consecutive_zeros;
2647 best_start = i - consecutive_zeros;
2648 }
2649 i -= 2;
2650 }
2651 }
2652 }
2653
2654 /* So long as it won't require any more insns to do so, it's
2655 desirable to emit a small constant (in bits 0...9) in the last
2656 insn. This way there is more chance that it can be combined with
2657 a later addressing insn to form a pre-indexed load or store
2658 operation. Consider:
2659
2660 *((volatile int *)0xe0000100) = 1;
2661 *((volatile int *)0xe0000110) = 2;
2662
2663 We want this to wind up as:
2664
2665 mov rA, #0xe0000000
2666 mov rB, #1
2667 str rB, [rA, #0x100]
2668 mov rB, #2
2669 str rB, [rA, #0x110]
2670
2671 rather than having to synthesize both large constants from scratch.
2672
2673 Therefore, we calculate how many insns would be required to emit
2674 the constant starting from `best_start', and also starting from
2675 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2676 yield a shorter sequence, we may as well use zero. */
2677 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2678 if (best_start != 0
2679 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2680 {
2681 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2682 if (insns2 <= insns1)
2683 {
2684 *return_sequence = tmp_sequence;
2685 insns1 = insns2;
2686 }
2687 }
2688
2689 return insns1;
2690 }
2691
2692 /* As for optimal_immediate_sequence, but starting at bit-position I. */
2693 static int
2694 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2695 struct four_ints *return_sequence, int i)
2696 {
2697 int remainder = val & 0xffffffff;
2698 int insns = 0;
2699
2700 /* Try and find a way of doing the job in either two or three
2701 instructions.
2702
2703 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2704 location. We start at position I. This may be the MSB, or
2705 optimial_immediate_sequence may have positioned it at the largest block
2706 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2707 wrapping around to the top of the word when we drop off the bottom.
2708 In the worst case this code should produce no more than four insns.
2709
2710 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2711 constants, shifted to any arbitrary location. We should always start
2712 at the MSB. */
2713 do
2714 {
2715 int end;
2716 unsigned int b1, b2, b3, b4;
2717 unsigned HOST_WIDE_INT result;
2718 int loc;
2719
2720 gcc_assert (insns < 4);
2721
2722 if (i <= 0)
2723 i += 32;
2724
2725 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2726 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2727 {
2728 loc = i;
2729 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2730 /* We can use addw/subw for the last 12 bits. */
2731 result = remainder;
2732 else
2733 {
2734 /* Use an 8-bit shifted/rotated immediate. */
2735 end = i - 8;
2736 if (end < 0)
2737 end += 32;
2738 result = remainder & ((0x0ff << end)
2739 | ((i < end) ? (0xff >> (32 - end))
2740 : 0));
2741 i -= 8;
2742 }
2743 }
2744 else
2745 {
2746 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2747 arbitrary shifts. */
2748 i -= TARGET_ARM ? 2 : 1;
2749 continue;
2750 }
2751
2752 /* Next, see if we can do a better job with a thumb2 replicated
2753 constant.
2754
2755 We do it this way around to catch the cases like 0x01F001E0 where
2756 two 8-bit immediates would work, but a replicated constant would
2757 make it worse.
2758
2759 TODO: 16-bit constants that don't clear all the bits, but still win.
2760 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2761 if (TARGET_THUMB2)
2762 {
2763 b1 = (remainder & 0xff000000) >> 24;
2764 b2 = (remainder & 0x00ff0000) >> 16;
2765 b3 = (remainder & 0x0000ff00) >> 8;
2766 b4 = remainder & 0xff;
2767
2768 if (loc > 24)
2769 {
2770 /* The 8-bit immediate already found clears b1 (and maybe b2),
2771 but must leave b3 and b4 alone. */
2772
2773 /* First try to find a 32-bit replicated constant that clears
2774 almost everything. We can assume that we can't do it in one,
2775 or else we wouldn't be here. */
2776 unsigned int tmp = b1 & b2 & b3 & b4;
2777 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2778 + (tmp << 24);
2779 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2780 + (tmp == b3) + (tmp == b4);
2781 if (tmp
2782 && (matching_bytes >= 3
2783 || (matching_bytes == 2
2784 && const_ok_for_op (remainder & ~tmp2, code))))
2785 {
2786 /* At least 3 of the bytes match, and the fourth has at
2787 least as many bits set, or two of the bytes match
2788 and it will only require one more insn to finish. */
2789 result = tmp2;
2790 i = tmp != b1 ? 32
2791 : tmp != b2 ? 24
2792 : tmp != b3 ? 16
2793 : 8;
2794 }
2795
2796 /* Second, try to find a 16-bit replicated constant that can
2797 leave three of the bytes clear. If b2 or b4 is already
2798 zero, then we can. If the 8-bit from above would not
2799 clear b2 anyway, then we still win. */
2800 else if (b1 == b3 && (!b2 || !b4
2801 || (remainder & 0x00ff0000 & ~result)))
2802 {
2803 result = remainder & 0xff00ff00;
2804 i = 24;
2805 }
2806 }
2807 else if (loc > 16)
2808 {
2809 /* The 8-bit immediate already found clears b2 (and maybe b3)
2810 and we don't get here unless b1 is alredy clear, but it will
2811 leave b4 unchanged. */
2812
2813 /* If we can clear b2 and b4 at once, then we win, since the
2814 8-bits couldn't possibly reach that far. */
2815 if (b2 == b4)
2816 {
2817 result = remainder & 0x00ff00ff;
2818 i = 16;
2819 }
2820 }
2821 }
2822
2823 return_sequence->i[insns++] = result;
2824 remainder &= ~result;
2825
2826 if (code == SET || code == MINUS)
2827 code = PLUS;
2828 }
2829 while (remainder);
2830
2831 return insns;
2832 }
2833
2834 /* Emit an instruction with the indicated PATTERN. If COND is
2835 non-NULL, conditionalize the execution of the instruction on COND
2836 being true. */
2837
2838 static void
2839 emit_constant_insn (rtx cond, rtx pattern)
2840 {
2841 if (cond)
2842 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
2843 emit_insn (pattern);
2844 }
2845
2846 /* As above, but extra parameter GENERATE which, if clear, suppresses
2847 RTL generation. */
2848
2849 static int
2850 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
2851 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
2852 int generate)
2853 {
2854 int can_invert = 0;
2855 int can_negate = 0;
2856 int final_invert = 0;
2857 int i;
2858 int set_sign_bit_copies = 0;
2859 int clear_sign_bit_copies = 0;
2860 int clear_zero_bit_copies = 0;
2861 int set_zero_bit_copies = 0;
2862 int insns = 0, neg_insns, inv_insns;
2863 unsigned HOST_WIDE_INT temp1, temp2;
2864 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2865 struct four_ints *immediates;
2866 struct four_ints pos_immediates, neg_immediates, inv_immediates;
2867
2868 /* Find out which operations are safe for a given CODE. Also do a quick
2869 check for degenerate cases; these can occur when DImode operations
2870 are split. */
2871 switch (code)
2872 {
2873 case SET:
2874 can_invert = 1;
2875 break;
2876
2877 case PLUS:
2878 can_negate = 1;
2879 break;
2880
2881 case IOR:
2882 if (remainder == 0xffffffff)
2883 {
2884 if (generate)
2885 emit_constant_insn (cond,
2886 gen_rtx_SET (VOIDmode, target,
2887 GEN_INT (ARM_SIGN_EXTEND (val))));
2888 return 1;
2889 }
2890
2891 if (remainder == 0)
2892 {
2893 if (reload_completed && rtx_equal_p (target, source))
2894 return 0;
2895
2896 if (generate)
2897 emit_constant_insn (cond,
2898 gen_rtx_SET (VOIDmode, target, source));
2899 return 1;
2900 }
2901 break;
2902
2903 case AND:
2904 if (remainder == 0)
2905 {
2906 if (generate)
2907 emit_constant_insn (cond,
2908 gen_rtx_SET (VOIDmode, target, const0_rtx));
2909 return 1;
2910 }
2911 if (remainder == 0xffffffff)
2912 {
2913 if (reload_completed && rtx_equal_p (target, source))
2914 return 0;
2915 if (generate)
2916 emit_constant_insn (cond,
2917 gen_rtx_SET (VOIDmode, target, source));
2918 return 1;
2919 }
2920 can_invert = 1;
2921 break;
2922
2923 case XOR:
2924 if (remainder == 0)
2925 {
2926 if (reload_completed && rtx_equal_p (target, source))
2927 return 0;
2928 if (generate)
2929 emit_constant_insn (cond,
2930 gen_rtx_SET (VOIDmode, target, source));
2931 return 1;
2932 }
2933
2934 if (remainder == 0xffffffff)
2935 {
2936 if (generate)
2937 emit_constant_insn (cond,
2938 gen_rtx_SET (VOIDmode, target,
2939 gen_rtx_NOT (mode, source)));
2940 return 1;
2941 }
2942 final_invert = 1;
2943 break;
2944
2945 case MINUS:
2946 /* We treat MINUS as (val - source), since (source - val) is always
2947 passed as (source + (-val)). */
2948 if (remainder == 0)
2949 {
2950 if (generate)
2951 emit_constant_insn (cond,
2952 gen_rtx_SET (VOIDmode, target,
2953 gen_rtx_NEG (mode, source)));
2954 return 1;
2955 }
2956 if (const_ok_for_arm (val))
2957 {
2958 if (generate)
2959 emit_constant_insn (cond,
2960 gen_rtx_SET (VOIDmode, target,
2961 gen_rtx_MINUS (mode, GEN_INT (val),
2962 source)));
2963 return 1;
2964 }
2965
2966 break;
2967
2968 default:
2969 gcc_unreachable ();
2970 }
2971
2972 /* If we can do it in one insn get out quickly. */
2973 if (const_ok_for_op (val, code))
2974 {
2975 if (generate)
2976 emit_constant_insn (cond,
2977 gen_rtx_SET (VOIDmode, target,
2978 (source
2979 ? gen_rtx_fmt_ee (code, mode, source,
2980 GEN_INT (val))
2981 : GEN_INT (val))));
2982 return 1;
2983 }
2984
2985 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
2986 insn. */
2987 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
2988 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
2989 {
2990 if (generate)
2991 {
2992 if (mode == SImode && i == 16)
2993 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
2994 smaller insn. */
2995 emit_constant_insn (cond,
2996 gen_zero_extendhisi2
2997 (target, gen_lowpart (HImode, source)));
2998 else
2999 /* Extz only supports SImode, but we can coerce the operands
3000 into that mode. */
3001 emit_constant_insn (cond,
3002 gen_extzv_t2 (gen_lowpart (SImode, target),
3003 gen_lowpart (SImode, source),
3004 GEN_INT (i), const0_rtx));
3005 }
3006
3007 return 1;
3008 }
3009
3010 /* Calculate a few attributes that may be useful for specific
3011 optimizations. */
3012 /* Count number of leading zeros. */
3013 for (i = 31; i >= 0; i--)
3014 {
3015 if ((remainder & (1 << i)) == 0)
3016 clear_sign_bit_copies++;
3017 else
3018 break;
3019 }
3020
3021 /* Count number of leading 1's. */
3022 for (i = 31; i >= 0; i--)
3023 {
3024 if ((remainder & (1 << i)) != 0)
3025 set_sign_bit_copies++;
3026 else
3027 break;
3028 }
3029
3030 /* Count number of trailing zero's. */
3031 for (i = 0; i <= 31; i++)
3032 {
3033 if ((remainder & (1 << i)) == 0)
3034 clear_zero_bit_copies++;
3035 else
3036 break;
3037 }
3038
3039 /* Count number of trailing 1's. */
3040 for (i = 0; i <= 31; i++)
3041 {
3042 if ((remainder & (1 << i)) != 0)
3043 set_zero_bit_copies++;
3044 else
3045 break;
3046 }
3047
3048 switch (code)
3049 {
3050 case SET:
3051 /* See if we can do this by sign_extending a constant that is known
3052 to be negative. This is a good, way of doing it, since the shift
3053 may well merge into a subsequent insn. */
3054 if (set_sign_bit_copies > 1)
3055 {
3056 if (const_ok_for_arm
3057 (temp1 = ARM_SIGN_EXTEND (remainder
3058 << (set_sign_bit_copies - 1))))
3059 {
3060 if (generate)
3061 {
3062 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3063 emit_constant_insn (cond,
3064 gen_rtx_SET (VOIDmode, new_src,
3065 GEN_INT (temp1)));
3066 emit_constant_insn (cond,
3067 gen_ashrsi3 (target, new_src,
3068 GEN_INT (set_sign_bit_copies - 1)));
3069 }
3070 return 2;
3071 }
3072 /* For an inverted constant, we will need to set the low bits,
3073 these will be shifted out of harm's way. */
3074 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3075 if (const_ok_for_arm (~temp1))
3076 {
3077 if (generate)
3078 {
3079 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3080 emit_constant_insn (cond,
3081 gen_rtx_SET (VOIDmode, new_src,
3082 GEN_INT (temp1)));
3083 emit_constant_insn (cond,
3084 gen_ashrsi3 (target, new_src,
3085 GEN_INT (set_sign_bit_copies - 1)));
3086 }
3087 return 2;
3088 }
3089 }
3090
3091 /* See if we can calculate the value as the difference between two
3092 valid immediates. */
3093 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3094 {
3095 int topshift = clear_sign_bit_copies & ~1;
3096
3097 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3098 & (0xff000000 >> topshift));
3099
3100 /* If temp1 is zero, then that means the 9 most significant
3101 bits of remainder were 1 and we've caused it to overflow.
3102 When topshift is 0 we don't need to do anything since we
3103 can borrow from 'bit 32'. */
3104 if (temp1 == 0 && topshift != 0)
3105 temp1 = 0x80000000 >> (topshift - 1);
3106
3107 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3108
3109 if (const_ok_for_arm (temp2))
3110 {
3111 if (generate)
3112 {
3113 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3114 emit_constant_insn (cond,
3115 gen_rtx_SET (VOIDmode, new_src,
3116 GEN_INT (temp1)));
3117 emit_constant_insn (cond,
3118 gen_addsi3 (target, new_src,
3119 GEN_INT (-temp2)));
3120 }
3121
3122 return 2;
3123 }
3124 }
3125
3126 /* See if we can generate this by setting the bottom (or the top)
3127 16 bits, and then shifting these into the other half of the
3128 word. We only look for the simplest cases, to do more would cost
3129 too much. Be careful, however, not to generate this when the
3130 alternative would take fewer insns. */
3131 if (val & 0xffff0000)
3132 {
3133 temp1 = remainder & 0xffff0000;
3134 temp2 = remainder & 0x0000ffff;
3135
3136 /* Overlaps outside this range are best done using other methods. */
3137 for (i = 9; i < 24; i++)
3138 {
3139 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3140 && !const_ok_for_arm (temp2))
3141 {
3142 rtx new_src = (subtargets
3143 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3144 : target);
3145 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3146 source, subtargets, generate);
3147 source = new_src;
3148 if (generate)
3149 emit_constant_insn
3150 (cond,
3151 gen_rtx_SET
3152 (VOIDmode, target,
3153 gen_rtx_IOR (mode,
3154 gen_rtx_ASHIFT (mode, source,
3155 GEN_INT (i)),
3156 source)));
3157 return insns + 1;
3158 }
3159 }
3160
3161 /* Don't duplicate cases already considered. */
3162 for (i = 17; i < 24; i++)
3163 {
3164 if (((temp1 | (temp1 >> i)) == remainder)
3165 && !const_ok_for_arm (temp1))
3166 {
3167 rtx new_src = (subtargets
3168 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3169 : target);
3170 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3171 source, subtargets, generate);
3172 source = new_src;
3173 if (generate)
3174 emit_constant_insn
3175 (cond,
3176 gen_rtx_SET (VOIDmode, target,
3177 gen_rtx_IOR
3178 (mode,
3179 gen_rtx_LSHIFTRT (mode, source,
3180 GEN_INT (i)),
3181 source)));
3182 return insns + 1;
3183 }
3184 }
3185 }
3186 break;
3187
3188 case IOR:
3189 case XOR:
3190 /* If we have IOR or XOR, and the constant can be loaded in a
3191 single instruction, and we can find a temporary to put it in,
3192 then this can be done in two instructions instead of 3-4. */
3193 if (subtargets
3194 /* TARGET can't be NULL if SUBTARGETS is 0 */
3195 || (reload_completed && !reg_mentioned_p (target, source)))
3196 {
3197 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
3198 {
3199 if (generate)
3200 {
3201 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3202
3203 emit_constant_insn (cond,
3204 gen_rtx_SET (VOIDmode, sub,
3205 GEN_INT (val)));
3206 emit_constant_insn (cond,
3207 gen_rtx_SET (VOIDmode, target,
3208 gen_rtx_fmt_ee (code, mode,
3209 source, sub)));
3210 }
3211 return 2;
3212 }
3213 }
3214
3215 if (code == XOR)
3216 break;
3217
3218 /* Convert.
3219 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3220 and the remainder 0s for e.g. 0xfff00000)
3221 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3222
3223 This can be done in 2 instructions by using shifts with mov or mvn.
3224 e.g. for
3225 x = x | 0xfff00000;
3226 we generate.
3227 mvn r0, r0, asl #12
3228 mvn r0, r0, lsr #12 */
3229 if (set_sign_bit_copies > 8
3230 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
3231 {
3232 if (generate)
3233 {
3234 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3235 rtx shift = GEN_INT (set_sign_bit_copies);
3236
3237 emit_constant_insn
3238 (cond,
3239 gen_rtx_SET (VOIDmode, sub,
3240 gen_rtx_NOT (mode,
3241 gen_rtx_ASHIFT (mode,
3242 source,
3243 shift))));
3244 emit_constant_insn
3245 (cond,
3246 gen_rtx_SET (VOIDmode, target,
3247 gen_rtx_NOT (mode,
3248 gen_rtx_LSHIFTRT (mode, sub,
3249 shift))));
3250 }
3251 return 2;
3252 }
3253
3254 /* Convert
3255 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3256 to
3257 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3258
3259 For eg. r0 = r0 | 0xfff
3260 mvn r0, r0, lsr #12
3261 mvn r0, r0, asl #12
3262
3263 */
3264 if (set_zero_bit_copies > 8
3265 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
3266 {
3267 if (generate)
3268 {
3269 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3270 rtx shift = GEN_INT (set_zero_bit_copies);
3271
3272 emit_constant_insn
3273 (cond,
3274 gen_rtx_SET (VOIDmode, sub,
3275 gen_rtx_NOT (mode,
3276 gen_rtx_LSHIFTRT (mode,
3277 source,
3278 shift))));
3279 emit_constant_insn
3280 (cond,
3281 gen_rtx_SET (VOIDmode, target,
3282 gen_rtx_NOT (mode,
3283 gen_rtx_ASHIFT (mode, sub,
3284 shift))));
3285 }
3286 return 2;
3287 }
3288
3289 /* This will never be reached for Thumb2 because orn is a valid
3290 instruction. This is for Thumb1 and the ARM 32 bit cases.
3291
3292 x = y | constant (such that ~constant is a valid constant)
3293 Transform this to
3294 x = ~(~y & ~constant).
3295 */
3296 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
3297 {
3298 if (generate)
3299 {
3300 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
3301 emit_constant_insn (cond,
3302 gen_rtx_SET (VOIDmode, sub,
3303 gen_rtx_NOT (mode, source)));
3304 source = sub;
3305 if (subtargets)
3306 sub = gen_reg_rtx (mode);
3307 emit_constant_insn (cond,
3308 gen_rtx_SET (VOIDmode, sub,
3309 gen_rtx_AND (mode, source,
3310 GEN_INT (temp1))));
3311 emit_constant_insn (cond,
3312 gen_rtx_SET (VOIDmode, target,
3313 gen_rtx_NOT (mode, sub)));
3314 }
3315 return 3;
3316 }
3317 break;
3318
3319 case AND:
3320 /* See if two shifts will do 2 or more insn's worth of work. */
3321 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
3322 {
3323 HOST_WIDE_INT shift_mask = ((0xffffffff
3324 << (32 - clear_sign_bit_copies))
3325 & 0xffffffff);
3326
3327 if ((remainder | shift_mask) != 0xffffffff)
3328 {
3329 if (generate)
3330 {
3331 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3332 insns = arm_gen_constant (AND, mode, cond,
3333 remainder | shift_mask,
3334 new_src, source, subtargets, 1);
3335 source = new_src;
3336 }
3337 else
3338 {
3339 rtx targ = subtargets ? NULL_RTX : target;
3340 insns = arm_gen_constant (AND, mode, cond,
3341 remainder | shift_mask,
3342 targ, source, subtargets, 0);
3343 }
3344 }
3345
3346 if (generate)
3347 {
3348 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3349 rtx shift = GEN_INT (clear_sign_bit_copies);
3350
3351 emit_insn (gen_ashlsi3 (new_src, source, shift));
3352 emit_insn (gen_lshrsi3 (target, new_src, shift));
3353 }
3354
3355 return insns + 2;
3356 }
3357
3358 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
3359 {
3360 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
3361
3362 if ((remainder | shift_mask) != 0xffffffff)
3363 {
3364 if (generate)
3365 {
3366 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3367
3368 insns = arm_gen_constant (AND, mode, cond,
3369 remainder | shift_mask,
3370 new_src, source, subtargets, 1);
3371 source = new_src;
3372 }
3373 else
3374 {
3375 rtx targ = subtargets ? NULL_RTX : target;
3376
3377 insns = arm_gen_constant (AND, mode, cond,
3378 remainder | shift_mask,
3379 targ, source, subtargets, 0);
3380 }
3381 }
3382
3383 if (generate)
3384 {
3385 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3386 rtx shift = GEN_INT (clear_zero_bit_copies);
3387
3388 emit_insn (gen_lshrsi3 (new_src, source, shift));
3389 emit_insn (gen_ashlsi3 (target, new_src, shift));
3390 }
3391
3392 return insns + 2;
3393 }
3394
3395 break;
3396
3397 default:
3398 break;
3399 }
3400
3401 /* Calculate what the instruction sequences would be if we generated it
3402 normally, negated, or inverted. */
3403 if (code == AND)
3404 /* AND cannot be split into multiple insns, so invert and use BIC. */
3405 insns = 99;
3406 else
3407 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3408
3409 if (can_negate)
3410 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3411 &neg_immediates);
3412 else
3413 neg_insns = 99;
3414
3415 if (can_invert || final_invert)
3416 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3417 &inv_immediates);
3418 else
3419 inv_insns = 99;
3420
3421 immediates = &pos_immediates;
3422
3423 /* Is the negated immediate sequence more efficient? */
3424 if (neg_insns < insns && neg_insns <= inv_insns)
3425 {
3426 insns = neg_insns;
3427 immediates = &neg_immediates;
3428 }
3429 else
3430 can_negate = 0;
3431
3432 /* Is the inverted immediate sequence more efficient?
3433 We must allow for an extra NOT instruction for XOR operations, although
3434 there is some chance that the final 'mvn' will get optimized later. */
3435 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
3436 {
3437 insns = inv_insns;
3438 immediates = &inv_immediates;
3439 }
3440 else
3441 {
3442 can_invert = 0;
3443 final_invert = 0;
3444 }
3445
3446 /* Now output the chosen sequence as instructions. */
3447 if (generate)
3448 {
3449 for (i = 0; i < insns; i++)
3450 {
3451 rtx new_src, temp1_rtx;
3452
3453 temp1 = immediates->i[i];
3454
3455 if (code == SET || code == MINUS)
3456 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3457 else if ((final_invert || i < (insns - 1)) && subtargets)
3458 new_src = gen_reg_rtx (mode);
3459 else
3460 new_src = target;
3461
3462 if (can_invert)
3463 temp1 = ~temp1;
3464 else if (can_negate)
3465 temp1 = -temp1;
3466
3467 temp1 = trunc_int_for_mode (temp1, mode);
3468 temp1_rtx = GEN_INT (temp1);
3469
3470 if (code == SET)
3471 ;
3472 else if (code == MINUS)
3473 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3474 else
3475 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3476
3477 emit_constant_insn (cond,
3478 gen_rtx_SET (VOIDmode, new_src,
3479 temp1_rtx));
3480 source = new_src;
3481
3482 if (code == SET)
3483 {
3484 can_negate = can_invert;
3485 can_invert = 0;
3486 code = PLUS;
3487 }
3488 else if (code == MINUS)
3489 code = PLUS;
3490 }
3491 }
3492
3493 if (final_invert)
3494 {
3495 if (generate)
3496 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
3497 gen_rtx_NOT (mode, source)));
3498 insns++;
3499 }
3500
3501 return insns;
3502 }
3503
3504 /* Canonicalize a comparison so that we are more likely to recognize it.
3505 This can be done for a few constant compares, where we can make the
3506 immediate value easier to load. */
3507
3508 enum rtx_code
3509 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3510 {
3511 enum machine_mode mode;
3512 unsigned HOST_WIDE_INT i, maxval;
3513
3514 mode = GET_MODE (*op0);
3515 if (mode == VOIDmode)
3516 mode = GET_MODE (*op1);
3517
3518 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3519
3520 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3521 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3522 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3523 for GTU/LEU in Thumb mode. */
3524 if (mode == DImode)
3525 {
3526 rtx tem;
3527
3528 if (code == GT || code == LE
3529 || (!TARGET_ARM && (code == GTU || code == LEU)))
3530 {
3531 /* Missing comparison. First try to use an available
3532 comparison. */
3533 if (GET_CODE (*op1) == CONST_INT)
3534 {
3535 i = INTVAL (*op1);
3536 switch (code)
3537 {
3538 case GT:
3539 case LE:
3540 if (i != maxval
3541 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3542 {
3543 *op1 = GEN_INT (i + 1);
3544 return code == GT ? GE : LT;
3545 }
3546 break;
3547 case GTU:
3548 case LEU:
3549 if (i != ~((unsigned HOST_WIDE_INT) 0)
3550 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3551 {
3552 *op1 = GEN_INT (i + 1);
3553 return code == GTU ? GEU : LTU;
3554 }
3555 break;
3556 default:
3557 gcc_unreachable ();
3558 }
3559 }
3560
3561 /* If that did not work, reverse the condition. */
3562 tem = *op0;
3563 *op0 = *op1;
3564 *op1 = tem;
3565 return swap_condition (code);
3566 }
3567
3568 return code;
3569 }
3570
3571 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3572 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3573 to facilitate possible combining with a cmp into 'ands'. */
3574 if (mode == SImode
3575 && GET_CODE (*op0) == ZERO_EXTEND
3576 && GET_CODE (XEXP (*op0, 0)) == SUBREG
3577 && GET_MODE (XEXP (*op0, 0)) == QImode
3578 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
3579 && subreg_lowpart_p (XEXP (*op0, 0))
3580 && *op1 == const0_rtx)
3581 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
3582 GEN_INT (255));
3583
3584 /* Comparisons smaller than DImode. Only adjust comparisons against
3585 an out-of-range constant. */
3586 if (GET_CODE (*op1) != CONST_INT
3587 || const_ok_for_arm (INTVAL (*op1))
3588 || const_ok_for_arm (- INTVAL (*op1)))
3589 return code;
3590
3591 i = INTVAL (*op1);
3592
3593 switch (code)
3594 {
3595 case EQ:
3596 case NE:
3597 return code;
3598
3599 case GT:
3600 case LE:
3601 if (i != maxval
3602 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3603 {
3604 *op1 = GEN_INT (i + 1);
3605 return code == GT ? GE : LT;
3606 }
3607 break;
3608
3609 case GE:
3610 case LT:
3611 if (i != ~maxval
3612 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3613 {
3614 *op1 = GEN_INT (i - 1);
3615 return code == GE ? GT : LE;
3616 }
3617 break;
3618
3619 case GTU:
3620 case LEU:
3621 if (i != ~((unsigned HOST_WIDE_INT) 0)
3622 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
3623 {
3624 *op1 = GEN_INT (i + 1);
3625 return code == GTU ? GEU : LTU;
3626 }
3627 break;
3628
3629 case GEU:
3630 case LTU:
3631 if (i != 0
3632 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
3633 {
3634 *op1 = GEN_INT (i - 1);
3635 return code == GEU ? GTU : LEU;
3636 }
3637 break;
3638
3639 default:
3640 gcc_unreachable ();
3641 }
3642
3643 return code;
3644 }
3645
3646
3647 /* Define how to find the value returned by a function. */
3648
3649 static rtx
3650 arm_function_value(const_tree type, const_tree func,
3651 bool outgoing ATTRIBUTE_UNUSED)
3652 {
3653 enum machine_mode mode;
3654 int unsignedp ATTRIBUTE_UNUSED;
3655 rtx r ATTRIBUTE_UNUSED;
3656
3657 mode = TYPE_MODE (type);
3658
3659 if (TARGET_AAPCS_BASED)
3660 return aapcs_allocate_return_reg (mode, type, func);
3661
3662 /* Promote integer types. */
3663 if (INTEGRAL_TYPE_P (type))
3664 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
3665
3666 /* Promotes small structs returned in a register to full-word size
3667 for big-endian AAPCS. */
3668 if (arm_return_in_msb (type))
3669 {
3670 HOST_WIDE_INT size = int_size_in_bytes (type);
3671 if (size % UNITS_PER_WORD != 0)
3672 {
3673 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
3674 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
3675 }
3676 }
3677
3678 return arm_libcall_value_1 (mode);
3679 }
3680
3681 static int
3682 libcall_eq (const void *p1, const void *p2)
3683 {
3684 return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
3685 }
3686
3687 static hashval_t
3688 libcall_hash (const void *p1)
3689 {
3690 return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
3691 }
3692
3693 static void
3694 add_libcall (htab_t htab, rtx libcall)
3695 {
3696 *htab_find_slot (htab, libcall, INSERT) = libcall;
3697 }
3698
3699 static bool
3700 arm_libcall_uses_aapcs_base (const_rtx libcall)
3701 {
3702 static bool init_done = false;
3703 static htab_t libcall_htab;
3704
3705 if (!init_done)
3706 {
3707 init_done = true;
3708
3709 libcall_htab = htab_create (31, libcall_hash, libcall_eq,
3710 NULL);
3711 add_libcall (libcall_htab,
3712 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
3713 add_libcall (libcall_htab,
3714 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
3715 add_libcall (libcall_htab,
3716 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
3717 add_libcall (libcall_htab,
3718 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
3719
3720 add_libcall (libcall_htab,
3721 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
3722 add_libcall (libcall_htab,
3723 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
3724 add_libcall (libcall_htab,
3725 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
3726 add_libcall (libcall_htab,
3727 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
3728
3729 add_libcall (libcall_htab,
3730 convert_optab_libfunc (sext_optab, SFmode, HFmode));
3731 add_libcall (libcall_htab,
3732 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
3733 add_libcall (libcall_htab,
3734 convert_optab_libfunc (sfix_optab, SImode, DFmode));
3735 add_libcall (libcall_htab,
3736 convert_optab_libfunc (ufix_optab, SImode, DFmode));
3737 add_libcall (libcall_htab,
3738 convert_optab_libfunc (sfix_optab, DImode, DFmode));
3739 add_libcall (libcall_htab,
3740 convert_optab_libfunc (ufix_optab, DImode, DFmode));
3741 add_libcall (libcall_htab,
3742 convert_optab_libfunc (sfix_optab, DImode, SFmode));
3743 add_libcall (libcall_htab,
3744 convert_optab_libfunc (ufix_optab, DImode, SFmode));
3745
3746 /* Values from double-precision helper functions are returned in core
3747 registers if the selected core only supports single-precision
3748 arithmetic, even if we are using the hard-float ABI. The same is
3749 true for single-precision helpers, but we will never be using the
3750 hard-float ABI on a CPU which doesn't support single-precision
3751 operations in hardware. */
3752 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
3753 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
3754 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
3755 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
3756 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
3757 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
3758 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
3759 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
3760 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
3761 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
3762 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
3763 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
3764 SFmode));
3765 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
3766 DFmode));
3767 }
3768
3769 return libcall && htab_find (libcall_htab, libcall) != NULL;
3770 }
3771
3772 static rtx
3773 arm_libcall_value_1 (enum machine_mode mode)
3774 {
3775 if (TARGET_AAPCS_BASED)
3776 return aapcs_libcall_value (mode);
3777 else if (TARGET_IWMMXT_ABI
3778 && arm_vector_mode_supported_p (mode))
3779 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
3780 else
3781 return gen_rtx_REG (mode, ARG_REGISTER (1));
3782 }
3783
3784 /* Define how to find the value returned by a library function
3785 assuming the value has mode MODE. */
3786
3787 static rtx
3788 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
3789 {
3790 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
3791 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3792 {
3793 /* The following libcalls return their result in integer registers,
3794 even though they return a floating point value. */
3795 if (arm_libcall_uses_aapcs_base (libcall))
3796 return gen_rtx_REG (mode, ARG_REGISTER(1));
3797
3798 }
3799
3800 return arm_libcall_value_1 (mode);
3801 }
3802
3803 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
3804
3805 static bool
3806 arm_function_value_regno_p (const unsigned int regno)
3807 {
3808 if (regno == ARG_REGISTER (1)
3809 || (TARGET_32BIT
3810 && TARGET_AAPCS_BASED
3811 && TARGET_VFP
3812 && TARGET_HARD_FLOAT
3813 && regno == FIRST_VFP_REGNUM)
3814 || (TARGET_IWMMXT_ABI
3815 && regno == FIRST_IWMMXT_REGNUM))
3816 return true;
3817
3818 return false;
3819 }
3820
3821 /* Determine the amount of memory needed to store the possible return
3822 registers of an untyped call. */
3823 int
3824 arm_apply_result_size (void)
3825 {
3826 int size = 16;
3827
3828 if (TARGET_32BIT)
3829 {
3830 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
3831 size += 32;
3832 if (TARGET_IWMMXT_ABI)
3833 size += 8;
3834 }
3835
3836 return size;
3837 }
3838
3839 /* Decide whether TYPE should be returned in memory (true)
3840 or in a register (false). FNTYPE is the type of the function making
3841 the call. */
3842 static bool
3843 arm_return_in_memory (const_tree type, const_tree fntype)
3844 {
3845 HOST_WIDE_INT size;
3846
3847 size = int_size_in_bytes (type); /* Negative if not fixed size. */
3848
3849 if (TARGET_AAPCS_BASED)
3850 {
3851 /* Simple, non-aggregate types (ie not including vectors and
3852 complex) are always returned in a register (or registers).
3853 We don't care about which register here, so we can short-cut
3854 some of the detail. */
3855 if (!AGGREGATE_TYPE_P (type)
3856 && TREE_CODE (type) != VECTOR_TYPE
3857 && TREE_CODE (type) != COMPLEX_TYPE)
3858 return false;
3859
3860 /* Any return value that is no larger than one word can be
3861 returned in r0. */
3862 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
3863 return false;
3864
3865 /* Check any available co-processors to see if they accept the
3866 type as a register candidate (VFP, for example, can return
3867 some aggregates in consecutive registers). These aren't
3868 available if the call is variadic. */
3869 if (aapcs_select_return_coproc (type, fntype) >= 0)
3870 return false;
3871
3872 /* Vector values should be returned using ARM registers, not
3873 memory (unless they're over 16 bytes, which will break since
3874 we only have four call-clobbered registers to play with). */
3875 if (TREE_CODE (type) == VECTOR_TYPE)
3876 return (size < 0 || size > (4 * UNITS_PER_WORD));
3877
3878 /* The rest go in memory. */
3879 return true;
3880 }
3881
3882 if (TREE_CODE (type) == VECTOR_TYPE)
3883 return (size < 0 || size > (4 * UNITS_PER_WORD));
3884
3885 if (!AGGREGATE_TYPE_P (type) &&
3886 (TREE_CODE (type) != VECTOR_TYPE))
3887 /* All simple types are returned in registers. */
3888 return false;
3889
3890 if (arm_abi != ARM_ABI_APCS)
3891 {
3892 /* ATPCS and later return aggregate types in memory only if they are
3893 larger than a word (or are variable size). */
3894 return (size < 0 || size > UNITS_PER_WORD);
3895 }
3896
3897 /* For the arm-wince targets we choose to be compatible with Microsoft's
3898 ARM and Thumb compilers, which always return aggregates in memory. */
3899 #ifndef ARM_WINCE
3900 /* All structures/unions bigger than one word are returned in memory.
3901 Also catch the case where int_size_in_bytes returns -1. In this case
3902 the aggregate is either huge or of variable size, and in either case
3903 we will want to return it via memory and not in a register. */
3904 if (size < 0 || size > UNITS_PER_WORD)
3905 return true;
3906
3907 if (TREE_CODE (type) == RECORD_TYPE)
3908 {
3909 tree field;
3910
3911 /* For a struct the APCS says that we only return in a register
3912 if the type is 'integer like' and every addressable element
3913 has an offset of zero. For practical purposes this means
3914 that the structure can have at most one non bit-field element
3915 and that this element must be the first one in the structure. */
3916
3917 /* Find the first field, ignoring non FIELD_DECL things which will
3918 have been created by C++. */
3919 for (field = TYPE_FIELDS (type);
3920 field && TREE_CODE (field) != FIELD_DECL;
3921 field = DECL_CHAIN (field))
3922 continue;
3923
3924 if (field == NULL)
3925 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3926
3927 /* Check that the first field is valid for returning in a register. */
3928
3929 /* ... Floats are not allowed */
3930 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3931 return true;
3932
3933 /* ... Aggregates that are not themselves valid for returning in
3934 a register are not allowed. */
3935 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3936 return true;
3937
3938 /* Now check the remaining fields, if any. Only bitfields are allowed,
3939 since they are not addressable. */
3940 for (field = DECL_CHAIN (field);
3941 field;
3942 field = DECL_CHAIN (field))
3943 {
3944 if (TREE_CODE (field) != FIELD_DECL)
3945 continue;
3946
3947 if (!DECL_BIT_FIELD_TYPE (field))
3948 return true;
3949 }
3950
3951 return false;
3952 }
3953
3954 if (TREE_CODE (type) == UNION_TYPE)
3955 {
3956 tree field;
3957
3958 /* Unions can be returned in registers if every element is
3959 integral, or can be returned in an integer register. */
3960 for (field = TYPE_FIELDS (type);
3961 field;
3962 field = DECL_CHAIN (field))
3963 {
3964 if (TREE_CODE (field) != FIELD_DECL)
3965 continue;
3966
3967 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3968 return true;
3969
3970 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3971 return true;
3972 }
3973
3974 return false;
3975 }
3976 #endif /* not ARM_WINCE */
3977
3978 /* Return all other types in memory. */
3979 return true;
3980 }
3981
3982 const struct pcs_attribute_arg
3983 {
3984 const char *arg;
3985 enum arm_pcs value;
3986 } pcs_attribute_args[] =
3987 {
3988 {"aapcs", ARM_PCS_AAPCS},
3989 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
3990 #if 0
3991 /* We could recognize these, but changes would be needed elsewhere
3992 * to implement them. */
3993 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
3994 {"atpcs", ARM_PCS_ATPCS},
3995 {"apcs", ARM_PCS_APCS},
3996 #endif
3997 {NULL, ARM_PCS_UNKNOWN}
3998 };
3999
4000 static enum arm_pcs
4001 arm_pcs_from_attribute (tree attr)
4002 {
4003 const struct pcs_attribute_arg *ptr;
4004 const char *arg;
4005
4006 /* Get the value of the argument. */
4007 if (TREE_VALUE (attr) == NULL_TREE
4008 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4009 return ARM_PCS_UNKNOWN;
4010
4011 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4012
4013 /* Check it against the list of known arguments. */
4014 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4015 if (streq (arg, ptr->arg))
4016 return ptr->value;
4017
4018 /* An unrecognized interrupt type. */
4019 return ARM_PCS_UNKNOWN;
4020 }
4021
4022 /* Get the PCS variant to use for this call. TYPE is the function's type
4023 specification, DECL is the specific declartion. DECL may be null if
4024 the call could be indirect or if this is a library call. */
4025 static enum arm_pcs
4026 arm_get_pcs_model (const_tree type, const_tree decl)
4027 {
4028 bool user_convention = false;
4029 enum arm_pcs user_pcs = arm_pcs_default;
4030 tree attr;
4031
4032 gcc_assert (type);
4033
4034 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4035 if (attr)
4036 {
4037 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4038 user_convention = true;
4039 }
4040
4041 if (TARGET_AAPCS_BASED)
4042 {
4043 /* Detect varargs functions. These always use the base rules
4044 (no argument is ever a candidate for a co-processor
4045 register). */
4046 bool base_rules = stdarg_p (type);
4047
4048 if (user_convention)
4049 {
4050 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4051 sorry ("non-AAPCS derived PCS variant");
4052 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4053 error ("variadic functions must use the base AAPCS variant");
4054 }
4055
4056 if (base_rules)
4057 return ARM_PCS_AAPCS;
4058 else if (user_convention)
4059 return user_pcs;
4060 else if (decl && flag_unit_at_a_time)
4061 {
4062 /* Local functions never leak outside this compilation unit,
4063 so we are free to use whatever conventions are
4064 appropriate. */
4065 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4066 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4067 if (i && i->local)
4068 return ARM_PCS_AAPCS_LOCAL;
4069 }
4070 }
4071 else if (user_convention && user_pcs != arm_pcs_default)
4072 sorry ("PCS variant");
4073
4074 /* For everything else we use the target's default. */
4075 return arm_pcs_default;
4076 }
4077
4078
4079 static void
4080 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4081 const_tree fntype ATTRIBUTE_UNUSED,
4082 rtx libcall ATTRIBUTE_UNUSED,
4083 const_tree fndecl ATTRIBUTE_UNUSED)
4084 {
4085 /* Record the unallocated VFP registers. */
4086 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4087 pcum->aapcs_vfp_reg_alloc = 0;
4088 }
4089
4090 /* Walk down the type tree of TYPE counting consecutive base elements.
4091 If *MODEP is VOIDmode, then set it to the first valid floating point
4092 type. If a non-floating point type is found, or if a floating point
4093 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4094 otherwise return the count in the sub-tree. */
4095 static int
4096 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4097 {
4098 enum machine_mode mode;
4099 HOST_WIDE_INT size;
4100
4101 switch (TREE_CODE (type))
4102 {
4103 case REAL_TYPE:
4104 mode = TYPE_MODE (type);
4105 if (mode != DFmode && mode != SFmode)
4106 return -1;
4107
4108 if (*modep == VOIDmode)
4109 *modep = mode;
4110
4111 if (*modep == mode)
4112 return 1;
4113
4114 break;
4115
4116 case COMPLEX_TYPE:
4117 mode = TYPE_MODE (TREE_TYPE (type));
4118 if (mode != DFmode && mode != SFmode)
4119 return -1;
4120
4121 if (*modep == VOIDmode)
4122 *modep = mode;
4123
4124 if (*modep == mode)
4125 return 2;
4126
4127 break;
4128
4129 case VECTOR_TYPE:
4130 /* Use V2SImode and V4SImode as representatives of all 64-bit
4131 and 128-bit vector types, whether or not those modes are
4132 supported with the present options. */
4133 size = int_size_in_bytes (type);
4134 switch (size)
4135 {
4136 case 8:
4137 mode = V2SImode;
4138 break;
4139 case 16:
4140 mode = V4SImode;
4141 break;
4142 default:
4143 return -1;
4144 }
4145
4146 if (*modep == VOIDmode)
4147 *modep = mode;
4148
4149 /* Vector modes are considered to be opaque: two vectors are
4150 equivalent for the purposes of being homogeneous aggregates
4151 if they are the same size. */
4152 if (*modep == mode)
4153 return 1;
4154
4155 break;
4156
4157 case ARRAY_TYPE:
4158 {
4159 int count;
4160 tree index = TYPE_DOMAIN (type);
4161
4162 /* Can't handle incomplete types. */
4163 if (!COMPLETE_TYPE_P(type))
4164 return -1;
4165
4166 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4167 if (count == -1
4168 || !index
4169 || !TYPE_MAX_VALUE (index)
4170 || !host_integerp (TYPE_MAX_VALUE (index), 1)
4171 || !TYPE_MIN_VALUE (index)
4172 || !host_integerp (TYPE_MIN_VALUE (index), 1)
4173 || count < 0)
4174 return -1;
4175
4176 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
4177 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
4178
4179 /* There must be no padding. */
4180 if (!host_integerp (TYPE_SIZE (type), 1)
4181 || (tree_low_cst (TYPE_SIZE (type), 1)
4182 != count * GET_MODE_BITSIZE (*modep)))
4183 return -1;
4184
4185 return count;
4186 }
4187
4188 case RECORD_TYPE:
4189 {
4190 int count = 0;
4191 int sub_count;
4192 tree field;
4193
4194 /* Can't handle incomplete types. */
4195 if (!COMPLETE_TYPE_P(type))
4196 return -1;
4197
4198 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4199 {
4200 if (TREE_CODE (field) != FIELD_DECL)
4201 continue;
4202
4203 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4204 if (sub_count < 0)
4205 return -1;
4206 count += sub_count;
4207 }
4208
4209 /* There must be no padding. */
4210 if (!host_integerp (TYPE_SIZE (type), 1)
4211 || (tree_low_cst (TYPE_SIZE (type), 1)
4212 != count * GET_MODE_BITSIZE (*modep)))
4213 return -1;
4214
4215 return count;
4216 }
4217
4218 case UNION_TYPE:
4219 case QUAL_UNION_TYPE:
4220 {
4221 /* These aren't very interesting except in a degenerate case. */
4222 int count = 0;
4223 int sub_count;
4224 tree field;
4225
4226 /* Can't handle incomplete types. */
4227 if (!COMPLETE_TYPE_P(type))
4228 return -1;
4229
4230 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
4231 {
4232 if (TREE_CODE (field) != FIELD_DECL)
4233 continue;
4234
4235 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
4236 if (sub_count < 0)
4237 return -1;
4238 count = count > sub_count ? count : sub_count;
4239 }
4240
4241 /* There must be no padding. */
4242 if (!host_integerp (TYPE_SIZE (type), 1)
4243 || (tree_low_cst (TYPE_SIZE (type), 1)
4244 != count * GET_MODE_BITSIZE (*modep)))
4245 return -1;
4246
4247 return count;
4248 }
4249
4250 default:
4251 break;
4252 }
4253
4254 return -1;
4255 }
4256
4257 /* Return true if PCS_VARIANT should use VFP registers. */
4258 static bool
4259 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
4260 {
4261 if (pcs_variant == ARM_PCS_AAPCS_VFP)
4262 {
4263 static bool seen_thumb1_vfp = false;
4264
4265 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4266 {
4267 sorry ("Thumb-1 hard-float VFP ABI");
4268 /* sorry() is not immediately fatal, so only display this once. */
4269 seen_thumb1_vfp = true;
4270 }
4271
4272 return true;
4273 }
4274
4275 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
4276 return false;
4277
4278 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4279 (TARGET_VFP_DOUBLE || !is_double));
4280 }
4281
4282 /* Return true if an argument whose type is TYPE, or mode is MODE, is
4283 suitable for passing or returning in VFP registers for the PCS
4284 variant selected. If it is, then *BASE_MODE is updated to contain
4285 a machine mode describing each element of the argument's type and
4286 *COUNT to hold the number of such elements. */
4287 static bool
4288 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
4289 enum machine_mode mode, const_tree type,
4290 enum machine_mode *base_mode, int *count)
4291 {
4292 enum machine_mode new_mode = VOIDmode;
4293
4294 /* If we have the type information, prefer that to working things
4295 out from the mode. */
4296 if (type)
4297 {
4298 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
4299
4300 if (ag_count > 0 && ag_count <= 4)
4301 *count = ag_count;
4302 else
4303 return false;
4304 }
4305 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
4306 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4307 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
4308 {
4309 *count = 1;
4310 new_mode = mode;
4311 }
4312 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
4313 {
4314 *count = 2;
4315 new_mode = (mode == DCmode ? DFmode : SFmode);
4316 }
4317 else
4318 return false;
4319
4320
4321 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
4322 return false;
4323
4324 *base_mode = new_mode;
4325 return true;
4326 }
4327
4328 static bool
4329 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
4330 enum machine_mode mode, const_tree type)
4331 {
4332 int count ATTRIBUTE_UNUSED;
4333 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
4334
4335 if (!use_vfp_abi (pcs_variant, false))
4336 return false;
4337 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4338 &ag_mode, &count);
4339 }
4340
4341 static bool
4342 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4343 const_tree type)
4344 {
4345 if (!use_vfp_abi (pcum->pcs_variant, false))
4346 return false;
4347
4348 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
4349 &pcum->aapcs_vfp_rmode,
4350 &pcum->aapcs_vfp_rcount);
4351 }
4352
4353 static bool
4354 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4355 const_tree type ATTRIBUTE_UNUSED)
4356 {
4357 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
4358 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
4359 int regno;
4360
4361 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
4362 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
4363 {
4364 pcum->aapcs_vfp_reg_alloc = mask << regno;
4365 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4366 {
4367 int i;
4368 int rcount = pcum->aapcs_vfp_rcount;
4369 int rshift = shift;
4370 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
4371 rtx par;
4372 if (!TARGET_NEON)
4373 {
4374 /* Avoid using unsupported vector modes. */
4375 if (rmode == V2SImode)
4376 rmode = DImode;
4377 else if (rmode == V4SImode)
4378 {
4379 rmode = DImode;
4380 rcount *= 2;
4381 rshift /= 2;
4382 }
4383 }
4384 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
4385 for (i = 0; i < rcount; i++)
4386 {
4387 rtx tmp = gen_rtx_REG (rmode,
4388 FIRST_VFP_REGNUM + regno + i * rshift);
4389 tmp = gen_rtx_EXPR_LIST
4390 (VOIDmode, tmp,
4391 GEN_INT (i * GET_MODE_SIZE (rmode)));
4392 XVECEXP (par, 0, i) = tmp;
4393 }
4394
4395 pcum->aapcs_reg = par;
4396 }
4397 else
4398 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
4399 return true;
4400 }
4401 return false;
4402 }
4403
4404 static rtx
4405 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
4406 enum machine_mode mode,
4407 const_tree type ATTRIBUTE_UNUSED)
4408 {
4409 if (!use_vfp_abi (pcs_variant, false))
4410 return NULL;
4411
4412 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
4413 {
4414 int count;
4415 enum machine_mode ag_mode;
4416 int i;
4417 rtx par;
4418 int shift;
4419
4420 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
4421 &ag_mode, &count);
4422
4423 if (!TARGET_NEON)
4424 {
4425 if (ag_mode == V2SImode)
4426 ag_mode = DImode;
4427 else if (ag_mode == V4SImode)
4428 {
4429 ag_mode = DImode;
4430 count *= 2;
4431 }
4432 }
4433 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
4434 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
4435 for (i = 0; i < count; i++)
4436 {
4437 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
4438 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
4439 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
4440 XVECEXP (par, 0, i) = tmp;
4441 }
4442
4443 return par;
4444 }
4445
4446 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
4447 }
4448
4449 static void
4450 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4451 enum machine_mode mode ATTRIBUTE_UNUSED,
4452 const_tree type ATTRIBUTE_UNUSED)
4453 {
4454 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
4455 pcum->aapcs_vfp_reg_alloc = 0;
4456 return;
4457 }
4458
4459 #define AAPCS_CP(X) \
4460 { \
4461 aapcs_ ## X ## _cum_init, \
4462 aapcs_ ## X ## _is_call_candidate, \
4463 aapcs_ ## X ## _allocate, \
4464 aapcs_ ## X ## _is_return_candidate, \
4465 aapcs_ ## X ## _allocate_return_reg, \
4466 aapcs_ ## X ## _advance \
4467 }
4468
4469 /* Table of co-processors that can be used to pass arguments in
4470 registers. Idealy no arugment should be a candidate for more than
4471 one co-processor table entry, but the table is processed in order
4472 and stops after the first match. If that entry then fails to put
4473 the argument into a co-processor register, the argument will go on
4474 the stack. */
4475 static struct
4476 {
4477 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4478 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
4479
4480 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4481 BLKmode) is a candidate for this co-processor's registers; this
4482 function should ignore any position-dependent state in
4483 CUMULATIVE_ARGS and only use call-type dependent information. */
4484 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4485
4486 /* Return true if the argument does get a co-processor register; it
4487 should set aapcs_reg to an RTX of the register allocated as is
4488 required for a return from FUNCTION_ARG. */
4489 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4490
4491 /* Return true if a result of mode MODE (or type TYPE if MODE is
4492 BLKmode) is can be returned in this co-processor's registers. */
4493 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
4494
4495 /* Allocate and return an RTX element to hold the return type of a
4496 call, this routine must not fail and will only be called if
4497 is_return_candidate returned true with the same parameters. */
4498 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
4499
4500 /* Finish processing this argument and prepare to start processing
4501 the next one. */
4502 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
4503 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
4504 {
4505 AAPCS_CP(vfp)
4506 };
4507
4508 #undef AAPCS_CP
4509
4510 static int
4511 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4512 const_tree type)
4513 {
4514 int i;
4515
4516 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4517 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4518 return i;
4519
4520 return -1;
4521 }
4522
4523 static int
4524 aapcs_select_return_coproc (const_tree type, const_tree fntype)
4525 {
4526 /* We aren't passed a decl, so we can't check that a call is local.
4527 However, it isn't clear that that would be a win anyway, since it
4528 might limit some tail-calling opportunities. */
4529 enum arm_pcs pcs_variant;
4530
4531 if (fntype)
4532 {
4533 const_tree fndecl = NULL_TREE;
4534
4535 if (TREE_CODE (fntype) == FUNCTION_DECL)
4536 {
4537 fndecl = fntype;
4538 fntype = TREE_TYPE (fntype);
4539 }
4540
4541 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4542 }
4543 else
4544 pcs_variant = arm_pcs_default;
4545
4546 if (pcs_variant != ARM_PCS_AAPCS)
4547 {
4548 int i;
4549
4550 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4551 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
4552 TYPE_MODE (type),
4553 type))
4554 return i;
4555 }
4556 return -1;
4557 }
4558
4559 static rtx
4560 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
4561 const_tree fntype)
4562 {
4563 /* We aren't passed a decl, so we can't check that a call is local.
4564 However, it isn't clear that that would be a win anyway, since it
4565 might limit some tail-calling opportunities. */
4566 enum arm_pcs pcs_variant;
4567 int unsignedp ATTRIBUTE_UNUSED;
4568
4569 if (fntype)
4570 {
4571 const_tree fndecl = NULL_TREE;
4572
4573 if (TREE_CODE (fntype) == FUNCTION_DECL)
4574 {
4575 fndecl = fntype;
4576 fntype = TREE_TYPE (fntype);
4577 }
4578
4579 pcs_variant = arm_get_pcs_model (fntype, fndecl);
4580 }
4581 else
4582 pcs_variant = arm_pcs_default;
4583
4584 /* Promote integer types. */
4585 if (type && INTEGRAL_TYPE_P (type))
4586 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
4587
4588 if (pcs_variant != ARM_PCS_AAPCS)
4589 {
4590 int i;
4591
4592 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4593 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
4594 type))
4595 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
4596 mode, type);
4597 }
4598
4599 /* Promotes small structs returned in a register to full-word size
4600 for big-endian AAPCS. */
4601 if (type && arm_return_in_msb (type))
4602 {
4603 HOST_WIDE_INT size = int_size_in_bytes (type);
4604 if (size % UNITS_PER_WORD != 0)
4605 {
4606 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4607 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4608 }
4609 }
4610
4611 return gen_rtx_REG (mode, R0_REGNUM);
4612 }
4613
4614 static rtx
4615 aapcs_libcall_value (enum machine_mode mode)
4616 {
4617 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
4618 && GET_MODE_SIZE (mode) <= 4)
4619 mode = SImode;
4620
4621 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
4622 }
4623
4624 /* Lay out a function argument using the AAPCS rules. The rule
4625 numbers referred to here are those in the AAPCS. */
4626 static void
4627 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4628 const_tree type, bool named)
4629 {
4630 int nregs, nregs2;
4631 int ncrn;
4632
4633 /* We only need to do this once per argument. */
4634 if (pcum->aapcs_arg_processed)
4635 return;
4636
4637 pcum->aapcs_arg_processed = true;
4638
4639 /* Special case: if named is false then we are handling an incoming
4640 anonymous argument which is on the stack. */
4641 if (!named)
4642 return;
4643
4644 /* Is this a potential co-processor register candidate? */
4645 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4646 {
4647 int slot = aapcs_select_call_coproc (pcum, mode, type);
4648 pcum->aapcs_cprc_slot = slot;
4649
4650 /* We don't have to apply any of the rules from part B of the
4651 preparation phase, these are handled elsewhere in the
4652 compiler. */
4653
4654 if (slot >= 0)
4655 {
4656 /* A Co-processor register candidate goes either in its own
4657 class of registers or on the stack. */
4658 if (!pcum->aapcs_cprc_failed[slot])
4659 {
4660 /* C1.cp - Try to allocate the argument to co-processor
4661 registers. */
4662 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
4663 return;
4664
4665 /* C2.cp - Put the argument on the stack and note that we
4666 can't assign any more candidates in this slot. We also
4667 need to note that we have allocated stack space, so that
4668 we won't later try to split a non-cprc candidate between
4669 core registers and the stack. */
4670 pcum->aapcs_cprc_failed[slot] = true;
4671 pcum->can_split = false;
4672 }
4673
4674 /* We didn't get a register, so this argument goes on the
4675 stack. */
4676 gcc_assert (pcum->can_split == false);
4677 return;
4678 }
4679 }
4680
4681 /* C3 - For double-word aligned arguments, round the NCRN up to the
4682 next even number. */
4683 ncrn = pcum->aapcs_ncrn;
4684 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
4685 ncrn++;
4686
4687 nregs = ARM_NUM_REGS2(mode, type);
4688
4689 /* Sigh, this test should really assert that nregs > 0, but a GCC
4690 extension allows empty structs and then gives them empty size; it
4691 then allows such a structure to be passed by value. For some of
4692 the code below we have to pretend that such an argument has
4693 non-zero size so that we 'locate' it correctly either in
4694 registers or on the stack. */
4695 gcc_assert (nregs >= 0);
4696
4697 nregs2 = nregs ? nregs : 1;
4698
4699 /* C4 - Argument fits entirely in core registers. */
4700 if (ncrn + nregs2 <= NUM_ARG_REGS)
4701 {
4702 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4703 pcum->aapcs_next_ncrn = ncrn + nregs;
4704 return;
4705 }
4706
4707 /* C5 - Some core registers left and there are no arguments already
4708 on the stack: split this argument between the remaining core
4709 registers and the stack. */
4710 if (ncrn < NUM_ARG_REGS && pcum->can_split)
4711 {
4712 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
4713 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4714 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
4715 return;
4716 }
4717
4718 /* C6 - NCRN is set to 4. */
4719 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
4720
4721 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4722 return;
4723 }
4724
4725 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4726 for a call to a function whose data type is FNTYPE.
4727 For a library call, FNTYPE is NULL. */
4728 void
4729 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
4730 rtx libname,
4731 tree fndecl ATTRIBUTE_UNUSED)
4732 {
4733 /* Long call handling. */
4734 if (fntype)
4735 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
4736 else
4737 pcum->pcs_variant = arm_pcs_default;
4738
4739 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4740 {
4741 if (arm_libcall_uses_aapcs_base (libname))
4742 pcum->pcs_variant = ARM_PCS_AAPCS;
4743
4744 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
4745 pcum->aapcs_reg = NULL_RTX;
4746 pcum->aapcs_partial = 0;
4747 pcum->aapcs_arg_processed = false;
4748 pcum->aapcs_cprc_slot = -1;
4749 pcum->can_split = true;
4750
4751 if (pcum->pcs_variant != ARM_PCS_AAPCS)
4752 {
4753 int i;
4754
4755 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4756 {
4757 pcum->aapcs_cprc_failed[i] = false;
4758 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
4759 }
4760 }
4761 return;
4762 }
4763
4764 /* Legacy ABIs */
4765
4766 /* On the ARM, the offset starts at 0. */
4767 pcum->nregs = 0;
4768 pcum->iwmmxt_nregs = 0;
4769 pcum->can_split = true;
4770
4771 /* Varargs vectors are treated the same as long long.
4772 named_count avoids having to change the way arm handles 'named' */
4773 pcum->named_count = 0;
4774 pcum->nargs = 0;
4775
4776 if (TARGET_REALLY_IWMMXT && fntype)
4777 {
4778 tree fn_arg;
4779
4780 for (fn_arg = TYPE_ARG_TYPES (fntype);
4781 fn_arg;
4782 fn_arg = TREE_CHAIN (fn_arg))
4783 pcum->named_count += 1;
4784
4785 if (! pcum->named_count)
4786 pcum->named_count = INT_MAX;
4787 }
4788 }
4789
4790
4791 /* Return true if mode/type need doubleword alignment. */
4792 static bool
4793 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4794 {
4795 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4796 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4797 }
4798
4799
4800 /* Determine where to put an argument to a function.
4801 Value is zero to push the argument on the stack,
4802 or a hard register in which to store the argument.
4803
4804 MODE is the argument's machine mode.
4805 TYPE is the data type of the argument (as a tree).
4806 This is null for libcalls where that information may
4807 not be available.
4808 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4809 the preceding args and about the function being called.
4810 NAMED is nonzero if this argument is a named parameter
4811 (otherwise it is an extra parameter matching an ellipsis).
4812
4813 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4814 other arguments are passed on the stack. If (NAMED == 0) (which happens
4815 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4816 defined), say it is passed in the stack (function_prologue will
4817 indeed make it pass in the stack if necessary). */
4818
4819 static rtx
4820 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
4821 const_tree type, bool named)
4822 {
4823 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4824 int nregs;
4825
4826 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4827 a call insn (op3 of a call_value insn). */
4828 if (mode == VOIDmode)
4829 return const0_rtx;
4830
4831 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4832 {
4833 aapcs_layout_arg (pcum, mode, type, named);
4834 return pcum->aapcs_reg;
4835 }
4836
4837 /* Varargs vectors are treated the same as long long.
4838 named_count avoids having to change the way arm handles 'named' */
4839 if (TARGET_IWMMXT_ABI
4840 && arm_vector_mode_supported_p (mode)
4841 && pcum->named_count > pcum->nargs + 1)
4842 {
4843 if (pcum->iwmmxt_nregs <= 9)
4844 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
4845 else
4846 {
4847 pcum->can_split = false;
4848 return NULL_RTX;
4849 }
4850 }
4851
4852 /* Put doubleword aligned quantities in even register pairs. */
4853 if (pcum->nregs & 1
4854 && ARM_DOUBLEWORD_ALIGN
4855 && arm_needs_doubleword_align (mode, type))
4856 pcum->nregs++;
4857
4858 /* Only allow splitting an arg between regs and memory if all preceding
4859 args were allocated to regs. For args passed by reference we only count
4860 the reference pointer. */
4861 if (pcum->can_split)
4862 nregs = 1;
4863 else
4864 nregs = ARM_NUM_REGS2 (mode, type);
4865
4866 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
4867 return NULL_RTX;
4868
4869 return gen_rtx_REG (mode, pcum->nregs);
4870 }
4871
4872 static unsigned int
4873 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4874 {
4875 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4876 ? DOUBLEWORD_ALIGNMENT
4877 : PARM_BOUNDARY);
4878 }
4879
4880 static int
4881 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
4882 tree type, bool named)
4883 {
4884 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4885 int nregs = pcum->nregs;
4886
4887 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4888 {
4889 aapcs_layout_arg (pcum, mode, type, named);
4890 return pcum->aapcs_partial;
4891 }
4892
4893 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
4894 return 0;
4895
4896 if (NUM_ARG_REGS > nregs
4897 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
4898 && pcum->can_split)
4899 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4900
4901 return 0;
4902 }
4903
4904 /* Update the data in PCUM to advance over an argument
4905 of mode MODE and data type TYPE.
4906 (TYPE is null for libcalls where that information may not be available.) */
4907
4908 static void
4909 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
4910 const_tree type, bool named)
4911 {
4912 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
4913
4914 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4915 {
4916 aapcs_layout_arg (pcum, mode, type, named);
4917
4918 if (pcum->aapcs_cprc_slot >= 0)
4919 {
4920 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
4921 type);
4922 pcum->aapcs_cprc_slot = -1;
4923 }
4924
4925 /* Generic stuff. */
4926 pcum->aapcs_arg_processed = false;
4927 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
4928 pcum->aapcs_reg = NULL_RTX;
4929 pcum->aapcs_partial = 0;
4930 }
4931 else
4932 {
4933 pcum->nargs += 1;
4934 if (arm_vector_mode_supported_p (mode)
4935 && pcum->named_count > pcum->nargs
4936 && TARGET_IWMMXT_ABI)
4937 pcum->iwmmxt_nregs += 1;
4938 else
4939 pcum->nregs += ARM_NUM_REGS2 (mode, type);
4940 }
4941 }
4942
4943 /* Variable sized types are passed by reference. This is a GCC
4944 extension to the ARM ABI. */
4945
4946 static bool
4947 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
4948 enum machine_mode mode ATTRIBUTE_UNUSED,
4949 const_tree type, bool named ATTRIBUTE_UNUSED)
4950 {
4951 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4952 }
4953 \f
4954 /* Encode the current state of the #pragma [no_]long_calls. */
4955 typedef enum
4956 {
4957 OFF, /* No #pragma [no_]long_calls is in effect. */
4958 LONG, /* #pragma long_calls is in effect. */
4959 SHORT /* #pragma no_long_calls is in effect. */
4960 } arm_pragma_enum;
4961
4962 static arm_pragma_enum arm_pragma_long_calls = OFF;
4963
4964 void
4965 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4966 {
4967 arm_pragma_long_calls = LONG;
4968 }
4969
4970 void
4971 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4972 {
4973 arm_pragma_long_calls = SHORT;
4974 }
4975
4976 void
4977 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
4978 {
4979 arm_pragma_long_calls = OFF;
4980 }
4981 \f
4982 /* Handle an attribute requiring a FUNCTION_DECL;
4983 arguments as in struct attribute_spec.handler. */
4984 static tree
4985 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
4986 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
4987 {
4988 if (TREE_CODE (*node) != FUNCTION_DECL)
4989 {
4990 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4991 name);
4992 *no_add_attrs = true;
4993 }
4994
4995 return NULL_TREE;
4996 }
4997
4998 /* Handle an "interrupt" or "isr" attribute;
4999 arguments as in struct attribute_spec.handler. */
5000 static tree
5001 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5002 bool *no_add_attrs)
5003 {
5004 if (DECL_P (*node))
5005 {
5006 if (TREE_CODE (*node) != FUNCTION_DECL)
5007 {
5008 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5009 name);
5010 *no_add_attrs = true;
5011 }
5012 /* FIXME: the argument if any is checked for type attributes;
5013 should it be checked for decl ones? */
5014 }
5015 else
5016 {
5017 if (TREE_CODE (*node) == FUNCTION_TYPE
5018 || TREE_CODE (*node) == METHOD_TYPE)
5019 {
5020 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5021 {
5022 warning (OPT_Wattributes, "%qE attribute ignored",
5023 name);
5024 *no_add_attrs = true;
5025 }
5026 }
5027 else if (TREE_CODE (*node) == POINTER_TYPE
5028 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5029 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5030 && arm_isr_value (args) != ARM_FT_UNKNOWN)
5031 {
5032 *node = build_variant_type_copy (*node);
5033 TREE_TYPE (*node) = build_type_attribute_variant
5034 (TREE_TYPE (*node),
5035 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5036 *no_add_attrs = true;
5037 }
5038 else
5039 {
5040 /* Possibly pass this attribute on from the type to a decl. */
5041 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5042 | (int) ATTR_FLAG_FUNCTION_NEXT
5043 | (int) ATTR_FLAG_ARRAY_NEXT))
5044 {
5045 *no_add_attrs = true;
5046 return tree_cons (name, args, NULL_TREE);
5047 }
5048 else
5049 {
5050 warning (OPT_Wattributes, "%qE attribute ignored",
5051 name);
5052 }
5053 }
5054 }
5055
5056 return NULL_TREE;
5057 }
5058
5059 /* Handle a "pcs" attribute; arguments as in struct
5060 attribute_spec.handler. */
5061 static tree
5062 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5063 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5064 {
5065 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5066 {
5067 warning (OPT_Wattributes, "%qE attribute ignored", name);
5068 *no_add_attrs = true;
5069 }
5070 return NULL_TREE;
5071 }
5072
5073 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5074 /* Handle the "notshared" attribute. This attribute is another way of
5075 requesting hidden visibility. ARM's compiler supports
5076 "__declspec(notshared)"; we support the same thing via an
5077 attribute. */
5078
5079 static tree
5080 arm_handle_notshared_attribute (tree *node,
5081 tree name ATTRIBUTE_UNUSED,
5082 tree args ATTRIBUTE_UNUSED,
5083 int flags ATTRIBUTE_UNUSED,
5084 bool *no_add_attrs)
5085 {
5086 tree decl = TYPE_NAME (*node);
5087
5088 if (decl)
5089 {
5090 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5091 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5092 *no_add_attrs = false;
5093 }
5094 return NULL_TREE;
5095 }
5096 #endif
5097
5098 /* Return 0 if the attributes for two types are incompatible, 1 if they
5099 are compatible, and 2 if they are nearly compatible (which causes a
5100 warning to be generated). */
5101 static int
5102 arm_comp_type_attributes (const_tree type1, const_tree type2)
5103 {
5104 int l1, l2, s1, s2;
5105
5106 /* Check for mismatch of non-default calling convention. */
5107 if (TREE_CODE (type1) != FUNCTION_TYPE)
5108 return 1;
5109
5110 /* Check for mismatched call attributes. */
5111 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5112 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5113 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5114 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5115
5116 /* Only bother to check if an attribute is defined. */
5117 if (l1 | l2 | s1 | s2)
5118 {
5119 /* If one type has an attribute, the other must have the same attribute. */
5120 if ((l1 != l2) || (s1 != s2))
5121 return 0;
5122
5123 /* Disallow mixed attributes. */
5124 if ((l1 & s2) || (l2 & s1))
5125 return 0;
5126 }
5127
5128 /* Check for mismatched ISR attribute. */
5129 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5130 if (! l1)
5131 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5132 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5133 if (! l2)
5134 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5135 if (l1 != l2)
5136 return 0;
5137
5138 return 1;
5139 }
5140
5141 /* Assigns default attributes to newly defined type. This is used to
5142 set short_call/long_call attributes for function types of
5143 functions defined inside corresponding #pragma scopes. */
5144 static void
5145 arm_set_default_type_attributes (tree type)
5146 {
5147 /* Add __attribute__ ((long_call)) to all functions, when
5148 inside #pragma long_calls or __attribute__ ((short_call)),
5149 when inside #pragma no_long_calls. */
5150 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5151 {
5152 tree type_attr_list, attr_name;
5153 type_attr_list = TYPE_ATTRIBUTES (type);
5154
5155 if (arm_pragma_long_calls == LONG)
5156 attr_name = get_identifier ("long_call");
5157 else if (arm_pragma_long_calls == SHORT)
5158 attr_name = get_identifier ("short_call");
5159 else
5160 return;
5161
5162 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5163 TYPE_ATTRIBUTES (type) = type_attr_list;
5164 }
5165 }
5166 \f
5167 /* Return true if DECL is known to be linked into section SECTION. */
5168
5169 static bool
5170 arm_function_in_section_p (tree decl, section *section)
5171 {
5172 /* We can only be certain about functions defined in the same
5173 compilation unit. */
5174 if (!TREE_STATIC (decl))
5175 return false;
5176
5177 /* Make sure that SYMBOL always binds to the definition in this
5178 compilation unit. */
5179 if (!targetm.binds_local_p (decl))
5180 return false;
5181
5182 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
5183 if (!DECL_SECTION_NAME (decl))
5184 {
5185 /* Make sure that we will not create a unique section for DECL. */
5186 if (flag_function_sections || DECL_ONE_ONLY (decl))
5187 return false;
5188 }
5189
5190 return function_section (decl) == section;
5191 }
5192
5193 /* Return nonzero if a 32-bit "long_call" should be generated for
5194 a call from the current function to DECL. We generate a long_call
5195 if the function:
5196
5197 a. has an __attribute__((long call))
5198 or b. is within the scope of a #pragma long_calls
5199 or c. the -mlong-calls command line switch has been specified
5200
5201 However we do not generate a long call if the function:
5202
5203 d. has an __attribute__ ((short_call))
5204 or e. is inside the scope of a #pragma no_long_calls
5205 or f. is defined in the same section as the current function. */
5206
5207 bool
5208 arm_is_long_call_p (tree decl)
5209 {
5210 tree attrs;
5211
5212 if (!decl)
5213 return TARGET_LONG_CALLS;
5214
5215 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
5216 if (lookup_attribute ("short_call", attrs))
5217 return false;
5218
5219 /* For "f", be conservative, and only cater for cases in which the
5220 whole of the current function is placed in the same section. */
5221 if (!flag_reorder_blocks_and_partition
5222 && TREE_CODE (decl) == FUNCTION_DECL
5223 && arm_function_in_section_p (decl, current_function_section ()))
5224 return false;
5225
5226 if (lookup_attribute ("long_call", attrs))
5227 return true;
5228
5229 return TARGET_LONG_CALLS;
5230 }
5231
5232 /* Return nonzero if it is ok to make a tail-call to DECL. */
5233 static bool
5234 arm_function_ok_for_sibcall (tree decl, tree exp)
5235 {
5236 unsigned long func_type;
5237
5238 if (cfun->machine->sibcall_blocked)
5239 return false;
5240
5241 /* Never tailcall something for which we have no decl, or if we
5242 are generating code for Thumb-1. */
5243 if (decl == NULL || TARGET_THUMB1)
5244 return false;
5245
5246 /* The PIC register is live on entry to VxWorks PLT entries, so we
5247 must make the call before restoring the PIC register. */
5248 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
5249 return false;
5250
5251 /* Cannot tail-call to long calls, since these are out of range of
5252 a branch instruction. */
5253 if (arm_is_long_call_p (decl))
5254 return false;
5255
5256 /* If we are interworking and the function is not declared static
5257 then we can't tail-call it unless we know that it exists in this
5258 compilation unit (since it might be a Thumb routine). */
5259 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
5260 return false;
5261
5262 func_type = arm_current_func_type ();
5263 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5264 if (IS_INTERRUPT (func_type))
5265 return false;
5266
5267 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5268 {
5269 /* Check that the return value locations are the same. For
5270 example that we aren't returning a value from the sibling in
5271 a VFP register but then need to transfer it to a core
5272 register. */
5273 rtx a, b;
5274
5275 a = arm_function_value (TREE_TYPE (exp), decl, false);
5276 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5277 cfun->decl, false);
5278 if (!rtx_equal_p (a, b))
5279 return false;
5280 }
5281
5282 /* Never tailcall if function may be called with a misaligned SP. */
5283 if (IS_STACKALIGN (func_type))
5284 return false;
5285
5286 /* The AAPCS says that, on bare-metal, calls to unresolved weak
5287 references should become a NOP. Don't convert such calls into
5288 sibling calls. */
5289 if (TARGET_AAPCS_BASED
5290 && arm_abi == ARM_ABI_AAPCS
5291 && DECL_WEAK (decl))
5292 return false;
5293
5294 /* Everything else is ok. */
5295 return true;
5296 }
5297
5298 \f
5299 /* Addressing mode support functions. */
5300
5301 /* Return nonzero if X is a legitimate immediate operand when compiling
5302 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5303 int
5304 legitimate_pic_operand_p (rtx x)
5305 {
5306 if (GET_CODE (x) == SYMBOL_REF
5307 || (GET_CODE (x) == CONST
5308 && GET_CODE (XEXP (x, 0)) == PLUS
5309 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5310 return 0;
5311
5312 return 1;
5313 }
5314
5315 /* Record that the current function needs a PIC register. Initialize
5316 cfun->machine->pic_reg if we have not already done so. */
5317
5318 static void
5319 require_pic_register (void)
5320 {
5321 /* A lot of the logic here is made obscure by the fact that this
5322 routine gets called as part of the rtx cost estimation process.
5323 We don't want those calls to affect any assumptions about the real
5324 function; and further, we can't call entry_of_function() until we
5325 start the real expansion process. */
5326 if (!crtl->uses_pic_offset_table)
5327 {
5328 gcc_assert (can_create_pseudo_p ());
5329 if (arm_pic_register != INVALID_REGNUM)
5330 {
5331 if (!cfun->machine->pic_reg)
5332 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
5333
5334 /* Play games to avoid marking the function as needing pic
5335 if we are being called as part of the cost-estimation
5336 process. */
5337 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5338 crtl->uses_pic_offset_table = 1;
5339 }
5340 else
5341 {
5342 rtx seq, insn;
5343
5344 if (!cfun->machine->pic_reg)
5345 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
5346
5347 /* Play games to avoid marking the function as needing pic
5348 if we are being called as part of the cost-estimation
5349 process. */
5350 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
5351 {
5352 crtl->uses_pic_offset_table = 1;
5353 start_sequence ();
5354
5355 arm_load_pic_register (0UL);
5356
5357 seq = get_insns ();
5358 end_sequence ();
5359
5360 for (insn = seq; insn; insn = NEXT_INSN (insn))
5361 if (INSN_P (insn))
5362 INSN_LOCATOR (insn) = prologue_locator;
5363
5364 /* We can be called during expansion of PHI nodes, where
5365 we can't yet emit instructions directly in the final
5366 insn stream. Queue the insns on the entry edge, they will
5367 be committed after everything else is expanded. */
5368 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
5369 }
5370 }
5371 }
5372 }
5373
5374 rtx
5375 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
5376 {
5377 if (GET_CODE (orig) == SYMBOL_REF
5378 || GET_CODE (orig) == LABEL_REF)
5379 {
5380 rtx insn;
5381
5382 if (reg == 0)
5383 {
5384 gcc_assert (can_create_pseudo_p ());
5385 reg = gen_reg_rtx (Pmode);
5386 }
5387
5388 /* VxWorks does not impose a fixed gap between segments; the run-time
5389 gap can be different from the object-file gap. We therefore can't
5390 use GOTOFF unless we are absolutely sure that the symbol is in the
5391 same segment as the GOT. Unfortunately, the flexibility of linker
5392 scripts means that we can't be sure of that in general, so assume
5393 that GOTOFF is never valid on VxWorks. */
5394 if ((GET_CODE (orig) == LABEL_REF
5395 || (GET_CODE (orig) == SYMBOL_REF &&
5396 SYMBOL_REF_LOCAL_P (orig)))
5397 && NEED_GOT_RELOC
5398 && !TARGET_VXWORKS_RTP)
5399 insn = arm_pic_static_addr (orig, reg);
5400 else
5401 {
5402 rtx pat;
5403 rtx mem;
5404
5405 /* If this function doesn't have a pic register, create one now. */
5406 require_pic_register ();
5407
5408 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
5409
5410 /* Make the MEM as close to a constant as possible. */
5411 mem = SET_SRC (pat);
5412 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
5413 MEM_READONLY_P (mem) = 1;
5414 MEM_NOTRAP_P (mem) = 1;
5415
5416 insn = emit_insn (pat);
5417 }
5418
5419 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5420 by loop. */
5421 set_unique_reg_note (insn, REG_EQUAL, orig);
5422
5423 return reg;
5424 }
5425 else if (GET_CODE (orig) == CONST)
5426 {
5427 rtx base, offset;
5428
5429 if (GET_CODE (XEXP (orig, 0)) == PLUS
5430 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
5431 return orig;
5432
5433 /* Handle the case where we have: const (UNSPEC_TLS). */
5434 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
5435 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
5436 return orig;
5437
5438 /* Handle the case where we have:
5439 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5440 CONST_INT. */
5441 if (GET_CODE (XEXP (orig, 0)) == PLUS
5442 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
5443 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
5444 {
5445 gcc_assert (GET_CODE (XEXP (XEXP (orig, 0), 1)) == CONST_INT);
5446 return orig;
5447 }
5448
5449 if (reg == 0)
5450 {
5451 gcc_assert (can_create_pseudo_p ());
5452 reg = gen_reg_rtx (Pmode);
5453 }
5454
5455 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
5456
5457 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
5458 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
5459 base == reg ? 0 : reg);
5460
5461 if (GET_CODE (offset) == CONST_INT)
5462 {
5463 /* The base register doesn't really matter, we only want to
5464 test the index for the appropriate mode. */
5465 if (!arm_legitimate_index_p (mode, offset, SET, 0))
5466 {
5467 gcc_assert (can_create_pseudo_p ());
5468 offset = force_reg (Pmode, offset);
5469 }
5470
5471 if (GET_CODE (offset) == CONST_INT)
5472 return plus_constant (Pmode, base, INTVAL (offset));
5473 }
5474
5475 if (GET_MODE_SIZE (mode) > 4
5476 && (GET_MODE_CLASS (mode) == MODE_INT
5477 || TARGET_SOFT_FLOAT))
5478 {
5479 emit_insn (gen_addsi3 (reg, base, offset));
5480 return reg;
5481 }
5482
5483 return gen_rtx_PLUS (Pmode, base, offset);
5484 }
5485
5486 return orig;
5487 }
5488
5489
5490 /* Find a spare register to use during the prolog of a function. */
5491
5492 static int
5493 thumb_find_work_register (unsigned long pushed_regs_mask)
5494 {
5495 int reg;
5496
5497 /* Check the argument registers first as these are call-used. The
5498 register allocation order means that sometimes r3 might be used
5499 but earlier argument registers might not, so check them all. */
5500 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
5501 if (!df_regs_ever_live_p (reg))
5502 return reg;
5503
5504 /* Before going on to check the call-saved registers we can try a couple
5505 more ways of deducing that r3 is available. The first is when we are
5506 pushing anonymous arguments onto the stack and we have less than 4
5507 registers worth of fixed arguments(*). In this case r3 will be part of
5508 the variable argument list and so we can be sure that it will be
5509 pushed right at the start of the function. Hence it will be available
5510 for the rest of the prologue.
5511 (*): ie crtl->args.pretend_args_size is greater than 0. */
5512 if (cfun->machine->uses_anonymous_args
5513 && crtl->args.pretend_args_size > 0)
5514 return LAST_ARG_REGNUM;
5515
5516 /* The other case is when we have fixed arguments but less than 4 registers
5517 worth. In this case r3 might be used in the body of the function, but
5518 it is not being used to convey an argument into the function. In theory
5519 we could just check crtl->args.size to see how many bytes are
5520 being passed in argument registers, but it seems that it is unreliable.
5521 Sometimes it will have the value 0 when in fact arguments are being
5522 passed. (See testcase execute/20021111-1.c for an example). So we also
5523 check the args_info.nregs field as well. The problem with this field is
5524 that it makes no allowances for arguments that are passed to the
5525 function but which are not used. Hence we could miss an opportunity
5526 when a function has an unused argument in r3. But it is better to be
5527 safe than to be sorry. */
5528 if (! cfun->machine->uses_anonymous_args
5529 && crtl->args.size >= 0
5530 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
5531 && crtl->args.info.nregs < 4)
5532 return LAST_ARG_REGNUM;
5533
5534 /* Otherwise look for a call-saved register that is going to be pushed. */
5535 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
5536 if (pushed_regs_mask & (1 << reg))
5537 return reg;
5538
5539 if (TARGET_THUMB2)
5540 {
5541 /* Thumb-2 can use high regs. */
5542 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
5543 if (pushed_regs_mask & (1 << reg))
5544 return reg;
5545 }
5546 /* Something went wrong - thumb_compute_save_reg_mask()
5547 should have arranged for a suitable register to be pushed. */
5548 gcc_unreachable ();
5549 }
5550
5551 static GTY(()) int pic_labelno;
5552
5553 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5554 low register. */
5555
5556 void
5557 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
5558 {
5559 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
5560
5561 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
5562 return;
5563
5564 gcc_assert (flag_pic);
5565
5566 pic_reg = cfun->machine->pic_reg;
5567 if (TARGET_VXWORKS_RTP)
5568 {
5569 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5570 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5571 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5572
5573 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5574
5575 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5576 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5577 }
5578 else
5579 {
5580 /* We use an UNSPEC rather than a LABEL_REF because this label
5581 never appears in the code stream. */
5582
5583 labelno = GEN_INT (pic_labelno++);
5584 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5585 l1 = gen_rtx_CONST (VOIDmode, l1);
5586
5587 /* On the ARM the PC register contains 'dot + 8' at the time of the
5588 addition, on the Thumb it is 'dot + 4'. */
5589 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5590 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5591 UNSPEC_GOTSYM_OFF);
5592 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5593
5594 if (TARGET_32BIT)
5595 {
5596 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5597 }
5598 else /* TARGET_THUMB1 */
5599 {
5600 if (arm_pic_register != INVALID_REGNUM
5601 && REGNO (pic_reg) > LAST_LO_REGNUM)
5602 {
5603 /* We will have pushed the pic register, so we should always be
5604 able to find a work register. */
5605 pic_tmp = gen_rtx_REG (SImode,
5606 thumb_find_work_register (saved_regs));
5607 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
5608 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
5609 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5610 }
5611 else
5612 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
5613 }
5614 }
5615
5616 /* Need to emit this whether or not we obey regdecls,
5617 since setjmp/longjmp can cause life info to screw up. */
5618 emit_use (pic_reg);
5619 }
5620
5621 /* Generate code to load the address of a static var when flag_pic is set. */
5622 static rtx
5623 arm_pic_static_addr (rtx orig, rtx reg)
5624 {
5625 rtx l1, labelno, offset_rtx, insn;
5626
5627 gcc_assert (flag_pic);
5628
5629 /* We use an UNSPEC rather than a LABEL_REF because this label
5630 never appears in the code stream. */
5631 labelno = GEN_INT (pic_labelno++);
5632 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5633 l1 = gen_rtx_CONST (VOIDmode, l1);
5634
5635 /* On the ARM the PC register contains 'dot + 8' at the time of the
5636 addition, on the Thumb it is 'dot + 4'. */
5637 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
5638 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5639 UNSPEC_SYMBOL_OFFSET);
5640 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5641
5642 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
5643 return insn;
5644 }
5645
5646 /* Return nonzero if X is valid as an ARM state addressing register. */
5647 static int
5648 arm_address_register_rtx_p (rtx x, int strict_p)
5649 {
5650 int regno;
5651
5652 if (GET_CODE (x) != REG)
5653 return 0;
5654
5655 regno = REGNO (x);
5656
5657 if (strict_p)
5658 return ARM_REGNO_OK_FOR_BASE_P (regno);
5659
5660 return (regno <= LAST_ARM_REGNUM
5661 || regno >= FIRST_PSEUDO_REGISTER
5662 || regno == FRAME_POINTER_REGNUM
5663 || regno == ARG_POINTER_REGNUM);
5664 }
5665
5666 /* Return TRUE if this rtx is the difference of a symbol and a label,
5667 and will reduce to a PC-relative relocation in the object file.
5668 Expressions like this can be left alone when generating PIC, rather
5669 than forced through the GOT. */
5670 static int
5671 pcrel_constant_p (rtx x)
5672 {
5673 if (GET_CODE (x) == MINUS)
5674 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5675
5676 return FALSE;
5677 }
5678
5679 /* Return true if X will surely end up in an index register after next
5680 splitting pass. */
5681 static bool
5682 will_be_in_index_register (const_rtx x)
5683 {
5684 /* arm.md: calculate_pic_address will split this into a register. */
5685 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
5686 }
5687
5688 /* Return nonzero if X is a valid ARM state address operand. */
5689 int
5690 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5691 int strict_p)
5692 {
5693 bool use_ldrd;
5694 enum rtx_code code = GET_CODE (x);
5695
5696 if (arm_address_register_rtx_p (x, strict_p))
5697 return 1;
5698
5699 use_ldrd = (TARGET_LDRD
5700 && (mode == DImode
5701 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5702
5703 if (code == POST_INC || code == PRE_DEC
5704 || ((code == PRE_INC || code == POST_DEC)
5705 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5706 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5707
5708 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5709 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5710 && GET_CODE (XEXP (x, 1)) == PLUS
5711 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5712 {
5713 rtx addend = XEXP (XEXP (x, 1), 1);
5714
5715 /* Don't allow ldrd post increment by register because it's hard
5716 to fixup invalid register choices. */
5717 if (use_ldrd
5718 && GET_CODE (x) == POST_MODIFY
5719 && GET_CODE (addend) == REG)
5720 return 0;
5721
5722 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
5723 && arm_legitimate_index_p (mode, addend, outer, strict_p));
5724 }
5725
5726 /* After reload constants split into minipools will have addresses
5727 from a LABEL_REF. */
5728 else if (reload_completed
5729 && (code == LABEL_REF
5730 || (code == CONST
5731 && GET_CODE (XEXP (x, 0)) == PLUS
5732 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5733 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5734 return 1;
5735
5736 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5737 return 0;
5738
5739 else if (code == PLUS)
5740 {
5741 rtx xop0 = XEXP (x, 0);
5742 rtx xop1 = XEXP (x, 1);
5743
5744 return ((arm_address_register_rtx_p (xop0, strict_p)
5745 && ((GET_CODE(xop1) == CONST_INT
5746 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5747 || (!strict_p && will_be_in_index_register (xop1))))
5748 || (arm_address_register_rtx_p (xop1, strict_p)
5749 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5750 }
5751
5752 #if 0
5753 /* Reload currently can't handle MINUS, so disable this for now */
5754 else if (GET_CODE (x) == MINUS)
5755 {
5756 rtx xop0 = XEXP (x, 0);
5757 rtx xop1 = XEXP (x, 1);
5758
5759 return (arm_address_register_rtx_p (xop0, strict_p)
5760 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
5761 }
5762 #endif
5763
5764 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5765 && code == SYMBOL_REF
5766 && CONSTANT_POOL_ADDRESS_P (x)
5767 && ! (flag_pic
5768 && symbol_mentioned_p (get_pool_constant (x))
5769 && ! pcrel_constant_p (get_pool_constant (x))))
5770 return 1;
5771
5772 return 0;
5773 }
5774
5775 /* Return nonzero if X is a valid Thumb-2 address operand. */
5776 static int
5777 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
5778 {
5779 bool use_ldrd;
5780 enum rtx_code code = GET_CODE (x);
5781
5782 if (arm_address_register_rtx_p (x, strict_p))
5783 return 1;
5784
5785 use_ldrd = (TARGET_LDRD
5786 && (mode == DImode
5787 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
5788
5789 if (code == POST_INC || code == PRE_DEC
5790 || ((code == PRE_INC || code == POST_DEC)
5791 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
5792 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
5793
5794 else if ((code == POST_MODIFY || code == PRE_MODIFY)
5795 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
5796 && GET_CODE (XEXP (x, 1)) == PLUS
5797 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
5798 {
5799 /* Thumb-2 only has autoincrement by constant. */
5800 rtx addend = XEXP (XEXP (x, 1), 1);
5801 HOST_WIDE_INT offset;
5802
5803 if (GET_CODE (addend) != CONST_INT)
5804 return 0;
5805
5806 offset = INTVAL(addend);
5807 if (GET_MODE_SIZE (mode) <= 4)
5808 return (offset > -256 && offset < 256);
5809
5810 return (use_ldrd && offset > -1024 && offset < 1024
5811 && (offset & 3) == 0);
5812 }
5813
5814 /* After reload constants split into minipools will have addresses
5815 from a LABEL_REF. */
5816 else if (reload_completed
5817 && (code == LABEL_REF
5818 || (code == CONST
5819 && GET_CODE (XEXP (x, 0)) == PLUS
5820 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5821 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
5822 return 1;
5823
5824 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
5825 return 0;
5826
5827 else if (code == PLUS)
5828 {
5829 rtx xop0 = XEXP (x, 0);
5830 rtx xop1 = XEXP (x, 1);
5831
5832 return ((arm_address_register_rtx_p (xop0, strict_p)
5833 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5834 || (!strict_p && will_be_in_index_register (xop1))))
5835 || (arm_address_register_rtx_p (xop1, strict_p)
5836 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5837 }
5838
5839 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5840 && code == SYMBOL_REF
5841 && CONSTANT_POOL_ADDRESS_P (x)
5842 && ! (flag_pic
5843 && symbol_mentioned_p (get_pool_constant (x))
5844 && ! pcrel_constant_p (get_pool_constant (x))))
5845 return 1;
5846
5847 return 0;
5848 }
5849
5850 /* Return nonzero if INDEX is valid for an address index operand in
5851 ARM state. */
5852 static int
5853 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
5854 int strict_p)
5855 {
5856 HOST_WIDE_INT range;
5857 enum rtx_code code = GET_CODE (index);
5858
5859 /* Standard coprocessor addressing modes. */
5860 if (TARGET_HARD_FLOAT
5861 && TARGET_VFP
5862 && (mode == SFmode || mode == DFmode))
5863 return (code == CONST_INT && INTVAL (index) < 1024
5864 && INTVAL (index) > -1024
5865 && (INTVAL (index) & 3) == 0);
5866
5867 /* For quad modes, we restrict the constant offset to be slightly less
5868 than what the instruction format permits. We do this because for
5869 quad mode moves, we will actually decompose them into two separate
5870 double-mode reads or writes. INDEX must therefore be a valid
5871 (double-mode) offset and so should INDEX+8. */
5872 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5873 return (code == CONST_INT
5874 && INTVAL (index) < 1016
5875 && INTVAL (index) > -1024
5876 && (INTVAL (index) & 3) == 0);
5877
5878 /* We have no such constraint on double mode offsets, so we permit the
5879 full range of the instruction format. */
5880 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5881 return (code == CONST_INT
5882 && INTVAL (index) < 1024
5883 && INTVAL (index) > -1024
5884 && (INTVAL (index) & 3) == 0);
5885
5886 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5887 return (code == CONST_INT
5888 && INTVAL (index) < 1024
5889 && INTVAL (index) > -1024
5890 && (INTVAL (index) & 3) == 0);
5891
5892 if (arm_address_register_rtx_p (index, strict_p)
5893 && (GET_MODE_SIZE (mode) <= 4))
5894 return 1;
5895
5896 if (mode == DImode || mode == DFmode)
5897 {
5898 if (code == CONST_INT)
5899 {
5900 HOST_WIDE_INT val = INTVAL (index);
5901
5902 if (TARGET_LDRD)
5903 return val > -256 && val < 256;
5904 else
5905 return val > -4096 && val < 4092;
5906 }
5907
5908 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
5909 }
5910
5911 if (GET_MODE_SIZE (mode) <= 4
5912 && ! (arm_arch4
5913 && (mode == HImode
5914 || mode == HFmode
5915 || (mode == QImode && outer == SIGN_EXTEND))))
5916 {
5917 if (code == MULT)
5918 {
5919 rtx xiop0 = XEXP (index, 0);
5920 rtx xiop1 = XEXP (index, 1);
5921
5922 return ((arm_address_register_rtx_p (xiop0, strict_p)
5923 && power_of_two_operand (xiop1, SImode))
5924 || (arm_address_register_rtx_p (xiop1, strict_p)
5925 && power_of_two_operand (xiop0, SImode)));
5926 }
5927 else if (code == LSHIFTRT || code == ASHIFTRT
5928 || code == ASHIFT || code == ROTATERT)
5929 {
5930 rtx op = XEXP (index, 1);
5931
5932 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
5933 && GET_CODE (op) == CONST_INT
5934 && INTVAL (op) > 0
5935 && INTVAL (op) <= 31);
5936 }
5937 }
5938
5939 /* For ARM v4 we may be doing a sign-extend operation during the
5940 load. */
5941 if (arm_arch4)
5942 {
5943 if (mode == HImode
5944 || mode == HFmode
5945 || (outer == SIGN_EXTEND && mode == QImode))
5946 range = 256;
5947 else
5948 range = 4096;
5949 }
5950 else
5951 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
5952
5953 return (code == CONST_INT
5954 && INTVAL (index) < range
5955 && INTVAL (index) > -range);
5956 }
5957
5958 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5959 index operand. i.e. 1, 2, 4 or 8. */
5960 static bool
5961 thumb2_index_mul_operand (rtx op)
5962 {
5963 HOST_WIDE_INT val;
5964
5965 if (GET_CODE(op) != CONST_INT)
5966 return false;
5967
5968 val = INTVAL(op);
5969 return (val == 1 || val == 2 || val == 4 || val == 8);
5970 }
5971
5972 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5973 static int
5974 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
5975 {
5976 enum rtx_code code = GET_CODE (index);
5977
5978 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5979 /* Standard coprocessor addressing modes. */
5980 if (TARGET_HARD_FLOAT
5981 && TARGET_VFP
5982 && (mode == SFmode || mode == DFmode))
5983 return (code == CONST_INT && INTVAL (index) < 1024
5984 /* Thumb-2 allows only > -256 index range for it's core register
5985 load/stores. Since we allow SF/DF in core registers, we have
5986 to use the intersection between -256~4096 (core) and -1024~1024
5987 (coprocessor). */
5988 && INTVAL (index) > -256
5989 && (INTVAL (index) & 3) == 0);
5990
5991 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5992 {
5993 /* For DImode assume values will usually live in core regs
5994 and only allow LDRD addressing modes. */
5995 if (!TARGET_LDRD || mode != DImode)
5996 return (code == CONST_INT
5997 && INTVAL (index) < 1024
5998 && INTVAL (index) > -1024
5999 && (INTVAL (index) & 3) == 0);
6000 }
6001
6002 /* For quad modes, we restrict the constant offset to be slightly less
6003 than what the instruction format permits. We do this because for
6004 quad mode moves, we will actually decompose them into two separate
6005 double-mode reads or writes. INDEX must therefore be a valid
6006 (double-mode) offset and so should INDEX+8. */
6007 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6008 return (code == CONST_INT
6009 && INTVAL (index) < 1016
6010 && INTVAL (index) > -1024
6011 && (INTVAL (index) & 3) == 0);
6012
6013 /* We have no such constraint on double mode offsets, so we permit the
6014 full range of the instruction format. */
6015 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6016 return (code == CONST_INT
6017 && INTVAL (index) < 1024
6018 && INTVAL (index) > -1024
6019 && (INTVAL (index) & 3) == 0);
6020
6021 if (arm_address_register_rtx_p (index, strict_p)
6022 && (GET_MODE_SIZE (mode) <= 4))
6023 return 1;
6024
6025 if (mode == DImode || mode == DFmode)
6026 {
6027 if (code == CONST_INT)
6028 {
6029 HOST_WIDE_INT val = INTVAL (index);
6030 /* ??? Can we assume ldrd for thumb2? */
6031 /* Thumb-2 ldrd only has reg+const addressing modes. */
6032 /* ldrd supports offsets of +-1020.
6033 However the ldr fallback does not. */
6034 return val > -256 && val < 256 && (val & 3) == 0;
6035 }
6036 else
6037 return 0;
6038 }
6039
6040 if (code == MULT)
6041 {
6042 rtx xiop0 = XEXP (index, 0);
6043 rtx xiop1 = XEXP (index, 1);
6044
6045 return ((arm_address_register_rtx_p (xiop0, strict_p)
6046 && thumb2_index_mul_operand (xiop1))
6047 || (arm_address_register_rtx_p (xiop1, strict_p)
6048 && thumb2_index_mul_operand (xiop0)));
6049 }
6050 else if (code == ASHIFT)
6051 {
6052 rtx op = XEXP (index, 1);
6053
6054 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6055 && GET_CODE (op) == CONST_INT
6056 && INTVAL (op) > 0
6057 && INTVAL (op) <= 3);
6058 }
6059
6060 return (code == CONST_INT
6061 && INTVAL (index) < 4096
6062 && INTVAL (index) > -256);
6063 }
6064
6065 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6066 static int
6067 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6068 {
6069 int regno;
6070
6071 if (GET_CODE (x) != REG)
6072 return 0;
6073
6074 regno = REGNO (x);
6075
6076 if (strict_p)
6077 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6078
6079 return (regno <= LAST_LO_REGNUM
6080 || regno > LAST_VIRTUAL_REGISTER
6081 || regno == FRAME_POINTER_REGNUM
6082 || (GET_MODE_SIZE (mode) >= 4
6083 && (regno == STACK_POINTER_REGNUM
6084 || regno >= FIRST_PSEUDO_REGISTER
6085 || x == hard_frame_pointer_rtx
6086 || x == arg_pointer_rtx)));
6087 }
6088
6089 /* Return nonzero if x is a legitimate index register. This is the case
6090 for any base register that can access a QImode object. */
6091 inline static int
6092 thumb1_index_register_rtx_p (rtx x, int strict_p)
6093 {
6094 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6095 }
6096
6097 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6098
6099 The AP may be eliminated to either the SP or the FP, so we use the
6100 least common denominator, e.g. SImode, and offsets from 0 to 64.
6101
6102 ??? Verify whether the above is the right approach.
6103
6104 ??? Also, the FP may be eliminated to the SP, so perhaps that
6105 needs special handling also.
6106
6107 ??? Look at how the mips16 port solves this problem. It probably uses
6108 better ways to solve some of these problems.
6109
6110 Although it is not incorrect, we don't accept QImode and HImode
6111 addresses based on the frame pointer or arg pointer until the
6112 reload pass starts. This is so that eliminating such addresses
6113 into stack based ones won't produce impossible code. */
6114 int
6115 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6116 {
6117 /* ??? Not clear if this is right. Experiment. */
6118 if (GET_MODE_SIZE (mode) < 4
6119 && !(reload_in_progress || reload_completed)
6120 && (reg_mentioned_p (frame_pointer_rtx, x)
6121 || reg_mentioned_p (arg_pointer_rtx, x)
6122 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6123 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6124 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6125 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6126 return 0;
6127
6128 /* Accept any base register. SP only in SImode or larger. */
6129 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6130 return 1;
6131
6132 /* This is PC relative data before arm_reorg runs. */
6133 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
6134 && GET_CODE (x) == SYMBOL_REF
6135 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
6136 return 1;
6137
6138 /* This is PC relative data after arm_reorg runs. */
6139 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
6140 && reload_completed
6141 && (GET_CODE (x) == LABEL_REF
6142 || (GET_CODE (x) == CONST
6143 && GET_CODE (XEXP (x, 0)) == PLUS
6144 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6145 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
6146 return 1;
6147
6148 /* Post-inc indexing only supported for SImode and larger. */
6149 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
6150 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
6151 return 1;
6152
6153 else if (GET_CODE (x) == PLUS)
6154 {
6155 /* REG+REG address can be any two index registers. */
6156 /* We disallow FRAME+REG addressing since we know that FRAME
6157 will be replaced with STACK, and SP relative addressing only
6158 permits SP+OFFSET. */
6159 if (GET_MODE_SIZE (mode) <= 4
6160 && XEXP (x, 0) != frame_pointer_rtx
6161 && XEXP (x, 1) != frame_pointer_rtx
6162 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6163 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
6164 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
6165 return 1;
6166
6167 /* REG+const has 5-7 bit offset for non-SP registers. */
6168 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
6169 || XEXP (x, 0) == arg_pointer_rtx)
6170 && GET_CODE (XEXP (x, 1)) == CONST_INT
6171 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6172 return 1;
6173
6174 /* REG+const has 10-bit offset for SP, but only SImode and
6175 larger is supported. */
6176 /* ??? Should probably check for DI/DFmode overflow here
6177 just like GO_IF_LEGITIMATE_OFFSET does. */
6178 else if (GET_CODE (XEXP (x, 0)) == REG
6179 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
6180 && GET_MODE_SIZE (mode) >= 4
6181 && GET_CODE (XEXP (x, 1)) == CONST_INT
6182 && INTVAL (XEXP (x, 1)) >= 0
6183 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
6184 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6185 return 1;
6186
6187 else if (GET_CODE (XEXP (x, 0)) == REG
6188 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
6189 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
6190 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
6191 && REGNO (XEXP (x, 0))
6192 <= LAST_VIRTUAL_POINTER_REGISTER))
6193 && GET_MODE_SIZE (mode) >= 4
6194 && GET_CODE (XEXP (x, 1)) == CONST_INT
6195 && (INTVAL (XEXP (x, 1)) & 3) == 0)
6196 return 1;
6197 }
6198
6199 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6200 && GET_MODE_SIZE (mode) == 4
6201 && GET_CODE (x) == SYMBOL_REF
6202 && CONSTANT_POOL_ADDRESS_P (x)
6203 && ! (flag_pic
6204 && symbol_mentioned_p (get_pool_constant (x))
6205 && ! pcrel_constant_p (get_pool_constant (x))))
6206 return 1;
6207
6208 return 0;
6209 }
6210
6211 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6212 instruction of mode MODE. */
6213 int
6214 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
6215 {
6216 switch (GET_MODE_SIZE (mode))
6217 {
6218 case 1:
6219 return val >= 0 && val < 32;
6220
6221 case 2:
6222 return val >= 0 && val < 64 && (val & 1) == 0;
6223
6224 default:
6225 return (val >= 0
6226 && (val + GET_MODE_SIZE (mode)) <= 128
6227 && (val & 3) == 0);
6228 }
6229 }
6230
6231 bool
6232 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
6233 {
6234 if (TARGET_ARM)
6235 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
6236 else if (TARGET_THUMB2)
6237 return thumb2_legitimate_address_p (mode, x, strict_p);
6238 else /* if (TARGET_THUMB1) */
6239 return thumb1_legitimate_address_p (mode, x, strict_p);
6240 }
6241
6242 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
6243
6244 Given an rtx X being reloaded into a reg required to be
6245 in class CLASS, return the class of reg to actually use.
6246 In general this is just CLASS, but for the Thumb core registers and
6247 immediate constants we prefer a LO_REGS class or a subset. */
6248
6249 static reg_class_t
6250 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
6251 {
6252 if (TARGET_32BIT)
6253 return rclass;
6254 else
6255 {
6256 if (rclass == GENERAL_REGS
6257 || rclass == HI_REGS
6258 || rclass == NO_REGS
6259 || rclass == STACK_REG)
6260 return LO_REGS;
6261 else
6262 return rclass;
6263 }
6264 }
6265
6266 /* Build the SYMBOL_REF for __tls_get_addr. */
6267
6268 static GTY(()) rtx tls_get_addr_libfunc;
6269
6270 static rtx
6271 get_tls_get_addr (void)
6272 {
6273 if (!tls_get_addr_libfunc)
6274 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
6275 return tls_get_addr_libfunc;
6276 }
6277
6278 static rtx
6279 arm_load_tp (rtx target)
6280 {
6281 if (!target)
6282 target = gen_reg_rtx (SImode);
6283
6284 if (TARGET_HARD_TP)
6285 {
6286 /* Can return in any reg. */
6287 emit_insn (gen_load_tp_hard (target));
6288 }
6289 else
6290 {
6291 /* Always returned in r0. Immediately copy the result into a pseudo,
6292 otherwise other uses of r0 (e.g. setting up function arguments) may
6293 clobber the value. */
6294
6295 rtx tmp;
6296
6297 emit_insn (gen_load_tp_soft ());
6298
6299 tmp = gen_rtx_REG (SImode, 0);
6300 emit_move_insn (target, tmp);
6301 }
6302 return target;
6303 }
6304
6305 static rtx
6306 load_tls_operand (rtx x, rtx reg)
6307 {
6308 rtx tmp;
6309
6310 if (reg == NULL_RTX)
6311 reg = gen_reg_rtx (SImode);
6312
6313 tmp = gen_rtx_CONST (SImode, x);
6314
6315 emit_move_insn (reg, tmp);
6316
6317 return reg;
6318 }
6319
6320 static rtx
6321 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
6322 {
6323 rtx insns, label, labelno, sum;
6324
6325 gcc_assert (reloc != TLS_DESCSEQ);
6326 start_sequence ();
6327
6328 labelno = GEN_INT (pic_labelno++);
6329 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6330 label = gen_rtx_CONST (VOIDmode, label);
6331
6332 sum = gen_rtx_UNSPEC (Pmode,
6333 gen_rtvec (4, x, GEN_INT (reloc), label,
6334 GEN_INT (TARGET_ARM ? 8 : 4)),
6335 UNSPEC_TLS);
6336 reg = load_tls_operand (sum, reg);
6337
6338 if (TARGET_ARM)
6339 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
6340 else
6341 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6342
6343 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
6344 LCT_PURE, /* LCT_CONST? */
6345 Pmode, 1, reg, Pmode);
6346
6347 insns = get_insns ();
6348 end_sequence ();
6349
6350 return insns;
6351 }
6352
6353 static rtx
6354 arm_tls_descseq_addr (rtx x, rtx reg)
6355 {
6356 rtx labelno = GEN_INT (pic_labelno++);
6357 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6358 rtx sum = gen_rtx_UNSPEC (Pmode,
6359 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
6360 gen_rtx_CONST (VOIDmode, label),
6361 GEN_INT (!TARGET_ARM)),
6362 UNSPEC_TLS);
6363 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
6364
6365 emit_insn (gen_tlscall (x, labelno));
6366 if (!reg)
6367 reg = gen_reg_rtx (SImode);
6368 else
6369 gcc_assert (REGNO (reg) != 0);
6370
6371 emit_move_insn (reg, reg0);
6372
6373 return reg;
6374 }
6375
6376 rtx
6377 legitimize_tls_address (rtx x, rtx reg)
6378 {
6379 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
6380 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
6381
6382 switch (model)
6383 {
6384 case TLS_MODEL_GLOBAL_DYNAMIC:
6385 if (TARGET_GNU2_TLS)
6386 {
6387 reg = arm_tls_descseq_addr (x, reg);
6388
6389 tp = arm_load_tp (NULL_RTX);
6390
6391 dest = gen_rtx_PLUS (Pmode, tp, reg);
6392 }
6393 else
6394 {
6395 /* Original scheme */
6396 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
6397 dest = gen_reg_rtx (Pmode);
6398 emit_libcall_block (insns, dest, ret, x);
6399 }
6400 return dest;
6401
6402 case TLS_MODEL_LOCAL_DYNAMIC:
6403 if (TARGET_GNU2_TLS)
6404 {
6405 reg = arm_tls_descseq_addr (x, reg);
6406
6407 tp = arm_load_tp (NULL_RTX);
6408
6409 dest = gen_rtx_PLUS (Pmode, tp, reg);
6410 }
6411 else
6412 {
6413 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
6414
6415 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6416 share the LDM result with other LD model accesses. */
6417 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
6418 UNSPEC_TLS);
6419 dest = gen_reg_rtx (Pmode);
6420 emit_libcall_block (insns, dest, ret, eqv);
6421
6422 /* Load the addend. */
6423 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
6424 GEN_INT (TLS_LDO32)),
6425 UNSPEC_TLS);
6426 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
6427 dest = gen_rtx_PLUS (Pmode, dest, addend);
6428 }
6429 return dest;
6430
6431 case TLS_MODEL_INITIAL_EXEC:
6432 labelno = GEN_INT (pic_labelno++);
6433 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6434 label = gen_rtx_CONST (VOIDmode, label);
6435 sum = gen_rtx_UNSPEC (Pmode,
6436 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
6437 GEN_INT (TARGET_ARM ? 8 : 4)),
6438 UNSPEC_TLS);
6439 reg = load_tls_operand (sum, reg);
6440
6441 if (TARGET_ARM)
6442 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
6443 else if (TARGET_THUMB2)
6444 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
6445 else
6446 {
6447 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
6448 emit_move_insn (reg, gen_const_mem (SImode, reg));
6449 }
6450
6451 tp = arm_load_tp (NULL_RTX);
6452
6453 return gen_rtx_PLUS (Pmode, tp, reg);
6454
6455 case TLS_MODEL_LOCAL_EXEC:
6456 tp = arm_load_tp (NULL_RTX);
6457
6458 reg = gen_rtx_UNSPEC (Pmode,
6459 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
6460 UNSPEC_TLS);
6461 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
6462
6463 return gen_rtx_PLUS (Pmode, tp, reg);
6464
6465 default:
6466 abort ();
6467 }
6468 }
6469
6470 /* Try machine-dependent ways of modifying an illegitimate address
6471 to be legitimate. If we find one, return the new, valid address. */
6472 rtx
6473 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6474 {
6475 if (!TARGET_ARM)
6476 {
6477 /* TODO: legitimize_address for Thumb2. */
6478 if (TARGET_THUMB2)
6479 return x;
6480 return thumb_legitimize_address (x, orig_x, mode);
6481 }
6482
6483 if (arm_tls_symbol_p (x))
6484 return legitimize_tls_address (x, NULL_RTX);
6485
6486 if (GET_CODE (x) == PLUS)
6487 {
6488 rtx xop0 = XEXP (x, 0);
6489 rtx xop1 = XEXP (x, 1);
6490
6491 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
6492 xop0 = force_reg (SImode, xop0);
6493
6494 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
6495 xop1 = force_reg (SImode, xop1);
6496
6497 if (ARM_BASE_REGISTER_RTX_P (xop0)
6498 && GET_CODE (xop1) == CONST_INT)
6499 {
6500 HOST_WIDE_INT n, low_n;
6501 rtx base_reg, val;
6502 n = INTVAL (xop1);
6503
6504 /* VFP addressing modes actually allow greater offsets, but for
6505 now we just stick with the lowest common denominator. */
6506 if (mode == DImode
6507 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
6508 {
6509 low_n = n & 0x0f;
6510 n &= ~0x0f;
6511 if (low_n > 4)
6512 {
6513 n += 16;
6514 low_n -= 16;
6515 }
6516 }
6517 else
6518 {
6519 low_n = ((mode) == TImode ? 0
6520 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
6521 n -= low_n;
6522 }
6523
6524 base_reg = gen_reg_rtx (SImode);
6525 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
6526 emit_move_insn (base_reg, val);
6527 x = plus_constant (Pmode, base_reg, low_n);
6528 }
6529 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6530 x = gen_rtx_PLUS (SImode, xop0, xop1);
6531 }
6532
6533 /* XXX We don't allow MINUS any more -- see comment in
6534 arm_legitimate_address_outer_p (). */
6535 else if (GET_CODE (x) == MINUS)
6536 {
6537 rtx xop0 = XEXP (x, 0);
6538 rtx xop1 = XEXP (x, 1);
6539
6540 if (CONSTANT_P (xop0))
6541 xop0 = force_reg (SImode, xop0);
6542
6543 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
6544 xop1 = force_reg (SImode, xop1);
6545
6546 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
6547 x = gen_rtx_MINUS (SImode, xop0, xop1);
6548 }
6549
6550 /* Make sure to take full advantage of the pre-indexed addressing mode
6551 with absolute addresses which often allows for the base register to
6552 be factorized for multiple adjacent memory references, and it might
6553 even allows for the mini pool to be avoided entirely. */
6554 else if (GET_CODE (x) == CONST_INT && optimize > 0)
6555 {
6556 unsigned int bits;
6557 HOST_WIDE_INT mask, base, index;
6558 rtx base_reg;
6559
6560 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6561 use a 8-bit index. So let's use a 12-bit index for SImode only and
6562 hope that arm_gen_constant will enable ldrb to use more bits. */
6563 bits = (mode == SImode) ? 12 : 8;
6564 mask = (1 << bits) - 1;
6565 base = INTVAL (x) & ~mask;
6566 index = INTVAL (x) & mask;
6567 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
6568 {
6569 /* It'll most probably be more efficient to generate the base
6570 with more bits set and use a negative index instead. */
6571 base |= mask;
6572 index -= mask;
6573 }
6574 base_reg = force_reg (SImode, GEN_INT (base));
6575 x = plus_constant (Pmode, base_reg, index);
6576 }
6577
6578 if (flag_pic)
6579 {
6580 /* We need to find and carefully transform any SYMBOL and LABEL
6581 references; so go back to the original address expression. */
6582 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6583
6584 if (new_x != orig_x)
6585 x = new_x;
6586 }
6587
6588 return x;
6589 }
6590
6591
6592 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6593 to be legitimate. If we find one, return the new, valid address. */
6594 rtx
6595 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
6596 {
6597 if (arm_tls_symbol_p (x))
6598 return legitimize_tls_address (x, NULL_RTX);
6599
6600 if (GET_CODE (x) == PLUS
6601 && GET_CODE (XEXP (x, 1)) == CONST_INT
6602 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
6603 || INTVAL (XEXP (x, 1)) < 0))
6604 {
6605 rtx xop0 = XEXP (x, 0);
6606 rtx xop1 = XEXP (x, 1);
6607 HOST_WIDE_INT offset = INTVAL (xop1);
6608
6609 /* Try and fold the offset into a biasing of the base register and
6610 then offsetting that. Don't do this when optimizing for space
6611 since it can cause too many CSEs. */
6612 if (optimize_size && offset >= 0
6613 && offset < 256 + 31 * GET_MODE_SIZE (mode))
6614 {
6615 HOST_WIDE_INT delta;
6616
6617 if (offset >= 256)
6618 delta = offset - (256 - GET_MODE_SIZE (mode));
6619 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
6620 delta = 31 * GET_MODE_SIZE (mode);
6621 else
6622 delta = offset & (~31 * GET_MODE_SIZE (mode));
6623
6624 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
6625 NULL_RTX);
6626 x = plus_constant (Pmode, xop0, delta);
6627 }
6628 else if (offset < 0 && offset > -256)
6629 /* Small negative offsets are best done with a subtract before the
6630 dereference, forcing these into a register normally takes two
6631 instructions. */
6632 x = force_operand (x, NULL_RTX);
6633 else
6634 {
6635 /* For the remaining cases, force the constant into a register. */
6636 xop1 = force_reg (SImode, xop1);
6637 x = gen_rtx_PLUS (SImode, xop0, xop1);
6638 }
6639 }
6640 else if (GET_CODE (x) == PLUS
6641 && s_register_operand (XEXP (x, 1), SImode)
6642 && !s_register_operand (XEXP (x, 0), SImode))
6643 {
6644 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
6645
6646 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
6647 }
6648
6649 if (flag_pic)
6650 {
6651 /* We need to find and carefully transform any SYMBOL and LABEL
6652 references; so go back to the original address expression. */
6653 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
6654
6655 if (new_x != orig_x)
6656 x = new_x;
6657 }
6658
6659 return x;
6660 }
6661
6662 bool
6663 arm_legitimize_reload_address (rtx *p,
6664 enum machine_mode mode,
6665 int opnum, int type,
6666 int ind_levels ATTRIBUTE_UNUSED)
6667 {
6668 /* We must recognize output that we have already generated ourselves. */
6669 if (GET_CODE (*p) == PLUS
6670 && GET_CODE (XEXP (*p, 0)) == PLUS
6671 && GET_CODE (XEXP (XEXP (*p, 0), 0)) == REG
6672 && GET_CODE (XEXP (XEXP (*p, 0), 1)) == CONST_INT
6673 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6674 {
6675 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6676 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6677 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6678 return true;
6679 }
6680
6681 if (GET_CODE (*p) == PLUS
6682 && GET_CODE (XEXP (*p, 0)) == REG
6683 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6684 /* If the base register is equivalent to a constant, let the generic
6685 code handle it. Otherwise we will run into problems if a future
6686 reload pass decides to rematerialize the constant. */
6687 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
6688 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6689 {
6690 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6691 HOST_WIDE_INT low, high;
6692
6693 /* Detect coprocessor load/stores. */
6694 bool coproc_p = ((TARGET_HARD_FLOAT
6695 && TARGET_VFP
6696 && (mode == SFmode || mode == DFmode))
6697 || (TARGET_REALLY_IWMMXT
6698 && VALID_IWMMXT_REG_MODE (mode))
6699 || (TARGET_NEON
6700 && (VALID_NEON_DREG_MODE (mode)
6701 || VALID_NEON_QREG_MODE (mode))));
6702
6703 /* For some conditions, bail out when lower two bits are unaligned. */
6704 if ((val & 0x3) != 0
6705 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6706 && (coproc_p
6707 /* For DI, and DF under soft-float: */
6708 || ((mode == DImode || mode == DFmode)
6709 /* Without ldrd, we use stm/ldm, which does not
6710 fair well with unaligned bits. */
6711 && (! TARGET_LDRD
6712 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6713 || TARGET_THUMB2))))
6714 return false;
6715
6716 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6717 of which the (reg+high) gets turned into a reload add insn,
6718 we try to decompose the index into high/low values that can often
6719 also lead to better reload CSE.
6720 For example:
6721 ldr r0, [r2, #4100] // Offset too large
6722 ldr r1, [r2, #4104] // Offset too large
6723
6724 is best reloaded as:
6725 add t1, r2, #4096
6726 ldr r0, [t1, #4]
6727 add t2, r2, #4096
6728 ldr r1, [t2, #8]
6729
6730 which post-reload CSE can simplify in most cases to eliminate the
6731 second add instruction:
6732 add t1, r2, #4096
6733 ldr r0, [t1, #4]
6734 ldr r1, [t1, #8]
6735
6736 The idea here is that we want to split out the bits of the constant
6737 as a mask, rather than as subtracting the maximum offset that the
6738 respective type of load/store used can handle.
6739
6740 When encountering negative offsets, we can still utilize it even if
6741 the overall offset is positive; sometimes this may lead to an immediate
6742 that can be constructed with fewer instructions.
6743 For example:
6744 ldr r0, [r2, #0x3FFFFC]
6745
6746 This is best reloaded as:
6747 add t1, r2, #0x400000
6748 ldr r0, [t1, #-4]
6749
6750 The trick for spotting this for a load insn with N bits of offset
6751 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6752 negative offset that is going to make bit N and all the bits below
6753 it become zero in the remainder part.
6754
6755 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6756 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6757 used in most cases of ARM load/store instructions. */
6758
6759 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6760 (((VAL) & ((1 << (N)) - 1)) \
6761 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6762 : 0)
6763
6764 if (coproc_p)
6765 {
6766 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
6767
6768 /* NEON quad-word load/stores are made of two double-word accesses,
6769 so the valid index range is reduced by 8. Treat as 9-bit range if
6770 we go over it. */
6771 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
6772 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
6773 }
6774 else if (GET_MODE_SIZE (mode) == 8)
6775 {
6776 if (TARGET_LDRD)
6777 low = (TARGET_THUMB2
6778 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
6779 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
6780 else
6781 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6782 to access doublewords. The supported load/store offsets are
6783 -8, -4, and 4, which we try to produce here. */
6784 low = ((val & 0xf) ^ 0x8) - 0x8;
6785 }
6786 else if (GET_MODE_SIZE (mode) < 8)
6787 {
6788 /* NEON element load/stores do not have an offset. */
6789 if (TARGET_NEON_FP16 && mode == HFmode)
6790 return false;
6791
6792 if (TARGET_THUMB2)
6793 {
6794 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6795 Try the wider 12-bit range first, and re-try if the result
6796 is out of range. */
6797 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6798 if (low < -255)
6799 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6800 }
6801 else
6802 {
6803 if (mode == HImode || mode == HFmode)
6804 {
6805 if (arm_arch4)
6806 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
6807 else
6808 {
6809 /* The storehi/movhi_bytes fallbacks can use only
6810 [-4094,+4094] of the full ldrb/strb index range. */
6811 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6812 if (low == 4095 || low == -4095)
6813 return false;
6814 }
6815 }
6816 else
6817 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
6818 }
6819 }
6820 else
6821 return false;
6822
6823 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6824 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6825 - (unsigned HOST_WIDE_INT) 0x80000000);
6826 /* Check for overflow or zero */
6827 if (low == 0 || high == 0 || (high + low != val))
6828 return false;
6829
6830 /* Reload the high part into a base reg; leave the low part
6831 in the mem. */
6832 *p = gen_rtx_PLUS (GET_MODE (*p),
6833 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6834 GEN_INT (high)),
6835 GEN_INT (low));
6836 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6837 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6838 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6839 return true;
6840 }
6841
6842 return false;
6843 }
6844
6845 rtx
6846 thumb_legitimize_reload_address (rtx *x_p,
6847 enum machine_mode mode,
6848 int opnum, int type,
6849 int ind_levels ATTRIBUTE_UNUSED)
6850 {
6851 rtx x = *x_p;
6852
6853 if (GET_CODE (x) == PLUS
6854 && GET_MODE_SIZE (mode) < 4
6855 && REG_P (XEXP (x, 0))
6856 && XEXP (x, 0) == stack_pointer_rtx
6857 && GET_CODE (XEXP (x, 1)) == CONST_INT
6858 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
6859 {
6860 rtx orig_x = x;
6861
6862 x = copy_rtx (x);
6863 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6864 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6865 return x;
6866 }
6867
6868 /* If both registers are hi-regs, then it's better to reload the
6869 entire expression rather than each register individually. That
6870 only requires one reload register rather than two. */
6871 if (GET_CODE (x) == PLUS
6872 && REG_P (XEXP (x, 0))
6873 && REG_P (XEXP (x, 1))
6874 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
6875 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
6876 {
6877 rtx orig_x = x;
6878
6879 x = copy_rtx (x);
6880 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
6881 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
6882 return x;
6883 }
6884
6885 return NULL;
6886 }
6887
6888 /* Test for various thread-local symbols. */
6889
6890 /* Return TRUE if X is a thread-local symbol. */
6891
6892 static bool
6893 arm_tls_symbol_p (rtx x)
6894 {
6895 if (! TARGET_HAVE_TLS)
6896 return false;
6897
6898 if (GET_CODE (x) != SYMBOL_REF)
6899 return false;
6900
6901 return SYMBOL_REF_TLS_MODEL (x) != 0;
6902 }
6903
6904 /* Helper for arm_tls_referenced_p. */
6905
6906 static int
6907 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
6908 {
6909 if (GET_CODE (*x) == SYMBOL_REF)
6910 return SYMBOL_REF_TLS_MODEL (*x) != 0;
6911
6912 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6913 TLS offsets, not real symbol references. */
6914 if (GET_CODE (*x) == UNSPEC
6915 && XINT (*x, 1) == UNSPEC_TLS)
6916 return -1;
6917
6918 return 0;
6919 }
6920
6921 /* Return TRUE if X contains any TLS symbol references. */
6922
6923 bool
6924 arm_tls_referenced_p (rtx x)
6925 {
6926 if (! TARGET_HAVE_TLS)
6927 return false;
6928
6929 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
6930 }
6931
6932 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6933
6934 On the ARM, allow any integer (invalid ones are removed later by insn
6935 patterns), nice doubles and symbol_refs which refer to the function's
6936 constant pool XXX.
6937
6938 When generating pic allow anything. */
6939
6940 static bool
6941 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
6942 {
6943 /* At present, we have no support for Neon structure constants, so forbid
6944 them here. It might be possible to handle simple cases like 0 and -1
6945 in future. */
6946 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
6947 return false;
6948
6949 return flag_pic || !label_mentioned_p (x);
6950 }
6951
6952 static bool
6953 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6954 {
6955 return (GET_CODE (x) == CONST_INT
6956 || GET_CODE (x) == CONST_DOUBLE
6957 || CONSTANT_ADDRESS_P (x)
6958 || flag_pic);
6959 }
6960
6961 static bool
6962 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
6963 {
6964 return (!arm_cannot_force_const_mem (mode, x)
6965 && (TARGET_32BIT
6966 ? arm_legitimate_constant_p_1 (mode, x)
6967 : thumb_legitimate_constant_p (mode, x)));
6968 }
6969
6970 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6971
6972 static bool
6973 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
6974 {
6975 rtx base, offset;
6976
6977 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
6978 {
6979 split_const (x, &base, &offset);
6980 if (GET_CODE (base) == SYMBOL_REF
6981 && !offset_within_block_p (base, INTVAL (offset)))
6982 return true;
6983 }
6984 return arm_tls_referenced_p (x);
6985 }
6986 \f
6987 #define REG_OR_SUBREG_REG(X) \
6988 (GET_CODE (X) == REG \
6989 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6990
6991 #define REG_OR_SUBREG_RTX(X) \
6992 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6993
6994 static inline int
6995 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6996 {
6997 enum machine_mode mode = GET_MODE (x);
6998 int total;
6999
7000 switch (code)
7001 {
7002 case ASHIFT:
7003 case ASHIFTRT:
7004 case LSHIFTRT:
7005 case ROTATERT:
7006 case PLUS:
7007 case MINUS:
7008 case COMPARE:
7009 case NEG:
7010 case NOT:
7011 return COSTS_N_INSNS (1);
7012
7013 case MULT:
7014 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7015 {
7016 int cycles = 0;
7017 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7018
7019 while (i)
7020 {
7021 i >>= 2;
7022 cycles++;
7023 }
7024 return COSTS_N_INSNS (2) + cycles;
7025 }
7026 return COSTS_N_INSNS (1) + 16;
7027
7028 case SET:
7029 return (COSTS_N_INSNS (1)
7030 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7031 + GET_CODE (SET_DEST (x)) == MEM));
7032
7033 case CONST_INT:
7034 if (outer == SET)
7035 {
7036 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7037 return 0;
7038 if (thumb_shiftable_const (INTVAL (x)))
7039 return COSTS_N_INSNS (2);
7040 return COSTS_N_INSNS (3);
7041 }
7042 else if ((outer == PLUS || outer == COMPARE)
7043 && INTVAL (x) < 256 && INTVAL (x) > -256)
7044 return 0;
7045 else if ((outer == IOR || outer == XOR || outer == AND)
7046 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7047 return COSTS_N_INSNS (1);
7048 else if (outer == AND)
7049 {
7050 int i;
7051 /* This duplicates the tests in the andsi3 expander. */
7052 for (i = 9; i <= 31; i++)
7053 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7054 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7055 return COSTS_N_INSNS (2);
7056 }
7057 else if (outer == ASHIFT || outer == ASHIFTRT
7058 || outer == LSHIFTRT)
7059 return 0;
7060 return COSTS_N_INSNS (2);
7061
7062 case CONST:
7063 case CONST_DOUBLE:
7064 case LABEL_REF:
7065 case SYMBOL_REF:
7066 return COSTS_N_INSNS (3);
7067
7068 case UDIV:
7069 case UMOD:
7070 case DIV:
7071 case MOD:
7072 return 100;
7073
7074 case TRUNCATE:
7075 return 99;
7076
7077 case AND:
7078 case XOR:
7079 case IOR:
7080 /* XXX guess. */
7081 return 8;
7082
7083 case MEM:
7084 /* XXX another guess. */
7085 /* Memory costs quite a lot for the first word, but subsequent words
7086 load at the equivalent of a single insn each. */
7087 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7088 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7089 ? 4 : 0));
7090
7091 case IF_THEN_ELSE:
7092 /* XXX a guess. */
7093 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7094 return 14;
7095 return 2;
7096
7097 case SIGN_EXTEND:
7098 case ZERO_EXTEND:
7099 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7100 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7101
7102 if (mode == SImode)
7103 return total;
7104
7105 if (arm_arch6)
7106 return total + COSTS_N_INSNS (1);
7107
7108 /* Assume a two-shift sequence. Increase the cost slightly so
7109 we prefer actual shifts over an extend operation. */
7110 return total + 1 + COSTS_N_INSNS (2);
7111
7112 default:
7113 return 99;
7114 }
7115 }
7116
7117 static inline bool
7118 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7119 {
7120 enum machine_mode mode = GET_MODE (x);
7121 enum rtx_code subcode;
7122 rtx operand;
7123 enum rtx_code code = GET_CODE (x);
7124 *total = 0;
7125
7126 switch (code)
7127 {
7128 case MEM:
7129 /* Memory costs quite a lot for the first word, but subsequent words
7130 load at the equivalent of a single insn each. */
7131 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7132 return true;
7133
7134 case DIV:
7135 case MOD:
7136 case UDIV:
7137 case UMOD:
7138 if (TARGET_HARD_FLOAT && mode == SFmode)
7139 *total = COSTS_N_INSNS (2);
7140 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
7141 *total = COSTS_N_INSNS (4);
7142 else
7143 *total = COSTS_N_INSNS (20);
7144 return false;
7145
7146 case ROTATE:
7147 if (GET_CODE (XEXP (x, 1)) == REG)
7148 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
7149 else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7150 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
7151
7152 /* Fall through */
7153 case ROTATERT:
7154 if (mode != SImode)
7155 {
7156 *total += COSTS_N_INSNS (4);
7157 return true;
7158 }
7159
7160 /* Fall through */
7161 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
7162 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7163 if (mode == DImode)
7164 {
7165 *total += COSTS_N_INSNS (3);
7166 return true;
7167 }
7168
7169 *total += COSTS_N_INSNS (1);
7170 /* Increase the cost of complex shifts because they aren't any faster,
7171 and reduce dual issue opportunities. */
7172 if (arm_tune_cortex_a9
7173 && outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
7174 ++*total;
7175
7176 return true;
7177
7178 case MINUS:
7179 if (mode == DImode)
7180 {
7181 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7182 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7183 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7184 {
7185 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7186 return true;
7187 }
7188
7189 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7190 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
7191 {
7192 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7193 return true;
7194 }
7195
7196 return false;
7197 }
7198
7199 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7200 {
7201 if (TARGET_HARD_FLOAT
7202 && (mode == SFmode
7203 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7204 {
7205 *total = COSTS_N_INSNS (1);
7206 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
7207 && arm_const_double_rtx (XEXP (x, 0)))
7208 {
7209 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7210 return true;
7211 }
7212
7213 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7214 && arm_const_double_rtx (XEXP (x, 1)))
7215 {
7216 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7217 return true;
7218 }
7219
7220 return false;
7221 }
7222 *total = COSTS_N_INSNS (20);
7223 return false;
7224 }
7225
7226 *total = COSTS_N_INSNS (1);
7227 if (GET_CODE (XEXP (x, 0)) == CONST_INT
7228 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
7229 {
7230 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7231 return true;
7232 }
7233
7234 subcode = GET_CODE (XEXP (x, 1));
7235 if (subcode == ASHIFT || subcode == ASHIFTRT
7236 || subcode == LSHIFTRT
7237 || subcode == ROTATE || subcode == ROTATERT)
7238 {
7239 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7240 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7241 return true;
7242 }
7243
7244 /* A shift as a part of RSB costs no more than RSB itself. */
7245 if (GET_CODE (XEXP (x, 0)) == MULT
7246 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7247 {
7248 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
7249 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7250 return true;
7251 }
7252
7253 if (subcode == MULT
7254 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
7255 {
7256 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7257 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
7258 return true;
7259 }
7260
7261 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
7262 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
7263 {
7264 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7265 if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
7266 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
7267 *total += COSTS_N_INSNS (1);
7268
7269 return true;
7270 }
7271
7272 /* Fall through */
7273
7274 case PLUS:
7275 if (code == PLUS && arm_arch6 && mode == SImode
7276 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
7277 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
7278 {
7279 *total = COSTS_N_INSNS (1);
7280 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
7281 0, speed);
7282 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7283 return true;
7284 }
7285
7286 /* MLA: All arguments must be registers. We filter out
7287 multiplication by a power of two, so that we fall down into
7288 the code below. */
7289 if (GET_CODE (XEXP (x, 0)) == MULT
7290 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7291 {
7292 /* The cost comes from the cost of the multiply. */
7293 return false;
7294 }
7295
7296 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7297 {
7298 if (TARGET_HARD_FLOAT
7299 && (mode == SFmode
7300 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7301 {
7302 *total = COSTS_N_INSNS (1);
7303 if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
7304 && arm_const_double_rtx (XEXP (x, 1)))
7305 {
7306 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7307 return true;
7308 }
7309
7310 return false;
7311 }
7312
7313 *total = COSTS_N_INSNS (20);
7314 return false;
7315 }
7316
7317 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
7318 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
7319 {
7320 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
7321 if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7322 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
7323 *total += COSTS_N_INSNS (1);
7324 return true;
7325 }
7326
7327 /* Fall through */
7328
7329 case AND: case XOR: case IOR:
7330
7331 /* Normally the frame registers will be spilt into reg+const during
7332 reload, so it is a bad idea to combine them with other instructions,
7333 since then they might not be moved outside of loops. As a compromise
7334 we allow integration with ops that have a constant as their second
7335 operand. */
7336 if (REG_OR_SUBREG_REG (XEXP (x, 0))
7337 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
7338 && GET_CODE (XEXP (x, 1)) != CONST_INT)
7339 *total = COSTS_N_INSNS (1);
7340
7341 if (mode == DImode)
7342 {
7343 *total += COSTS_N_INSNS (2);
7344 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7345 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7346 {
7347 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7348 return true;
7349 }
7350
7351 return false;
7352 }
7353
7354 *total += COSTS_N_INSNS (1);
7355 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7356 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7357 {
7358 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7359 return true;
7360 }
7361 subcode = GET_CODE (XEXP (x, 0));
7362 if (subcode == ASHIFT || subcode == ASHIFTRT
7363 || subcode == LSHIFTRT
7364 || subcode == ROTATE || subcode == ROTATERT)
7365 {
7366 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7367 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7368 return true;
7369 }
7370
7371 if (subcode == MULT
7372 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7373 {
7374 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7375 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7376 return true;
7377 }
7378
7379 if (subcode == UMIN || subcode == UMAX
7380 || subcode == SMIN || subcode == SMAX)
7381 {
7382 *total = COSTS_N_INSNS (3);
7383 return true;
7384 }
7385
7386 return false;
7387
7388 case MULT:
7389 /* This should have been handled by the CPU specific routines. */
7390 gcc_unreachable ();
7391
7392 case TRUNCATE:
7393 if (arm_arch3m && mode == SImode
7394 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
7395 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7396 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
7397 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
7398 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
7399 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
7400 {
7401 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
7402 return true;
7403 }
7404 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7405 return false;
7406
7407 case NEG:
7408 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7409 {
7410 if (TARGET_HARD_FLOAT
7411 && (mode == SFmode
7412 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7413 {
7414 *total = COSTS_N_INSNS (1);
7415 return false;
7416 }
7417 *total = COSTS_N_INSNS (2);
7418 return false;
7419 }
7420
7421 /* Fall through */
7422 case NOT:
7423 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
7424 if (mode == SImode && code == NOT)
7425 {
7426 subcode = GET_CODE (XEXP (x, 0));
7427 if (subcode == ASHIFT || subcode == ASHIFTRT
7428 || subcode == LSHIFTRT
7429 || subcode == ROTATE || subcode == ROTATERT
7430 || (subcode == MULT
7431 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
7432 {
7433 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7434 /* Register shifts cost an extra cycle. */
7435 if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
7436 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
7437 subcode, 1, speed);
7438 return true;
7439 }
7440 }
7441
7442 return false;
7443
7444 case IF_THEN_ELSE:
7445 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7446 {
7447 *total = COSTS_N_INSNS (4);
7448 return true;
7449 }
7450
7451 operand = XEXP (x, 0);
7452
7453 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
7454 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
7455 && GET_CODE (XEXP (operand, 0)) == REG
7456 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
7457 *total += COSTS_N_INSNS (1);
7458 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
7459 + rtx_cost (XEXP (x, 2), code, 2, speed));
7460 return true;
7461
7462 case NE:
7463 if (mode == SImode && XEXP (x, 1) == const0_rtx)
7464 {
7465 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7466 return true;
7467 }
7468 goto scc_insn;
7469
7470 case GE:
7471 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7472 && mode == SImode && XEXP (x, 1) == const0_rtx)
7473 {
7474 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7475 return true;
7476 }
7477 goto scc_insn;
7478
7479 case LT:
7480 if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
7481 && mode == SImode && XEXP (x, 1) == const0_rtx)
7482 {
7483 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7484 return true;
7485 }
7486 goto scc_insn;
7487
7488 case EQ:
7489 case GT:
7490 case LE:
7491 case GEU:
7492 case LTU:
7493 case GTU:
7494 case LEU:
7495 case UNORDERED:
7496 case ORDERED:
7497 case UNEQ:
7498 case UNGE:
7499 case UNLT:
7500 case UNGT:
7501 case UNLE:
7502 scc_insn:
7503 /* SCC insns. In the case where the comparison has already been
7504 performed, then they cost 2 instructions. Otherwise they need
7505 an additional comparison before them. */
7506 *total = COSTS_N_INSNS (2);
7507 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7508 {
7509 return true;
7510 }
7511
7512 /* Fall through */
7513 case COMPARE:
7514 if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
7515 {
7516 *total = 0;
7517 return true;
7518 }
7519
7520 *total += COSTS_N_INSNS (1);
7521 if (GET_CODE (XEXP (x, 1)) == CONST_INT
7522 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
7523 {
7524 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7525 return true;
7526 }
7527
7528 subcode = GET_CODE (XEXP (x, 0));
7529 if (subcode == ASHIFT || subcode == ASHIFTRT
7530 || subcode == LSHIFTRT
7531 || subcode == ROTATE || subcode == ROTATERT)
7532 {
7533 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7534 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7535 return true;
7536 }
7537
7538 if (subcode == MULT
7539 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7540 {
7541 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7542 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
7543 return true;
7544 }
7545
7546 return false;
7547
7548 case UMIN:
7549 case UMAX:
7550 case SMIN:
7551 case SMAX:
7552 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
7553 if (GET_CODE (XEXP (x, 1)) != CONST_INT
7554 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
7555 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
7556 return true;
7557
7558 case ABS:
7559 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
7560 {
7561 if (TARGET_HARD_FLOAT
7562 && (mode == SFmode
7563 || (mode == DFmode && !TARGET_VFP_SINGLE)))
7564 {
7565 *total = COSTS_N_INSNS (1);
7566 return false;
7567 }
7568 *total = COSTS_N_INSNS (20);
7569 return false;
7570 }
7571 *total = COSTS_N_INSNS (1);
7572 if (mode == DImode)
7573 *total += COSTS_N_INSNS (3);
7574 return false;
7575
7576 case SIGN_EXTEND:
7577 case ZERO_EXTEND:
7578 *total = 0;
7579 if (GET_MODE_CLASS (mode) == MODE_INT)
7580 {
7581 rtx op = XEXP (x, 0);
7582 enum machine_mode opmode = GET_MODE (op);
7583
7584 if (mode == DImode)
7585 *total += COSTS_N_INSNS (1);
7586
7587 if (opmode != SImode)
7588 {
7589 if (MEM_P (op))
7590 {
7591 /* If !arm_arch4, we use one of the extendhisi2_mem
7592 or movhi_bytes patterns for HImode. For a QImode
7593 sign extension, we first zero-extend from memory
7594 and then perform a shift sequence. */
7595 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7596 *total += COSTS_N_INSNS (2);
7597 }
7598 else if (arm_arch6)
7599 *total += COSTS_N_INSNS (1);
7600
7601 /* We don't have the necessary insn, so we need to perform some
7602 other operation. */
7603 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7604 /* An and with constant 255. */
7605 *total += COSTS_N_INSNS (1);
7606 else
7607 /* A shift sequence. Increase costs slightly to avoid
7608 combining two shifts into an extend operation. */
7609 *total += COSTS_N_INSNS (2) + 1;
7610 }
7611
7612 return false;
7613 }
7614
7615 switch (GET_MODE (XEXP (x, 0)))
7616 {
7617 case V8QImode:
7618 case V4HImode:
7619 case V2SImode:
7620 case V4QImode:
7621 case V2HImode:
7622 *total = COSTS_N_INSNS (1);
7623 return false;
7624
7625 default:
7626 gcc_unreachable ();
7627 }
7628 gcc_unreachable ();
7629
7630 case ZERO_EXTRACT:
7631 case SIGN_EXTRACT:
7632 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
7633 return true;
7634
7635 case CONST_INT:
7636 if (const_ok_for_arm (INTVAL (x))
7637 || const_ok_for_arm (~INTVAL (x)))
7638 *total = COSTS_N_INSNS (1);
7639 else
7640 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
7641 INTVAL (x), NULL_RTX,
7642 NULL_RTX, 0, 0));
7643 return true;
7644
7645 case CONST:
7646 case LABEL_REF:
7647 case SYMBOL_REF:
7648 *total = COSTS_N_INSNS (3);
7649 return true;
7650
7651 case HIGH:
7652 *total = COSTS_N_INSNS (1);
7653 return true;
7654
7655 case LO_SUM:
7656 *total = COSTS_N_INSNS (1);
7657 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
7658 return true;
7659
7660 case CONST_DOUBLE:
7661 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
7662 && (mode == SFmode || !TARGET_VFP_SINGLE))
7663 *total = COSTS_N_INSNS (1);
7664 else
7665 *total = COSTS_N_INSNS (4);
7666 return true;
7667
7668 case SET:
7669 return false;
7670
7671 case UNSPEC:
7672 /* We cost this as high as our memory costs to allow this to
7673 be hoisted from loops. */
7674 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
7675 {
7676 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
7677 }
7678 return true;
7679
7680 case CONST_VECTOR:
7681 if (TARGET_NEON
7682 && TARGET_HARD_FLOAT
7683 && outer == SET
7684 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
7685 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
7686 *total = COSTS_N_INSNS (1);
7687 else
7688 *total = COSTS_N_INSNS (4);
7689 return true;
7690
7691 default:
7692 *total = COSTS_N_INSNS (4);
7693 return false;
7694 }
7695 }
7696
7697 /* Estimates the size cost of thumb1 instructions.
7698 For now most of the code is copied from thumb1_rtx_costs. We need more
7699 fine grain tuning when we have more related test cases. */
7700 static inline int
7701 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7702 {
7703 enum machine_mode mode = GET_MODE (x);
7704
7705 switch (code)
7706 {
7707 case ASHIFT:
7708 case ASHIFTRT:
7709 case LSHIFTRT:
7710 case ROTATERT:
7711 case PLUS:
7712 case MINUS:
7713 case COMPARE:
7714 case NEG:
7715 case NOT:
7716 return COSTS_N_INSNS (1);
7717
7718 case MULT:
7719 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7720 {
7721 /* Thumb1 mul instruction can't operate on const. We must Load it
7722 into a register first. */
7723 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
7724 return COSTS_N_INSNS (1) + const_size;
7725 }
7726 return COSTS_N_INSNS (1);
7727
7728 case SET:
7729 return (COSTS_N_INSNS (1)
7730 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
7731 + GET_CODE (SET_DEST (x)) == MEM));
7732
7733 case CONST_INT:
7734 if (outer == SET)
7735 {
7736 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7737 return COSTS_N_INSNS (1);
7738 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7739 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7740 return COSTS_N_INSNS (2);
7741 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7742 if (thumb_shiftable_const (INTVAL (x)))
7743 return COSTS_N_INSNS (2);
7744 return COSTS_N_INSNS (3);
7745 }
7746 else if ((outer == PLUS || outer == COMPARE)
7747 && INTVAL (x) < 256 && INTVAL (x) > -256)
7748 return 0;
7749 else if ((outer == IOR || outer == XOR || outer == AND)
7750 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7751 return COSTS_N_INSNS (1);
7752 else if (outer == AND)
7753 {
7754 int i;
7755 /* This duplicates the tests in the andsi3 expander. */
7756 for (i = 9; i <= 31; i++)
7757 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7758 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7759 return COSTS_N_INSNS (2);
7760 }
7761 else if (outer == ASHIFT || outer == ASHIFTRT
7762 || outer == LSHIFTRT)
7763 return 0;
7764 return COSTS_N_INSNS (2);
7765
7766 case CONST:
7767 case CONST_DOUBLE:
7768 case LABEL_REF:
7769 case SYMBOL_REF:
7770 return COSTS_N_INSNS (3);
7771
7772 case UDIV:
7773 case UMOD:
7774 case DIV:
7775 case MOD:
7776 return 100;
7777
7778 case TRUNCATE:
7779 return 99;
7780
7781 case AND:
7782 case XOR:
7783 case IOR:
7784 /* XXX guess. */
7785 return 8;
7786
7787 case MEM:
7788 /* XXX another guess. */
7789 /* Memory costs quite a lot for the first word, but subsequent words
7790 load at the equivalent of a single insn each. */
7791 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7792 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7793 ? 4 : 0));
7794
7795 case IF_THEN_ELSE:
7796 /* XXX a guess. */
7797 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7798 return 14;
7799 return 2;
7800
7801 case ZERO_EXTEND:
7802 /* XXX still guessing. */
7803 switch (GET_MODE (XEXP (x, 0)))
7804 {
7805 case QImode:
7806 return (1 + (mode == DImode ? 4 : 0)
7807 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7808
7809 case HImode:
7810 return (4 + (mode == DImode ? 4 : 0)
7811 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7812
7813 case SImode:
7814 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7815
7816 default:
7817 return 99;
7818 }
7819
7820 default:
7821 return 99;
7822 }
7823 }
7824
7825 /* RTX costs when optimizing for size. */
7826 static bool
7827 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7828 int *total)
7829 {
7830 enum machine_mode mode = GET_MODE (x);
7831 if (TARGET_THUMB1)
7832 {
7833 *total = thumb1_size_rtx_costs (x, code, outer_code);
7834 return true;
7835 }
7836
7837 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7838 switch (code)
7839 {
7840 case MEM:
7841 /* A memory access costs 1 insn if the mode is small, or the address is
7842 a single register, otherwise it costs one insn per word. */
7843 if (REG_P (XEXP (x, 0)))
7844 *total = COSTS_N_INSNS (1);
7845 else if (flag_pic
7846 && GET_CODE (XEXP (x, 0)) == PLUS
7847 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7848 /* This will be split into two instructions.
7849 See arm.md:calculate_pic_address. */
7850 *total = COSTS_N_INSNS (2);
7851 else
7852 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7853 return true;
7854
7855 case DIV:
7856 case MOD:
7857 case UDIV:
7858 case UMOD:
7859 /* Needs a libcall, so it costs about this. */
7860 *total = COSTS_N_INSNS (2);
7861 return false;
7862
7863 case ROTATE:
7864 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
7865 {
7866 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
7867 return true;
7868 }
7869 /* Fall through */
7870 case ROTATERT:
7871 case ASHIFT:
7872 case LSHIFTRT:
7873 case ASHIFTRT:
7874 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
7875 {
7876 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
7877 return true;
7878 }
7879 else if (mode == SImode)
7880 {
7881 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
7882 /* Slightly disparage register shifts, but not by much. */
7883 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
7884 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
7885 return true;
7886 }
7887
7888 /* Needs a libcall. */
7889 *total = COSTS_N_INSNS (2);
7890 return false;
7891
7892 case MINUS:
7893 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7894 && (mode == SFmode || !TARGET_VFP_SINGLE))
7895 {
7896 *total = COSTS_N_INSNS (1);
7897 return false;
7898 }
7899
7900 if (mode == SImode)
7901 {
7902 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
7903 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
7904
7905 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
7906 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
7907 || subcode1 == ROTATE || subcode1 == ROTATERT
7908 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
7909 || subcode1 == ASHIFTRT)
7910 {
7911 /* It's just the cost of the two operands. */
7912 *total = 0;
7913 return false;
7914 }
7915
7916 *total = COSTS_N_INSNS (1);
7917 return false;
7918 }
7919
7920 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7921 return false;
7922
7923 case PLUS:
7924 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7925 && (mode == SFmode || !TARGET_VFP_SINGLE))
7926 {
7927 *total = COSTS_N_INSNS (1);
7928 return false;
7929 }
7930
7931 /* A shift as a part of ADD costs nothing. */
7932 if (GET_CODE (XEXP (x, 0)) == MULT
7933 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
7934 {
7935 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
7936 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
7937 *total += rtx_cost (XEXP (x, 1), code, 1, false);
7938 return true;
7939 }
7940
7941 /* Fall through */
7942 case AND: case XOR: case IOR:
7943 if (mode == SImode)
7944 {
7945 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
7946
7947 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
7948 || subcode == LSHIFTRT || subcode == ASHIFTRT
7949 || (code == AND && subcode == NOT))
7950 {
7951 /* It's just the cost of the two operands. */
7952 *total = 0;
7953 return false;
7954 }
7955 }
7956
7957 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7958 return false;
7959
7960 case MULT:
7961 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7962 return false;
7963
7964 case NEG:
7965 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7966 && (mode == SFmode || !TARGET_VFP_SINGLE))
7967 {
7968 *total = COSTS_N_INSNS (1);
7969 return false;
7970 }
7971
7972 /* Fall through */
7973 case NOT:
7974 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7975
7976 return false;
7977
7978 case IF_THEN_ELSE:
7979 *total = 0;
7980 return false;
7981
7982 case COMPARE:
7983 if (cc_register (XEXP (x, 0), VOIDmode))
7984 * total = 0;
7985 else
7986 *total = COSTS_N_INSNS (1);
7987 return false;
7988
7989 case ABS:
7990 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
7991 && (mode == SFmode || !TARGET_VFP_SINGLE))
7992 *total = COSTS_N_INSNS (1);
7993 else
7994 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7995 return false;
7996
7997 case SIGN_EXTEND:
7998 case ZERO_EXTEND:
7999 return arm_rtx_costs_1 (x, outer_code, total, 0);
8000
8001 case CONST_INT:
8002 if (const_ok_for_arm (INTVAL (x)))
8003 /* A multiplication by a constant requires another instruction
8004 to load the constant to a register. */
8005 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
8006 ? 1 : 0);
8007 else if (const_ok_for_arm (~INTVAL (x)))
8008 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
8009 else if (const_ok_for_arm (-INTVAL (x)))
8010 {
8011 if (outer_code == COMPARE || outer_code == PLUS
8012 || outer_code == MINUS)
8013 *total = 0;
8014 else
8015 *total = COSTS_N_INSNS (1);
8016 }
8017 else
8018 *total = COSTS_N_INSNS (2);
8019 return true;
8020
8021 case CONST:
8022 case LABEL_REF:
8023 case SYMBOL_REF:
8024 *total = COSTS_N_INSNS (2);
8025 return true;
8026
8027 case CONST_DOUBLE:
8028 *total = COSTS_N_INSNS (4);
8029 return true;
8030
8031 case CONST_VECTOR:
8032 if (TARGET_NEON
8033 && TARGET_HARD_FLOAT
8034 && outer_code == SET
8035 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8036 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8037 *total = COSTS_N_INSNS (1);
8038 else
8039 *total = COSTS_N_INSNS (4);
8040 return true;
8041
8042 case HIGH:
8043 case LO_SUM:
8044 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8045 cost of these slightly. */
8046 *total = COSTS_N_INSNS (1) + 1;
8047 return true;
8048
8049 case SET:
8050 return false;
8051
8052 default:
8053 if (mode != VOIDmode)
8054 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8055 else
8056 *total = COSTS_N_INSNS (4); /* How knows? */
8057 return false;
8058 }
8059 }
8060
8061 /* RTX costs when optimizing for size. */
8062 static bool
8063 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
8064 int *total, bool speed)
8065 {
8066 if (!speed)
8067 return arm_size_rtx_costs (x, (enum rtx_code) code,
8068 (enum rtx_code) outer_code, total);
8069 else
8070 return current_tune->rtx_costs (x, (enum rtx_code) code,
8071 (enum rtx_code) outer_code,
8072 total, speed);
8073 }
8074
8075 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
8076 supported on any "slowmul" cores, so it can be ignored. */
8077
8078 static bool
8079 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8080 int *total, bool speed)
8081 {
8082 enum machine_mode mode = GET_MODE (x);
8083
8084 if (TARGET_THUMB)
8085 {
8086 *total = thumb1_rtx_costs (x, code, outer_code);
8087 return true;
8088 }
8089
8090 switch (code)
8091 {
8092 case MULT:
8093 if (GET_MODE_CLASS (mode) == MODE_FLOAT
8094 || mode == DImode)
8095 {
8096 *total = COSTS_N_INSNS (20);
8097 return false;
8098 }
8099
8100 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8101 {
8102 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8103 & (unsigned HOST_WIDE_INT) 0xffffffff);
8104 int cost, const_ok = const_ok_for_arm (i);
8105 int j, booth_unit_size;
8106
8107 /* Tune as appropriate. */
8108 cost = const_ok ? 4 : 8;
8109 booth_unit_size = 2;
8110 for (j = 0; i && j < 32; j += booth_unit_size)
8111 {
8112 i >>= booth_unit_size;
8113 cost++;
8114 }
8115
8116 *total = COSTS_N_INSNS (cost);
8117 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8118 return true;
8119 }
8120
8121 *total = COSTS_N_INSNS (20);
8122 return false;
8123
8124 default:
8125 return arm_rtx_costs_1 (x, outer_code, total, speed);;
8126 }
8127 }
8128
8129
8130 /* RTX cost for cores with a fast multiply unit (M variants). */
8131
8132 static bool
8133 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8134 int *total, bool speed)
8135 {
8136 enum machine_mode mode = GET_MODE (x);
8137
8138 if (TARGET_THUMB1)
8139 {
8140 *total = thumb1_rtx_costs (x, code, outer_code);
8141 return true;
8142 }
8143
8144 /* ??? should thumb2 use different costs? */
8145 switch (code)
8146 {
8147 case MULT:
8148 /* There is no point basing this on the tuning, since it is always the
8149 fast variant if it exists at all. */
8150 if (mode == DImode
8151 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8152 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8153 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8154 {
8155 *total = COSTS_N_INSNS(2);
8156 return false;
8157 }
8158
8159
8160 if (mode == DImode)
8161 {
8162 *total = COSTS_N_INSNS (5);
8163 return false;
8164 }
8165
8166 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8167 {
8168 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
8169 & (unsigned HOST_WIDE_INT) 0xffffffff);
8170 int cost, const_ok = const_ok_for_arm (i);
8171 int j, booth_unit_size;
8172
8173 /* Tune as appropriate. */
8174 cost = const_ok ? 4 : 8;
8175 booth_unit_size = 8;
8176 for (j = 0; i && j < 32; j += booth_unit_size)
8177 {
8178 i >>= booth_unit_size;
8179 cost++;
8180 }
8181
8182 *total = COSTS_N_INSNS(cost);
8183 return false;
8184 }
8185
8186 if (mode == SImode)
8187 {
8188 *total = COSTS_N_INSNS (4);
8189 return false;
8190 }
8191
8192 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8193 {
8194 if (TARGET_HARD_FLOAT
8195 && (mode == SFmode
8196 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8197 {
8198 *total = COSTS_N_INSNS (1);
8199 return false;
8200 }
8201 }
8202
8203 /* Requires a lib call */
8204 *total = COSTS_N_INSNS (20);
8205 return false;
8206
8207 default:
8208 return arm_rtx_costs_1 (x, outer_code, total, speed);
8209 }
8210 }
8211
8212
8213 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
8214 so it can be ignored. */
8215
8216 static bool
8217 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8218 int *total, bool speed)
8219 {
8220 enum machine_mode mode = GET_MODE (x);
8221
8222 if (TARGET_THUMB)
8223 {
8224 *total = thumb1_rtx_costs (x, code, outer_code);
8225 return true;
8226 }
8227
8228 switch (code)
8229 {
8230 case COMPARE:
8231 if (GET_CODE (XEXP (x, 0)) != MULT)
8232 return arm_rtx_costs_1 (x, outer_code, total, speed);
8233
8234 /* A COMPARE of a MULT is slow on XScale; the muls instruction
8235 will stall until the multiplication is complete. */
8236 *total = COSTS_N_INSNS (3);
8237 return false;
8238
8239 case MULT:
8240 /* There is no point basing this on the tuning, since it is always the
8241 fast variant if it exists at all. */
8242 if (mode == DImode
8243 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8244 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8245 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8246 {
8247 *total = COSTS_N_INSNS (2);
8248 return false;
8249 }
8250
8251
8252 if (mode == DImode)
8253 {
8254 *total = COSTS_N_INSNS (5);
8255 return false;
8256 }
8257
8258 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8259 {
8260 /* If operand 1 is a constant we can more accurately
8261 calculate the cost of the multiply. The multiplier can
8262 retire 15 bits on the first cycle and a further 12 on the
8263 second. We do, of course, have to load the constant into
8264 a register first. */
8265 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8266 /* There's a general overhead of one cycle. */
8267 int cost = 1;
8268 unsigned HOST_WIDE_INT masked_const;
8269
8270 if (i & 0x80000000)
8271 i = ~i;
8272
8273 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
8274
8275 masked_const = i & 0xffff8000;
8276 if (masked_const != 0)
8277 {
8278 cost++;
8279 masked_const = i & 0xf8000000;
8280 if (masked_const != 0)
8281 cost++;
8282 }
8283 *total = COSTS_N_INSNS (cost);
8284 return false;
8285 }
8286
8287 if (mode == SImode)
8288 {
8289 *total = COSTS_N_INSNS (3);
8290 return false;
8291 }
8292
8293 /* Requires a lib call */
8294 *total = COSTS_N_INSNS (20);
8295 return false;
8296
8297 default:
8298 return arm_rtx_costs_1 (x, outer_code, total, speed);
8299 }
8300 }
8301
8302
8303 /* RTX costs for 9e (and later) cores. */
8304
8305 static bool
8306 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8307 int *total, bool speed)
8308 {
8309 enum machine_mode mode = GET_MODE (x);
8310
8311 if (TARGET_THUMB1)
8312 {
8313 switch (code)
8314 {
8315 case MULT:
8316 *total = COSTS_N_INSNS (3);
8317 return true;
8318
8319 default:
8320 *total = thumb1_rtx_costs (x, code, outer_code);
8321 return true;
8322 }
8323 }
8324
8325 switch (code)
8326 {
8327 case MULT:
8328 /* There is no point basing this on the tuning, since it is always the
8329 fast variant if it exists at all. */
8330 if (mode == DImode
8331 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
8332 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8333 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8334 {
8335 *total = COSTS_N_INSNS (2);
8336 return false;
8337 }
8338
8339
8340 if (mode == DImode)
8341 {
8342 *total = COSTS_N_INSNS (5);
8343 return false;
8344 }
8345
8346 if (mode == SImode)
8347 {
8348 *total = COSTS_N_INSNS (2);
8349 return false;
8350 }
8351
8352 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8353 {
8354 if (TARGET_HARD_FLOAT
8355 && (mode == SFmode
8356 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8357 {
8358 *total = COSTS_N_INSNS (1);
8359 return false;
8360 }
8361 }
8362
8363 *total = COSTS_N_INSNS (20);
8364 return false;
8365
8366 default:
8367 return arm_rtx_costs_1 (x, outer_code, total, speed);
8368 }
8369 }
8370 /* All address computations that can be done are free, but rtx cost returns
8371 the same for practically all of them. So we weight the different types
8372 of address here in the order (most pref first):
8373 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8374 static inline int
8375 arm_arm_address_cost (rtx x)
8376 {
8377 enum rtx_code c = GET_CODE (x);
8378
8379 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
8380 return 0;
8381 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
8382 return 10;
8383
8384 if (c == PLUS)
8385 {
8386 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8387 return 2;
8388
8389 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
8390 return 3;
8391
8392 return 4;
8393 }
8394
8395 return 6;
8396 }
8397
8398 static inline int
8399 arm_thumb_address_cost (rtx x)
8400 {
8401 enum rtx_code c = GET_CODE (x);
8402
8403 if (c == REG)
8404 return 1;
8405 if (c == PLUS
8406 && GET_CODE (XEXP (x, 0)) == REG
8407 && GET_CODE (XEXP (x, 1)) == CONST_INT)
8408 return 1;
8409
8410 return 2;
8411 }
8412
8413 static int
8414 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8415 {
8416 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
8417 }
8418
8419 /* Adjust cost hook for XScale. */
8420 static bool
8421 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8422 {
8423 /* Some true dependencies can have a higher cost depending
8424 on precisely how certain input operands are used. */
8425 if (REG_NOTE_KIND(link) == 0
8426 && recog_memoized (insn) >= 0
8427 && recog_memoized (dep) >= 0)
8428 {
8429 int shift_opnum = get_attr_shift (insn);
8430 enum attr_type attr_type = get_attr_type (dep);
8431
8432 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8433 operand for INSN. If we have a shifted input operand and the
8434 instruction we depend on is another ALU instruction, then we may
8435 have to account for an additional stall. */
8436 if (shift_opnum != 0
8437 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
8438 {
8439 rtx shifted_operand;
8440 int opno;
8441
8442 /* Get the shifted operand. */
8443 extract_insn (insn);
8444 shifted_operand = recog_data.operand[shift_opnum];
8445
8446 /* Iterate over all the operands in DEP. If we write an operand
8447 that overlaps with SHIFTED_OPERAND, then we have increase the
8448 cost of this dependency. */
8449 extract_insn (dep);
8450 preprocess_constraints ();
8451 for (opno = 0; opno < recog_data.n_operands; opno++)
8452 {
8453 /* We can ignore strict inputs. */
8454 if (recog_data.operand_type[opno] == OP_IN)
8455 continue;
8456
8457 if (reg_overlap_mentioned_p (recog_data.operand[opno],
8458 shifted_operand))
8459 {
8460 *cost = 2;
8461 return false;
8462 }
8463 }
8464 }
8465 }
8466 return true;
8467 }
8468
8469 /* Adjust cost hook for Cortex A9. */
8470 static bool
8471 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8472 {
8473 switch (REG_NOTE_KIND (link))
8474 {
8475 case REG_DEP_ANTI:
8476 *cost = 0;
8477 return false;
8478
8479 case REG_DEP_TRUE:
8480 case REG_DEP_OUTPUT:
8481 if (recog_memoized (insn) >= 0
8482 && recog_memoized (dep) >= 0)
8483 {
8484 if (GET_CODE (PATTERN (insn)) == SET)
8485 {
8486 if (GET_MODE_CLASS
8487 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8488 || GET_MODE_CLASS
8489 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8490 {
8491 enum attr_type attr_type_insn = get_attr_type (insn);
8492 enum attr_type attr_type_dep = get_attr_type (dep);
8493
8494 /* By default all dependencies of the form
8495 s0 = s0 <op> s1
8496 s0 = s0 <op> s2
8497 have an extra latency of 1 cycle because
8498 of the input and output dependency in this
8499 case. However this gets modeled as an true
8500 dependency and hence all these checks. */
8501 if (REG_P (SET_DEST (PATTERN (insn)))
8502 && REG_P (SET_DEST (PATTERN (dep)))
8503 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8504 SET_DEST (PATTERN (dep))))
8505 {
8506 /* FMACS is a special case where the dependent
8507 instruction can be issued 3 cycles before
8508 the normal latency in case of an output
8509 dependency. */
8510 if ((attr_type_insn == TYPE_FMACS
8511 || attr_type_insn == TYPE_FMACD)
8512 && (attr_type_dep == TYPE_FMACS
8513 || attr_type_dep == TYPE_FMACD))
8514 {
8515 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8516 *cost = insn_default_latency (dep) - 3;
8517 else
8518 *cost = insn_default_latency (dep);
8519 return false;
8520 }
8521 else
8522 {
8523 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8524 *cost = insn_default_latency (dep) + 1;
8525 else
8526 *cost = insn_default_latency (dep);
8527 }
8528 return false;
8529 }
8530 }
8531 }
8532 }
8533 break;
8534
8535 default:
8536 gcc_unreachable ();
8537 }
8538
8539 return true;
8540 }
8541
8542 /* Adjust cost hook for FA726TE. */
8543 static bool
8544 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8545 {
8546 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8547 have penalty of 3. */
8548 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8549 && recog_memoized (insn) >= 0
8550 && recog_memoized (dep) >= 0
8551 && get_attr_conds (dep) == CONDS_SET)
8552 {
8553 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8554 if (get_attr_conds (insn) == CONDS_USE
8555 && get_attr_type (insn) != TYPE_BRANCH)
8556 {
8557 *cost = 3;
8558 return false;
8559 }
8560
8561 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8562 || get_attr_conds (insn) == CONDS_USE)
8563 {
8564 *cost = 0;
8565 return false;
8566 }
8567 }
8568
8569 return true;
8570 }
8571
8572 /* Implement TARGET_REGISTER_MOVE_COST.
8573
8574 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
8575 it is typically more expensive than a single memory access. We set
8576 the cost to less than two memory accesses so that floating
8577 point to integer conversion does not go through memory. */
8578
8579 int
8580 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
8581 reg_class_t from, reg_class_t to)
8582 {
8583 if (TARGET_32BIT)
8584 {
8585 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
8586 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
8587 return 15;
8588 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
8589 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
8590 return 4;
8591 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
8592 return 20;
8593 else
8594 return 2;
8595 }
8596 else
8597 {
8598 if (from == HI_REGS || to == HI_REGS)
8599 return 4;
8600 else
8601 return 2;
8602 }
8603 }
8604
8605 /* Implement TARGET_MEMORY_MOVE_COST. */
8606
8607 int
8608 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
8609 bool in ATTRIBUTE_UNUSED)
8610 {
8611 if (TARGET_32BIT)
8612 return 10;
8613 else
8614 {
8615 if (GET_MODE_SIZE (mode) < 4)
8616 return 8;
8617 else
8618 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
8619 }
8620 }
8621
8622 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8623 It corrects the value of COST based on the relationship between
8624 INSN and DEP through the dependence LINK. It returns the new
8625 value. There is a per-core adjust_cost hook to adjust scheduler costs
8626 and the per-core hook can choose to completely override the generic
8627 adjust_cost function. Only put bits of code into arm_adjust_cost that
8628 are common across all cores. */
8629 static int
8630 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8631 {
8632 rtx i_pat, d_pat;
8633
8634 /* When generating Thumb-1 code, we want to place flag-setting operations
8635 close to a conditional branch which depends on them, so that we can
8636 omit the comparison. */
8637 if (TARGET_THUMB1
8638 && REG_NOTE_KIND (link) == 0
8639 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8640 && recog_memoized (dep) >= 0
8641 && get_attr_conds (dep) == CONDS_SET)
8642 return 0;
8643
8644 if (current_tune->sched_adjust_cost != NULL)
8645 {
8646 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8647 return cost;
8648 }
8649
8650 /* XXX Is this strictly true? */
8651 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8652 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8653 return 0;
8654
8655 /* Call insns don't incur a stall, even if they follow a load. */
8656 if (REG_NOTE_KIND (link) == 0
8657 && GET_CODE (insn) == CALL_INSN)
8658 return 1;
8659
8660 if ((i_pat = single_set (insn)) != NULL
8661 && GET_CODE (SET_SRC (i_pat)) == MEM
8662 && (d_pat = single_set (dep)) != NULL
8663 && GET_CODE (SET_DEST (d_pat)) == MEM)
8664 {
8665 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
8666 /* This is a load after a store, there is no conflict if the load reads
8667 from a cached area. Assume that loads from the stack, and from the
8668 constant pool are cached, and that others will miss. This is a
8669 hack. */
8670
8671 if ((GET_CODE (src_mem) == SYMBOL_REF
8672 && CONSTANT_POOL_ADDRESS_P (src_mem))
8673 || reg_mentioned_p (stack_pointer_rtx, src_mem)
8674 || reg_mentioned_p (frame_pointer_rtx, src_mem)
8675 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
8676 return 1;
8677 }
8678
8679 return cost;
8680 }
8681
8682 static int
8683 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
8684 {
8685 if (TARGET_32BIT)
8686 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
8687 else
8688 return (optimize > 0) ? 2 : 0;
8689 }
8690
8691 static int
8692 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
8693 {
8694 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
8695 }
8696
8697 static bool fp_consts_inited = false;
8698
8699 static REAL_VALUE_TYPE value_fp0;
8700
8701 static void
8702 init_fp_table (void)
8703 {
8704 REAL_VALUE_TYPE r;
8705
8706 r = REAL_VALUE_ATOF ("0", DFmode);
8707 value_fp0 = r;
8708 fp_consts_inited = true;
8709 }
8710
8711 /* Return TRUE if rtx X is a valid immediate FP constant. */
8712 int
8713 arm_const_double_rtx (rtx x)
8714 {
8715 REAL_VALUE_TYPE r;
8716
8717 if (!fp_consts_inited)
8718 init_fp_table ();
8719
8720 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8721 if (REAL_VALUE_MINUS_ZERO (r))
8722 return 0;
8723
8724 if (REAL_VALUES_EQUAL (r, value_fp0))
8725 return 1;
8726
8727 return 0;
8728 }
8729
8730 /* VFPv3 has a fairly wide range of representable immediates, formed from
8731 "quarter-precision" floating-point values. These can be evaluated using this
8732 formula (with ^ for exponentiation):
8733
8734 -1^s * n * 2^-r
8735
8736 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8737 16 <= n <= 31 and 0 <= r <= 7.
8738
8739 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8740
8741 - A (most-significant) is the sign bit.
8742 - BCD are the exponent (encoded as r XOR 3).
8743 - EFGH are the mantissa (encoded as n - 16).
8744 */
8745
8746 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8747 fconst[sd] instruction, or -1 if X isn't suitable. */
8748 static int
8749 vfp3_const_double_index (rtx x)
8750 {
8751 REAL_VALUE_TYPE r, m;
8752 int sign, exponent;
8753 unsigned HOST_WIDE_INT mantissa, mant_hi;
8754 unsigned HOST_WIDE_INT mask;
8755 HOST_WIDE_INT m1, m2;
8756 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8757
8758 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
8759 return -1;
8760
8761 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8762
8763 /* We can't represent these things, so detect them first. */
8764 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
8765 return -1;
8766
8767 /* Extract sign, exponent and mantissa. */
8768 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
8769 r = real_value_abs (&r);
8770 exponent = REAL_EXP (&r);
8771 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8772 highest (sign) bit, with a fixed binary point at bit point_pos.
8773 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8774 bits for the mantissa, this may fail (low bits would be lost). */
8775 real_ldexp (&m, &r, point_pos - exponent);
8776 REAL_VALUE_TO_INT (&m1, &m2, m);
8777 mantissa = m1;
8778 mant_hi = m2;
8779
8780 /* If there are bits set in the low part of the mantissa, we can't
8781 represent this value. */
8782 if (mantissa != 0)
8783 return -1;
8784
8785 /* Now make it so that mantissa contains the most-significant bits, and move
8786 the point_pos to indicate that the least-significant bits have been
8787 discarded. */
8788 point_pos -= HOST_BITS_PER_WIDE_INT;
8789 mantissa = mant_hi;
8790
8791 /* We can permit four significant bits of mantissa only, plus a high bit
8792 which is always 1. */
8793 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8794 if ((mantissa & mask) != 0)
8795 return -1;
8796
8797 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8798 mantissa >>= point_pos - 5;
8799
8800 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8801 floating-point immediate zero with Neon using an integer-zero load, but
8802 that case is handled elsewhere.) */
8803 if (mantissa == 0)
8804 return -1;
8805
8806 gcc_assert (mantissa >= 16 && mantissa <= 31);
8807
8808 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8809 normalized significands are in the range [1, 2). (Our mantissa is shifted
8810 left 4 places at this point relative to normalized IEEE754 values). GCC
8811 internally uses [0.5, 1) (see real.c), so the exponent returned from
8812 REAL_EXP must be altered. */
8813 exponent = 5 - exponent;
8814
8815 if (exponent < 0 || exponent > 7)
8816 return -1;
8817
8818 /* Sign, mantissa and exponent are now in the correct form to plug into the
8819 formula described in the comment above. */
8820 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
8821 }
8822
8823 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8824 int
8825 vfp3_const_double_rtx (rtx x)
8826 {
8827 if (!TARGET_VFP3)
8828 return 0;
8829
8830 return vfp3_const_double_index (x) != -1;
8831 }
8832
8833 /* Recognize immediates which can be used in various Neon instructions. Legal
8834 immediates are described by the following table (for VMVN variants, the
8835 bitwise inverse of the constant shown is recognized. In either case, VMOV
8836 is output and the correct instruction to use for a given constant is chosen
8837 by the assembler). The constant shown is replicated across all elements of
8838 the destination vector.
8839
8840 insn elems variant constant (binary)
8841 ---- ----- ------- -----------------
8842 vmov i32 0 00000000 00000000 00000000 abcdefgh
8843 vmov i32 1 00000000 00000000 abcdefgh 00000000
8844 vmov i32 2 00000000 abcdefgh 00000000 00000000
8845 vmov i32 3 abcdefgh 00000000 00000000 00000000
8846 vmov i16 4 00000000 abcdefgh
8847 vmov i16 5 abcdefgh 00000000
8848 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8849 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8850 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8851 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8852 vmvn i16 10 00000000 abcdefgh
8853 vmvn i16 11 abcdefgh 00000000
8854 vmov i32 12 00000000 00000000 abcdefgh 11111111
8855 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8856 vmov i32 14 00000000 abcdefgh 11111111 11111111
8857 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8858 vmov i8 16 abcdefgh
8859 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8860 eeeeeeee ffffffff gggggggg hhhhhhhh
8861 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8862 vmov f32 19 00000000 00000000 00000000 00000000
8863
8864 For case 18, B = !b. Representable values are exactly those accepted by
8865 vfp3_const_double_index, but are output as floating-point numbers rather
8866 than indices.
8867
8868 For case 19, we will change it to vmov.i32 when assembling.
8869
8870 Variants 0-5 (inclusive) may also be used as immediates for the second
8871 operand of VORR/VBIC instructions.
8872
8873 The INVERSE argument causes the bitwise inverse of the given operand to be
8874 recognized instead (used for recognizing legal immediates for the VAND/VORN
8875 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8876 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8877 output, rather than the real insns vbic/vorr).
8878
8879 INVERSE makes no difference to the recognition of float vectors.
8880
8881 The return value is the variant of immediate as shown in the above table, or
8882 -1 if the given value doesn't match any of the listed patterns.
8883 */
8884 static int
8885 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
8886 rtx *modconst, int *elementwidth)
8887 {
8888 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8889 matches = 1; \
8890 for (i = 0; i < idx; i += (STRIDE)) \
8891 if (!(TEST)) \
8892 matches = 0; \
8893 if (matches) \
8894 { \
8895 immtype = (CLASS); \
8896 elsize = (ELSIZE); \
8897 break; \
8898 }
8899
8900 unsigned int i, elsize = 0, idx = 0, n_elts;
8901 unsigned int innersize;
8902 unsigned char bytes[16];
8903 int immtype = -1, matches;
8904 unsigned int invmask = inverse ? 0xff : 0;
8905 bool vector = GET_CODE (op) == CONST_VECTOR;
8906
8907 if (vector)
8908 {
8909 n_elts = CONST_VECTOR_NUNITS (op);
8910 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8911 }
8912 else
8913 {
8914 n_elts = 1;
8915 if (mode == VOIDmode)
8916 mode = DImode;
8917 innersize = GET_MODE_SIZE (mode);
8918 }
8919
8920 /* Vectors of float constants. */
8921 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8922 {
8923 rtx el0 = CONST_VECTOR_ELT (op, 0);
8924 REAL_VALUE_TYPE r0;
8925
8926 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
8927 return -1;
8928
8929 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
8930
8931 for (i = 1; i < n_elts; i++)
8932 {
8933 rtx elt = CONST_VECTOR_ELT (op, i);
8934 REAL_VALUE_TYPE re;
8935
8936 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
8937
8938 if (!REAL_VALUES_EQUAL (r0, re))
8939 return -1;
8940 }
8941
8942 if (modconst)
8943 *modconst = CONST_VECTOR_ELT (op, 0);
8944
8945 if (elementwidth)
8946 *elementwidth = 0;
8947
8948 if (el0 == CONST0_RTX (GET_MODE (el0)))
8949 return 19;
8950 else
8951 return 18;
8952 }
8953
8954 /* Splat vector constant out into a byte vector. */
8955 for (i = 0; i < n_elts; i++)
8956 {
8957 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
8958 unsigned HOST_WIDE_INT elpart;
8959 unsigned int part, parts;
8960
8961 if (GET_CODE (el) == CONST_INT)
8962 {
8963 elpart = INTVAL (el);
8964 parts = 1;
8965 }
8966 else if (GET_CODE (el) == CONST_DOUBLE)
8967 {
8968 elpart = CONST_DOUBLE_LOW (el);
8969 parts = 2;
8970 }
8971 else
8972 gcc_unreachable ();
8973
8974 for (part = 0; part < parts; part++)
8975 {
8976 unsigned int byte;
8977 for (byte = 0; byte < innersize; byte++)
8978 {
8979 bytes[idx++] = (elpart & 0xff) ^ invmask;
8980 elpart >>= BITS_PER_UNIT;
8981 }
8982 if (GET_CODE (el) == CONST_DOUBLE)
8983 elpart = CONST_DOUBLE_HIGH (el);
8984 }
8985 }
8986
8987 /* Sanity check. */
8988 gcc_assert (idx == GET_MODE_SIZE (mode));
8989
8990 do
8991 {
8992 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8993 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8994
8995 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8996 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
8997
8998 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8999 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9000
9001 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
9002 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
9003
9004 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
9005
9006 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
9007
9008 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
9009 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9010
9011 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9012 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9013
9014 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
9015 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9016
9017 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
9018 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
9019
9020 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
9021
9022 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
9023
9024 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
9025 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
9026
9027 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
9028 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
9029
9030 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
9031 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
9032
9033 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
9034 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
9035
9036 CHECK (1, 8, 16, bytes[i] == bytes[0]);
9037
9038 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
9039 && bytes[i] == bytes[(i + 8) % idx]);
9040 }
9041 while (0);
9042
9043 if (immtype == -1)
9044 return -1;
9045
9046 if (elementwidth)
9047 *elementwidth = elsize;
9048
9049 if (modconst)
9050 {
9051 unsigned HOST_WIDE_INT imm = 0;
9052
9053 /* Un-invert bytes of recognized vector, if necessary. */
9054 if (invmask != 0)
9055 for (i = 0; i < idx; i++)
9056 bytes[i] ^= invmask;
9057
9058 if (immtype == 17)
9059 {
9060 /* FIXME: Broken on 32-bit H_W_I hosts. */
9061 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
9062
9063 for (i = 0; i < 8; i++)
9064 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
9065 << (i * BITS_PER_UNIT);
9066
9067 *modconst = GEN_INT (imm);
9068 }
9069 else
9070 {
9071 unsigned HOST_WIDE_INT imm = 0;
9072
9073 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
9074 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
9075
9076 *modconst = GEN_INT (imm);
9077 }
9078 }
9079
9080 return immtype;
9081 #undef CHECK
9082 }
9083
9084 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
9085 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
9086 float elements), and a modified constant (whatever should be output for a
9087 VMOV) in *MODCONST. */
9088
9089 int
9090 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
9091 rtx *modconst, int *elementwidth)
9092 {
9093 rtx tmpconst;
9094 int tmpwidth;
9095 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
9096
9097 if (retval == -1)
9098 return 0;
9099
9100 if (modconst)
9101 *modconst = tmpconst;
9102
9103 if (elementwidth)
9104 *elementwidth = tmpwidth;
9105
9106 return 1;
9107 }
9108
9109 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
9110 the immediate is valid, write a constant suitable for using as an operand
9111 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
9112 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
9113
9114 int
9115 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
9116 rtx *modconst, int *elementwidth)
9117 {
9118 rtx tmpconst;
9119 int tmpwidth;
9120 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
9121
9122 if (retval < 0 || retval > 5)
9123 return 0;
9124
9125 if (modconst)
9126 *modconst = tmpconst;
9127
9128 if (elementwidth)
9129 *elementwidth = tmpwidth;
9130
9131 return 1;
9132 }
9133
9134 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
9135 the immediate is valid, write a constant suitable for using as an operand
9136 to VSHR/VSHL to *MODCONST and the corresponding element width to
9137 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
9138 because they have different limitations. */
9139
9140 int
9141 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
9142 rtx *modconst, int *elementwidth,
9143 bool isleftshift)
9144 {
9145 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
9146 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
9147 unsigned HOST_WIDE_INT last_elt = 0;
9148 unsigned HOST_WIDE_INT maxshift;
9149
9150 /* Split vector constant out into a byte vector. */
9151 for (i = 0; i < n_elts; i++)
9152 {
9153 rtx el = CONST_VECTOR_ELT (op, i);
9154 unsigned HOST_WIDE_INT elpart;
9155
9156 if (GET_CODE (el) == CONST_INT)
9157 elpart = INTVAL (el);
9158 else if (GET_CODE (el) == CONST_DOUBLE)
9159 return 0;
9160 else
9161 gcc_unreachable ();
9162
9163 if (i != 0 && elpart != last_elt)
9164 return 0;
9165
9166 last_elt = elpart;
9167 }
9168
9169 /* Shift less than element size. */
9170 maxshift = innersize * 8;
9171
9172 if (isleftshift)
9173 {
9174 /* Left shift immediate value can be from 0 to <size>-1. */
9175 if (last_elt >= maxshift)
9176 return 0;
9177 }
9178 else
9179 {
9180 /* Right shift immediate value can be from 1 to <size>. */
9181 if (last_elt == 0 || last_elt > maxshift)
9182 return 0;
9183 }
9184
9185 if (elementwidth)
9186 *elementwidth = innersize * 8;
9187
9188 if (modconst)
9189 *modconst = CONST_VECTOR_ELT (op, 0);
9190
9191 return 1;
9192 }
9193
9194 /* Return a string suitable for output of Neon immediate logic operation
9195 MNEM. */
9196
9197 char *
9198 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
9199 int inverse, int quad)
9200 {
9201 int width, is_valid;
9202 static char templ[40];
9203
9204 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
9205
9206 gcc_assert (is_valid != 0);
9207
9208 if (quad)
9209 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
9210 else
9211 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
9212
9213 return templ;
9214 }
9215
9216 /* Return a string suitable for output of Neon immediate shift operation
9217 (VSHR or VSHL) MNEM. */
9218
9219 char *
9220 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
9221 enum machine_mode mode, int quad,
9222 bool isleftshift)
9223 {
9224 int width, is_valid;
9225 static char templ[40];
9226
9227 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
9228 gcc_assert (is_valid != 0);
9229
9230 if (quad)
9231 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
9232 else
9233 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
9234
9235 return templ;
9236 }
9237
9238 /* Output a sequence of pairwise operations to implement a reduction.
9239 NOTE: We do "too much work" here, because pairwise operations work on two
9240 registers-worth of operands in one go. Unfortunately we can't exploit those
9241 extra calculations to do the full operation in fewer steps, I don't think.
9242 Although all vector elements of the result but the first are ignored, we
9243 actually calculate the same result in each of the elements. An alternative
9244 such as initially loading a vector with zero to use as each of the second
9245 operands would use up an additional register and take an extra instruction,
9246 for no particular gain. */
9247
9248 void
9249 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
9250 rtx (*reduc) (rtx, rtx, rtx))
9251 {
9252 enum machine_mode inner = GET_MODE_INNER (mode);
9253 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
9254 rtx tmpsum = op1;
9255
9256 for (i = parts / 2; i >= 1; i /= 2)
9257 {
9258 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
9259 emit_insn (reduc (dest, tmpsum, tmpsum));
9260 tmpsum = dest;
9261 }
9262 }
9263
9264 /* If VALS is a vector constant that can be loaded into a register
9265 using VDUP, generate instructions to do so and return an RTX to
9266 assign to the register. Otherwise return NULL_RTX. */
9267
9268 static rtx
9269 neon_vdup_constant (rtx vals)
9270 {
9271 enum machine_mode mode = GET_MODE (vals);
9272 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9273 int n_elts = GET_MODE_NUNITS (mode);
9274 bool all_same = true;
9275 rtx x;
9276 int i;
9277
9278 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
9279 return NULL_RTX;
9280
9281 for (i = 0; i < n_elts; ++i)
9282 {
9283 x = XVECEXP (vals, 0, i);
9284 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9285 all_same = false;
9286 }
9287
9288 if (!all_same)
9289 /* The elements are not all the same. We could handle repeating
9290 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
9291 {0, C, 0, C, 0, C, 0, C} which can be loaded using
9292 vdup.i16). */
9293 return NULL_RTX;
9294
9295 /* We can load this constant by using VDUP and a constant in a
9296 single ARM register. This will be cheaper than a vector
9297 load. */
9298
9299 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9300 return gen_rtx_VEC_DUPLICATE (mode, x);
9301 }
9302
9303 /* Generate code to load VALS, which is a PARALLEL containing only
9304 constants (for vec_init) or CONST_VECTOR, efficiently into a
9305 register. Returns an RTX to copy into the register, or NULL_RTX
9306 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9307
9308 rtx
9309 neon_make_constant (rtx vals)
9310 {
9311 enum machine_mode mode = GET_MODE (vals);
9312 rtx target;
9313 rtx const_vec = NULL_RTX;
9314 int n_elts = GET_MODE_NUNITS (mode);
9315 int n_const = 0;
9316 int i;
9317
9318 if (GET_CODE (vals) == CONST_VECTOR)
9319 const_vec = vals;
9320 else if (GET_CODE (vals) == PARALLEL)
9321 {
9322 /* A CONST_VECTOR must contain only CONST_INTs and
9323 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9324 Only store valid constants in a CONST_VECTOR. */
9325 for (i = 0; i < n_elts; ++i)
9326 {
9327 rtx x = XVECEXP (vals, 0, i);
9328 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
9329 n_const++;
9330 }
9331 if (n_const == n_elts)
9332 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
9333 }
9334 else
9335 gcc_unreachable ();
9336
9337 if (const_vec != NULL
9338 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
9339 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9340 return const_vec;
9341 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
9342 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9343 pipeline cycle; creating the constant takes one or two ARM
9344 pipeline cycles. */
9345 return target;
9346 else if (const_vec != NULL_RTX)
9347 /* Load from constant pool. On Cortex-A8 this takes two cycles
9348 (for either double or quad vectors). We can not take advantage
9349 of single-cycle VLD1 because we need a PC-relative addressing
9350 mode. */
9351 return const_vec;
9352 else
9353 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9354 We can not construct an initializer. */
9355 return NULL_RTX;
9356 }
9357
9358 /* Initialize vector TARGET to VALS. */
9359
9360 void
9361 neon_expand_vector_init (rtx target, rtx vals)
9362 {
9363 enum machine_mode mode = GET_MODE (target);
9364 enum machine_mode inner_mode = GET_MODE_INNER (mode);
9365 int n_elts = GET_MODE_NUNITS (mode);
9366 int n_var = 0, one_var = -1;
9367 bool all_same = true;
9368 rtx x, mem;
9369 int i;
9370
9371 for (i = 0; i < n_elts; ++i)
9372 {
9373 x = XVECEXP (vals, 0, i);
9374 if (!CONSTANT_P (x))
9375 ++n_var, one_var = i;
9376
9377 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
9378 all_same = false;
9379 }
9380
9381 if (n_var == 0)
9382 {
9383 rtx constant = neon_make_constant (vals);
9384 if (constant != NULL_RTX)
9385 {
9386 emit_move_insn (target, constant);
9387 return;
9388 }
9389 }
9390
9391 /* Splat a single non-constant element if we can. */
9392 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
9393 {
9394 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
9395 emit_insn (gen_rtx_SET (VOIDmode, target,
9396 gen_rtx_VEC_DUPLICATE (mode, x)));
9397 return;
9398 }
9399
9400 /* One field is non-constant. Load constant then overwrite varying
9401 field. This is more efficient than using the stack. */
9402 if (n_var == 1)
9403 {
9404 rtx copy = copy_rtx (vals);
9405 rtx index = GEN_INT (one_var);
9406
9407 /* Load constant part of vector, substitute neighboring value for
9408 varying element. */
9409 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
9410 neon_expand_vector_init (target, copy);
9411
9412 /* Insert variable. */
9413 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
9414 switch (mode)
9415 {
9416 case V8QImode:
9417 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
9418 break;
9419 case V16QImode:
9420 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
9421 break;
9422 case V4HImode:
9423 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
9424 break;
9425 case V8HImode:
9426 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
9427 break;
9428 case V2SImode:
9429 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
9430 break;
9431 case V4SImode:
9432 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
9433 break;
9434 case V2SFmode:
9435 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
9436 break;
9437 case V4SFmode:
9438 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
9439 break;
9440 case V2DImode:
9441 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
9442 break;
9443 default:
9444 gcc_unreachable ();
9445 }
9446 return;
9447 }
9448
9449 /* Construct the vector in memory one field at a time
9450 and load the whole vector. */
9451 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
9452 for (i = 0; i < n_elts; i++)
9453 emit_move_insn (adjust_address_nv (mem, inner_mode,
9454 i * GET_MODE_SIZE (inner_mode)),
9455 XVECEXP (vals, 0, i));
9456 emit_move_insn (target, mem);
9457 }
9458
9459 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9460 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9461 reported source locations are bogus. */
9462
9463 static void
9464 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
9465 const char *err)
9466 {
9467 HOST_WIDE_INT lane;
9468
9469 gcc_assert (GET_CODE (operand) == CONST_INT);
9470
9471 lane = INTVAL (operand);
9472
9473 if (lane < low || lane >= high)
9474 error (err);
9475 }
9476
9477 /* Bounds-check lanes. */
9478
9479 void
9480 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9481 {
9482 bounds_check (operand, low, high, "lane out of range");
9483 }
9484
9485 /* Bounds-check constants. */
9486
9487 void
9488 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
9489 {
9490 bounds_check (operand, low, high, "constant out of range");
9491 }
9492
9493 HOST_WIDE_INT
9494 neon_element_bits (enum machine_mode mode)
9495 {
9496 if (mode == DImode)
9497 return GET_MODE_BITSIZE (mode);
9498 else
9499 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
9500 }
9501
9502 \f
9503 /* Predicates for `match_operand' and `match_operator'. */
9504
9505 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9506 WB is true if full writeback address modes are allowed and is false
9507 if limited writeback address modes (POST_INC and PRE_DEC) are
9508 allowed. */
9509
9510 int
9511 arm_coproc_mem_operand (rtx op, bool wb)
9512 {
9513 rtx ind;
9514
9515 /* Reject eliminable registers. */
9516 if (! (reload_in_progress || reload_completed)
9517 && ( reg_mentioned_p (frame_pointer_rtx, op)
9518 || reg_mentioned_p (arg_pointer_rtx, op)
9519 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9520 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9521 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9522 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9523 return FALSE;
9524
9525 /* Constants are converted into offsets from labels. */
9526 if (GET_CODE (op) != MEM)
9527 return FALSE;
9528
9529 ind = XEXP (op, 0);
9530
9531 if (reload_completed
9532 && (GET_CODE (ind) == LABEL_REF
9533 || (GET_CODE (ind) == CONST
9534 && GET_CODE (XEXP (ind, 0)) == PLUS
9535 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9536 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9537 return TRUE;
9538
9539 /* Match: (mem (reg)). */
9540 if (GET_CODE (ind) == REG)
9541 return arm_address_register_rtx_p (ind, 0);
9542
9543 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9544 acceptable in any case (subject to verification by
9545 arm_address_register_rtx_p). We need WB to be true to accept
9546 PRE_INC and POST_DEC. */
9547 if (GET_CODE (ind) == POST_INC
9548 || GET_CODE (ind) == PRE_DEC
9549 || (wb
9550 && (GET_CODE (ind) == PRE_INC
9551 || GET_CODE (ind) == POST_DEC)))
9552 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9553
9554 if (wb
9555 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
9556 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
9557 && GET_CODE (XEXP (ind, 1)) == PLUS
9558 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
9559 ind = XEXP (ind, 1);
9560
9561 /* Match:
9562 (plus (reg)
9563 (const)). */
9564 if (GET_CODE (ind) == PLUS
9565 && GET_CODE (XEXP (ind, 0)) == REG
9566 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9567 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9568 && INTVAL (XEXP (ind, 1)) > -1024
9569 && INTVAL (XEXP (ind, 1)) < 1024
9570 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9571 return TRUE;
9572
9573 return FALSE;
9574 }
9575
9576 /* Return TRUE if OP is a memory operand which we can load or store a vector
9577 to/from. TYPE is one of the following values:
9578 0 - Vector load/stor (vldr)
9579 1 - Core registers (ldm)
9580 2 - Element/structure loads (vld1)
9581 */
9582 int
9583 neon_vector_mem_operand (rtx op, int type)
9584 {
9585 rtx ind;
9586
9587 /* Reject eliminable registers. */
9588 if (! (reload_in_progress || reload_completed)
9589 && ( reg_mentioned_p (frame_pointer_rtx, op)
9590 || reg_mentioned_p (arg_pointer_rtx, op)
9591 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9592 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9593 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9594 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9595 return FALSE;
9596
9597 /* Constants are converted into offsets from labels. */
9598 if (GET_CODE (op) != MEM)
9599 return FALSE;
9600
9601 ind = XEXP (op, 0);
9602
9603 if (reload_completed
9604 && (GET_CODE (ind) == LABEL_REF
9605 || (GET_CODE (ind) == CONST
9606 && GET_CODE (XEXP (ind, 0)) == PLUS
9607 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9608 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9609 return TRUE;
9610
9611 /* Match: (mem (reg)). */
9612 if (GET_CODE (ind) == REG)
9613 return arm_address_register_rtx_p (ind, 0);
9614
9615 /* Allow post-increment with Neon registers. */
9616 if ((type != 1 && GET_CODE (ind) == POST_INC)
9617 || (type == 0 && GET_CODE (ind) == PRE_DEC))
9618 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9619
9620 /* FIXME: vld1 allows register post-modify. */
9621
9622 /* Match:
9623 (plus (reg)
9624 (const)). */
9625 if (type == 0
9626 && GET_CODE (ind) == PLUS
9627 && GET_CODE (XEXP (ind, 0)) == REG
9628 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
9629 && GET_CODE (XEXP (ind, 1)) == CONST_INT
9630 && INTVAL (XEXP (ind, 1)) > -1024
9631 && INTVAL (XEXP (ind, 1)) < 1016
9632 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
9633 return TRUE;
9634
9635 return FALSE;
9636 }
9637
9638 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9639 type. */
9640 int
9641 neon_struct_mem_operand (rtx op)
9642 {
9643 rtx ind;
9644
9645 /* Reject eliminable registers. */
9646 if (! (reload_in_progress || reload_completed)
9647 && ( reg_mentioned_p (frame_pointer_rtx, op)
9648 || reg_mentioned_p (arg_pointer_rtx, op)
9649 || reg_mentioned_p (virtual_incoming_args_rtx, op)
9650 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
9651 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
9652 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
9653 return FALSE;
9654
9655 /* Constants are converted into offsets from labels. */
9656 if (GET_CODE (op) != MEM)
9657 return FALSE;
9658
9659 ind = XEXP (op, 0);
9660
9661 if (reload_completed
9662 && (GET_CODE (ind) == LABEL_REF
9663 || (GET_CODE (ind) == CONST
9664 && GET_CODE (XEXP (ind, 0)) == PLUS
9665 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
9666 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
9667 return TRUE;
9668
9669 /* Match: (mem (reg)). */
9670 if (GET_CODE (ind) == REG)
9671 return arm_address_register_rtx_p (ind, 0);
9672
9673 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9674 if (GET_CODE (ind) == POST_INC
9675 || GET_CODE (ind) == PRE_DEC)
9676 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
9677
9678 return FALSE;
9679 }
9680
9681 /* Return true if X is a register that will be eliminated later on. */
9682 int
9683 arm_eliminable_register (rtx x)
9684 {
9685 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
9686 || REGNO (x) == ARG_POINTER_REGNUM
9687 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
9688 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
9689 }
9690
9691 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9692 coprocessor registers. Otherwise return NO_REGS. */
9693
9694 enum reg_class
9695 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
9696 {
9697 if (mode == HFmode)
9698 {
9699 if (!TARGET_NEON_FP16)
9700 return GENERAL_REGS;
9701 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
9702 return NO_REGS;
9703 return GENERAL_REGS;
9704 }
9705
9706 /* The neon move patterns handle all legitimate vector and struct
9707 addresses. */
9708 if (TARGET_NEON
9709 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
9710 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
9711 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
9712 || VALID_NEON_STRUCT_MODE (mode)))
9713 return NO_REGS;
9714
9715 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
9716 return NO_REGS;
9717
9718 return GENERAL_REGS;
9719 }
9720
9721 /* Values which must be returned in the most-significant end of the return
9722 register. */
9723
9724 static bool
9725 arm_return_in_msb (const_tree valtype)
9726 {
9727 return (TARGET_AAPCS_BASED
9728 && BYTES_BIG_ENDIAN
9729 && (AGGREGATE_TYPE_P (valtype)
9730 || TREE_CODE (valtype) == COMPLEX_TYPE
9731 || FIXED_POINT_TYPE_P (valtype)));
9732 }
9733
9734 /* Return TRUE if X references a SYMBOL_REF. */
9735 int
9736 symbol_mentioned_p (rtx x)
9737 {
9738 const char * fmt;
9739 int i;
9740
9741 if (GET_CODE (x) == SYMBOL_REF)
9742 return 1;
9743
9744 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9745 are constant offsets, not symbols. */
9746 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9747 return 0;
9748
9749 fmt = GET_RTX_FORMAT (GET_CODE (x));
9750
9751 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9752 {
9753 if (fmt[i] == 'E')
9754 {
9755 int j;
9756
9757 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9758 if (symbol_mentioned_p (XVECEXP (x, i, j)))
9759 return 1;
9760 }
9761 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
9762 return 1;
9763 }
9764
9765 return 0;
9766 }
9767
9768 /* Return TRUE if X references a LABEL_REF. */
9769 int
9770 label_mentioned_p (rtx x)
9771 {
9772 const char * fmt;
9773 int i;
9774
9775 if (GET_CODE (x) == LABEL_REF)
9776 return 1;
9777
9778 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9779 instruction, but they are constant offsets, not symbols. */
9780 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
9781 return 0;
9782
9783 fmt = GET_RTX_FORMAT (GET_CODE (x));
9784 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9785 {
9786 if (fmt[i] == 'E')
9787 {
9788 int j;
9789
9790 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9791 if (label_mentioned_p (XVECEXP (x, i, j)))
9792 return 1;
9793 }
9794 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
9795 return 1;
9796 }
9797
9798 return 0;
9799 }
9800
9801 int
9802 tls_mentioned_p (rtx x)
9803 {
9804 switch (GET_CODE (x))
9805 {
9806 case CONST:
9807 return tls_mentioned_p (XEXP (x, 0));
9808
9809 case UNSPEC:
9810 if (XINT (x, 1) == UNSPEC_TLS)
9811 return 1;
9812
9813 default:
9814 return 0;
9815 }
9816 }
9817
9818 /* Must not copy any rtx that uses a pc-relative address. */
9819
9820 static int
9821 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
9822 {
9823 if (GET_CODE (*x) == UNSPEC
9824 && (XINT (*x, 1) == UNSPEC_PIC_BASE
9825 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
9826 return 1;
9827 return 0;
9828 }
9829
9830 static bool
9831 arm_cannot_copy_insn_p (rtx insn)
9832 {
9833 /* The tls call insn cannot be copied, as it is paired with a data
9834 word. */
9835 if (recog_memoized (insn) == CODE_FOR_tlscall)
9836 return true;
9837
9838 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
9839 }
9840
9841 enum rtx_code
9842 minmax_code (rtx x)
9843 {
9844 enum rtx_code code = GET_CODE (x);
9845
9846 switch (code)
9847 {
9848 case SMAX:
9849 return GE;
9850 case SMIN:
9851 return LE;
9852 case UMIN:
9853 return LEU;
9854 case UMAX:
9855 return GEU;
9856 default:
9857 gcc_unreachable ();
9858 }
9859 }
9860
9861 /* Match pair of min/max operators that can be implemented via usat/ssat. */
9862
9863 bool
9864 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
9865 int *mask, bool *signed_sat)
9866 {
9867 /* The high bound must be a power of two minus one. */
9868 int log = exact_log2 (INTVAL (hi_bound) + 1);
9869 if (log == -1)
9870 return false;
9871
9872 /* The low bound is either zero (for usat) or one less than the
9873 negation of the high bound (for ssat). */
9874 if (INTVAL (lo_bound) == 0)
9875 {
9876 if (mask)
9877 *mask = log;
9878 if (signed_sat)
9879 *signed_sat = false;
9880
9881 return true;
9882 }
9883
9884 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
9885 {
9886 if (mask)
9887 *mask = log + 1;
9888 if (signed_sat)
9889 *signed_sat = true;
9890
9891 return true;
9892 }
9893
9894 return false;
9895 }
9896
9897 /* Return 1 if memory locations are adjacent. */
9898 int
9899 adjacent_mem_locations (rtx a, rtx b)
9900 {
9901 /* We don't guarantee to preserve the order of these memory refs. */
9902 if (volatile_refs_p (a) || volatile_refs_p (b))
9903 return 0;
9904
9905 if ((GET_CODE (XEXP (a, 0)) == REG
9906 || (GET_CODE (XEXP (a, 0)) == PLUS
9907 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
9908 && (GET_CODE (XEXP (b, 0)) == REG
9909 || (GET_CODE (XEXP (b, 0)) == PLUS
9910 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
9911 {
9912 HOST_WIDE_INT val0 = 0, val1 = 0;
9913 rtx reg0, reg1;
9914 int val_diff;
9915
9916 if (GET_CODE (XEXP (a, 0)) == PLUS)
9917 {
9918 reg0 = XEXP (XEXP (a, 0), 0);
9919 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
9920 }
9921 else
9922 reg0 = XEXP (a, 0);
9923
9924 if (GET_CODE (XEXP (b, 0)) == PLUS)
9925 {
9926 reg1 = XEXP (XEXP (b, 0), 0);
9927 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
9928 }
9929 else
9930 reg1 = XEXP (b, 0);
9931
9932 /* Don't accept any offset that will require multiple
9933 instructions to handle, since this would cause the
9934 arith_adjacentmem pattern to output an overlong sequence. */
9935 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
9936 return 0;
9937
9938 /* Don't allow an eliminable register: register elimination can make
9939 the offset too large. */
9940 if (arm_eliminable_register (reg0))
9941 return 0;
9942
9943 val_diff = val1 - val0;
9944
9945 if (arm_ld_sched)
9946 {
9947 /* If the target has load delay slots, then there's no benefit
9948 to using an ldm instruction unless the offset is zero and
9949 we are optimizing for size. */
9950 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
9951 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
9952 && (val_diff == 4 || val_diff == -4));
9953 }
9954
9955 return ((REGNO (reg0) == REGNO (reg1))
9956 && (val_diff == 4 || val_diff == -4));
9957 }
9958
9959 return 0;
9960 }
9961
9962 /* Return true if OP is a valid load or store multiple operation. LOAD is true
9963 for load operations, false for store operations. CONSECUTIVE is true
9964 if the register numbers in the operation must be consecutive in the register
9965 bank. RETURN_PC is true if value is to be loaded in PC.
9966 The pattern we are trying to match for load is:
9967 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
9968 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
9969 :
9970 :
9971 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
9972 ]
9973 where
9974 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
9975 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
9976 3. If consecutive is TRUE, then for kth register being loaded,
9977 REGNO (R_dk) = REGNO (R_d0) + k.
9978 The pattern for store is similar. */
9979 bool
9980 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
9981 bool consecutive, bool return_pc)
9982 {
9983 HOST_WIDE_INT count = XVECLEN (op, 0);
9984 rtx reg, mem, addr;
9985 unsigned regno;
9986 unsigned first_regno;
9987 HOST_WIDE_INT i = 1, base = 0, offset = 0;
9988 rtx elt;
9989 bool addr_reg_in_reglist = false;
9990 bool update = false;
9991 int reg_increment;
9992 int offset_adj;
9993 int regs_per_val;
9994
9995 /* If not in SImode, then registers must be consecutive
9996 (e.g., VLDM instructions for DFmode). */
9997 gcc_assert ((mode == SImode) || consecutive);
9998 /* Setting return_pc for stores is illegal. */
9999 gcc_assert (!return_pc || load);
10000
10001 /* Set up the increments and the regs per val based on the mode. */
10002 reg_increment = GET_MODE_SIZE (mode);
10003 regs_per_val = reg_increment / 4;
10004 offset_adj = return_pc ? 1 : 0;
10005
10006 if (count <= 1
10007 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
10008 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
10009 return false;
10010
10011 /* Check if this is a write-back. */
10012 elt = XVECEXP (op, 0, offset_adj);
10013 if (GET_CODE (SET_SRC (elt)) == PLUS)
10014 {
10015 i++;
10016 base = 1;
10017 update = true;
10018
10019 /* The offset adjustment must be the number of registers being
10020 popped times the size of a single register. */
10021 if (!REG_P (SET_DEST (elt))
10022 || !REG_P (XEXP (SET_SRC (elt), 0))
10023 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
10024 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
10025 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
10026 ((count - 1 - offset_adj) * reg_increment))
10027 return false;
10028 }
10029
10030 i = i + offset_adj;
10031 base = base + offset_adj;
10032 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
10033 success depends on the type: VLDM can do just one reg,
10034 LDM must do at least two. */
10035 if ((count <= i) && (mode == SImode))
10036 return false;
10037
10038 elt = XVECEXP (op, 0, i - 1);
10039 if (GET_CODE (elt) != SET)
10040 return false;
10041
10042 if (load)
10043 {
10044 reg = SET_DEST (elt);
10045 mem = SET_SRC (elt);
10046 }
10047 else
10048 {
10049 reg = SET_SRC (elt);
10050 mem = SET_DEST (elt);
10051 }
10052
10053 if (!REG_P (reg) || !MEM_P (mem))
10054 return false;
10055
10056 regno = REGNO (reg);
10057 first_regno = regno;
10058 addr = XEXP (mem, 0);
10059 if (GET_CODE (addr) == PLUS)
10060 {
10061 if (!CONST_INT_P (XEXP (addr, 1)))
10062 return false;
10063
10064 offset = INTVAL (XEXP (addr, 1));
10065 addr = XEXP (addr, 0);
10066 }
10067
10068 if (!REG_P (addr))
10069 return false;
10070
10071 /* Don't allow SP to be loaded unless it is also the base register. It
10072 guarantees that SP is reset correctly when an LDM instruction
10073 is interruptted. Otherwise, we might end up with a corrupt stack. */
10074 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10075 return false;
10076
10077 for (; i < count; i++)
10078 {
10079 elt = XVECEXP (op, 0, i);
10080 if (GET_CODE (elt) != SET)
10081 return false;
10082
10083 if (load)
10084 {
10085 reg = SET_DEST (elt);
10086 mem = SET_SRC (elt);
10087 }
10088 else
10089 {
10090 reg = SET_SRC (elt);
10091 mem = SET_DEST (elt);
10092 }
10093
10094 if (!REG_P (reg)
10095 || GET_MODE (reg) != mode
10096 || REGNO (reg) <= regno
10097 || (consecutive
10098 && (REGNO (reg) !=
10099 (unsigned int) (first_regno + regs_per_val * (i - base))))
10100 /* Don't allow SP to be loaded unless it is also the base register. It
10101 guarantees that SP is reset correctly when an LDM instruction
10102 is interrupted. Otherwise, we might end up with a corrupt stack. */
10103 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
10104 || !MEM_P (mem)
10105 || GET_MODE (mem) != mode
10106 || ((GET_CODE (XEXP (mem, 0)) != PLUS
10107 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
10108 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
10109 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
10110 offset + (i - base) * reg_increment))
10111 && (!REG_P (XEXP (mem, 0))
10112 || offset + (i - base) * reg_increment != 0)))
10113 return false;
10114
10115 regno = REGNO (reg);
10116 if (regno == REGNO (addr))
10117 addr_reg_in_reglist = true;
10118 }
10119
10120 if (load)
10121 {
10122 if (update && addr_reg_in_reglist)
10123 return false;
10124
10125 /* For Thumb-1, address register is always modified - either by write-back
10126 or by explicit load. If the pattern does not describe an update,
10127 then the address register must be in the list of loaded registers. */
10128 if (TARGET_THUMB1)
10129 return update || addr_reg_in_reglist;
10130 }
10131
10132 return true;
10133 }
10134
10135 /* Return true iff it would be profitable to turn a sequence of NOPS loads
10136 or stores (depending on IS_STORE) into a load-multiple or store-multiple
10137 instruction. ADD_OFFSET is nonzero if the base address register needs
10138 to be modified with an add instruction before we can use it. */
10139
10140 static bool
10141 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
10142 int nops, HOST_WIDE_INT add_offset)
10143 {
10144 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
10145 if the offset isn't small enough. The reason 2 ldrs are faster
10146 is because these ARMs are able to do more than one cache access
10147 in a single cycle. The ARM9 and StrongARM have Harvard caches,
10148 whilst the ARM8 has a double bandwidth cache. This means that
10149 these cores can do both an instruction fetch and a data fetch in
10150 a single cycle, so the trick of calculating the address into a
10151 scratch register (one of the result regs) and then doing a load
10152 multiple actually becomes slower (and no smaller in code size).
10153 That is the transformation
10154
10155 ldr rd1, [rbase + offset]
10156 ldr rd2, [rbase + offset + 4]
10157
10158 to
10159
10160 add rd1, rbase, offset
10161 ldmia rd1, {rd1, rd2}
10162
10163 produces worse code -- '3 cycles + any stalls on rd2' instead of
10164 '2 cycles + any stalls on rd2'. On ARMs with only one cache
10165 access per cycle, the first sequence could never complete in less
10166 than 6 cycles, whereas the ldm sequence would only take 5 and
10167 would make better use of sequential accesses if not hitting the
10168 cache.
10169
10170 We cheat here and test 'arm_ld_sched' which we currently know to
10171 only be true for the ARM8, ARM9 and StrongARM. If this ever
10172 changes, then the test below needs to be reworked. */
10173 if (nops == 2 && arm_ld_sched && add_offset != 0)
10174 return false;
10175
10176 /* XScale has load-store double instructions, but they have stricter
10177 alignment requirements than load-store multiple, so we cannot
10178 use them.
10179
10180 For XScale ldm requires 2 + NREGS cycles to complete and blocks
10181 the pipeline until completion.
10182
10183 NREGS CYCLES
10184 1 3
10185 2 4
10186 3 5
10187 4 6
10188
10189 An ldr instruction takes 1-3 cycles, but does not block the
10190 pipeline.
10191
10192 NREGS CYCLES
10193 1 1-3
10194 2 2-6
10195 3 3-9
10196 4 4-12
10197
10198 Best case ldr will always win. However, the more ldr instructions
10199 we issue, the less likely we are to be able to schedule them well.
10200 Using ldr instructions also increases code size.
10201
10202 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
10203 for counts of 3 or 4 regs. */
10204 if (nops <= 2 && arm_tune_xscale && !optimize_size)
10205 return false;
10206 return true;
10207 }
10208
10209 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
10210 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
10211 an array ORDER which describes the sequence to use when accessing the
10212 offsets that produces an ascending order. In this sequence, each
10213 offset must be larger by exactly 4 than the previous one. ORDER[0]
10214 must have been filled in with the lowest offset by the caller.
10215 If UNSORTED_REGS is nonnull, it is an array of register numbers that
10216 we use to verify that ORDER produces an ascending order of registers.
10217 Return true if it was possible to construct such an order, false if
10218 not. */
10219
10220 static bool
10221 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
10222 int *unsorted_regs)
10223 {
10224 int i;
10225 for (i = 1; i < nops; i++)
10226 {
10227 int j;
10228
10229 order[i] = order[i - 1];
10230 for (j = 0; j < nops; j++)
10231 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
10232 {
10233 /* We must find exactly one offset that is higher than the
10234 previous one by 4. */
10235 if (order[i] != order[i - 1])
10236 return false;
10237 order[i] = j;
10238 }
10239 if (order[i] == order[i - 1])
10240 return false;
10241 /* The register numbers must be ascending. */
10242 if (unsorted_regs != NULL
10243 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
10244 return false;
10245 }
10246 return true;
10247 }
10248
10249 /* Used to determine in a peephole whether a sequence of load
10250 instructions can be changed into a load-multiple instruction.
10251 NOPS is the number of separate load instructions we are examining. The
10252 first NOPS entries in OPERANDS are the destination registers, the
10253 next NOPS entries are memory operands. If this function is
10254 successful, *BASE is set to the common base register of the memory
10255 accesses; *LOAD_OFFSET is set to the first memory location's offset
10256 from that base register.
10257 REGS is an array filled in with the destination register numbers.
10258 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
10259 insn numbers to an ascending order of stores. If CHECK_REGS is true,
10260 the sequence of registers in REGS matches the loads from ascending memory
10261 locations, and the function verifies that the register numbers are
10262 themselves ascending. If CHECK_REGS is false, the register numbers
10263 are stored in the order they are found in the operands. */
10264 static int
10265 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
10266 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
10267 {
10268 int unsorted_regs[MAX_LDM_STM_OPS];
10269 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10270 int order[MAX_LDM_STM_OPS];
10271 rtx base_reg_rtx = NULL;
10272 int base_reg = -1;
10273 int i, ldm_case;
10274
10275 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10276 easily extended if required. */
10277 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10278
10279 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10280
10281 /* Loop over the operands and check that the memory references are
10282 suitable (i.e. immediate offsets from the same base register). At
10283 the same time, extract the target register, and the memory
10284 offsets. */
10285 for (i = 0; i < nops; i++)
10286 {
10287 rtx reg;
10288 rtx offset;
10289
10290 /* Convert a subreg of a mem into the mem itself. */
10291 if (GET_CODE (operands[nops + i]) == SUBREG)
10292 operands[nops + i] = alter_subreg (operands + (nops + i));
10293
10294 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10295
10296 /* Don't reorder volatile memory references; it doesn't seem worth
10297 looking for the case where the order is ok anyway. */
10298 if (MEM_VOLATILE_P (operands[nops + i]))
10299 return 0;
10300
10301 offset = const0_rtx;
10302
10303 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10304 || (GET_CODE (reg) == SUBREG
10305 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10306 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10307 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10308 == REG)
10309 || (GET_CODE (reg) == SUBREG
10310 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10311 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10312 == CONST_INT)))
10313 {
10314 if (i == 0)
10315 {
10316 base_reg = REGNO (reg);
10317 base_reg_rtx = reg;
10318 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10319 return 0;
10320 }
10321 else if (base_reg != (int) REGNO (reg))
10322 /* Not addressed from the same base register. */
10323 return 0;
10324
10325 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
10326 ? REGNO (operands[i])
10327 : REGNO (SUBREG_REG (operands[i])));
10328
10329 /* If it isn't an integer register, or if it overwrites the
10330 base register but isn't the last insn in the list, then
10331 we can't do this. */
10332 if (unsorted_regs[i] < 0
10333 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10334 || unsorted_regs[i] > 14
10335 || (i != nops - 1 && unsorted_regs[i] == base_reg))
10336 return 0;
10337
10338 unsorted_offsets[i] = INTVAL (offset);
10339 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10340 order[0] = i;
10341 }
10342 else
10343 /* Not a suitable memory address. */
10344 return 0;
10345 }
10346
10347 /* All the useful information has now been extracted from the
10348 operands into unsorted_regs and unsorted_offsets; additionally,
10349 order[0] has been set to the lowest offset in the list. Sort
10350 the offsets into order, verifying that they are adjacent, and
10351 check that the register numbers are ascending. */
10352 if (!compute_offset_order (nops, unsorted_offsets, order,
10353 check_regs ? unsorted_regs : NULL))
10354 return 0;
10355
10356 if (saved_order)
10357 memcpy (saved_order, order, sizeof order);
10358
10359 if (base)
10360 {
10361 *base = base_reg;
10362
10363 for (i = 0; i < nops; i++)
10364 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10365
10366 *load_offset = unsorted_offsets[order[0]];
10367 }
10368
10369 if (TARGET_THUMB1
10370 && !peep2_reg_dead_p (nops, base_reg_rtx))
10371 return 0;
10372
10373 if (unsorted_offsets[order[0]] == 0)
10374 ldm_case = 1; /* ldmia */
10375 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10376 ldm_case = 2; /* ldmib */
10377 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10378 ldm_case = 3; /* ldmda */
10379 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10380 ldm_case = 4; /* ldmdb */
10381 else if (const_ok_for_arm (unsorted_offsets[order[0]])
10382 || const_ok_for_arm (-unsorted_offsets[order[0]]))
10383 ldm_case = 5;
10384 else
10385 return 0;
10386
10387 if (!multiple_operation_profitable_p (false, nops,
10388 ldm_case == 5
10389 ? unsorted_offsets[order[0]] : 0))
10390 return 0;
10391
10392 return ldm_case;
10393 }
10394
10395 /* Used to determine in a peephole whether a sequence of store instructions can
10396 be changed into a store-multiple instruction.
10397 NOPS is the number of separate store instructions we are examining.
10398 NOPS_TOTAL is the total number of instructions recognized by the peephole
10399 pattern.
10400 The first NOPS entries in OPERANDS are the source registers, the next
10401 NOPS entries are memory operands. If this function is successful, *BASE is
10402 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10403 to the first memory location's offset from that base register. REGS is an
10404 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10405 likewise filled with the corresponding rtx's.
10406 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10407 numbers to an ascending order of stores.
10408 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10409 from ascending memory locations, and the function verifies that the register
10410 numbers are themselves ascending. If CHECK_REGS is false, the register
10411 numbers are stored in the order they are found in the operands. */
10412 static int
10413 store_multiple_sequence (rtx *operands, int nops, int nops_total,
10414 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
10415 HOST_WIDE_INT *load_offset, bool check_regs)
10416 {
10417 int unsorted_regs[MAX_LDM_STM_OPS];
10418 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
10419 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
10420 int order[MAX_LDM_STM_OPS];
10421 int base_reg = -1;
10422 rtx base_reg_rtx = NULL;
10423 int i, stm_case;
10424
10425 /* Write back of base register is currently only supported for Thumb 1. */
10426 int base_writeback = TARGET_THUMB1;
10427
10428 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10429 easily extended if required. */
10430 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
10431
10432 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
10433
10434 /* Loop over the operands and check that the memory references are
10435 suitable (i.e. immediate offsets from the same base register). At
10436 the same time, extract the target register, and the memory
10437 offsets. */
10438 for (i = 0; i < nops; i++)
10439 {
10440 rtx reg;
10441 rtx offset;
10442
10443 /* Convert a subreg of a mem into the mem itself. */
10444 if (GET_CODE (operands[nops + i]) == SUBREG)
10445 operands[nops + i] = alter_subreg (operands + (nops + i));
10446
10447 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
10448
10449 /* Don't reorder volatile memory references; it doesn't seem worth
10450 looking for the case where the order is ok anyway. */
10451 if (MEM_VOLATILE_P (operands[nops + i]))
10452 return 0;
10453
10454 offset = const0_rtx;
10455
10456 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
10457 || (GET_CODE (reg) == SUBREG
10458 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10459 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
10460 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
10461 == REG)
10462 || (GET_CODE (reg) == SUBREG
10463 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
10464 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
10465 == CONST_INT)))
10466 {
10467 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
10468 ? operands[i] : SUBREG_REG (operands[i]));
10469 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
10470
10471 if (i == 0)
10472 {
10473 base_reg = REGNO (reg);
10474 base_reg_rtx = reg;
10475 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
10476 return 0;
10477 }
10478 else if (base_reg != (int) REGNO (reg))
10479 /* Not addressed from the same base register. */
10480 return 0;
10481
10482 /* If it isn't an integer register, then we can't do this. */
10483 if (unsorted_regs[i] < 0
10484 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
10485 /* The effects are unpredictable if the base register is
10486 both updated and stored. */
10487 || (base_writeback && unsorted_regs[i] == base_reg)
10488 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
10489 || unsorted_regs[i] > 14)
10490 return 0;
10491
10492 unsorted_offsets[i] = INTVAL (offset);
10493 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
10494 order[0] = i;
10495 }
10496 else
10497 /* Not a suitable memory address. */
10498 return 0;
10499 }
10500
10501 /* All the useful information has now been extracted from the
10502 operands into unsorted_regs and unsorted_offsets; additionally,
10503 order[0] has been set to the lowest offset in the list. Sort
10504 the offsets into order, verifying that they are adjacent, and
10505 check that the register numbers are ascending. */
10506 if (!compute_offset_order (nops, unsorted_offsets, order,
10507 check_regs ? unsorted_regs : NULL))
10508 return 0;
10509
10510 if (saved_order)
10511 memcpy (saved_order, order, sizeof order);
10512
10513 if (base)
10514 {
10515 *base = base_reg;
10516
10517 for (i = 0; i < nops; i++)
10518 {
10519 regs[i] = unsorted_regs[check_regs ? order[i] : i];
10520 if (reg_rtxs)
10521 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
10522 }
10523
10524 *load_offset = unsorted_offsets[order[0]];
10525 }
10526
10527 if (TARGET_THUMB1
10528 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
10529 return 0;
10530
10531 if (unsorted_offsets[order[0]] == 0)
10532 stm_case = 1; /* stmia */
10533 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
10534 stm_case = 2; /* stmib */
10535 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
10536 stm_case = 3; /* stmda */
10537 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
10538 stm_case = 4; /* stmdb */
10539 else
10540 return 0;
10541
10542 if (!multiple_operation_profitable_p (false, nops, 0))
10543 return 0;
10544
10545 return stm_case;
10546 }
10547 \f
10548 /* Routines for use in generating RTL. */
10549
10550 /* Generate a load-multiple instruction. COUNT is the number of loads in
10551 the instruction; REGS and MEMS are arrays containing the operands.
10552 BASEREG is the base register to be used in addressing the memory operands.
10553 WBACK_OFFSET is nonzero if the instruction should update the base
10554 register. */
10555
10556 static rtx
10557 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10558 HOST_WIDE_INT wback_offset)
10559 {
10560 int i = 0, j;
10561 rtx result;
10562
10563 if (!multiple_operation_profitable_p (false, count, 0))
10564 {
10565 rtx seq;
10566
10567 start_sequence ();
10568
10569 for (i = 0; i < count; i++)
10570 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
10571
10572 if (wback_offset != 0)
10573 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
10574
10575 seq = get_insns ();
10576 end_sequence ();
10577
10578 return seq;
10579 }
10580
10581 result = gen_rtx_PARALLEL (VOIDmode,
10582 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10583 if (wback_offset != 0)
10584 {
10585 XVECEXP (result, 0, 0)
10586 = gen_rtx_SET (VOIDmode, basereg,
10587 plus_constant (Pmode, basereg, wback_offset));
10588 i = 1;
10589 count++;
10590 }
10591
10592 for (j = 0; i < count; i++, j++)
10593 XVECEXP (result, 0, i)
10594 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
10595
10596 return result;
10597 }
10598
10599 /* Generate a store-multiple instruction. COUNT is the number of stores in
10600 the instruction; REGS and MEMS are arrays containing the operands.
10601 BASEREG is the base register to be used in addressing the memory operands.
10602 WBACK_OFFSET is nonzero if the instruction should update the base
10603 register. */
10604
10605 static rtx
10606 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
10607 HOST_WIDE_INT wback_offset)
10608 {
10609 int i = 0, j;
10610 rtx result;
10611
10612 if (GET_CODE (basereg) == PLUS)
10613 basereg = XEXP (basereg, 0);
10614
10615 if (!multiple_operation_profitable_p (false, count, 0))
10616 {
10617 rtx seq;
10618
10619 start_sequence ();
10620
10621 for (i = 0; i < count; i++)
10622 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
10623
10624 if (wback_offset != 0)
10625 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
10626
10627 seq = get_insns ();
10628 end_sequence ();
10629
10630 return seq;
10631 }
10632
10633 result = gen_rtx_PARALLEL (VOIDmode,
10634 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10635 if (wback_offset != 0)
10636 {
10637 XVECEXP (result, 0, 0)
10638 = gen_rtx_SET (VOIDmode, basereg,
10639 plus_constant (Pmode, basereg, wback_offset));
10640 i = 1;
10641 count++;
10642 }
10643
10644 for (j = 0; i < count; i++, j++)
10645 XVECEXP (result, 0, i)
10646 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
10647
10648 return result;
10649 }
10650
10651 /* Generate either a load-multiple or a store-multiple instruction. This
10652 function can be used in situations where we can start with a single MEM
10653 rtx and adjust its address upwards.
10654 COUNT is the number of operations in the instruction, not counting a
10655 possible update of the base register. REGS is an array containing the
10656 register operands.
10657 BASEREG is the base register to be used in addressing the memory operands,
10658 which are constructed from BASEMEM.
10659 WRITE_BACK specifies whether the generated instruction should include an
10660 update of the base register.
10661 OFFSETP is used to pass an offset to and from this function; this offset
10662 is not used when constructing the address (instead BASEMEM should have an
10663 appropriate offset in its address), it is used only for setting
10664 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10665
10666 static rtx
10667 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10668 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10669 {
10670 rtx mems[MAX_LDM_STM_OPS];
10671 HOST_WIDE_INT offset = *offsetp;
10672 int i;
10673
10674 gcc_assert (count <= MAX_LDM_STM_OPS);
10675
10676 if (GET_CODE (basereg) == PLUS)
10677 basereg = XEXP (basereg, 0);
10678
10679 for (i = 0; i < count; i++)
10680 {
10681 rtx addr = plus_constant (Pmode, basereg, i * 4);
10682 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10683 offset += 4;
10684 }
10685
10686 if (write_back)
10687 *offsetp = offset;
10688
10689 if (is_load)
10690 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10691 write_back ? 4 * count : 0);
10692 else
10693 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10694 write_back ? 4 * count : 0);
10695 }
10696
10697 rtx
10698 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
10699 rtx basemem, HOST_WIDE_INT *offsetp)
10700 {
10701 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
10702 offsetp);
10703 }
10704
10705 rtx
10706 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
10707 rtx basemem, HOST_WIDE_INT *offsetp)
10708 {
10709 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
10710 offsetp);
10711 }
10712
10713 /* Called from a peephole2 expander to turn a sequence of loads into an
10714 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10715 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10716 is true if we can reorder the registers because they are used commutatively
10717 subsequently.
10718 Returns true iff we could generate a new instruction. */
10719
10720 bool
10721 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
10722 {
10723 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10724 rtx mems[MAX_LDM_STM_OPS];
10725 int i, j, base_reg;
10726 rtx base_reg_rtx;
10727 HOST_WIDE_INT offset;
10728 int write_back = FALSE;
10729 int ldm_case;
10730 rtx addr;
10731
10732 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
10733 &base_reg, &offset, !sort_regs);
10734
10735 if (ldm_case == 0)
10736 return false;
10737
10738 if (sort_regs)
10739 for (i = 0; i < nops - 1; i++)
10740 for (j = i + 1; j < nops; j++)
10741 if (regs[i] > regs[j])
10742 {
10743 int t = regs[i];
10744 regs[i] = regs[j];
10745 regs[j] = t;
10746 }
10747 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10748
10749 if (TARGET_THUMB1)
10750 {
10751 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
10752 gcc_assert (ldm_case == 1 || ldm_case == 5);
10753 write_back = TRUE;
10754 }
10755
10756 if (ldm_case == 5)
10757 {
10758 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10759 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10760 offset = 0;
10761 if (!TARGET_THUMB1)
10762 {
10763 base_reg = regs[0];
10764 base_reg_rtx = newbase;
10765 }
10766 }
10767
10768 for (i = 0; i < nops; i++)
10769 {
10770 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
10771 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10772 SImode, addr, 0);
10773 }
10774 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10775 write_back ? offset + i * 4 : 0));
10776 return true;
10777 }
10778
10779 /* Called from a peephole2 expander to turn a sequence of stores into an
10780 STM instruction. OPERANDS are the operands found by the peephole matcher;
10781 NOPS indicates how many separate stores we are trying to combine.
10782 Returns true iff we could generate a new instruction. */
10783
10784 bool
10785 gen_stm_seq (rtx *operands, int nops)
10786 {
10787 int i;
10788 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10789 rtx mems[MAX_LDM_STM_OPS];
10790 int base_reg;
10791 rtx base_reg_rtx;
10792 HOST_WIDE_INT offset;
10793 int write_back = FALSE;
10794 int stm_case;
10795 rtx addr;
10796 bool base_reg_dies;
10797
10798 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10799 mem_order, &base_reg, &offset, true);
10800
10801 if (stm_case == 0)
10802 return false;
10803
10804 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10805
10806 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10807 if (TARGET_THUMB1)
10808 {
10809 gcc_assert (base_reg_dies);
10810 write_back = TRUE;
10811 }
10812
10813 if (stm_case == 5)
10814 {
10815 gcc_assert (base_reg_dies);
10816 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10817 offset = 0;
10818 }
10819
10820 addr = plus_constant (Pmode, base_reg_rtx, offset);
10821
10822 for (i = 0; i < nops; i++)
10823 {
10824 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
10825 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10826 SImode, addr, 0);
10827 }
10828 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10829 write_back ? offset + i * 4 : 0));
10830 return true;
10831 }
10832
10833 /* Called from a peephole2 expander to turn a sequence of stores that are
10834 preceded by constant loads into an STM instruction. OPERANDS are the
10835 operands found by the peephole matcher; NOPS indicates how many
10836 separate stores we are trying to combine; there are 2 * NOPS
10837 instructions in the peephole.
10838 Returns true iff we could generate a new instruction. */
10839
10840 bool
10841 gen_const_stm_seq (rtx *operands, int nops)
10842 {
10843 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10844 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10845 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10846 rtx mems[MAX_LDM_STM_OPS];
10847 int base_reg;
10848 rtx base_reg_rtx;
10849 HOST_WIDE_INT offset;
10850 int write_back = FALSE;
10851 int stm_case;
10852 rtx addr;
10853 bool base_reg_dies;
10854 int i, j;
10855 HARD_REG_SET allocated;
10856
10857 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10858 mem_order, &base_reg, &offset, false);
10859
10860 if (stm_case == 0)
10861 return false;
10862
10863 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10864
10865 /* If the same register is used more than once, try to find a free
10866 register. */
10867 CLEAR_HARD_REG_SET (allocated);
10868 for (i = 0; i < nops; i++)
10869 {
10870 for (j = i + 1; j < nops; j++)
10871 if (regs[i] == regs[j])
10872 {
10873 rtx t = peep2_find_free_register (0, nops * 2,
10874 TARGET_THUMB1 ? "l" : "r",
10875 SImode, &allocated);
10876 if (t == NULL_RTX)
10877 return false;
10878 reg_rtxs[i] = t;
10879 regs[i] = REGNO (t);
10880 }
10881 }
10882
10883 /* Compute an ordering that maps the register numbers to an ascending
10884 sequence. */
10885 reg_order[0] = 0;
10886 for (i = 0; i < nops; i++)
10887 if (regs[i] < regs[reg_order[0]])
10888 reg_order[0] = i;
10889
10890 for (i = 1; i < nops; i++)
10891 {
10892 int this_order = reg_order[i - 1];
10893 for (j = 0; j < nops; j++)
10894 if (regs[j] > regs[reg_order[i - 1]]
10895 && (this_order == reg_order[i - 1]
10896 || regs[j] < regs[this_order]))
10897 this_order = j;
10898 reg_order[i] = this_order;
10899 }
10900
10901 /* Ensure that registers that must be live after the instruction end
10902 up with the correct value. */
10903 for (i = 0; i < nops; i++)
10904 {
10905 int this_order = reg_order[i];
10906 if ((this_order != mem_order[i]
10907 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10908 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10909 return false;
10910 }
10911
10912 /* Load the constants. */
10913 for (i = 0; i < nops; i++)
10914 {
10915 rtx op = operands[2 * nops + mem_order[i]];
10916 sorted_regs[i] = regs[reg_order[i]];
10917 emit_move_insn (reg_rtxs[reg_order[i]], op);
10918 }
10919
10920 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10921
10922 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10923 if (TARGET_THUMB1)
10924 {
10925 gcc_assert (base_reg_dies);
10926 write_back = TRUE;
10927 }
10928
10929 if (stm_case == 5)
10930 {
10931 gcc_assert (base_reg_dies);
10932 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10933 offset = 0;
10934 }
10935
10936 addr = plus_constant (Pmode, base_reg_rtx, offset);
10937
10938 for (i = 0; i < nops; i++)
10939 {
10940 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
10941 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10942 SImode, addr, 0);
10943 }
10944 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10945 write_back ? offset + i * 4 : 0));
10946 return true;
10947 }
10948
10949 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
10950 unaligned copies on processors which support unaligned semantics for those
10951 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
10952 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
10953 An interleave factor of 1 (the minimum) will perform no interleaving.
10954 Load/store multiple are used for aligned addresses where possible. */
10955
10956 static void
10957 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
10958 HOST_WIDE_INT length,
10959 unsigned int interleave_factor)
10960 {
10961 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
10962 int *regnos = XALLOCAVEC (int, interleave_factor);
10963 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
10964 HOST_WIDE_INT i, j;
10965 HOST_WIDE_INT remaining = length, words;
10966 rtx halfword_tmp = NULL, byte_tmp = NULL;
10967 rtx dst, src;
10968 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
10969 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
10970 HOST_WIDE_INT srcoffset, dstoffset;
10971 HOST_WIDE_INT src_autoinc, dst_autoinc;
10972 rtx mem, addr;
10973
10974 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
10975
10976 /* Use hard registers if we have aligned source or destination so we can use
10977 load/store multiple with contiguous registers. */
10978 if (dst_aligned || src_aligned)
10979 for (i = 0; i < interleave_factor; i++)
10980 regs[i] = gen_rtx_REG (SImode, i);
10981 else
10982 for (i = 0; i < interleave_factor; i++)
10983 regs[i] = gen_reg_rtx (SImode);
10984
10985 dst = copy_addr_to_reg (XEXP (dstbase, 0));
10986 src = copy_addr_to_reg (XEXP (srcbase, 0));
10987
10988 srcoffset = dstoffset = 0;
10989
10990 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
10991 For copying the last bytes we want to subtract this offset again. */
10992 src_autoinc = dst_autoinc = 0;
10993
10994 for (i = 0; i < interleave_factor; i++)
10995 regnos[i] = i;
10996
10997 /* Copy BLOCK_SIZE_BYTES chunks. */
10998
10999 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
11000 {
11001 /* Load words. */
11002 if (src_aligned && interleave_factor > 1)
11003 {
11004 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
11005 TRUE, srcbase, &srcoffset));
11006 src_autoinc += UNITS_PER_WORD * interleave_factor;
11007 }
11008 else
11009 {
11010 for (j = 0; j < interleave_factor; j++)
11011 {
11012 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
11013 - src_autoinc));
11014 mem = adjust_automodify_address (srcbase, SImode, addr,
11015 srcoffset + j * UNITS_PER_WORD);
11016 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11017 }
11018 srcoffset += block_size_bytes;
11019 }
11020
11021 /* Store words. */
11022 if (dst_aligned && interleave_factor > 1)
11023 {
11024 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
11025 TRUE, dstbase, &dstoffset));
11026 dst_autoinc += UNITS_PER_WORD * interleave_factor;
11027 }
11028 else
11029 {
11030 for (j = 0; j < interleave_factor; j++)
11031 {
11032 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
11033 - dst_autoinc));
11034 mem = adjust_automodify_address (dstbase, SImode, addr,
11035 dstoffset + j * UNITS_PER_WORD);
11036 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11037 }
11038 dstoffset += block_size_bytes;
11039 }
11040
11041 remaining -= block_size_bytes;
11042 }
11043
11044 /* Copy any whole words left (note these aren't interleaved with any
11045 subsequent halfword/byte load/stores in the interests of simplicity). */
11046
11047 words = remaining / UNITS_PER_WORD;
11048
11049 gcc_assert (words < interleave_factor);
11050
11051 if (src_aligned && words > 1)
11052 {
11053 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
11054 &srcoffset));
11055 src_autoinc += UNITS_PER_WORD * words;
11056 }
11057 else
11058 {
11059 for (j = 0; j < words; j++)
11060 {
11061 addr = plus_constant (Pmode, src,
11062 srcoffset + j * UNITS_PER_WORD - src_autoinc);
11063 mem = adjust_automodify_address (srcbase, SImode, addr,
11064 srcoffset + j * UNITS_PER_WORD);
11065 emit_insn (gen_unaligned_loadsi (regs[j], mem));
11066 }
11067 srcoffset += words * UNITS_PER_WORD;
11068 }
11069
11070 if (dst_aligned && words > 1)
11071 {
11072 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
11073 &dstoffset));
11074 dst_autoinc += words * UNITS_PER_WORD;
11075 }
11076 else
11077 {
11078 for (j = 0; j < words; j++)
11079 {
11080 addr = plus_constant (Pmode, dst,
11081 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
11082 mem = adjust_automodify_address (dstbase, SImode, addr,
11083 dstoffset + j * UNITS_PER_WORD);
11084 emit_insn (gen_unaligned_storesi (mem, regs[j]));
11085 }
11086 dstoffset += words * UNITS_PER_WORD;
11087 }
11088
11089 remaining -= words * UNITS_PER_WORD;
11090
11091 gcc_assert (remaining < 4);
11092
11093 /* Copy a halfword if necessary. */
11094
11095 if (remaining >= 2)
11096 {
11097 halfword_tmp = gen_reg_rtx (SImode);
11098
11099 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11100 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
11101 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
11102
11103 /* Either write out immediately, or delay until we've loaded the last
11104 byte, depending on interleave factor. */
11105 if (interleave_factor == 1)
11106 {
11107 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11108 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11109 emit_insn (gen_unaligned_storehi (mem,
11110 gen_lowpart (HImode, halfword_tmp)));
11111 halfword_tmp = NULL;
11112 dstoffset += 2;
11113 }
11114
11115 remaining -= 2;
11116 srcoffset += 2;
11117 }
11118
11119 gcc_assert (remaining < 2);
11120
11121 /* Copy last byte. */
11122
11123 if ((remaining & 1) != 0)
11124 {
11125 byte_tmp = gen_reg_rtx (SImode);
11126
11127 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
11128 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
11129 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
11130
11131 if (interleave_factor == 1)
11132 {
11133 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11134 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11135 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11136 byte_tmp = NULL;
11137 dstoffset++;
11138 }
11139
11140 remaining--;
11141 srcoffset++;
11142 }
11143
11144 /* Store last halfword if we haven't done so already. */
11145
11146 if (halfword_tmp)
11147 {
11148 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11149 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
11150 emit_insn (gen_unaligned_storehi (mem,
11151 gen_lowpart (HImode, halfword_tmp)));
11152 dstoffset += 2;
11153 }
11154
11155 /* Likewise for last byte. */
11156
11157 if (byte_tmp)
11158 {
11159 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
11160 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
11161 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
11162 dstoffset++;
11163 }
11164
11165 gcc_assert (remaining == 0 && srcoffset == dstoffset);
11166 }
11167
11168 /* From mips_adjust_block_mem:
11169
11170 Helper function for doing a loop-based block operation on memory
11171 reference MEM. Each iteration of the loop will operate on LENGTH
11172 bytes of MEM.
11173
11174 Create a new base register for use within the loop and point it to
11175 the start of MEM. Create a new memory reference that uses this
11176 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
11177
11178 static void
11179 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
11180 rtx *loop_mem)
11181 {
11182 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
11183
11184 /* Although the new mem does not refer to a known location,
11185 it does keep up to LENGTH bytes of alignment. */
11186 *loop_mem = change_address (mem, BLKmode, *loop_reg);
11187 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
11188 }
11189
11190 /* From mips_block_move_loop:
11191
11192 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
11193 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
11194 the memory regions do not overlap. */
11195
11196 static void
11197 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
11198 unsigned int interleave_factor,
11199 HOST_WIDE_INT bytes_per_iter)
11200 {
11201 rtx label, src_reg, dest_reg, final_src, test;
11202 HOST_WIDE_INT leftover;
11203
11204 leftover = length % bytes_per_iter;
11205 length -= leftover;
11206
11207 /* Create registers and memory references for use within the loop. */
11208 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
11209 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
11210
11211 /* Calculate the value that SRC_REG should have after the last iteration of
11212 the loop. */
11213 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
11214 0, 0, OPTAB_WIDEN);
11215
11216 /* Emit the start of the loop. */
11217 label = gen_label_rtx ();
11218 emit_label (label);
11219
11220 /* Emit the loop body. */
11221 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
11222 interleave_factor);
11223
11224 /* Move on to the next block. */
11225 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
11226 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
11227
11228 /* Emit the loop condition. */
11229 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
11230 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
11231
11232 /* Mop up any left-over bytes. */
11233 if (leftover)
11234 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
11235 }
11236
11237 /* Emit a block move when either the source or destination is unaligned (not
11238 aligned to a four-byte boundary). This may need further tuning depending on
11239 core type, optimize_size setting, etc. */
11240
11241 static int
11242 arm_movmemqi_unaligned (rtx *operands)
11243 {
11244 HOST_WIDE_INT length = INTVAL (operands[2]);
11245
11246 if (optimize_size)
11247 {
11248 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
11249 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
11250 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
11251 size of code if optimizing for size. We'll use ldm/stm if src_aligned
11252 or dst_aligned though: allow more interleaving in those cases since the
11253 resulting code can be smaller. */
11254 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
11255 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
11256
11257 if (length > 12)
11258 arm_block_move_unaligned_loop (operands[0], operands[1], length,
11259 interleave_factor, bytes_per_iter);
11260 else
11261 arm_block_move_unaligned_straight (operands[0], operands[1], length,
11262 interleave_factor);
11263 }
11264 else
11265 {
11266 /* Note that the loop created by arm_block_move_unaligned_loop may be
11267 subject to loop unrolling, which makes tuning this condition a little
11268 redundant. */
11269 if (length > 32)
11270 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
11271 else
11272 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
11273 }
11274
11275 return 1;
11276 }
11277
11278 int
11279 arm_gen_movmemqi (rtx *operands)
11280 {
11281 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
11282 HOST_WIDE_INT srcoffset, dstoffset;
11283 int i;
11284 rtx src, dst, srcbase, dstbase;
11285 rtx part_bytes_reg = NULL;
11286 rtx mem;
11287
11288 if (GET_CODE (operands[2]) != CONST_INT
11289 || GET_CODE (operands[3]) != CONST_INT
11290 || INTVAL (operands[2]) > 64)
11291 return 0;
11292
11293 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
11294 return arm_movmemqi_unaligned (operands);
11295
11296 if (INTVAL (operands[3]) & 3)
11297 return 0;
11298
11299 dstbase = operands[0];
11300 srcbase = operands[1];
11301
11302 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
11303 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
11304
11305 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
11306 out_words_to_go = INTVAL (operands[2]) / 4;
11307 last_bytes = INTVAL (operands[2]) & 3;
11308 dstoffset = srcoffset = 0;
11309
11310 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
11311 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
11312
11313 for (i = 0; in_words_to_go >= 2; i+=4)
11314 {
11315 if (in_words_to_go > 4)
11316 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
11317 TRUE, srcbase, &srcoffset));
11318 else
11319 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
11320 src, FALSE, srcbase,
11321 &srcoffset));
11322
11323 if (out_words_to_go)
11324 {
11325 if (out_words_to_go > 4)
11326 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
11327 TRUE, dstbase, &dstoffset));
11328 else if (out_words_to_go != 1)
11329 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
11330 out_words_to_go, dst,
11331 (last_bytes == 0
11332 ? FALSE : TRUE),
11333 dstbase, &dstoffset));
11334 else
11335 {
11336 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11337 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
11338 if (last_bytes != 0)
11339 {
11340 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
11341 dstoffset += 4;
11342 }
11343 }
11344 }
11345
11346 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
11347 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
11348 }
11349
11350 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
11351 if (out_words_to_go)
11352 {
11353 rtx sreg;
11354
11355 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11356 sreg = copy_to_reg (mem);
11357
11358 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
11359 emit_move_insn (mem, sreg);
11360 in_words_to_go--;
11361
11362 gcc_assert (!in_words_to_go); /* Sanity check */
11363 }
11364
11365 if (in_words_to_go)
11366 {
11367 gcc_assert (in_words_to_go > 0);
11368
11369 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
11370 part_bytes_reg = copy_to_mode_reg (SImode, mem);
11371 }
11372
11373 gcc_assert (!last_bytes || part_bytes_reg);
11374
11375 if (BYTES_BIG_ENDIAN && last_bytes)
11376 {
11377 rtx tmp = gen_reg_rtx (SImode);
11378
11379 /* The bytes we want are in the top end of the word. */
11380 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
11381 GEN_INT (8 * (4 - last_bytes))));
11382 part_bytes_reg = tmp;
11383
11384 while (last_bytes)
11385 {
11386 mem = adjust_automodify_address (dstbase, QImode,
11387 plus_constant (Pmode, dst,
11388 last_bytes - 1),
11389 dstoffset + last_bytes - 1);
11390 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11391
11392 if (--last_bytes)
11393 {
11394 tmp = gen_reg_rtx (SImode);
11395 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
11396 part_bytes_reg = tmp;
11397 }
11398 }
11399
11400 }
11401 else
11402 {
11403 if (last_bytes > 1)
11404 {
11405 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
11406 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
11407 last_bytes -= 2;
11408 if (last_bytes)
11409 {
11410 rtx tmp = gen_reg_rtx (SImode);
11411 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
11412 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
11413 part_bytes_reg = tmp;
11414 dstoffset += 2;
11415 }
11416 }
11417
11418 if (last_bytes)
11419 {
11420 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
11421 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
11422 }
11423 }
11424
11425 return 1;
11426 }
11427
11428 /* Select a dominance comparison mode if possible for a test of the general
11429 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
11430 COND_OR == DOM_CC_X_AND_Y => (X && Y)
11431 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
11432 COND_OR == DOM_CC_X_OR_Y => (X || Y)
11433 In all cases OP will be either EQ or NE, but we don't need to know which
11434 here. If we are unable to support a dominance comparison we return
11435 CC mode. This will then fail to match for the RTL expressions that
11436 generate this call. */
11437 enum machine_mode
11438 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
11439 {
11440 enum rtx_code cond1, cond2;
11441 int swapped = 0;
11442
11443 /* Currently we will probably get the wrong result if the individual
11444 comparisons are not simple. This also ensures that it is safe to
11445 reverse a comparison if necessary. */
11446 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
11447 != CCmode)
11448 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
11449 != CCmode))
11450 return CCmode;
11451
11452 /* The if_then_else variant of this tests the second condition if the
11453 first passes, but is true if the first fails. Reverse the first
11454 condition to get a true "inclusive-or" expression. */
11455 if (cond_or == DOM_CC_NX_OR_Y)
11456 cond1 = reverse_condition (cond1);
11457
11458 /* If the comparisons are not equal, and one doesn't dominate the other,
11459 then we can't do this. */
11460 if (cond1 != cond2
11461 && !comparison_dominates_p (cond1, cond2)
11462 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
11463 return CCmode;
11464
11465 if (swapped)
11466 {
11467 enum rtx_code temp = cond1;
11468 cond1 = cond2;
11469 cond2 = temp;
11470 }
11471
11472 switch (cond1)
11473 {
11474 case EQ:
11475 if (cond_or == DOM_CC_X_AND_Y)
11476 return CC_DEQmode;
11477
11478 switch (cond2)
11479 {
11480 case EQ: return CC_DEQmode;
11481 case LE: return CC_DLEmode;
11482 case LEU: return CC_DLEUmode;
11483 case GE: return CC_DGEmode;
11484 case GEU: return CC_DGEUmode;
11485 default: gcc_unreachable ();
11486 }
11487
11488 case LT:
11489 if (cond_or == DOM_CC_X_AND_Y)
11490 return CC_DLTmode;
11491
11492 switch (cond2)
11493 {
11494 case LT:
11495 return CC_DLTmode;
11496 case LE:
11497 return CC_DLEmode;
11498 case NE:
11499 return CC_DNEmode;
11500 default:
11501 gcc_unreachable ();
11502 }
11503
11504 case GT:
11505 if (cond_or == DOM_CC_X_AND_Y)
11506 return CC_DGTmode;
11507
11508 switch (cond2)
11509 {
11510 case GT:
11511 return CC_DGTmode;
11512 case GE:
11513 return CC_DGEmode;
11514 case NE:
11515 return CC_DNEmode;
11516 default:
11517 gcc_unreachable ();
11518 }
11519
11520 case LTU:
11521 if (cond_or == DOM_CC_X_AND_Y)
11522 return CC_DLTUmode;
11523
11524 switch (cond2)
11525 {
11526 case LTU:
11527 return CC_DLTUmode;
11528 case LEU:
11529 return CC_DLEUmode;
11530 case NE:
11531 return CC_DNEmode;
11532 default:
11533 gcc_unreachable ();
11534 }
11535
11536 case GTU:
11537 if (cond_or == DOM_CC_X_AND_Y)
11538 return CC_DGTUmode;
11539
11540 switch (cond2)
11541 {
11542 case GTU:
11543 return CC_DGTUmode;
11544 case GEU:
11545 return CC_DGEUmode;
11546 case NE:
11547 return CC_DNEmode;
11548 default:
11549 gcc_unreachable ();
11550 }
11551
11552 /* The remaining cases only occur when both comparisons are the
11553 same. */
11554 case NE:
11555 gcc_assert (cond1 == cond2);
11556 return CC_DNEmode;
11557
11558 case LE:
11559 gcc_assert (cond1 == cond2);
11560 return CC_DLEmode;
11561
11562 case GE:
11563 gcc_assert (cond1 == cond2);
11564 return CC_DGEmode;
11565
11566 case LEU:
11567 gcc_assert (cond1 == cond2);
11568 return CC_DLEUmode;
11569
11570 case GEU:
11571 gcc_assert (cond1 == cond2);
11572 return CC_DGEUmode;
11573
11574 default:
11575 gcc_unreachable ();
11576 }
11577 }
11578
11579 enum machine_mode
11580 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
11581 {
11582 /* All floating point compares return CCFP if it is an equality
11583 comparison, and CCFPE otherwise. */
11584 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11585 {
11586 switch (op)
11587 {
11588 case EQ:
11589 case NE:
11590 case UNORDERED:
11591 case ORDERED:
11592 case UNLT:
11593 case UNLE:
11594 case UNGT:
11595 case UNGE:
11596 case UNEQ:
11597 case LTGT:
11598 return CCFPmode;
11599
11600 case LT:
11601 case LE:
11602 case GT:
11603 case GE:
11604 return CCFPEmode;
11605
11606 default:
11607 gcc_unreachable ();
11608 }
11609 }
11610
11611 /* A compare with a shifted operand. Because of canonicalization, the
11612 comparison will have to be swapped when we emit the assembler. */
11613 if (GET_MODE (y) == SImode
11614 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11615 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11616 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
11617 || GET_CODE (x) == ROTATERT))
11618 return CC_SWPmode;
11619
11620 /* This operation is performed swapped, but since we only rely on the Z
11621 flag we don't need an additional mode. */
11622 if (GET_MODE (y) == SImode
11623 && (REG_P (y) || (GET_CODE (y) == SUBREG))
11624 && GET_CODE (x) == NEG
11625 && (op == EQ || op == NE))
11626 return CC_Zmode;
11627
11628 /* This is a special case that is used by combine to allow a
11629 comparison of a shifted byte load to be split into a zero-extend
11630 followed by a comparison of the shifted integer (only valid for
11631 equalities and unsigned inequalities). */
11632 if (GET_MODE (x) == SImode
11633 && GET_CODE (x) == ASHIFT
11634 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
11635 && GET_CODE (XEXP (x, 0)) == SUBREG
11636 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
11637 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
11638 && (op == EQ || op == NE
11639 || op == GEU || op == GTU || op == LTU || op == LEU)
11640 && GET_CODE (y) == CONST_INT)
11641 return CC_Zmode;
11642
11643 /* A construct for a conditional compare, if the false arm contains
11644 0, then both conditions must be true, otherwise either condition
11645 must be true. Not all conditions are possible, so CCmode is
11646 returned if it can't be done. */
11647 if (GET_CODE (x) == IF_THEN_ELSE
11648 && (XEXP (x, 2) == const0_rtx
11649 || XEXP (x, 2) == const1_rtx)
11650 && COMPARISON_P (XEXP (x, 0))
11651 && COMPARISON_P (XEXP (x, 1)))
11652 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11653 INTVAL (XEXP (x, 2)));
11654
11655 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
11656 if (GET_CODE (x) == AND
11657 && (op == EQ || op == NE)
11658 && COMPARISON_P (XEXP (x, 0))
11659 && COMPARISON_P (XEXP (x, 1)))
11660 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11661 DOM_CC_X_AND_Y);
11662
11663 if (GET_CODE (x) == IOR
11664 && (op == EQ || op == NE)
11665 && COMPARISON_P (XEXP (x, 0))
11666 && COMPARISON_P (XEXP (x, 1)))
11667 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
11668 DOM_CC_X_OR_Y);
11669
11670 /* An operation (on Thumb) where we want to test for a single bit.
11671 This is done by shifting that bit up into the top bit of a
11672 scratch register; we can then branch on the sign bit. */
11673 if (TARGET_THUMB1
11674 && GET_MODE (x) == SImode
11675 && (op == EQ || op == NE)
11676 && GET_CODE (x) == ZERO_EXTRACT
11677 && XEXP (x, 1) == const1_rtx)
11678 return CC_Nmode;
11679
11680 /* An operation that sets the condition codes as a side-effect, the
11681 V flag is not set correctly, so we can only use comparisons where
11682 this doesn't matter. (For LT and GE we can use "mi" and "pl"
11683 instead.) */
11684 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
11685 if (GET_MODE (x) == SImode
11686 && y == const0_rtx
11687 && (op == EQ || op == NE || op == LT || op == GE)
11688 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
11689 || GET_CODE (x) == AND || GET_CODE (x) == IOR
11690 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
11691 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
11692 || GET_CODE (x) == LSHIFTRT
11693 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
11694 || GET_CODE (x) == ROTATERT
11695 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
11696 return CC_NOOVmode;
11697
11698 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
11699 return CC_Zmode;
11700
11701 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
11702 && GET_CODE (x) == PLUS
11703 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
11704 return CC_Cmode;
11705
11706 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
11707 {
11708 switch (op)
11709 {
11710 case EQ:
11711 case NE:
11712 /* A DImode comparison against zero can be implemented by
11713 or'ing the two halves together. */
11714 if (y == const0_rtx)
11715 return CC_Zmode;
11716
11717 /* We can do an equality test in three Thumb instructions. */
11718 if (!TARGET_32BIT)
11719 return CC_Zmode;
11720
11721 /* FALLTHROUGH */
11722
11723 case LTU:
11724 case LEU:
11725 case GTU:
11726 case GEU:
11727 /* DImode unsigned comparisons can be implemented by cmp +
11728 cmpeq without a scratch register. Not worth doing in
11729 Thumb-2. */
11730 if (TARGET_32BIT)
11731 return CC_CZmode;
11732
11733 /* FALLTHROUGH */
11734
11735 case LT:
11736 case LE:
11737 case GT:
11738 case GE:
11739 /* DImode signed and unsigned comparisons can be implemented
11740 by cmp + sbcs with a scratch register, but that does not
11741 set the Z flag - we must reverse GT/LE/GTU/LEU. */
11742 gcc_assert (op != EQ && op != NE);
11743 return CC_NCVmode;
11744
11745 default:
11746 gcc_unreachable ();
11747 }
11748 }
11749
11750 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
11751 return GET_MODE (x);
11752
11753 return CCmode;
11754 }
11755
11756 /* X and Y are two things to compare using CODE. Emit the compare insn and
11757 return the rtx for register 0 in the proper mode. FP means this is a
11758 floating point compare: I don't think that it is needed on the arm. */
11759 rtx
11760 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
11761 {
11762 enum machine_mode mode;
11763 rtx cc_reg;
11764 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
11765
11766 /* We might have X as a constant, Y as a register because of the predicates
11767 used for cmpdi. If so, force X to a register here. */
11768 if (dimode_comparison && !REG_P (x))
11769 x = force_reg (DImode, x);
11770
11771 mode = SELECT_CC_MODE (code, x, y);
11772 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
11773
11774 if (dimode_comparison
11775 && mode != CC_CZmode)
11776 {
11777 rtx clobber, set;
11778
11779 /* To compare two non-zero values for equality, XOR them and
11780 then compare against zero. Not used for ARM mode; there
11781 CC_CZmode is cheaper. */
11782 if (mode == CC_Zmode && y != const0_rtx)
11783 {
11784 gcc_assert (!reload_completed);
11785 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
11786 y = const0_rtx;
11787 }
11788
11789 /* A scratch register is required. */
11790 if (reload_completed)
11791 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
11792 else
11793 scratch = gen_rtx_SCRATCH (SImode);
11794
11795 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
11796 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
11797 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
11798 }
11799 else
11800 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
11801
11802 return cc_reg;
11803 }
11804
11805 /* Generate a sequence of insns that will generate the correct return
11806 address mask depending on the physical architecture that the program
11807 is running on. */
11808 rtx
11809 arm_gen_return_addr_mask (void)
11810 {
11811 rtx reg = gen_reg_rtx (Pmode);
11812
11813 emit_insn (gen_return_addr_mask (reg));
11814 return reg;
11815 }
11816
11817 void
11818 arm_reload_in_hi (rtx *operands)
11819 {
11820 rtx ref = operands[1];
11821 rtx base, scratch;
11822 HOST_WIDE_INT offset = 0;
11823
11824 if (GET_CODE (ref) == SUBREG)
11825 {
11826 offset = SUBREG_BYTE (ref);
11827 ref = SUBREG_REG (ref);
11828 }
11829
11830 if (GET_CODE (ref) == REG)
11831 {
11832 /* We have a pseudo which has been spilt onto the stack; there
11833 are two cases here: the first where there is a simple
11834 stack-slot replacement and a second where the stack-slot is
11835 out of range, or is used as a subreg. */
11836 if (reg_equiv_mem (REGNO (ref)))
11837 {
11838 ref = reg_equiv_mem (REGNO (ref));
11839 base = find_replacement (&XEXP (ref, 0));
11840 }
11841 else
11842 /* The slot is out of range, or was dressed up in a SUBREG. */
11843 base = reg_equiv_address (REGNO (ref));
11844 }
11845 else
11846 base = find_replacement (&XEXP (ref, 0));
11847
11848 /* Handle the case where the address is too complex to be offset by 1. */
11849 if (GET_CODE (base) == MINUS
11850 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11851 {
11852 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11853
11854 emit_set_insn (base_plus, base);
11855 base = base_plus;
11856 }
11857 else if (GET_CODE (base) == PLUS)
11858 {
11859 /* The addend must be CONST_INT, or we would have dealt with it above. */
11860 HOST_WIDE_INT hi, lo;
11861
11862 offset += INTVAL (XEXP (base, 1));
11863 base = XEXP (base, 0);
11864
11865 /* Rework the address into a legal sequence of insns. */
11866 /* Valid range for lo is -4095 -> 4095 */
11867 lo = (offset >= 0
11868 ? (offset & 0xfff)
11869 : -((-offset) & 0xfff));
11870
11871 /* Corner case, if lo is the max offset then we would be out of range
11872 once we have added the additional 1 below, so bump the msb into the
11873 pre-loading insn(s). */
11874 if (lo == 4095)
11875 lo &= 0x7ff;
11876
11877 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
11878 ^ (HOST_WIDE_INT) 0x80000000)
11879 - (HOST_WIDE_INT) 0x80000000);
11880
11881 gcc_assert (hi + lo == offset);
11882
11883 if (hi != 0)
11884 {
11885 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11886
11887 /* Get the base address; addsi3 knows how to handle constants
11888 that require more than one insn. */
11889 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
11890 base = base_plus;
11891 offset = lo;
11892 }
11893 }
11894
11895 /* Operands[2] may overlap operands[0] (though it won't overlap
11896 operands[1]), that's why we asked for a DImode reg -- so we can
11897 use the bit that does not overlap. */
11898 if (REGNO (operands[2]) == REGNO (operands[0]))
11899 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11900 else
11901 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11902
11903 emit_insn (gen_zero_extendqisi2 (scratch,
11904 gen_rtx_MEM (QImode,
11905 plus_constant (Pmode, base,
11906 offset))));
11907 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
11908 gen_rtx_MEM (QImode,
11909 plus_constant (Pmode, base,
11910 offset + 1))));
11911 if (!BYTES_BIG_ENDIAN)
11912 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11913 gen_rtx_IOR (SImode,
11914 gen_rtx_ASHIFT
11915 (SImode,
11916 gen_rtx_SUBREG (SImode, operands[0], 0),
11917 GEN_INT (8)),
11918 scratch));
11919 else
11920 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
11921 gen_rtx_IOR (SImode,
11922 gen_rtx_ASHIFT (SImode, scratch,
11923 GEN_INT (8)),
11924 gen_rtx_SUBREG (SImode, operands[0], 0)));
11925 }
11926
11927 /* Handle storing a half-word to memory during reload by synthesizing as two
11928 byte stores. Take care not to clobber the input values until after we
11929 have moved them somewhere safe. This code assumes that if the DImode
11930 scratch in operands[2] overlaps either the input value or output address
11931 in some way, then that value must die in this insn (we absolutely need
11932 two scratch registers for some corner cases). */
11933 void
11934 arm_reload_out_hi (rtx *operands)
11935 {
11936 rtx ref = operands[0];
11937 rtx outval = operands[1];
11938 rtx base, scratch;
11939 HOST_WIDE_INT offset = 0;
11940
11941 if (GET_CODE (ref) == SUBREG)
11942 {
11943 offset = SUBREG_BYTE (ref);
11944 ref = SUBREG_REG (ref);
11945 }
11946
11947 if (GET_CODE (ref) == REG)
11948 {
11949 /* We have a pseudo which has been spilt onto the stack; there
11950 are two cases here: the first where there is a simple
11951 stack-slot replacement and a second where the stack-slot is
11952 out of range, or is used as a subreg. */
11953 if (reg_equiv_mem (REGNO (ref)))
11954 {
11955 ref = reg_equiv_mem (REGNO (ref));
11956 base = find_replacement (&XEXP (ref, 0));
11957 }
11958 else
11959 /* The slot is out of range, or was dressed up in a SUBREG. */
11960 base = reg_equiv_address (REGNO (ref));
11961 }
11962 else
11963 base = find_replacement (&XEXP (ref, 0));
11964
11965 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
11966
11967 /* Handle the case where the address is too complex to be offset by 1. */
11968 if (GET_CODE (base) == MINUS
11969 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
11970 {
11971 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
11972
11973 /* Be careful not to destroy OUTVAL. */
11974 if (reg_overlap_mentioned_p (base_plus, outval))
11975 {
11976 /* Updating base_plus might destroy outval, see if we can
11977 swap the scratch and base_plus. */
11978 if (!reg_overlap_mentioned_p (scratch, outval))
11979 {
11980 rtx tmp = scratch;
11981 scratch = base_plus;
11982 base_plus = tmp;
11983 }
11984 else
11985 {
11986 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
11987
11988 /* Be conservative and copy OUTVAL into the scratch now,
11989 this should only be necessary if outval is a subreg
11990 of something larger than a word. */
11991 /* XXX Might this clobber base? I can't see how it can,
11992 since scratch is known to overlap with OUTVAL, and
11993 must be wider than a word. */
11994 emit_insn (gen_movhi (scratch_hi, outval));
11995 outval = scratch_hi;
11996 }
11997 }
11998
11999 emit_set_insn (base_plus, base);
12000 base = base_plus;
12001 }
12002 else if (GET_CODE (base) == PLUS)
12003 {
12004 /* The addend must be CONST_INT, or we would have dealt with it above. */
12005 HOST_WIDE_INT hi, lo;
12006
12007 offset += INTVAL (XEXP (base, 1));
12008 base = XEXP (base, 0);
12009
12010 /* Rework the address into a legal sequence of insns. */
12011 /* Valid range for lo is -4095 -> 4095 */
12012 lo = (offset >= 0
12013 ? (offset & 0xfff)
12014 : -((-offset) & 0xfff));
12015
12016 /* Corner case, if lo is the max offset then we would be out of range
12017 once we have added the additional 1 below, so bump the msb into the
12018 pre-loading insn(s). */
12019 if (lo == 4095)
12020 lo &= 0x7ff;
12021
12022 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
12023 ^ (HOST_WIDE_INT) 0x80000000)
12024 - (HOST_WIDE_INT) 0x80000000);
12025
12026 gcc_assert (hi + lo == offset);
12027
12028 if (hi != 0)
12029 {
12030 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
12031
12032 /* Be careful not to destroy OUTVAL. */
12033 if (reg_overlap_mentioned_p (base_plus, outval))
12034 {
12035 /* Updating base_plus might destroy outval, see if we
12036 can swap the scratch and base_plus. */
12037 if (!reg_overlap_mentioned_p (scratch, outval))
12038 {
12039 rtx tmp = scratch;
12040 scratch = base_plus;
12041 base_plus = tmp;
12042 }
12043 else
12044 {
12045 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
12046
12047 /* Be conservative and copy outval into scratch now,
12048 this should only be necessary if outval is a
12049 subreg of something larger than a word. */
12050 /* XXX Might this clobber base? I can't see how it
12051 can, since scratch is known to overlap with
12052 outval. */
12053 emit_insn (gen_movhi (scratch_hi, outval));
12054 outval = scratch_hi;
12055 }
12056 }
12057
12058 /* Get the base address; addsi3 knows how to handle constants
12059 that require more than one insn. */
12060 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
12061 base = base_plus;
12062 offset = lo;
12063 }
12064 }
12065
12066 if (BYTES_BIG_ENDIAN)
12067 {
12068 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12069 plus_constant (Pmode, base,
12070 offset + 1)),
12071 gen_lowpart (QImode, outval)));
12072 emit_insn (gen_lshrsi3 (scratch,
12073 gen_rtx_SUBREG (SImode, outval, 0),
12074 GEN_INT (8)));
12075 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12076 offset)),
12077 gen_lowpart (QImode, scratch)));
12078 }
12079 else
12080 {
12081 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
12082 offset)),
12083 gen_lowpart (QImode, outval)));
12084 emit_insn (gen_lshrsi3 (scratch,
12085 gen_rtx_SUBREG (SImode, outval, 0),
12086 GEN_INT (8)));
12087 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
12088 plus_constant (Pmode, base,
12089 offset + 1)),
12090 gen_lowpart (QImode, scratch)));
12091 }
12092 }
12093
12094 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
12095 (padded to the size of a word) should be passed in a register. */
12096
12097 static bool
12098 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
12099 {
12100 if (TARGET_AAPCS_BASED)
12101 return must_pass_in_stack_var_size (mode, type);
12102 else
12103 return must_pass_in_stack_var_size_or_pad (mode, type);
12104 }
12105
12106
12107 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
12108 Return true if an argument passed on the stack should be padded upwards,
12109 i.e. if the least-significant byte has useful data.
12110 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
12111 aggregate types are placed in the lowest memory address. */
12112
12113 bool
12114 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
12115 {
12116 if (!TARGET_AAPCS_BASED)
12117 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
12118
12119 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
12120 return false;
12121
12122 return true;
12123 }
12124
12125
12126 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
12127 Return !BYTES_BIG_ENDIAN if the least significant byte of the
12128 register has useful data, and return the opposite if the most
12129 significant byte does. */
12130
12131 bool
12132 arm_pad_reg_upward (enum machine_mode mode,
12133 tree type, int first ATTRIBUTE_UNUSED)
12134 {
12135 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
12136 {
12137 /* For AAPCS, small aggregates, small fixed-point types,
12138 and small complex types are always padded upwards. */
12139 if (type)
12140 {
12141 if ((AGGREGATE_TYPE_P (type)
12142 || TREE_CODE (type) == COMPLEX_TYPE
12143 || FIXED_POINT_TYPE_P (type))
12144 && int_size_in_bytes (type) <= 4)
12145 return true;
12146 }
12147 else
12148 {
12149 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
12150 && GET_MODE_SIZE (mode) <= 4)
12151 return true;
12152 }
12153 }
12154
12155 /* Otherwise, use default padding. */
12156 return !BYTES_BIG_ENDIAN;
12157 }
12158
12159 \f
12160 /* Print a symbolic form of X to the debug file, F. */
12161 static void
12162 arm_print_value (FILE *f, rtx x)
12163 {
12164 switch (GET_CODE (x))
12165 {
12166 case CONST_INT:
12167 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
12168 return;
12169
12170 case CONST_DOUBLE:
12171 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
12172 return;
12173
12174 case CONST_VECTOR:
12175 {
12176 int i;
12177
12178 fprintf (f, "<");
12179 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
12180 {
12181 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
12182 if (i < (CONST_VECTOR_NUNITS (x) - 1))
12183 fputc (',', f);
12184 }
12185 fprintf (f, ">");
12186 }
12187 return;
12188
12189 case CONST_STRING:
12190 fprintf (f, "\"%s\"", XSTR (x, 0));
12191 return;
12192
12193 case SYMBOL_REF:
12194 fprintf (f, "`%s'", XSTR (x, 0));
12195 return;
12196
12197 case LABEL_REF:
12198 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
12199 return;
12200
12201 case CONST:
12202 arm_print_value (f, XEXP (x, 0));
12203 return;
12204
12205 case PLUS:
12206 arm_print_value (f, XEXP (x, 0));
12207 fprintf (f, "+");
12208 arm_print_value (f, XEXP (x, 1));
12209 return;
12210
12211 case PC:
12212 fprintf (f, "pc");
12213 return;
12214
12215 default:
12216 fprintf (f, "????");
12217 return;
12218 }
12219 }
12220 \f
12221 /* Routines for manipulation of the constant pool. */
12222
12223 /* Arm instructions cannot load a large constant directly into a
12224 register; they have to come from a pc relative load. The constant
12225 must therefore be placed in the addressable range of the pc
12226 relative load. Depending on the precise pc relative load
12227 instruction the range is somewhere between 256 bytes and 4k. This
12228 means that we often have to dump a constant inside a function, and
12229 generate code to branch around it.
12230
12231 It is important to minimize this, since the branches will slow
12232 things down and make the code larger.
12233
12234 Normally we can hide the table after an existing unconditional
12235 branch so that there is no interruption of the flow, but in the
12236 worst case the code looks like this:
12237
12238 ldr rn, L1
12239 ...
12240 b L2
12241 align
12242 L1: .long value
12243 L2:
12244 ...
12245
12246 ldr rn, L3
12247 ...
12248 b L4
12249 align
12250 L3: .long value
12251 L4:
12252 ...
12253
12254 We fix this by performing a scan after scheduling, which notices
12255 which instructions need to have their operands fetched from the
12256 constant table and builds the table.
12257
12258 The algorithm starts by building a table of all the constants that
12259 need fixing up and all the natural barriers in the function (places
12260 where a constant table can be dropped without breaking the flow).
12261 For each fixup we note how far the pc-relative replacement will be
12262 able to reach and the offset of the instruction into the function.
12263
12264 Having built the table we then group the fixes together to form
12265 tables that are as large as possible (subject to addressing
12266 constraints) and emit each table of constants after the last
12267 barrier that is within range of all the instructions in the group.
12268 If a group does not contain a barrier, then we forcibly create one
12269 by inserting a jump instruction into the flow. Once the table has
12270 been inserted, the insns are then modified to reference the
12271 relevant entry in the pool.
12272
12273 Possible enhancements to the algorithm (not implemented) are:
12274
12275 1) For some processors and object formats, there may be benefit in
12276 aligning the pools to the start of cache lines; this alignment
12277 would need to be taken into account when calculating addressability
12278 of a pool. */
12279
12280 /* These typedefs are located at the start of this file, so that
12281 they can be used in the prototypes there. This comment is to
12282 remind readers of that fact so that the following structures
12283 can be understood more easily.
12284
12285 typedef struct minipool_node Mnode;
12286 typedef struct minipool_fixup Mfix; */
12287
12288 struct minipool_node
12289 {
12290 /* Doubly linked chain of entries. */
12291 Mnode * next;
12292 Mnode * prev;
12293 /* The maximum offset into the code that this entry can be placed. While
12294 pushing fixes for forward references, all entries are sorted in order
12295 of increasing max_address. */
12296 HOST_WIDE_INT max_address;
12297 /* Similarly for an entry inserted for a backwards ref. */
12298 HOST_WIDE_INT min_address;
12299 /* The number of fixes referencing this entry. This can become zero
12300 if we "unpush" an entry. In this case we ignore the entry when we
12301 come to emit the code. */
12302 int refcount;
12303 /* The offset from the start of the minipool. */
12304 HOST_WIDE_INT offset;
12305 /* The value in table. */
12306 rtx value;
12307 /* The mode of value. */
12308 enum machine_mode mode;
12309 /* The size of the value. With iWMMXt enabled
12310 sizes > 4 also imply an alignment of 8-bytes. */
12311 int fix_size;
12312 };
12313
12314 struct minipool_fixup
12315 {
12316 Mfix * next;
12317 rtx insn;
12318 HOST_WIDE_INT address;
12319 rtx * loc;
12320 enum machine_mode mode;
12321 int fix_size;
12322 rtx value;
12323 Mnode * minipool;
12324 HOST_WIDE_INT forwards;
12325 HOST_WIDE_INT backwards;
12326 };
12327
12328 /* Fixes less than a word need padding out to a word boundary. */
12329 #define MINIPOOL_FIX_SIZE(mode) \
12330 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
12331
12332 static Mnode * minipool_vector_head;
12333 static Mnode * minipool_vector_tail;
12334 static rtx minipool_vector_label;
12335 static int minipool_pad;
12336
12337 /* The linked list of all minipool fixes required for this function. */
12338 Mfix * minipool_fix_head;
12339 Mfix * minipool_fix_tail;
12340 /* The fix entry for the current minipool, once it has been placed. */
12341 Mfix * minipool_barrier;
12342
12343 /* Determines if INSN is the start of a jump table. Returns the end
12344 of the TABLE or NULL_RTX. */
12345 static rtx
12346 is_jump_table (rtx insn)
12347 {
12348 rtx table;
12349
12350 if (jump_to_label_p (insn)
12351 && ((table = next_real_insn (JUMP_LABEL (insn)))
12352 == next_real_insn (insn))
12353 && table != NULL
12354 && GET_CODE (table) == JUMP_INSN
12355 && (GET_CODE (PATTERN (table)) == ADDR_VEC
12356 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
12357 return table;
12358
12359 return NULL_RTX;
12360 }
12361
12362 #ifndef JUMP_TABLES_IN_TEXT_SECTION
12363 #define JUMP_TABLES_IN_TEXT_SECTION 0
12364 #endif
12365
12366 static HOST_WIDE_INT
12367 get_jump_table_size (rtx insn)
12368 {
12369 /* ADDR_VECs only take room if read-only data does into the text
12370 section. */
12371 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
12372 {
12373 rtx body = PATTERN (insn);
12374 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
12375 HOST_WIDE_INT size;
12376 HOST_WIDE_INT modesize;
12377
12378 modesize = GET_MODE_SIZE (GET_MODE (body));
12379 size = modesize * XVECLEN (body, elt);
12380 switch (modesize)
12381 {
12382 case 1:
12383 /* Round up size of TBB table to a halfword boundary. */
12384 size = (size + 1) & ~(HOST_WIDE_INT)1;
12385 break;
12386 case 2:
12387 /* No padding necessary for TBH. */
12388 break;
12389 case 4:
12390 /* Add two bytes for alignment on Thumb. */
12391 if (TARGET_THUMB)
12392 size += 2;
12393 break;
12394 default:
12395 gcc_unreachable ();
12396 }
12397 return size;
12398 }
12399
12400 return 0;
12401 }
12402
12403 /* Return the maximum amount of padding that will be inserted before
12404 label LABEL. */
12405
12406 static HOST_WIDE_INT
12407 get_label_padding (rtx label)
12408 {
12409 HOST_WIDE_INT align, min_insn_size;
12410
12411 align = 1 << label_to_alignment (label);
12412 min_insn_size = TARGET_THUMB ? 2 : 4;
12413 return align > min_insn_size ? align - min_insn_size : 0;
12414 }
12415
12416 /* Move a minipool fix MP from its current location to before MAX_MP.
12417 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
12418 constraints may need updating. */
12419 static Mnode *
12420 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
12421 HOST_WIDE_INT max_address)
12422 {
12423 /* The code below assumes these are different. */
12424 gcc_assert (mp != max_mp);
12425
12426 if (max_mp == NULL)
12427 {
12428 if (max_address < mp->max_address)
12429 mp->max_address = max_address;
12430 }
12431 else
12432 {
12433 if (max_address > max_mp->max_address - mp->fix_size)
12434 mp->max_address = max_mp->max_address - mp->fix_size;
12435 else
12436 mp->max_address = max_address;
12437
12438 /* Unlink MP from its current position. Since max_mp is non-null,
12439 mp->prev must be non-null. */
12440 mp->prev->next = mp->next;
12441 if (mp->next != NULL)
12442 mp->next->prev = mp->prev;
12443 else
12444 minipool_vector_tail = mp->prev;
12445
12446 /* Re-insert it before MAX_MP. */
12447 mp->next = max_mp;
12448 mp->prev = max_mp->prev;
12449 max_mp->prev = mp;
12450
12451 if (mp->prev != NULL)
12452 mp->prev->next = mp;
12453 else
12454 minipool_vector_head = mp;
12455 }
12456
12457 /* Save the new entry. */
12458 max_mp = mp;
12459
12460 /* Scan over the preceding entries and adjust their addresses as
12461 required. */
12462 while (mp->prev != NULL
12463 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12464 {
12465 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12466 mp = mp->prev;
12467 }
12468
12469 return max_mp;
12470 }
12471
12472 /* Add a constant to the minipool for a forward reference. Returns the
12473 node added or NULL if the constant will not fit in this pool. */
12474 static Mnode *
12475 add_minipool_forward_ref (Mfix *fix)
12476 {
12477 /* If set, max_mp is the first pool_entry that has a lower
12478 constraint than the one we are trying to add. */
12479 Mnode * max_mp = NULL;
12480 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
12481 Mnode * mp;
12482
12483 /* If the minipool starts before the end of FIX->INSN then this FIX
12484 can not be placed into the current pool. Furthermore, adding the
12485 new constant pool entry may cause the pool to start FIX_SIZE bytes
12486 earlier. */
12487 if (minipool_vector_head &&
12488 (fix->address + get_attr_length (fix->insn)
12489 >= minipool_vector_head->max_address - fix->fix_size))
12490 return NULL;
12491
12492 /* Scan the pool to see if a constant with the same value has
12493 already been added. While we are doing this, also note the
12494 location where we must insert the constant if it doesn't already
12495 exist. */
12496 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12497 {
12498 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12499 && fix->mode == mp->mode
12500 && (GET_CODE (fix->value) != CODE_LABEL
12501 || (CODE_LABEL_NUMBER (fix->value)
12502 == CODE_LABEL_NUMBER (mp->value)))
12503 && rtx_equal_p (fix->value, mp->value))
12504 {
12505 /* More than one fix references this entry. */
12506 mp->refcount++;
12507 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
12508 }
12509
12510 /* Note the insertion point if necessary. */
12511 if (max_mp == NULL
12512 && mp->max_address > max_address)
12513 max_mp = mp;
12514
12515 /* If we are inserting an 8-bytes aligned quantity and
12516 we have not already found an insertion point, then
12517 make sure that all such 8-byte aligned quantities are
12518 placed at the start of the pool. */
12519 if (ARM_DOUBLEWORD_ALIGN
12520 && max_mp == NULL
12521 && fix->fix_size >= 8
12522 && mp->fix_size < 8)
12523 {
12524 max_mp = mp;
12525 max_address = mp->max_address;
12526 }
12527 }
12528
12529 /* The value is not currently in the minipool, so we need to create
12530 a new entry for it. If MAX_MP is NULL, the entry will be put on
12531 the end of the list since the placement is less constrained than
12532 any existing entry. Otherwise, we insert the new fix before
12533 MAX_MP and, if necessary, adjust the constraints on the other
12534 entries. */
12535 mp = XNEW (Mnode);
12536 mp->fix_size = fix->fix_size;
12537 mp->mode = fix->mode;
12538 mp->value = fix->value;
12539 mp->refcount = 1;
12540 /* Not yet required for a backwards ref. */
12541 mp->min_address = -65536;
12542
12543 if (max_mp == NULL)
12544 {
12545 mp->max_address = max_address;
12546 mp->next = NULL;
12547 mp->prev = minipool_vector_tail;
12548
12549 if (mp->prev == NULL)
12550 {
12551 minipool_vector_head = mp;
12552 minipool_vector_label = gen_label_rtx ();
12553 }
12554 else
12555 mp->prev->next = mp;
12556
12557 minipool_vector_tail = mp;
12558 }
12559 else
12560 {
12561 if (max_address > max_mp->max_address - mp->fix_size)
12562 mp->max_address = max_mp->max_address - mp->fix_size;
12563 else
12564 mp->max_address = max_address;
12565
12566 mp->next = max_mp;
12567 mp->prev = max_mp->prev;
12568 max_mp->prev = mp;
12569 if (mp->prev != NULL)
12570 mp->prev->next = mp;
12571 else
12572 minipool_vector_head = mp;
12573 }
12574
12575 /* Save the new entry. */
12576 max_mp = mp;
12577
12578 /* Scan over the preceding entries and adjust their addresses as
12579 required. */
12580 while (mp->prev != NULL
12581 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
12582 {
12583 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
12584 mp = mp->prev;
12585 }
12586
12587 return max_mp;
12588 }
12589
12590 static Mnode *
12591 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
12592 HOST_WIDE_INT min_address)
12593 {
12594 HOST_WIDE_INT offset;
12595
12596 /* The code below assumes these are different. */
12597 gcc_assert (mp != min_mp);
12598
12599 if (min_mp == NULL)
12600 {
12601 if (min_address > mp->min_address)
12602 mp->min_address = min_address;
12603 }
12604 else
12605 {
12606 /* We will adjust this below if it is too loose. */
12607 mp->min_address = min_address;
12608
12609 /* Unlink MP from its current position. Since min_mp is non-null,
12610 mp->next must be non-null. */
12611 mp->next->prev = mp->prev;
12612 if (mp->prev != NULL)
12613 mp->prev->next = mp->next;
12614 else
12615 minipool_vector_head = mp->next;
12616
12617 /* Reinsert it after MIN_MP. */
12618 mp->prev = min_mp;
12619 mp->next = min_mp->next;
12620 min_mp->next = mp;
12621 if (mp->next != NULL)
12622 mp->next->prev = mp;
12623 else
12624 minipool_vector_tail = mp;
12625 }
12626
12627 min_mp = mp;
12628
12629 offset = 0;
12630 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12631 {
12632 mp->offset = offset;
12633 if (mp->refcount > 0)
12634 offset += mp->fix_size;
12635
12636 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
12637 mp->next->min_address = mp->min_address + mp->fix_size;
12638 }
12639
12640 return min_mp;
12641 }
12642
12643 /* Add a constant to the minipool for a backward reference. Returns the
12644 node added or NULL if the constant will not fit in this pool.
12645
12646 Note that the code for insertion for a backwards reference can be
12647 somewhat confusing because the calculated offsets for each fix do
12648 not take into account the size of the pool (which is still under
12649 construction. */
12650 static Mnode *
12651 add_minipool_backward_ref (Mfix *fix)
12652 {
12653 /* If set, min_mp is the last pool_entry that has a lower constraint
12654 than the one we are trying to add. */
12655 Mnode *min_mp = NULL;
12656 /* This can be negative, since it is only a constraint. */
12657 HOST_WIDE_INT min_address = fix->address - fix->backwards;
12658 Mnode *mp;
12659
12660 /* If we can't reach the current pool from this insn, or if we can't
12661 insert this entry at the end of the pool without pushing other
12662 fixes out of range, then we don't try. This ensures that we
12663 can't fail later on. */
12664 if (min_address >= minipool_barrier->address
12665 || (minipool_vector_tail->min_address + fix->fix_size
12666 >= minipool_barrier->address))
12667 return NULL;
12668
12669 /* Scan the pool to see if a constant with the same value has
12670 already been added. While we are doing this, also note the
12671 location where we must insert the constant if it doesn't already
12672 exist. */
12673 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
12674 {
12675 if (GET_CODE (fix->value) == GET_CODE (mp->value)
12676 && fix->mode == mp->mode
12677 && (GET_CODE (fix->value) != CODE_LABEL
12678 || (CODE_LABEL_NUMBER (fix->value)
12679 == CODE_LABEL_NUMBER (mp->value)))
12680 && rtx_equal_p (fix->value, mp->value)
12681 /* Check that there is enough slack to move this entry to the
12682 end of the table (this is conservative). */
12683 && (mp->max_address
12684 > (minipool_barrier->address
12685 + minipool_vector_tail->offset
12686 + minipool_vector_tail->fix_size)))
12687 {
12688 mp->refcount++;
12689 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
12690 }
12691
12692 if (min_mp != NULL)
12693 mp->min_address += fix->fix_size;
12694 else
12695 {
12696 /* Note the insertion point if necessary. */
12697 if (mp->min_address < min_address)
12698 {
12699 /* For now, we do not allow the insertion of 8-byte alignment
12700 requiring nodes anywhere but at the start of the pool. */
12701 if (ARM_DOUBLEWORD_ALIGN
12702 && fix->fix_size >= 8 && mp->fix_size < 8)
12703 return NULL;
12704 else
12705 min_mp = mp;
12706 }
12707 else if (mp->max_address
12708 < minipool_barrier->address + mp->offset + fix->fix_size)
12709 {
12710 /* Inserting before this entry would push the fix beyond
12711 its maximum address (which can happen if we have
12712 re-located a forwards fix); force the new fix to come
12713 after it. */
12714 if (ARM_DOUBLEWORD_ALIGN
12715 && fix->fix_size >= 8 && mp->fix_size < 8)
12716 return NULL;
12717 else
12718 {
12719 min_mp = mp;
12720 min_address = mp->min_address + fix->fix_size;
12721 }
12722 }
12723 /* Do not insert a non-8-byte aligned quantity before 8-byte
12724 aligned quantities. */
12725 else if (ARM_DOUBLEWORD_ALIGN
12726 && fix->fix_size < 8
12727 && mp->fix_size >= 8)
12728 {
12729 min_mp = mp;
12730 min_address = mp->min_address + fix->fix_size;
12731 }
12732 }
12733 }
12734
12735 /* We need to create a new entry. */
12736 mp = XNEW (Mnode);
12737 mp->fix_size = fix->fix_size;
12738 mp->mode = fix->mode;
12739 mp->value = fix->value;
12740 mp->refcount = 1;
12741 mp->max_address = minipool_barrier->address + 65536;
12742
12743 mp->min_address = min_address;
12744
12745 if (min_mp == NULL)
12746 {
12747 mp->prev = NULL;
12748 mp->next = minipool_vector_head;
12749
12750 if (mp->next == NULL)
12751 {
12752 minipool_vector_tail = mp;
12753 minipool_vector_label = gen_label_rtx ();
12754 }
12755 else
12756 mp->next->prev = mp;
12757
12758 minipool_vector_head = mp;
12759 }
12760 else
12761 {
12762 mp->next = min_mp->next;
12763 mp->prev = min_mp;
12764 min_mp->next = mp;
12765
12766 if (mp->next != NULL)
12767 mp->next->prev = mp;
12768 else
12769 minipool_vector_tail = mp;
12770 }
12771
12772 /* Save the new entry. */
12773 min_mp = mp;
12774
12775 if (mp->prev)
12776 mp = mp->prev;
12777 else
12778 mp->offset = 0;
12779
12780 /* Scan over the following entries and adjust their offsets. */
12781 while (mp->next != NULL)
12782 {
12783 if (mp->next->min_address < mp->min_address + mp->fix_size)
12784 mp->next->min_address = mp->min_address + mp->fix_size;
12785
12786 if (mp->refcount)
12787 mp->next->offset = mp->offset + mp->fix_size;
12788 else
12789 mp->next->offset = mp->offset;
12790
12791 mp = mp->next;
12792 }
12793
12794 return min_mp;
12795 }
12796
12797 static void
12798 assign_minipool_offsets (Mfix *barrier)
12799 {
12800 HOST_WIDE_INT offset = 0;
12801 Mnode *mp;
12802
12803 minipool_barrier = barrier;
12804
12805 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12806 {
12807 mp->offset = offset;
12808
12809 if (mp->refcount > 0)
12810 offset += mp->fix_size;
12811 }
12812 }
12813
12814 /* Output the literal table */
12815 static void
12816 dump_minipool (rtx scan)
12817 {
12818 Mnode * mp;
12819 Mnode * nmp;
12820 int align64 = 0;
12821
12822 if (ARM_DOUBLEWORD_ALIGN)
12823 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
12824 if (mp->refcount > 0 && mp->fix_size >= 8)
12825 {
12826 align64 = 1;
12827 break;
12828 }
12829
12830 if (dump_file)
12831 fprintf (dump_file,
12832 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
12833 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
12834
12835 scan = emit_label_after (gen_label_rtx (), scan);
12836 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
12837 scan = emit_label_after (minipool_vector_label, scan);
12838
12839 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
12840 {
12841 if (mp->refcount > 0)
12842 {
12843 if (dump_file)
12844 {
12845 fprintf (dump_file,
12846 ";; Offset %u, min %ld, max %ld ",
12847 (unsigned) mp->offset, (unsigned long) mp->min_address,
12848 (unsigned long) mp->max_address);
12849 arm_print_value (dump_file, mp->value);
12850 fputc ('\n', dump_file);
12851 }
12852
12853 switch (mp->fix_size)
12854 {
12855 #ifdef HAVE_consttable_1
12856 case 1:
12857 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
12858 break;
12859
12860 #endif
12861 #ifdef HAVE_consttable_2
12862 case 2:
12863 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
12864 break;
12865
12866 #endif
12867 #ifdef HAVE_consttable_4
12868 case 4:
12869 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
12870 break;
12871
12872 #endif
12873 #ifdef HAVE_consttable_8
12874 case 8:
12875 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
12876 break;
12877
12878 #endif
12879 #ifdef HAVE_consttable_16
12880 case 16:
12881 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
12882 break;
12883
12884 #endif
12885 default:
12886 gcc_unreachable ();
12887 }
12888 }
12889
12890 nmp = mp->next;
12891 free (mp);
12892 }
12893
12894 minipool_vector_head = minipool_vector_tail = NULL;
12895 scan = emit_insn_after (gen_consttable_end (), scan);
12896 scan = emit_barrier_after (scan);
12897 }
12898
12899 /* Return the cost of forcibly inserting a barrier after INSN. */
12900 static int
12901 arm_barrier_cost (rtx insn)
12902 {
12903 /* Basing the location of the pool on the loop depth is preferable,
12904 but at the moment, the basic block information seems to be
12905 corrupt by this stage of the compilation. */
12906 int base_cost = 50;
12907 rtx next = next_nonnote_insn (insn);
12908
12909 if (next != NULL && GET_CODE (next) == CODE_LABEL)
12910 base_cost -= 20;
12911
12912 switch (GET_CODE (insn))
12913 {
12914 case CODE_LABEL:
12915 /* It will always be better to place the table before the label, rather
12916 than after it. */
12917 return 50;
12918
12919 case INSN:
12920 case CALL_INSN:
12921 return base_cost;
12922
12923 case JUMP_INSN:
12924 return base_cost - 10;
12925
12926 default:
12927 return base_cost + 10;
12928 }
12929 }
12930
12931 /* Find the best place in the insn stream in the range
12932 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
12933 Create the barrier by inserting a jump and add a new fix entry for
12934 it. */
12935 static Mfix *
12936 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
12937 {
12938 HOST_WIDE_INT count = 0;
12939 rtx barrier;
12940 rtx from = fix->insn;
12941 /* The instruction after which we will insert the jump. */
12942 rtx selected = NULL;
12943 int selected_cost;
12944 /* The address at which the jump instruction will be placed. */
12945 HOST_WIDE_INT selected_address;
12946 Mfix * new_fix;
12947 HOST_WIDE_INT max_count = max_address - fix->address;
12948 rtx label = gen_label_rtx ();
12949
12950 selected_cost = arm_barrier_cost (from);
12951 selected_address = fix->address;
12952
12953 while (from && count < max_count)
12954 {
12955 rtx tmp;
12956 int new_cost;
12957
12958 /* This code shouldn't have been called if there was a natural barrier
12959 within range. */
12960 gcc_assert (GET_CODE (from) != BARRIER);
12961
12962 /* Count the length of this insn. This must stay in sync with the
12963 code that pushes minipool fixes. */
12964 if (LABEL_P (from))
12965 count += get_label_padding (from);
12966 else
12967 count += get_attr_length (from);
12968
12969 /* If there is a jump table, add its length. */
12970 tmp = is_jump_table (from);
12971 if (tmp != NULL)
12972 {
12973 count += get_jump_table_size (tmp);
12974
12975 /* Jump tables aren't in a basic block, so base the cost on
12976 the dispatch insn. If we select this location, we will
12977 still put the pool after the table. */
12978 new_cost = arm_barrier_cost (from);
12979
12980 if (count < max_count
12981 && (!selected || new_cost <= selected_cost))
12982 {
12983 selected = tmp;
12984 selected_cost = new_cost;
12985 selected_address = fix->address + count;
12986 }
12987
12988 /* Continue after the dispatch table. */
12989 from = NEXT_INSN (tmp);
12990 continue;
12991 }
12992
12993 new_cost = arm_barrier_cost (from);
12994
12995 if (count < max_count
12996 && (!selected || new_cost <= selected_cost))
12997 {
12998 selected = from;
12999 selected_cost = new_cost;
13000 selected_address = fix->address + count;
13001 }
13002
13003 from = NEXT_INSN (from);
13004 }
13005
13006 /* Make sure that we found a place to insert the jump. */
13007 gcc_assert (selected);
13008
13009 /* Make sure we do not split a call and its corresponding
13010 CALL_ARG_LOCATION note. */
13011 if (CALL_P (selected))
13012 {
13013 rtx next = NEXT_INSN (selected);
13014 if (next && NOTE_P (next)
13015 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
13016 selected = next;
13017 }
13018
13019 /* Create a new JUMP_INSN that branches around a barrier. */
13020 from = emit_jump_insn_after (gen_jump (label), selected);
13021 JUMP_LABEL (from) = label;
13022 barrier = emit_barrier_after (from);
13023 emit_label_after (label, barrier);
13024
13025 /* Create a minipool barrier entry for the new barrier. */
13026 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
13027 new_fix->insn = barrier;
13028 new_fix->address = selected_address;
13029 new_fix->next = fix->next;
13030 fix->next = new_fix;
13031
13032 return new_fix;
13033 }
13034
13035 /* Record that there is a natural barrier in the insn stream at
13036 ADDRESS. */
13037 static void
13038 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
13039 {
13040 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13041
13042 fix->insn = insn;
13043 fix->address = address;
13044
13045 fix->next = NULL;
13046 if (minipool_fix_head != NULL)
13047 minipool_fix_tail->next = fix;
13048 else
13049 minipool_fix_head = fix;
13050
13051 minipool_fix_tail = fix;
13052 }
13053
13054 /* Record INSN, which will need fixing up to load a value from the
13055 minipool. ADDRESS is the offset of the insn since the start of the
13056 function; LOC is a pointer to the part of the insn which requires
13057 fixing; VALUE is the constant that must be loaded, which is of type
13058 MODE. */
13059 static void
13060 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
13061 enum machine_mode mode, rtx value)
13062 {
13063 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
13064
13065 fix->insn = insn;
13066 fix->address = address;
13067 fix->loc = loc;
13068 fix->mode = mode;
13069 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
13070 fix->value = value;
13071 fix->forwards = get_attr_pool_range (insn);
13072 fix->backwards = get_attr_neg_pool_range (insn);
13073 fix->minipool = NULL;
13074
13075 /* If an insn doesn't have a range defined for it, then it isn't
13076 expecting to be reworked by this code. Better to stop now than
13077 to generate duff assembly code. */
13078 gcc_assert (fix->forwards || fix->backwards);
13079
13080 /* If an entry requires 8-byte alignment then assume all constant pools
13081 require 4 bytes of padding. Trying to do this later on a per-pool
13082 basis is awkward because existing pool entries have to be modified. */
13083 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
13084 minipool_pad = 4;
13085
13086 if (dump_file)
13087 {
13088 fprintf (dump_file,
13089 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
13090 GET_MODE_NAME (mode),
13091 INSN_UID (insn), (unsigned long) address,
13092 -1 * (long)fix->backwards, (long)fix->forwards);
13093 arm_print_value (dump_file, fix->value);
13094 fprintf (dump_file, "\n");
13095 }
13096
13097 /* Add it to the chain of fixes. */
13098 fix->next = NULL;
13099
13100 if (minipool_fix_head != NULL)
13101 minipool_fix_tail->next = fix;
13102 else
13103 minipool_fix_head = fix;
13104
13105 minipool_fix_tail = fix;
13106 }
13107
13108 /* Return the cost of synthesizing a 64-bit constant VAL inline.
13109 Returns the number of insns needed, or 99 if we don't know how to
13110 do it. */
13111 int
13112 arm_const_double_inline_cost (rtx val)
13113 {
13114 rtx lowpart, highpart;
13115 enum machine_mode mode;
13116
13117 mode = GET_MODE (val);
13118
13119 if (mode == VOIDmode)
13120 mode = DImode;
13121
13122 gcc_assert (GET_MODE_SIZE (mode) == 8);
13123
13124 lowpart = gen_lowpart (SImode, val);
13125 highpart = gen_highpart_mode (SImode, mode, val);
13126
13127 gcc_assert (GET_CODE (lowpart) == CONST_INT);
13128 gcc_assert (GET_CODE (highpart) == CONST_INT);
13129
13130 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
13131 NULL_RTX, NULL_RTX, 0, 0)
13132 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
13133 NULL_RTX, NULL_RTX, 0, 0));
13134 }
13135
13136 /* Return true if it is worthwhile to split a 64-bit constant into two
13137 32-bit operations. This is the case if optimizing for size, or
13138 if we have load delay slots, or if one 32-bit part can be done with
13139 a single data operation. */
13140 bool
13141 arm_const_double_by_parts (rtx val)
13142 {
13143 enum machine_mode mode = GET_MODE (val);
13144 rtx part;
13145
13146 if (optimize_size || arm_ld_sched)
13147 return true;
13148
13149 if (mode == VOIDmode)
13150 mode = DImode;
13151
13152 part = gen_highpart_mode (SImode, mode, val);
13153
13154 gcc_assert (GET_CODE (part) == CONST_INT);
13155
13156 if (const_ok_for_arm (INTVAL (part))
13157 || const_ok_for_arm (~INTVAL (part)))
13158 return true;
13159
13160 part = gen_lowpart (SImode, val);
13161
13162 gcc_assert (GET_CODE (part) == CONST_INT);
13163
13164 if (const_ok_for_arm (INTVAL (part))
13165 || const_ok_for_arm (~INTVAL (part)))
13166 return true;
13167
13168 return false;
13169 }
13170
13171 /* Return true if it is possible to inline both the high and low parts
13172 of a 64-bit constant into 32-bit data processing instructions. */
13173 bool
13174 arm_const_double_by_immediates (rtx val)
13175 {
13176 enum machine_mode mode = GET_MODE (val);
13177 rtx part;
13178
13179 if (mode == VOIDmode)
13180 mode = DImode;
13181
13182 part = gen_highpart_mode (SImode, mode, val);
13183
13184 gcc_assert (GET_CODE (part) == CONST_INT);
13185
13186 if (!const_ok_for_arm (INTVAL (part)))
13187 return false;
13188
13189 part = gen_lowpart (SImode, val);
13190
13191 gcc_assert (GET_CODE (part) == CONST_INT);
13192
13193 if (!const_ok_for_arm (INTVAL (part)))
13194 return false;
13195
13196 return true;
13197 }
13198
13199 /* Scan INSN and note any of its operands that need fixing.
13200 If DO_PUSHES is false we do not actually push any of the fixups
13201 needed. */
13202 static void
13203 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
13204 {
13205 int opno;
13206
13207 extract_insn (insn);
13208
13209 if (!constrain_operands (1))
13210 fatal_insn_not_found (insn);
13211
13212 if (recog_data.n_alternatives == 0)
13213 return;
13214
13215 /* Fill in recog_op_alt with information about the constraints of
13216 this insn. */
13217 preprocess_constraints ();
13218
13219 for (opno = 0; opno < recog_data.n_operands; opno++)
13220 {
13221 /* Things we need to fix can only occur in inputs. */
13222 if (recog_data.operand_type[opno] != OP_IN)
13223 continue;
13224
13225 /* If this alternative is a memory reference, then any mention
13226 of constants in this alternative is really to fool reload
13227 into allowing us to accept one there. We need to fix them up
13228 now so that we output the right code. */
13229 if (recog_op_alt[opno][which_alternative].memory_ok)
13230 {
13231 rtx op = recog_data.operand[opno];
13232
13233 if (CONSTANT_P (op))
13234 {
13235 if (do_pushes)
13236 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
13237 recog_data.operand_mode[opno], op);
13238 }
13239 else if (GET_CODE (op) == MEM
13240 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
13241 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
13242 {
13243 if (do_pushes)
13244 {
13245 rtx cop = avoid_constant_pool_reference (op);
13246
13247 /* Casting the address of something to a mode narrower
13248 than a word can cause avoid_constant_pool_reference()
13249 to return the pool reference itself. That's no good to
13250 us here. Lets just hope that we can use the
13251 constant pool value directly. */
13252 if (op == cop)
13253 cop = get_pool_constant (XEXP (op, 0));
13254
13255 push_minipool_fix (insn, address,
13256 recog_data.operand_loc[opno],
13257 recog_data.operand_mode[opno], cop);
13258 }
13259
13260 }
13261 }
13262 }
13263
13264 return;
13265 }
13266
13267 /* Convert instructions to their cc-clobbering variant if possible, since
13268 that allows us to use smaller encodings. */
13269
13270 static void
13271 thumb2_reorg (void)
13272 {
13273 basic_block bb;
13274 regset_head live;
13275
13276 INIT_REG_SET (&live);
13277
13278 /* We are freeing block_for_insn in the toplev to keep compatibility
13279 with old MDEP_REORGS that are not CFG based. Recompute it now. */
13280 compute_bb_for_insn ();
13281 df_analyze ();
13282
13283 FOR_EACH_BB (bb)
13284 {
13285 rtx insn;
13286
13287 COPY_REG_SET (&live, DF_LR_OUT (bb));
13288 df_simulate_initialize_backwards (bb, &live);
13289 FOR_BB_INSNS_REVERSE (bb, insn)
13290 {
13291 if (NONJUMP_INSN_P (insn)
13292 && !REGNO_REG_SET_P (&live, CC_REGNUM)
13293 && GET_CODE (PATTERN (insn)) == SET)
13294 {
13295 enum {SKIP, CONV, SWAP_CONV} action = SKIP;
13296 rtx pat = PATTERN (insn);
13297 rtx dst = XEXP (pat, 0);
13298 rtx src = XEXP (pat, 1);
13299 rtx op0 = NULL_RTX, op1 = NULL_RTX;
13300
13301 if (!OBJECT_P (src))
13302 op0 = XEXP (src, 0);
13303
13304 if (BINARY_P (src))
13305 op1 = XEXP (src, 1);
13306
13307 if (low_register_operand (dst, SImode))
13308 {
13309 switch (GET_CODE (src))
13310 {
13311 case PLUS:
13312 /* Adding two registers and storing the result
13313 in the first source is already a 16-bit
13314 operation. */
13315 if (rtx_equal_p (dst, op0)
13316 && register_operand (op1, SImode))
13317 break;
13318
13319 if (low_register_operand (op0, SImode))
13320 {
13321 /* ADDS <Rd>,<Rn>,<Rm> */
13322 if (low_register_operand (op1, SImode))
13323 action = CONV;
13324 /* ADDS <Rdn>,#<imm8> */
13325 /* SUBS <Rdn>,#<imm8> */
13326 else if (rtx_equal_p (dst, op0)
13327 && CONST_INT_P (op1)
13328 && IN_RANGE (INTVAL (op1), -255, 255))
13329 action = CONV;
13330 /* ADDS <Rd>,<Rn>,#<imm3> */
13331 /* SUBS <Rd>,<Rn>,#<imm3> */
13332 else if (CONST_INT_P (op1)
13333 && IN_RANGE (INTVAL (op1), -7, 7))
13334 action = CONV;
13335 }
13336 break;
13337
13338 case MINUS:
13339 /* RSBS <Rd>,<Rn>,#0
13340 Not handled here: see NEG below. */
13341 /* SUBS <Rd>,<Rn>,#<imm3>
13342 SUBS <Rdn>,#<imm8>
13343 Not handled here: see PLUS above. */
13344 /* SUBS <Rd>,<Rn>,<Rm> */
13345 if (low_register_operand (op0, SImode)
13346 && low_register_operand (op1, SImode))
13347 action = CONV;
13348 break;
13349
13350 case MULT:
13351 /* MULS <Rdm>,<Rn>,<Rdm>
13352 As an exception to the rule, this is only used
13353 when optimizing for size since MULS is slow on all
13354 known implementations. We do not even want to use
13355 MULS in cold code, if optimizing for speed, so we
13356 test the global flag here. */
13357 if (!optimize_size)
13358 break;
13359 /* else fall through. */
13360 case AND:
13361 case IOR:
13362 case XOR:
13363 /* ANDS <Rdn>,<Rm> */
13364 if (rtx_equal_p (dst, op0)
13365 && low_register_operand (op1, SImode))
13366 action = CONV;
13367 else if (rtx_equal_p (dst, op1)
13368 && low_register_operand (op0, SImode))
13369 action = SWAP_CONV;
13370 break;
13371
13372 case ASHIFTRT:
13373 case ASHIFT:
13374 case LSHIFTRT:
13375 /* ASRS <Rdn>,<Rm> */
13376 /* LSRS <Rdn>,<Rm> */
13377 /* LSLS <Rdn>,<Rm> */
13378 if (rtx_equal_p (dst, op0)
13379 && low_register_operand (op1, SImode))
13380 action = CONV;
13381 /* ASRS <Rd>,<Rm>,#<imm5> */
13382 /* LSRS <Rd>,<Rm>,#<imm5> */
13383 /* LSLS <Rd>,<Rm>,#<imm5> */
13384 else if (low_register_operand (op0, SImode)
13385 && CONST_INT_P (op1)
13386 && IN_RANGE (INTVAL (op1), 0, 31))
13387 action = CONV;
13388 break;
13389
13390 case ROTATERT:
13391 /* RORS <Rdn>,<Rm> */
13392 if (rtx_equal_p (dst, op0)
13393 && low_register_operand (op1, SImode))
13394 action = CONV;
13395 break;
13396
13397 case NOT:
13398 case NEG:
13399 /* MVNS <Rd>,<Rm> */
13400 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
13401 if (low_register_operand (op0, SImode))
13402 action = CONV;
13403 break;
13404
13405 case CONST_INT:
13406 /* MOVS <Rd>,#<imm8> */
13407 if (CONST_INT_P (src)
13408 && IN_RANGE (INTVAL (src), 0, 255))
13409 action = CONV;
13410 break;
13411
13412 case REG:
13413 /* MOVS and MOV<c> with registers have different
13414 encodings, so are not relevant here. */
13415 break;
13416
13417 default:
13418 break;
13419 }
13420 }
13421
13422 if (action != SKIP)
13423 {
13424 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
13425 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
13426 rtvec vec;
13427
13428 if (action == SWAP_CONV)
13429 {
13430 src = copy_rtx (src);
13431 XEXP (src, 0) = op1;
13432 XEXP (src, 1) = op0;
13433 pat = gen_rtx_SET (VOIDmode, dst, src);
13434 vec = gen_rtvec (2, pat, clobber);
13435 }
13436 else /* action == CONV */
13437 vec = gen_rtvec (2, pat, clobber);
13438
13439 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
13440 INSN_CODE (insn) = -1;
13441 }
13442 }
13443
13444 if (NONDEBUG_INSN_P (insn))
13445 df_simulate_one_insn_backwards (bb, insn, &live);
13446 }
13447 }
13448
13449 CLEAR_REG_SET (&live);
13450 }
13451
13452 /* Gcc puts the pool in the wrong place for ARM, since we can only
13453 load addresses a limited distance around the pc. We do some
13454 special munging to move the constant pool values to the correct
13455 point in the code. */
13456 static void
13457 arm_reorg (void)
13458 {
13459 rtx insn;
13460 HOST_WIDE_INT address = 0;
13461 Mfix * fix;
13462
13463 if (TARGET_THUMB2)
13464 thumb2_reorg ();
13465
13466 /* Ensure all insns that must be split have been split at this point.
13467 Otherwise, the pool placement code below may compute incorrect
13468 insn lengths. Note that when optimizing, all insns have already
13469 been split at this point. */
13470 if (!optimize)
13471 split_all_insns_noflow ();
13472
13473 minipool_fix_head = minipool_fix_tail = NULL;
13474
13475 /* The first insn must always be a note, or the code below won't
13476 scan it properly. */
13477 insn = get_insns ();
13478 gcc_assert (GET_CODE (insn) == NOTE);
13479 minipool_pad = 0;
13480
13481 /* Scan all the insns and record the operands that will need fixing. */
13482 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
13483 {
13484 if (GET_CODE (insn) == BARRIER)
13485 push_minipool_barrier (insn, address);
13486 else if (INSN_P (insn))
13487 {
13488 rtx table;
13489
13490 note_invalid_constants (insn, address, true);
13491 address += get_attr_length (insn);
13492
13493 /* If the insn is a vector jump, add the size of the table
13494 and skip the table. */
13495 if ((table = is_jump_table (insn)) != NULL)
13496 {
13497 address += get_jump_table_size (table);
13498 insn = table;
13499 }
13500 }
13501 else if (LABEL_P (insn))
13502 /* Add the worst-case padding due to alignment. We don't add
13503 the _current_ padding because the minipool insertions
13504 themselves might change it. */
13505 address += get_label_padding (insn);
13506 }
13507
13508 fix = minipool_fix_head;
13509
13510 /* Now scan the fixups and perform the required changes. */
13511 while (fix)
13512 {
13513 Mfix * ftmp;
13514 Mfix * fdel;
13515 Mfix * last_added_fix;
13516 Mfix * last_barrier = NULL;
13517 Mfix * this_fix;
13518
13519 /* Skip any further barriers before the next fix. */
13520 while (fix && GET_CODE (fix->insn) == BARRIER)
13521 fix = fix->next;
13522
13523 /* No more fixes. */
13524 if (fix == NULL)
13525 break;
13526
13527 last_added_fix = NULL;
13528
13529 for (ftmp = fix; ftmp; ftmp = ftmp->next)
13530 {
13531 if (GET_CODE (ftmp->insn) == BARRIER)
13532 {
13533 if (ftmp->address >= minipool_vector_head->max_address)
13534 break;
13535
13536 last_barrier = ftmp;
13537 }
13538 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
13539 break;
13540
13541 last_added_fix = ftmp; /* Keep track of the last fix added. */
13542 }
13543
13544 /* If we found a barrier, drop back to that; any fixes that we
13545 could have reached but come after the barrier will now go in
13546 the next mini-pool. */
13547 if (last_barrier != NULL)
13548 {
13549 /* Reduce the refcount for those fixes that won't go into this
13550 pool after all. */
13551 for (fdel = last_barrier->next;
13552 fdel && fdel != ftmp;
13553 fdel = fdel->next)
13554 {
13555 fdel->minipool->refcount--;
13556 fdel->minipool = NULL;
13557 }
13558
13559 ftmp = last_barrier;
13560 }
13561 else
13562 {
13563 /* ftmp is first fix that we can't fit into this pool and
13564 there no natural barriers that we could use. Insert a
13565 new barrier in the code somewhere between the previous
13566 fix and this one, and arrange to jump around it. */
13567 HOST_WIDE_INT max_address;
13568
13569 /* The last item on the list of fixes must be a barrier, so
13570 we can never run off the end of the list of fixes without
13571 last_barrier being set. */
13572 gcc_assert (ftmp);
13573
13574 max_address = minipool_vector_head->max_address;
13575 /* Check that there isn't another fix that is in range that
13576 we couldn't fit into this pool because the pool was
13577 already too large: we need to put the pool before such an
13578 instruction. The pool itself may come just after the
13579 fix because create_fix_barrier also allows space for a
13580 jump instruction. */
13581 if (ftmp->address < max_address)
13582 max_address = ftmp->address + 1;
13583
13584 last_barrier = create_fix_barrier (last_added_fix, max_address);
13585 }
13586
13587 assign_minipool_offsets (last_barrier);
13588
13589 while (ftmp)
13590 {
13591 if (GET_CODE (ftmp->insn) != BARRIER
13592 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
13593 == NULL))
13594 break;
13595
13596 ftmp = ftmp->next;
13597 }
13598
13599 /* Scan over the fixes we have identified for this pool, fixing them
13600 up and adding the constants to the pool itself. */
13601 for (this_fix = fix; this_fix && ftmp != this_fix;
13602 this_fix = this_fix->next)
13603 if (GET_CODE (this_fix->insn) != BARRIER)
13604 {
13605 rtx addr
13606 = plus_constant (Pmode,
13607 gen_rtx_LABEL_REF (VOIDmode,
13608 minipool_vector_label),
13609 this_fix->minipool->offset);
13610 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
13611 }
13612
13613 dump_minipool (last_barrier->insn);
13614 fix = ftmp;
13615 }
13616
13617 /* From now on we must synthesize any constants that we can't handle
13618 directly. This can happen if the RTL gets split during final
13619 instruction generation. */
13620 after_arm_reorg = 1;
13621
13622 /* Free the minipool memory. */
13623 obstack_free (&minipool_obstack, minipool_startobj);
13624 }
13625 \f
13626 /* Routines to output assembly language. */
13627
13628 /* If the rtx is the correct value then return the string of the number.
13629 In this way we can ensure that valid double constants are generated even
13630 when cross compiling. */
13631 const char *
13632 fp_immediate_constant (rtx x)
13633 {
13634 REAL_VALUE_TYPE r;
13635
13636 if (!fp_consts_inited)
13637 init_fp_table ();
13638
13639 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13640
13641 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
13642 return "0";
13643 }
13644
13645 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
13646 static const char *
13647 fp_const_from_val (REAL_VALUE_TYPE *r)
13648 {
13649 if (!fp_consts_inited)
13650 init_fp_table ();
13651
13652 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
13653 return "0";
13654 }
13655
13656 /* OPERANDS[0] is the entire list of insns that constitute pop,
13657 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
13658 is in the list, UPDATE is true iff the list contains explicit
13659 update of base register. */
13660 void
13661 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
13662 bool update)
13663 {
13664 int i;
13665 char pattern[100];
13666 int offset;
13667 const char *conditional;
13668 int num_saves = XVECLEN (operands[0], 0);
13669 unsigned int regno;
13670 unsigned int regno_base = REGNO (operands[1]);
13671
13672 offset = 0;
13673 offset += update ? 1 : 0;
13674 offset += return_pc ? 1 : 0;
13675
13676 /* Is the base register in the list? */
13677 for (i = offset; i < num_saves; i++)
13678 {
13679 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
13680 /* If SP is in the list, then the base register must be SP. */
13681 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
13682 /* If base register is in the list, there must be no explicit update. */
13683 if (regno == regno_base)
13684 gcc_assert (!update);
13685 }
13686
13687 conditional = reverse ? "%?%D0" : "%?%d0";
13688 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
13689 {
13690 /* Output pop (not stmfd) because it has a shorter encoding. */
13691 gcc_assert (update);
13692 sprintf (pattern, "pop%s\t{", conditional);
13693 }
13694 else
13695 {
13696 /* Output ldmfd when the base register is SP, otherwise output ldmia.
13697 It's just a convention, their semantics are identical. */
13698 if (regno_base == SP_REGNUM)
13699 sprintf (pattern, "ldm%sfd\t", conditional);
13700 else if (TARGET_UNIFIED_ASM)
13701 sprintf (pattern, "ldmia%s\t", conditional);
13702 else
13703 sprintf (pattern, "ldm%sia\t", conditional);
13704
13705 strcat (pattern, reg_names[regno_base]);
13706 if (update)
13707 strcat (pattern, "!, {");
13708 else
13709 strcat (pattern, ", {");
13710 }
13711
13712 /* Output the first destination register. */
13713 strcat (pattern,
13714 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
13715
13716 /* Output the rest of the destination registers. */
13717 for (i = offset + 1; i < num_saves; i++)
13718 {
13719 strcat (pattern, ", ");
13720 strcat (pattern,
13721 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
13722 }
13723
13724 strcat (pattern, "}");
13725
13726 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
13727 strcat (pattern, "^");
13728
13729 output_asm_insn (pattern, &cond);
13730 }
13731
13732
13733 /* Output the assembly for a store multiple. */
13734
13735 const char *
13736 vfp_output_fstmd (rtx * operands)
13737 {
13738 char pattern[100];
13739 int p;
13740 int base;
13741 int i;
13742
13743 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
13744 p = strlen (pattern);
13745
13746 gcc_assert (GET_CODE (operands[1]) == REG);
13747
13748 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
13749 for (i = 1; i < XVECLEN (operands[2], 0); i++)
13750 {
13751 p += sprintf (&pattern[p], ", d%d", base + i);
13752 }
13753 strcpy (&pattern[p], "}");
13754
13755 output_asm_insn (pattern, operands);
13756 return "";
13757 }
13758
13759
13760 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
13761 number of bytes pushed. */
13762
13763 static int
13764 vfp_emit_fstmd (int base_reg, int count)
13765 {
13766 rtx par;
13767 rtx dwarf;
13768 rtx tmp, reg;
13769 int i;
13770
13771 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
13772 register pairs are stored by a store multiple insn. We avoid this
13773 by pushing an extra pair. */
13774 if (count == 2 && !arm_arch6)
13775 {
13776 if (base_reg == LAST_VFP_REGNUM - 3)
13777 base_reg -= 2;
13778 count++;
13779 }
13780
13781 /* FSTMD may not store more than 16 doubleword registers at once. Split
13782 larger stores into multiple parts (up to a maximum of two, in
13783 practice). */
13784 if (count > 16)
13785 {
13786 int saved;
13787 /* NOTE: base_reg is an internal register number, so each D register
13788 counts as 2. */
13789 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
13790 saved += vfp_emit_fstmd (base_reg, 16);
13791 return saved;
13792 }
13793
13794 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
13795 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
13796
13797 reg = gen_rtx_REG (DFmode, base_reg);
13798 base_reg += 2;
13799
13800 XVECEXP (par, 0, 0)
13801 = gen_rtx_SET (VOIDmode,
13802 gen_frame_mem
13803 (BLKmode,
13804 gen_rtx_PRE_MODIFY (Pmode,
13805 stack_pointer_rtx,
13806 plus_constant
13807 (Pmode, stack_pointer_rtx,
13808 - (count * 8)))
13809 ),
13810 gen_rtx_UNSPEC (BLKmode,
13811 gen_rtvec (1, reg),
13812 UNSPEC_PUSH_MULT));
13813
13814 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13815 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
13816 RTX_FRAME_RELATED_P (tmp) = 1;
13817 XVECEXP (dwarf, 0, 0) = tmp;
13818
13819 tmp = gen_rtx_SET (VOIDmode,
13820 gen_frame_mem (DFmode, stack_pointer_rtx),
13821 reg);
13822 RTX_FRAME_RELATED_P (tmp) = 1;
13823 XVECEXP (dwarf, 0, 1) = tmp;
13824
13825 for (i = 1; i < count; i++)
13826 {
13827 reg = gen_rtx_REG (DFmode, base_reg);
13828 base_reg += 2;
13829 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
13830
13831 tmp = gen_rtx_SET (VOIDmode,
13832 gen_frame_mem (DFmode,
13833 plus_constant (Pmode,
13834 stack_pointer_rtx,
13835 i * 8)),
13836 reg);
13837 RTX_FRAME_RELATED_P (tmp) = 1;
13838 XVECEXP (dwarf, 0, i + 1) = tmp;
13839 }
13840
13841 par = emit_insn (par);
13842 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
13843 RTX_FRAME_RELATED_P (par) = 1;
13844
13845 return count * 8;
13846 }
13847
13848 /* Emit a call instruction with pattern PAT. ADDR is the address of
13849 the call target. */
13850
13851 void
13852 arm_emit_call_insn (rtx pat, rtx addr)
13853 {
13854 rtx insn;
13855
13856 insn = emit_call_insn (pat);
13857
13858 /* The PIC register is live on entry to VxWorks PIC PLT entries.
13859 If the call might use such an entry, add a use of the PIC register
13860 to the instruction's CALL_INSN_FUNCTION_USAGE. */
13861 if (TARGET_VXWORKS_RTP
13862 && flag_pic
13863 && GET_CODE (addr) == SYMBOL_REF
13864 && (SYMBOL_REF_DECL (addr)
13865 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
13866 : !SYMBOL_REF_LOCAL_P (addr)))
13867 {
13868 require_pic_register ();
13869 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
13870 }
13871 }
13872
13873 /* Output a 'call' insn. */
13874 const char *
13875 output_call (rtx *operands)
13876 {
13877 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
13878
13879 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
13880 if (REGNO (operands[0]) == LR_REGNUM)
13881 {
13882 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
13883 output_asm_insn ("mov%?\t%0, %|lr", operands);
13884 }
13885
13886 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13887
13888 if (TARGET_INTERWORK || arm_arch4t)
13889 output_asm_insn ("bx%?\t%0", operands);
13890 else
13891 output_asm_insn ("mov%?\t%|pc, %0", operands);
13892
13893 return "";
13894 }
13895
13896 /* Output a 'call' insn that is a reference in memory. This is
13897 disabled for ARMv5 and we prefer a blx instead because otherwise
13898 there's a significant performance overhead. */
13899 const char *
13900 output_call_mem (rtx *operands)
13901 {
13902 gcc_assert (!arm_arch5);
13903 if (TARGET_INTERWORK)
13904 {
13905 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13906 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13907 output_asm_insn ("bx%?\t%|ip", operands);
13908 }
13909 else if (regno_use_in (LR_REGNUM, operands[0]))
13910 {
13911 /* LR is used in the memory address. We load the address in the
13912 first instruction. It's safe to use IP as the target of the
13913 load since the call will kill it anyway. */
13914 output_asm_insn ("ldr%?\t%|ip, %0", operands);
13915 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13916 if (arm_arch4t)
13917 output_asm_insn ("bx%?\t%|ip", operands);
13918 else
13919 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
13920 }
13921 else
13922 {
13923 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
13924 output_asm_insn ("ldr%?\t%|pc, %0", operands);
13925 }
13926
13927 return "";
13928 }
13929
13930
13931 /* Output a move from arm registers to arm registers of a long double
13932 OPERANDS[0] is the destination.
13933 OPERANDS[1] is the source. */
13934 const char *
13935 output_mov_long_double_arm_from_arm (rtx *operands)
13936 {
13937 /* We have to be careful here because the two might overlap. */
13938 int dest_start = REGNO (operands[0]);
13939 int src_start = REGNO (operands[1]);
13940 rtx ops[2];
13941 int i;
13942
13943 if (dest_start < src_start)
13944 {
13945 for (i = 0; i < 3; i++)
13946 {
13947 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13948 ops[1] = gen_rtx_REG (SImode, src_start + i);
13949 output_asm_insn ("mov%?\t%0, %1", ops);
13950 }
13951 }
13952 else
13953 {
13954 for (i = 2; i >= 0; i--)
13955 {
13956 ops[0] = gen_rtx_REG (SImode, dest_start + i);
13957 ops[1] = gen_rtx_REG (SImode, src_start + i);
13958 output_asm_insn ("mov%?\t%0, %1", ops);
13959 }
13960 }
13961
13962 return "";
13963 }
13964
13965 void
13966 arm_emit_movpair (rtx dest, rtx src)
13967 {
13968 /* If the src is an immediate, simplify it. */
13969 if (CONST_INT_P (src))
13970 {
13971 HOST_WIDE_INT val = INTVAL (src);
13972 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
13973 if ((val >> 16) & 0x0000ffff)
13974 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
13975 GEN_INT (16)),
13976 GEN_INT ((val >> 16) & 0x0000ffff));
13977 return;
13978 }
13979 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
13980 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
13981 }
13982
13983 /* Output a move between double words. It must be REG<-MEM
13984 or MEM<-REG. */
13985 const char *
13986 output_move_double (rtx *operands, bool emit, int *count)
13987 {
13988 enum rtx_code code0 = GET_CODE (operands[0]);
13989 enum rtx_code code1 = GET_CODE (operands[1]);
13990 rtx otherops[3];
13991 if (count)
13992 *count = 1;
13993
13994 /* The only case when this might happen is when
13995 you are looking at the length of a DImode instruction
13996 that has an invalid constant in it. */
13997 if (code0 == REG && code1 != MEM)
13998 {
13999 gcc_assert (!emit);
14000 *count = 2;
14001 return "";
14002 }
14003
14004 if (code0 == REG)
14005 {
14006 unsigned int reg0 = REGNO (operands[0]);
14007
14008 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
14009
14010 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
14011
14012 switch (GET_CODE (XEXP (operands[1], 0)))
14013 {
14014 case REG:
14015
14016 if (emit)
14017 {
14018 if (TARGET_LDRD
14019 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
14020 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
14021 else
14022 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14023 }
14024 break;
14025
14026 case PRE_INC:
14027 gcc_assert (TARGET_LDRD);
14028 if (emit)
14029 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
14030 break;
14031
14032 case PRE_DEC:
14033 if (emit)
14034 {
14035 if (TARGET_LDRD)
14036 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
14037 else
14038 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
14039 }
14040 break;
14041
14042 case POST_INC:
14043 if (emit)
14044 {
14045 if (TARGET_LDRD)
14046 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
14047 else
14048 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
14049 }
14050 break;
14051
14052 case POST_DEC:
14053 gcc_assert (TARGET_LDRD);
14054 if (emit)
14055 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
14056 break;
14057
14058 case PRE_MODIFY:
14059 case POST_MODIFY:
14060 /* Autoicrement addressing modes should never have overlapping
14061 base and destination registers, and overlapping index registers
14062 are already prohibited, so this doesn't need to worry about
14063 fix_cm3_ldrd. */
14064 otherops[0] = operands[0];
14065 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
14066 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
14067
14068 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
14069 {
14070 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
14071 {
14072 /* Registers overlap so split out the increment. */
14073 if (emit)
14074 {
14075 output_asm_insn ("add%?\t%1, %1, %2", otherops);
14076 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
14077 }
14078 if (count)
14079 *count = 2;
14080 }
14081 else
14082 {
14083 /* Use a single insn if we can.
14084 FIXME: IWMMXT allows offsets larger than ldrd can
14085 handle, fix these up with a pair of ldr. */
14086 if (TARGET_THUMB2
14087 || GET_CODE (otherops[2]) != CONST_INT
14088 || (INTVAL (otherops[2]) > -256
14089 && INTVAL (otherops[2]) < 256))
14090 {
14091 if (emit)
14092 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
14093 }
14094 else
14095 {
14096 if (emit)
14097 {
14098 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
14099 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14100 }
14101 if (count)
14102 *count = 2;
14103
14104 }
14105 }
14106 }
14107 else
14108 {
14109 /* Use a single insn if we can.
14110 FIXME: IWMMXT allows offsets larger than ldrd can handle,
14111 fix these up with a pair of ldr. */
14112 if (TARGET_THUMB2
14113 || GET_CODE (otherops[2]) != CONST_INT
14114 || (INTVAL (otherops[2]) > -256
14115 && INTVAL (otherops[2]) < 256))
14116 {
14117 if (emit)
14118 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
14119 }
14120 else
14121 {
14122 if (emit)
14123 {
14124 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
14125 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
14126 }
14127 if (count)
14128 *count = 2;
14129 }
14130 }
14131 break;
14132
14133 case LABEL_REF:
14134 case CONST:
14135 /* We might be able to use ldrd %0, %1 here. However the range is
14136 different to ldr/adr, and it is broken on some ARMv7-M
14137 implementations. */
14138 /* Use the second register of the pair to avoid problematic
14139 overlap. */
14140 otherops[1] = operands[1];
14141 if (emit)
14142 output_asm_insn ("adr%?\t%0, %1", otherops);
14143 operands[1] = otherops[0];
14144 if (emit)
14145 {
14146 if (TARGET_LDRD)
14147 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14148 else
14149 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
14150 }
14151
14152 if (count)
14153 *count = 2;
14154 break;
14155
14156 /* ??? This needs checking for thumb2. */
14157 default:
14158 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
14159 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
14160 {
14161 otherops[0] = operands[0];
14162 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
14163 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
14164
14165 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
14166 {
14167 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
14168 {
14169 switch ((int) INTVAL (otherops[2]))
14170 {
14171 case -8:
14172 if (emit)
14173 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
14174 return "";
14175 case -4:
14176 if (TARGET_THUMB2)
14177 break;
14178 if (emit)
14179 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
14180 return "";
14181 case 4:
14182 if (TARGET_THUMB2)
14183 break;
14184 if (emit)
14185 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
14186 return "";
14187 }
14188 }
14189 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
14190 operands[1] = otherops[0];
14191 if (TARGET_LDRD
14192 && (GET_CODE (otherops[2]) == REG
14193 || TARGET_THUMB2
14194 || (GET_CODE (otherops[2]) == CONST_INT
14195 && INTVAL (otherops[2]) > -256
14196 && INTVAL (otherops[2]) < 256)))
14197 {
14198 if (reg_overlap_mentioned_p (operands[0],
14199 otherops[2]))
14200 {
14201 rtx tmp;
14202 /* Swap base and index registers over to
14203 avoid a conflict. */
14204 tmp = otherops[1];
14205 otherops[1] = otherops[2];
14206 otherops[2] = tmp;
14207 }
14208 /* If both registers conflict, it will usually
14209 have been fixed by a splitter. */
14210 if (reg_overlap_mentioned_p (operands[0], otherops[2])
14211 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
14212 {
14213 if (emit)
14214 {
14215 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14216 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
14217 }
14218 if (count)
14219 *count = 2;
14220 }
14221 else
14222 {
14223 otherops[0] = operands[0];
14224 if (emit)
14225 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
14226 }
14227 return "";
14228 }
14229
14230 if (GET_CODE (otherops[2]) == CONST_INT)
14231 {
14232 if (emit)
14233 {
14234 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
14235 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
14236 else
14237 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14238 }
14239 }
14240 else
14241 {
14242 if (emit)
14243 output_asm_insn ("add%?\t%0, %1, %2", otherops);
14244 }
14245 }
14246 else
14247 {
14248 if (emit)
14249 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
14250 }
14251
14252 if (count)
14253 *count = 2;
14254
14255 if (TARGET_LDRD)
14256 return "ldr%(d%)\t%0, [%1]";
14257
14258 return "ldm%(ia%)\t%1, %M0";
14259 }
14260 else
14261 {
14262 otherops[1] = adjust_address (operands[1], SImode, 4);
14263 /* Take care of overlapping base/data reg. */
14264 if (reg_mentioned_p (operands[0], operands[1]))
14265 {
14266 if (emit)
14267 {
14268 output_asm_insn ("ldr%?\t%0, %1", otherops);
14269 output_asm_insn ("ldr%?\t%0, %1", operands);
14270 }
14271 if (count)
14272 *count = 2;
14273
14274 }
14275 else
14276 {
14277 if (emit)
14278 {
14279 output_asm_insn ("ldr%?\t%0, %1", operands);
14280 output_asm_insn ("ldr%?\t%0, %1", otherops);
14281 }
14282 if (count)
14283 *count = 2;
14284 }
14285 }
14286 }
14287 }
14288 else
14289 {
14290 /* Constraints should ensure this. */
14291 gcc_assert (code0 == MEM && code1 == REG);
14292 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
14293
14294 switch (GET_CODE (XEXP (operands[0], 0)))
14295 {
14296 case REG:
14297 if (emit)
14298 {
14299 if (TARGET_LDRD)
14300 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
14301 else
14302 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14303 }
14304 break;
14305
14306 case PRE_INC:
14307 gcc_assert (TARGET_LDRD);
14308 if (emit)
14309 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
14310 break;
14311
14312 case PRE_DEC:
14313 if (emit)
14314 {
14315 if (TARGET_LDRD)
14316 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
14317 else
14318 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
14319 }
14320 break;
14321
14322 case POST_INC:
14323 if (emit)
14324 {
14325 if (TARGET_LDRD)
14326 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
14327 else
14328 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
14329 }
14330 break;
14331
14332 case POST_DEC:
14333 gcc_assert (TARGET_LDRD);
14334 if (emit)
14335 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
14336 break;
14337
14338 case PRE_MODIFY:
14339 case POST_MODIFY:
14340 otherops[0] = operands[1];
14341 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
14342 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
14343
14344 /* IWMMXT allows offsets larger than ldrd can handle,
14345 fix these up with a pair of ldr. */
14346 if (!TARGET_THUMB2
14347 && GET_CODE (otherops[2]) == CONST_INT
14348 && (INTVAL(otherops[2]) <= -256
14349 || INTVAL(otherops[2]) >= 256))
14350 {
14351 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14352 {
14353 if (emit)
14354 {
14355 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
14356 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14357 }
14358 if (count)
14359 *count = 2;
14360 }
14361 else
14362 {
14363 if (emit)
14364 {
14365 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
14366 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
14367 }
14368 if (count)
14369 *count = 2;
14370 }
14371 }
14372 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
14373 {
14374 if (emit)
14375 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
14376 }
14377 else
14378 {
14379 if (emit)
14380 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
14381 }
14382 break;
14383
14384 case PLUS:
14385 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
14386 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
14387 {
14388 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
14389 {
14390 case -8:
14391 if (emit)
14392 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
14393 return "";
14394
14395 case -4:
14396 if (TARGET_THUMB2)
14397 break;
14398 if (emit)
14399 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
14400 return "";
14401
14402 case 4:
14403 if (TARGET_THUMB2)
14404 break;
14405 if (emit)
14406 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
14407 return "";
14408 }
14409 }
14410 if (TARGET_LDRD
14411 && (GET_CODE (otherops[2]) == REG
14412 || TARGET_THUMB2
14413 || (GET_CODE (otherops[2]) == CONST_INT
14414 && INTVAL (otherops[2]) > -256
14415 && INTVAL (otherops[2]) < 256)))
14416 {
14417 otherops[0] = operands[1];
14418 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
14419 if (emit)
14420 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
14421 return "";
14422 }
14423 /* Fall through */
14424
14425 default:
14426 otherops[0] = adjust_address (operands[0], SImode, 4);
14427 otherops[1] = operands[1];
14428 if (emit)
14429 {
14430 output_asm_insn ("str%?\t%1, %0", operands);
14431 output_asm_insn ("str%?\t%H1, %0", otherops);
14432 }
14433 if (count)
14434 *count = 2;
14435 }
14436 }
14437
14438 return "";
14439 }
14440
14441 /* Output a move, load or store for quad-word vectors in ARM registers. Only
14442 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
14443
14444 const char *
14445 output_move_quad (rtx *operands)
14446 {
14447 if (REG_P (operands[0]))
14448 {
14449 /* Load, or reg->reg move. */
14450
14451 if (MEM_P (operands[1]))
14452 {
14453 switch (GET_CODE (XEXP (operands[1], 0)))
14454 {
14455 case REG:
14456 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
14457 break;
14458
14459 case LABEL_REF:
14460 case CONST:
14461 output_asm_insn ("adr%?\t%0, %1", operands);
14462 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
14463 break;
14464
14465 default:
14466 gcc_unreachable ();
14467 }
14468 }
14469 else
14470 {
14471 rtx ops[2];
14472 int dest, src, i;
14473
14474 gcc_assert (REG_P (operands[1]));
14475
14476 dest = REGNO (operands[0]);
14477 src = REGNO (operands[1]);
14478
14479 /* This seems pretty dumb, but hopefully GCC won't try to do it
14480 very often. */
14481 if (dest < src)
14482 for (i = 0; i < 4; i++)
14483 {
14484 ops[0] = gen_rtx_REG (SImode, dest + i);
14485 ops[1] = gen_rtx_REG (SImode, src + i);
14486 output_asm_insn ("mov%?\t%0, %1", ops);
14487 }
14488 else
14489 for (i = 3; i >= 0; i--)
14490 {
14491 ops[0] = gen_rtx_REG (SImode, dest + i);
14492 ops[1] = gen_rtx_REG (SImode, src + i);
14493 output_asm_insn ("mov%?\t%0, %1", ops);
14494 }
14495 }
14496 }
14497 else
14498 {
14499 gcc_assert (MEM_P (operands[0]));
14500 gcc_assert (REG_P (operands[1]));
14501 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
14502
14503 switch (GET_CODE (XEXP (operands[0], 0)))
14504 {
14505 case REG:
14506 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
14507 break;
14508
14509 default:
14510 gcc_unreachable ();
14511 }
14512 }
14513
14514 return "";
14515 }
14516
14517 /* Output a VFP load or store instruction. */
14518
14519 const char *
14520 output_move_vfp (rtx *operands)
14521 {
14522 rtx reg, mem, addr, ops[2];
14523 int load = REG_P (operands[0]);
14524 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
14525 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
14526 const char *templ;
14527 char buff[50];
14528 enum machine_mode mode;
14529
14530 reg = operands[!load];
14531 mem = operands[load];
14532
14533 mode = GET_MODE (reg);
14534
14535 gcc_assert (REG_P (reg));
14536 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
14537 gcc_assert (mode == SFmode
14538 || mode == DFmode
14539 || mode == SImode
14540 || mode == DImode
14541 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
14542 gcc_assert (MEM_P (mem));
14543
14544 addr = XEXP (mem, 0);
14545
14546 switch (GET_CODE (addr))
14547 {
14548 case PRE_DEC:
14549 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
14550 ops[0] = XEXP (addr, 0);
14551 ops[1] = reg;
14552 break;
14553
14554 case POST_INC:
14555 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
14556 ops[0] = XEXP (addr, 0);
14557 ops[1] = reg;
14558 break;
14559
14560 default:
14561 templ = "f%s%c%%?\t%%%s0, %%1%s";
14562 ops[0] = reg;
14563 ops[1] = mem;
14564 break;
14565 }
14566
14567 sprintf (buff, templ,
14568 load ? "ld" : "st",
14569 dp ? 'd' : 's',
14570 dp ? "P" : "",
14571 integer_p ? "\t%@ int" : "");
14572 output_asm_insn (buff, ops);
14573
14574 return "";
14575 }
14576
14577 /* Output a Neon quad-word load or store, or a load or store for
14578 larger structure modes.
14579
14580 WARNING: The ordering of elements is weird in big-endian mode,
14581 because we use VSTM, as required by the EABI. GCC RTL defines
14582 element ordering based on in-memory order. This can be differ
14583 from the architectural ordering of elements within a NEON register.
14584 The intrinsics defined in arm_neon.h use the NEON register element
14585 ordering, not the GCC RTL element ordering.
14586
14587 For example, the in-memory ordering of a big-endian a quadword
14588 vector with 16-bit elements when stored from register pair {d0,d1}
14589 will be (lowest address first, d0[N] is NEON register element N):
14590
14591 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
14592
14593 When necessary, quadword registers (dN, dN+1) are moved to ARM
14594 registers from rN in the order:
14595
14596 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
14597
14598 So that STM/LDM can be used on vectors in ARM registers, and the
14599 same memory layout will result as if VSTM/VLDM were used. */
14600
14601 const char *
14602 output_move_neon (rtx *operands)
14603 {
14604 rtx reg, mem, addr, ops[2];
14605 int regno, load = REG_P (operands[0]);
14606 const char *templ;
14607 char buff[50];
14608 enum machine_mode mode;
14609
14610 reg = operands[!load];
14611 mem = operands[load];
14612
14613 mode = GET_MODE (reg);
14614
14615 gcc_assert (REG_P (reg));
14616 regno = REGNO (reg);
14617 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
14618 || NEON_REGNO_OK_FOR_QUAD (regno));
14619 gcc_assert (VALID_NEON_DREG_MODE (mode)
14620 || VALID_NEON_QREG_MODE (mode)
14621 || VALID_NEON_STRUCT_MODE (mode));
14622 gcc_assert (MEM_P (mem));
14623
14624 addr = XEXP (mem, 0);
14625
14626 /* Strip off const from addresses like (const (plus (...))). */
14627 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14628 addr = XEXP (addr, 0);
14629
14630 switch (GET_CODE (addr))
14631 {
14632 case POST_INC:
14633 templ = "v%smia%%?\t%%0!, %%h1";
14634 ops[0] = XEXP (addr, 0);
14635 ops[1] = reg;
14636 break;
14637
14638 case PRE_DEC:
14639 /* FIXME: We should be using vld1/vst1 here in BE mode? */
14640 templ = "v%smdb%%?\t%%0!, %%h1";
14641 ops[0] = XEXP (addr, 0);
14642 ops[1] = reg;
14643 break;
14644
14645 case POST_MODIFY:
14646 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
14647 gcc_unreachable ();
14648
14649 case LABEL_REF:
14650 case PLUS:
14651 {
14652 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14653 int i;
14654 int overlap = -1;
14655 for (i = 0; i < nregs; i++)
14656 {
14657 /* We're only using DImode here because it's a convenient size. */
14658 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
14659 ops[1] = adjust_address (mem, DImode, 8 * i);
14660 if (reg_overlap_mentioned_p (ops[0], mem))
14661 {
14662 gcc_assert (overlap == -1);
14663 overlap = i;
14664 }
14665 else
14666 {
14667 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14668 output_asm_insn (buff, ops);
14669 }
14670 }
14671 if (overlap != -1)
14672 {
14673 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
14674 ops[1] = adjust_address (mem, SImode, 8 * overlap);
14675 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
14676 output_asm_insn (buff, ops);
14677 }
14678
14679 return "";
14680 }
14681
14682 default:
14683 templ = "v%smia%%?\t%%m0, %%h1";
14684 ops[0] = mem;
14685 ops[1] = reg;
14686 }
14687
14688 sprintf (buff, templ, load ? "ld" : "st");
14689 output_asm_insn (buff, ops);
14690
14691 return "";
14692 }
14693
14694 /* Compute and return the length of neon_mov<mode>, where <mode> is
14695 one of VSTRUCT modes: EI, OI, CI or XI. */
14696 int
14697 arm_attr_length_move_neon (rtx insn)
14698 {
14699 rtx reg, mem, addr;
14700 int load;
14701 enum machine_mode mode;
14702
14703 extract_insn_cached (insn);
14704
14705 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
14706 {
14707 mode = GET_MODE (recog_data.operand[0]);
14708 switch (mode)
14709 {
14710 case EImode:
14711 case OImode:
14712 return 8;
14713 case CImode:
14714 return 12;
14715 case XImode:
14716 return 16;
14717 default:
14718 gcc_unreachable ();
14719 }
14720 }
14721
14722 load = REG_P (recog_data.operand[0]);
14723 reg = recog_data.operand[!load];
14724 mem = recog_data.operand[load];
14725
14726 gcc_assert (MEM_P (mem));
14727
14728 mode = GET_MODE (reg);
14729 addr = XEXP (mem, 0);
14730
14731 /* Strip off const from addresses like (const (plus (...))). */
14732 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
14733 addr = XEXP (addr, 0);
14734
14735 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
14736 {
14737 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
14738 return insns * 4;
14739 }
14740 else
14741 return 4;
14742 }
14743
14744 /* Return nonzero if the offset in the address is an immediate. Otherwise,
14745 return zero. */
14746
14747 int
14748 arm_address_offset_is_imm (rtx insn)
14749 {
14750 rtx mem, addr;
14751
14752 extract_insn_cached (insn);
14753
14754 if (REG_P (recog_data.operand[0]))
14755 return 0;
14756
14757 mem = recog_data.operand[0];
14758
14759 gcc_assert (MEM_P (mem));
14760
14761 addr = XEXP (mem, 0);
14762
14763 if (GET_CODE (addr) == REG
14764 || (GET_CODE (addr) == PLUS
14765 && GET_CODE (XEXP (addr, 0)) == REG
14766 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
14767 return 1;
14768 else
14769 return 0;
14770 }
14771
14772 /* Output an ADD r, s, #n where n may be too big for one instruction.
14773 If adding zero to one register, output nothing. */
14774 const char *
14775 output_add_immediate (rtx *operands)
14776 {
14777 HOST_WIDE_INT n = INTVAL (operands[2]);
14778
14779 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
14780 {
14781 if (n < 0)
14782 output_multi_immediate (operands,
14783 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
14784 -n);
14785 else
14786 output_multi_immediate (operands,
14787 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
14788 n);
14789 }
14790
14791 return "";
14792 }
14793
14794 /* Output a multiple immediate operation.
14795 OPERANDS is the vector of operands referred to in the output patterns.
14796 INSTR1 is the output pattern to use for the first constant.
14797 INSTR2 is the output pattern to use for subsequent constants.
14798 IMMED_OP is the index of the constant slot in OPERANDS.
14799 N is the constant value. */
14800 static const char *
14801 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
14802 int immed_op, HOST_WIDE_INT n)
14803 {
14804 #if HOST_BITS_PER_WIDE_INT > 32
14805 n &= 0xffffffff;
14806 #endif
14807
14808 if (n == 0)
14809 {
14810 /* Quick and easy output. */
14811 operands[immed_op] = const0_rtx;
14812 output_asm_insn (instr1, operands);
14813 }
14814 else
14815 {
14816 int i;
14817 const char * instr = instr1;
14818
14819 /* Note that n is never zero here (which would give no output). */
14820 for (i = 0; i < 32; i += 2)
14821 {
14822 if (n & (3 << i))
14823 {
14824 operands[immed_op] = GEN_INT (n & (255 << i));
14825 output_asm_insn (instr, operands);
14826 instr = instr2;
14827 i += 6;
14828 }
14829 }
14830 }
14831
14832 return "";
14833 }
14834
14835 /* Return the name of a shifter operation. */
14836 static const char *
14837 arm_shift_nmem(enum rtx_code code)
14838 {
14839 switch (code)
14840 {
14841 case ASHIFT:
14842 return ARM_LSL_NAME;
14843
14844 case ASHIFTRT:
14845 return "asr";
14846
14847 case LSHIFTRT:
14848 return "lsr";
14849
14850 case ROTATERT:
14851 return "ror";
14852
14853 default:
14854 abort();
14855 }
14856 }
14857
14858 /* Return the appropriate ARM instruction for the operation code.
14859 The returned result should not be overwritten. OP is the rtx of the
14860 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
14861 was shifted. */
14862 const char *
14863 arithmetic_instr (rtx op, int shift_first_arg)
14864 {
14865 switch (GET_CODE (op))
14866 {
14867 case PLUS:
14868 return "add";
14869
14870 case MINUS:
14871 return shift_first_arg ? "rsb" : "sub";
14872
14873 case IOR:
14874 return "orr";
14875
14876 case XOR:
14877 return "eor";
14878
14879 case AND:
14880 return "and";
14881
14882 case ASHIFT:
14883 case ASHIFTRT:
14884 case LSHIFTRT:
14885 case ROTATERT:
14886 return arm_shift_nmem(GET_CODE(op));
14887
14888 default:
14889 gcc_unreachable ();
14890 }
14891 }
14892
14893 /* Ensure valid constant shifts and return the appropriate shift mnemonic
14894 for the operation code. The returned result should not be overwritten.
14895 OP is the rtx code of the shift.
14896 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
14897 shift. */
14898 static const char *
14899 shift_op (rtx op, HOST_WIDE_INT *amountp)
14900 {
14901 const char * mnem;
14902 enum rtx_code code = GET_CODE (op);
14903
14904 switch (GET_CODE (XEXP (op, 1)))
14905 {
14906 case REG:
14907 case SUBREG:
14908 *amountp = -1;
14909 break;
14910
14911 case CONST_INT:
14912 *amountp = INTVAL (XEXP (op, 1));
14913 break;
14914
14915 default:
14916 gcc_unreachable ();
14917 }
14918
14919 switch (code)
14920 {
14921 case ROTATE:
14922 gcc_assert (*amountp != -1);
14923 *amountp = 32 - *amountp;
14924 code = ROTATERT;
14925
14926 /* Fall through. */
14927
14928 case ASHIFT:
14929 case ASHIFTRT:
14930 case LSHIFTRT:
14931 case ROTATERT:
14932 mnem = arm_shift_nmem(code);
14933 break;
14934
14935 case MULT:
14936 /* We never have to worry about the amount being other than a
14937 power of 2, since this case can never be reloaded from a reg. */
14938 gcc_assert (*amountp != -1);
14939 *amountp = int_log2 (*amountp);
14940 return ARM_LSL_NAME;
14941
14942 default:
14943 gcc_unreachable ();
14944 }
14945
14946 if (*amountp != -1)
14947 {
14948 /* This is not 100% correct, but follows from the desire to merge
14949 multiplication by a power of 2 with the recognizer for a
14950 shift. >=32 is not a valid shift for "lsl", so we must try and
14951 output a shift that produces the correct arithmetical result.
14952 Using lsr #32 is identical except for the fact that the carry bit
14953 is not set correctly if we set the flags; but we never use the
14954 carry bit from such an operation, so we can ignore that. */
14955 if (code == ROTATERT)
14956 /* Rotate is just modulo 32. */
14957 *amountp &= 31;
14958 else if (*amountp != (*amountp & 31))
14959 {
14960 if (code == ASHIFT)
14961 mnem = "lsr";
14962 *amountp = 32;
14963 }
14964
14965 /* Shifts of 0 are no-ops. */
14966 if (*amountp == 0)
14967 return NULL;
14968 }
14969
14970 return mnem;
14971 }
14972
14973 /* Obtain the shift from the POWER of two. */
14974
14975 static HOST_WIDE_INT
14976 int_log2 (HOST_WIDE_INT power)
14977 {
14978 HOST_WIDE_INT shift = 0;
14979
14980 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
14981 {
14982 gcc_assert (shift <= 31);
14983 shift++;
14984 }
14985
14986 return shift;
14987 }
14988
14989 /* Output a .ascii pseudo-op, keeping track of lengths. This is
14990 because /bin/as is horribly restrictive. The judgement about
14991 whether or not each character is 'printable' (and can be output as
14992 is) or not (and must be printed with an octal escape) must be made
14993 with reference to the *host* character set -- the situation is
14994 similar to that discussed in the comments above pp_c_char in
14995 c-pretty-print.c. */
14996
14997 #define MAX_ASCII_LEN 51
14998
14999 void
15000 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
15001 {
15002 int i;
15003 int len_so_far = 0;
15004
15005 fputs ("\t.ascii\t\"", stream);
15006
15007 for (i = 0; i < len; i++)
15008 {
15009 int c = p[i];
15010
15011 if (len_so_far >= MAX_ASCII_LEN)
15012 {
15013 fputs ("\"\n\t.ascii\t\"", stream);
15014 len_so_far = 0;
15015 }
15016
15017 if (ISPRINT (c))
15018 {
15019 if (c == '\\' || c == '\"')
15020 {
15021 putc ('\\', stream);
15022 len_so_far++;
15023 }
15024 putc (c, stream);
15025 len_so_far++;
15026 }
15027 else
15028 {
15029 fprintf (stream, "\\%03o", c);
15030 len_so_far += 4;
15031 }
15032 }
15033
15034 fputs ("\"\n", stream);
15035 }
15036 \f
15037 /* Compute the register save mask for registers 0 through 12
15038 inclusive. This code is used by arm_compute_save_reg_mask. */
15039
15040 static unsigned long
15041 arm_compute_save_reg0_reg12_mask (void)
15042 {
15043 unsigned long func_type = arm_current_func_type ();
15044 unsigned long save_reg_mask = 0;
15045 unsigned int reg;
15046
15047 if (IS_INTERRUPT (func_type))
15048 {
15049 unsigned int max_reg;
15050 /* Interrupt functions must not corrupt any registers,
15051 even call clobbered ones. If this is a leaf function
15052 we can just examine the registers used by the RTL, but
15053 otherwise we have to assume that whatever function is
15054 called might clobber anything, and so we have to save
15055 all the call-clobbered registers as well. */
15056 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
15057 /* FIQ handlers have registers r8 - r12 banked, so
15058 we only need to check r0 - r7, Normal ISRs only
15059 bank r14 and r15, so we must check up to r12.
15060 r13 is the stack pointer which is always preserved,
15061 so we do not need to consider it here. */
15062 max_reg = 7;
15063 else
15064 max_reg = 12;
15065
15066 for (reg = 0; reg <= max_reg; reg++)
15067 if (df_regs_ever_live_p (reg)
15068 || (! crtl->is_leaf && call_used_regs[reg]))
15069 save_reg_mask |= (1 << reg);
15070
15071 /* Also save the pic base register if necessary. */
15072 if (flag_pic
15073 && !TARGET_SINGLE_PIC_BASE
15074 && arm_pic_register != INVALID_REGNUM
15075 && crtl->uses_pic_offset_table)
15076 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15077 }
15078 else if (IS_VOLATILE(func_type))
15079 {
15080 /* For noreturn functions we historically omitted register saves
15081 altogether. However this really messes up debugging. As a
15082 compromise save just the frame pointers. Combined with the link
15083 register saved elsewhere this should be sufficient to get
15084 a backtrace. */
15085 if (frame_pointer_needed)
15086 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15087 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
15088 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15089 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
15090 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
15091 }
15092 else
15093 {
15094 /* In the normal case we only need to save those registers
15095 which are call saved and which are used by this function. */
15096 for (reg = 0; reg <= 11; reg++)
15097 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
15098 save_reg_mask |= (1 << reg);
15099
15100 /* Handle the frame pointer as a special case. */
15101 if (frame_pointer_needed)
15102 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
15103
15104 /* If we aren't loading the PIC register,
15105 don't stack it even though it may be live. */
15106 if (flag_pic
15107 && !TARGET_SINGLE_PIC_BASE
15108 && arm_pic_register != INVALID_REGNUM
15109 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
15110 || crtl->uses_pic_offset_table))
15111 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15112
15113 /* The prologue will copy SP into R0, so save it. */
15114 if (IS_STACKALIGN (func_type))
15115 save_reg_mask |= 1;
15116 }
15117
15118 /* Save registers so the exception handler can modify them. */
15119 if (crtl->calls_eh_return)
15120 {
15121 unsigned int i;
15122
15123 for (i = 0; ; i++)
15124 {
15125 reg = EH_RETURN_DATA_REGNO (i);
15126 if (reg == INVALID_REGNUM)
15127 break;
15128 save_reg_mask |= 1 << reg;
15129 }
15130 }
15131
15132 return save_reg_mask;
15133 }
15134
15135
15136 /* Compute the number of bytes used to store the static chain register on the
15137 stack, above the stack frame. We need to know this accurately to get the
15138 alignment of the rest of the stack frame correct. */
15139
15140 static int arm_compute_static_chain_stack_bytes (void)
15141 {
15142 unsigned long func_type = arm_current_func_type ();
15143 int static_chain_stack_bytes = 0;
15144
15145 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
15146 IS_NESTED (func_type) &&
15147 df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
15148 static_chain_stack_bytes = 4;
15149
15150 return static_chain_stack_bytes;
15151 }
15152
15153
15154 /* Compute a bit mask of which registers need to be
15155 saved on the stack for the current function.
15156 This is used by arm_get_frame_offsets, which may add extra registers. */
15157
15158 static unsigned long
15159 arm_compute_save_reg_mask (void)
15160 {
15161 unsigned int save_reg_mask = 0;
15162 unsigned long func_type = arm_current_func_type ();
15163 unsigned int reg;
15164
15165 if (IS_NAKED (func_type))
15166 /* This should never really happen. */
15167 return 0;
15168
15169 /* If we are creating a stack frame, then we must save the frame pointer,
15170 IP (which will hold the old stack pointer), LR and the PC. */
15171 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
15172 save_reg_mask |=
15173 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
15174 | (1 << IP_REGNUM)
15175 | (1 << LR_REGNUM)
15176 | (1 << PC_REGNUM);
15177
15178 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
15179
15180 /* Decide if we need to save the link register.
15181 Interrupt routines have their own banked link register,
15182 so they never need to save it.
15183 Otherwise if we do not use the link register we do not need to save
15184 it. If we are pushing other registers onto the stack however, we
15185 can save an instruction in the epilogue by pushing the link register
15186 now and then popping it back into the PC. This incurs extra memory
15187 accesses though, so we only do it when optimizing for size, and only
15188 if we know that we will not need a fancy return sequence. */
15189 if (df_regs_ever_live_p (LR_REGNUM)
15190 || (save_reg_mask
15191 && optimize_size
15192 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
15193 && !crtl->calls_eh_return))
15194 save_reg_mask |= 1 << LR_REGNUM;
15195
15196 if (cfun->machine->lr_save_eliminated)
15197 save_reg_mask &= ~ (1 << LR_REGNUM);
15198
15199 if (TARGET_REALLY_IWMMXT
15200 && ((bit_count (save_reg_mask)
15201 + ARM_NUM_INTS (crtl->args.pretend_args_size +
15202 arm_compute_static_chain_stack_bytes())
15203 ) % 2) != 0)
15204 {
15205 /* The total number of registers that are going to be pushed
15206 onto the stack is odd. We need to ensure that the stack
15207 is 64-bit aligned before we start to save iWMMXt registers,
15208 and also before we start to create locals. (A local variable
15209 might be a double or long long which we will load/store using
15210 an iWMMXt instruction). Therefore we need to push another
15211 ARM register, so that the stack will be 64-bit aligned. We
15212 try to avoid using the arg registers (r0 -r3) as they might be
15213 used to pass values in a tail call. */
15214 for (reg = 4; reg <= 12; reg++)
15215 if ((save_reg_mask & (1 << reg)) == 0)
15216 break;
15217
15218 if (reg <= 12)
15219 save_reg_mask |= (1 << reg);
15220 else
15221 {
15222 cfun->machine->sibcall_blocked = 1;
15223 save_reg_mask |= (1 << 3);
15224 }
15225 }
15226
15227 /* We may need to push an additional register for use initializing the
15228 PIC base register. */
15229 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
15230 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
15231 {
15232 reg = thumb_find_work_register (1 << 4);
15233 if (!call_used_regs[reg])
15234 save_reg_mask |= (1 << reg);
15235 }
15236
15237 return save_reg_mask;
15238 }
15239
15240
15241 /* Compute a bit mask of which registers need to be
15242 saved on the stack for the current function. */
15243 static unsigned long
15244 thumb1_compute_save_reg_mask (void)
15245 {
15246 unsigned long mask;
15247 unsigned reg;
15248
15249 mask = 0;
15250 for (reg = 0; reg < 12; reg ++)
15251 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
15252 mask |= 1 << reg;
15253
15254 if (flag_pic
15255 && !TARGET_SINGLE_PIC_BASE
15256 && arm_pic_register != INVALID_REGNUM
15257 && crtl->uses_pic_offset_table)
15258 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
15259
15260 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
15261 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
15262 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
15263
15264 /* LR will also be pushed if any lo regs are pushed. */
15265 if (mask & 0xff || thumb_force_lr_save ())
15266 mask |= (1 << LR_REGNUM);
15267
15268 /* Make sure we have a low work register if we need one.
15269 We will need one if we are going to push a high register,
15270 but we are not currently intending to push a low register. */
15271 if ((mask & 0xff) == 0
15272 && ((mask & 0x0f00) || TARGET_BACKTRACE))
15273 {
15274 /* Use thumb_find_work_register to choose which register
15275 we will use. If the register is live then we will
15276 have to push it. Use LAST_LO_REGNUM as our fallback
15277 choice for the register to select. */
15278 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
15279 /* Make sure the register returned by thumb_find_work_register is
15280 not part of the return value. */
15281 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
15282 reg = LAST_LO_REGNUM;
15283
15284 if (! call_used_regs[reg])
15285 mask |= 1 << reg;
15286 }
15287
15288 /* The 504 below is 8 bytes less than 512 because there are two possible
15289 alignment words. We can't tell here if they will be present or not so we
15290 have to play it safe and assume that they are. */
15291 if ((CALLER_INTERWORKING_SLOT_SIZE +
15292 ROUND_UP_WORD (get_frame_size ()) +
15293 crtl->outgoing_args_size) >= 504)
15294 {
15295 /* This is the same as the code in thumb1_expand_prologue() which
15296 determines which register to use for stack decrement. */
15297 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
15298 if (mask & (1 << reg))
15299 break;
15300
15301 if (reg > LAST_LO_REGNUM)
15302 {
15303 /* Make sure we have a register available for stack decrement. */
15304 mask |= 1 << LAST_LO_REGNUM;
15305 }
15306 }
15307
15308 return mask;
15309 }
15310
15311
15312 /* Return the number of bytes required to save VFP registers. */
15313 static int
15314 arm_get_vfp_saved_size (void)
15315 {
15316 unsigned int regno;
15317 int count;
15318 int saved;
15319
15320 saved = 0;
15321 /* Space for saved VFP registers. */
15322 if (TARGET_HARD_FLOAT && TARGET_VFP)
15323 {
15324 count = 0;
15325 for (regno = FIRST_VFP_REGNUM;
15326 regno < LAST_VFP_REGNUM;
15327 regno += 2)
15328 {
15329 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
15330 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
15331 {
15332 if (count > 0)
15333 {
15334 /* Workaround ARM10 VFPr1 bug. */
15335 if (count == 2 && !arm_arch6)
15336 count++;
15337 saved += count * 8;
15338 }
15339 count = 0;
15340 }
15341 else
15342 count++;
15343 }
15344 if (count > 0)
15345 {
15346 if (count == 2 && !arm_arch6)
15347 count++;
15348 saved += count * 8;
15349 }
15350 }
15351 return saved;
15352 }
15353
15354
15355 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
15356 everything bar the final return instruction. If simple_return is true,
15357 then do not output epilogue, because it has already been emitted in RTL. */
15358 const char *
15359 output_return_instruction (rtx operand, bool really_return, bool reverse,
15360 bool simple_return)
15361 {
15362 char conditional[10];
15363 char instr[100];
15364 unsigned reg;
15365 unsigned long live_regs_mask;
15366 unsigned long func_type;
15367 arm_stack_offsets *offsets;
15368
15369 func_type = arm_current_func_type ();
15370
15371 if (IS_NAKED (func_type))
15372 return "";
15373
15374 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
15375 {
15376 /* If this function was declared non-returning, and we have
15377 found a tail call, then we have to trust that the called
15378 function won't return. */
15379 if (really_return)
15380 {
15381 rtx ops[2];
15382
15383 /* Otherwise, trap an attempted return by aborting. */
15384 ops[0] = operand;
15385 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
15386 : "abort");
15387 assemble_external_libcall (ops[1]);
15388 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
15389 }
15390
15391 return "";
15392 }
15393
15394 gcc_assert (!cfun->calls_alloca || really_return);
15395
15396 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
15397
15398 cfun->machine->return_used_this_function = 1;
15399
15400 offsets = arm_get_frame_offsets ();
15401 live_regs_mask = offsets->saved_regs_mask;
15402
15403 if (!simple_return && live_regs_mask)
15404 {
15405 const char * return_reg;
15406
15407 /* If we do not have any special requirements for function exit
15408 (e.g. interworking) then we can load the return address
15409 directly into the PC. Otherwise we must load it into LR. */
15410 if (really_return
15411 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
15412 return_reg = reg_names[PC_REGNUM];
15413 else
15414 return_reg = reg_names[LR_REGNUM];
15415
15416 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
15417 {
15418 /* There are three possible reasons for the IP register
15419 being saved. 1) a stack frame was created, in which case
15420 IP contains the old stack pointer, or 2) an ISR routine
15421 corrupted it, or 3) it was saved to align the stack on
15422 iWMMXt. In case 1, restore IP into SP, otherwise just
15423 restore IP. */
15424 if (frame_pointer_needed)
15425 {
15426 live_regs_mask &= ~ (1 << IP_REGNUM);
15427 live_regs_mask |= (1 << SP_REGNUM);
15428 }
15429 else
15430 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
15431 }
15432
15433 /* On some ARM architectures it is faster to use LDR rather than
15434 LDM to load a single register. On other architectures, the
15435 cost is the same. In 26 bit mode, or for exception handlers,
15436 we have to use LDM to load the PC so that the CPSR is also
15437 restored. */
15438 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
15439 if (live_regs_mask == (1U << reg))
15440 break;
15441
15442 if (reg <= LAST_ARM_REGNUM
15443 && (reg != LR_REGNUM
15444 || ! really_return
15445 || ! IS_INTERRUPT (func_type)))
15446 {
15447 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
15448 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
15449 }
15450 else
15451 {
15452 char *p;
15453 int first = 1;
15454
15455 /* Generate the load multiple instruction to restore the
15456 registers. Note we can get here, even if
15457 frame_pointer_needed is true, but only if sp already
15458 points to the base of the saved core registers. */
15459 if (live_regs_mask & (1 << SP_REGNUM))
15460 {
15461 unsigned HOST_WIDE_INT stack_adjust;
15462
15463 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
15464 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
15465
15466 if (stack_adjust && arm_arch5 && TARGET_ARM)
15467 if (TARGET_UNIFIED_ASM)
15468 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
15469 else
15470 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
15471 else
15472 {
15473 /* If we can't use ldmib (SA110 bug),
15474 then try to pop r3 instead. */
15475 if (stack_adjust)
15476 live_regs_mask |= 1 << 3;
15477
15478 if (TARGET_UNIFIED_ASM)
15479 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
15480 else
15481 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
15482 }
15483 }
15484 else
15485 if (TARGET_UNIFIED_ASM)
15486 sprintf (instr, "pop%s\t{", conditional);
15487 else
15488 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
15489
15490 p = instr + strlen (instr);
15491
15492 for (reg = 0; reg <= SP_REGNUM; reg++)
15493 if (live_regs_mask & (1 << reg))
15494 {
15495 int l = strlen (reg_names[reg]);
15496
15497 if (first)
15498 first = 0;
15499 else
15500 {
15501 memcpy (p, ", ", 2);
15502 p += 2;
15503 }
15504
15505 memcpy (p, "%|", 2);
15506 memcpy (p + 2, reg_names[reg], l);
15507 p += l + 2;
15508 }
15509
15510 if (live_regs_mask & (1 << LR_REGNUM))
15511 {
15512 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
15513 /* If returning from an interrupt, restore the CPSR. */
15514 if (IS_INTERRUPT (func_type))
15515 strcat (p, "^");
15516 }
15517 else
15518 strcpy (p, "}");
15519 }
15520
15521 output_asm_insn (instr, & operand);
15522
15523 /* See if we need to generate an extra instruction to
15524 perform the actual function return. */
15525 if (really_return
15526 && func_type != ARM_FT_INTERWORKED
15527 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
15528 {
15529 /* The return has already been handled
15530 by loading the LR into the PC. */
15531 return "";
15532 }
15533 }
15534
15535 if (really_return)
15536 {
15537 switch ((int) ARM_FUNC_TYPE (func_type))
15538 {
15539 case ARM_FT_ISR:
15540 case ARM_FT_FIQ:
15541 /* ??? This is wrong for unified assembly syntax. */
15542 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
15543 break;
15544
15545 case ARM_FT_INTERWORKED:
15546 sprintf (instr, "bx%s\t%%|lr", conditional);
15547 break;
15548
15549 case ARM_FT_EXCEPTION:
15550 /* ??? This is wrong for unified assembly syntax. */
15551 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
15552 break;
15553
15554 default:
15555 /* Use bx if it's available. */
15556 if (arm_arch5 || arm_arch4t)
15557 sprintf (instr, "bx%s\t%%|lr", conditional);
15558 else
15559 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
15560 break;
15561 }
15562
15563 output_asm_insn (instr, & operand);
15564 }
15565
15566 return "";
15567 }
15568
15569 /* Write the function name into the code section, directly preceding
15570 the function prologue.
15571
15572 Code will be output similar to this:
15573 t0
15574 .ascii "arm_poke_function_name", 0
15575 .align
15576 t1
15577 .word 0xff000000 + (t1 - t0)
15578 arm_poke_function_name
15579 mov ip, sp
15580 stmfd sp!, {fp, ip, lr, pc}
15581 sub fp, ip, #4
15582
15583 When performing a stack backtrace, code can inspect the value
15584 of 'pc' stored at 'fp' + 0. If the trace function then looks
15585 at location pc - 12 and the top 8 bits are set, then we know
15586 that there is a function name embedded immediately preceding this
15587 location and has length ((pc[-3]) & 0xff000000).
15588
15589 We assume that pc is declared as a pointer to an unsigned long.
15590
15591 It is of no benefit to output the function name if we are assembling
15592 a leaf function. These function types will not contain a stack
15593 backtrace structure, therefore it is not possible to determine the
15594 function name. */
15595 void
15596 arm_poke_function_name (FILE *stream, const char *name)
15597 {
15598 unsigned long alignlength;
15599 unsigned long length;
15600 rtx x;
15601
15602 length = strlen (name) + 1;
15603 alignlength = ROUND_UP_WORD (length);
15604
15605 ASM_OUTPUT_ASCII (stream, name, length);
15606 ASM_OUTPUT_ALIGN (stream, 2);
15607 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
15608 assemble_aligned_integer (UNITS_PER_WORD, x);
15609 }
15610
15611 /* Place some comments into the assembler stream
15612 describing the current function. */
15613 static void
15614 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
15615 {
15616 unsigned long func_type;
15617
15618 /* ??? Do we want to print some of the below anyway? */
15619 if (TARGET_THUMB1)
15620 return;
15621
15622 /* Sanity check. */
15623 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
15624
15625 func_type = arm_current_func_type ();
15626
15627 switch ((int) ARM_FUNC_TYPE (func_type))
15628 {
15629 default:
15630 case ARM_FT_NORMAL:
15631 break;
15632 case ARM_FT_INTERWORKED:
15633 asm_fprintf (f, "\t%@ Function supports interworking.\n");
15634 break;
15635 case ARM_FT_ISR:
15636 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
15637 break;
15638 case ARM_FT_FIQ:
15639 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
15640 break;
15641 case ARM_FT_EXCEPTION:
15642 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
15643 break;
15644 }
15645
15646 if (IS_NAKED (func_type))
15647 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
15648
15649 if (IS_VOLATILE (func_type))
15650 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
15651
15652 if (IS_NESTED (func_type))
15653 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
15654 if (IS_STACKALIGN (func_type))
15655 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
15656
15657 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
15658 crtl->args.size,
15659 crtl->args.pretend_args_size, frame_size);
15660
15661 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
15662 frame_pointer_needed,
15663 cfun->machine->uses_anonymous_args);
15664
15665 if (cfun->machine->lr_save_eliminated)
15666 asm_fprintf (f, "\t%@ link register save eliminated.\n");
15667
15668 if (crtl->calls_eh_return)
15669 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
15670
15671 }
15672
15673 static void
15674 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
15675 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
15676 {
15677 arm_stack_offsets *offsets;
15678
15679 if (TARGET_THUMB1)
15680 {
15681 int regno;
15682
15683 /* Emit any call-via-reg trampolines that are needed for v4t support
15684 of call_reg and call_value_reg type insns. */
15685 for (regno = 0; regno < LR_REGNUM; regno++)
15686 {
15687 rtx label = cfun->machine->call_via[regno];
15688
15689 if (label != NULL)
15690 {
15691 switch_to_section (function_section (current_function_decl));
15692 targetm.asm_out.internal_label (asm_out_file, "L",
15693 CODE_LABEL_NUMBER (label));
15694 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
15695 }
15696 }
15697
15698 /* ??? Probably not safe to set this here, since it assumes that a
15699 function will be emitted as assembly immediately after we generate
15700 RTL for it. This does not happen for inline functions. */
15701 cfun->machine->return_used_this_function = 0;
15702 }
15703 else /* TARGET_32BIT */
15704 {
15705 /* We need to take into account any stack-frame rounding. */
15706 offsets = arm_get_frame_offsets ();
15707
15708 gcc_assert (!use_return_insn (FALSE, NULL)
15709 || (cfun->machine->return_used_this_function != 0)
15710 || offsets->saved_regs == offsets->outgoing_args
15711 || frame_pointer_needed);
15712
15713 /* Reset the ARM-specific per-function variables. */
15714 after_arm_reorg = 0;
15715 }
15716 }
15717
15718 /* Generate and emit an insn that we will recognize as a push_multi.
15719 Unfortunately, since this insn does not reflect very well the actual
15720 semantics of the operation, we need to annotate the insn for the benefit
15721 of DWARF2 frame unwind information. */
15722 static rtx
15723 emit_multi_reg_push (unsigned long mask)
15724 {
15725 int num_regs = 0;
15726 int num_dwarf_regs;
15727 int i, j;
15728 rtx par;
15729 rtx dwarf;
15730 int dwarf_par_index;
15731 rtx tmp, reg;
15732
15733 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15734 if (mask & (1 << i))
15735 num_regs++;
15736
15737 gcc_assert (num_regs && num_regs <= 16);
15738
15739 /* We don't record the PC in the dwarf frame information. */
15740 num_dwarf_regs = num_regs;
15741 if (mask & (1 << PC_REGNUM))
15742 num_dwarf_regs--;
15743
15744 /* For the body of the insn we are going to generate an UNSPEC in
15745 parallel with several USEs. This allows the insn to be recognized
15746 by the push_multi pattern in the arm.md file.
15747
15748 The body of the insn looks something like this:
15749
15750 (parallel [
15751 (set (mem:BLK (pre_modify:SI (reg:SI sp)
15752 (const_int:SI <num>)))
15753 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
15754 (use (reg:SI XX))
15755 (use (reg:SI YY))
15756 ...
15757 ])
15758
15759 For the frame note however, we try to be more explicit and actually
15760 show each register being stored into the stack frame, plus a (single)
15761 decrement of the stack pointer. We do it this way in order to be
15762 friendly to the stack unwinding code, which only wants to see a single
15763 stack decrement per instruction. The RTL we generate for the note looks
15764 something like this:
15765
15766 (sequence [
15767 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
15768 (set (mem:SI (reg:SI sp)) (reg:SI r4))
15769 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
15770 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
15771 ...
15772 ])
15773
15774 FIXME:: In an ideal world the PRE_MODIFY would not exist and
15775 instead we'd have a parallel expression detailing all
15776 the stores to the various memory addresses so that debug
15777 information is more up-to-date. Remember however while writing
15778 this to take care of the constraints with the push instruction.
15779
15780 Note also that this has to be taken care of for the VFP registers.
15781
15782 For more see PR43399. */
15783
15784 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
15785 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
15786 dwarf_par_index = 1;
15787
15788 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15789 {
15790 if (mask & (1 << i))
15791 {
15792 reg = gen_rtx_REG (SImode, i);
15793
15794 XVECEXP (par, 0, 0)
15795 = gen_rtx_SET (VOIDmode,
15796 gen_frame_mem
15797 (BLKmode,
15798 gen_rtx_PRE_MODIFY (Pmode,
15799 stack_pointer_rtx,
15800 plus_constant
15801 (Pmode, stack_pointer_rtx,
15802 -4 * num_regs))
15803 ),
15804 gen_rtx_UNSPEC (BLKmode,
15805 gen_rtvec (1, reg),
15806 UNSPEC_PUSH_MULT));
15807
15808 if (i != PC_REGNUM)
15809 {
15810 tmp = gen_rtx_SET (VOIDmode,
15811 gen_frame_mem (SImode, stack_pointer_rtx),
15812 reg);
15813 RTX_FRAME_RELATED_P (tmp) = 1;
15814 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
15815 dwarf_par_index++;
15816 }
15817
15818 break;
15819 }
15820 }
15821
15822 for (j = 1, i++; j < num_regs; i++)
15823 {
15824 if (mask & (1 << i))
15825 {
15826 reg = gen_rtx_REG (SImode, i);
15827
15828 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
15829
15830 if (i != PC_REGNUM)
15831 {
15832 tmp
15833 = gen_rtx_SET (VOIDmode,
15834 gen_frame_mem
15835 (SImode,
15836 plus_constant (Pmode, stack_pointer_rtx,
15837 4 * j)),
15838 reg);
15839 RTX_FRAME_RELATED_P (tmp) = 1;
15840 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
15841 }
15842
15843 j++;
15844 }
15845 }
15846
15847 par = emit_insn (par);
15848
15849 tmp = gen_rtx_SET (VOIDmode,
15850 stack_pointer_rtx,
15851 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
15852 RTX_FRAME_RELATED_P (tmp) = 1;
15853 XVECEXP (dwarf, 0, 0) = tmp;
15854
15855 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
15856
15857 return par;
15858 }
15859
15860 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
15861 SAVED_REGS_MASK shows which registers need to be restored.
15862
15863 Unfortunately, since this insn does not reflect very well the actual
15864 semantics of the operation, we need to annotate the insn for the benefit
15865 of DWARF2 frame unwind information. */
15866 static void
15867 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
15868 {
15869 int num_regs = 0;
15870 int i, j;
15871 rtx par;
15872 rtx dwarf = NULL_RTX;
15873 rtx tmp, reg;
15874 bool return_in_pc;
15875 int offset_adj;
15876 int emit_update;
15877
15878 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
15879 offset_adj = return_in_pc ? 1 : 0;
15880 for (i = 0; i <= LAST_ARM_REGNUM; i++)
15881 if (saved_regs_mask & (1 << i))
15882 num_regs++;
15883
15884 gcc_assert (num_regs && num_regs <= 16);
15885
15886 /* If SP is in reglist, then we don't emit SP update insn. */
15887 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
15888
15889 /* The parallel needs to hold num_regs SETs
15890 and one SET for the stack update. */
15891 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
15892
15893 if (return_in_pc)
15894 {
15895 tmp = ret_rtx;
15896 XVECEXP (par, 0, 0) = tmp;
15897 }
15898
15899 if (emit_update)
15900 {
15901 /* Increment the stack pointer, based on there being
15902 num_regs 4-byte registers to restore. */
15903 tmp = gen_rtx_SET (VOIDmode,
15904 stack_pointer_rtx,
15905 plus_constant (Pmode,
15906 stack_pointer_rtx,
15907 4 * num_regs));
15908 RTX_FRAME_RELATED_P (tmp) = 1;
15909 XVECEXP (par, 0, offset_adj) = tmp;
15910 }
15911
15912 /* Now restore every reg, which may include PC. */
15913 for (j = 0, i = 0; j < num_regs; i++)
15914 if (saved_regs_mask & (1 << i))
15915 {
15916 reg = gen_rtx_REG (SImode, i);
15917 tmp = gen_rtx_SET (VOIDmode,
15918 reg,
15919 gen_frame_mem
15920 (SImode,
15921 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
15922 RTX_FRAME_RELATED_P (tmp) = 1;
15923 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
15924
15925 /* We need to maintain a sequence for DWARF info too. As dwarf info
15926 should not have PC, skip PC. */
15927 if (i != PC_REGNUM)
15928 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
15929
15930 j++;
15931 }
15932
15933 if (return_in_pc)
15934 par = emit_jump_insn (par);
15935 else
15936 par = emit_insn (par);
15937
15938 REG_NOTES (par) = dwarf;
15939 }
15940
15941 /* Generate and emit an insn pattern that we will recognize as a pop_multi
15942 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
15943
15944 Unfortunately, since this insn does not reflect very well the actual
15945 semantics of the operation, we need to annotate the insn for the benefit
15946 of DWARF2 frame unwind information. */
15947 static void
15948 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
15949 {
15950 int i, j;
15951 rtx par;
15952 rtx dwarf = NULL_RTX;
15953 rtx tmp, reg;
15954
15955 gcc_assert (num_regs && num_regs <= 32);
15956
15957 /* Workaround ARM10 VFPr1 bug. */
15958 if (num_regs == 2 && !arm_arch6)
15959 {
15960 if (first_reg == 15)
15961 first_reg--;
15962
15963 num_regs++;
15964 }
15965
15966 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
15967 there could be up to 32 D-registers to restore.
15968 If there are more than 16 D-registers, make two recursive calls,
15969 each of which emits one pop_multi instruction. */
15970 if (num_regs > 16)
15971 {
15972 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
15973 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
15974 return;
15975 }
15976
15977 /* The parallel needs to hold num_regs SETs
15978 and one SET for the stack update. */
15979 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
15980
15981 /* Increment the stack pointer, based on there being
15982 num_regs 8-byte registers to restore. */
15983 tmp = gen_rtx_SET (VOIDmode,
15984 base_reg,
15985 plus_constant (Pmode, base_reg, 8 * num_regs));
15986 RTX_FRAME_RELATED_P (tmp) = 1;
15987 XVECEXP (par, 0, 0) = tmp;
15988
15989 /* Now show every reg that will be restored, using a SET for each. */
15990 for (j = 0, i=first_reg; j < num_regs; i += 2)
15991 {
15992 reg = gen_rtx_REG (DFmode, i);
15993
15994 tmp = gen_rtx_SET (VOIDmode,
15995 reg,
15996 gen_frame_mem
15997 (DFmode,
15998 plus_constant (Pmode, base_reg, 8 * j)));
15999 RTX_FRAME_RELATED_P (tmp) = 1;
16000 XVECEXP (par, 0, j + 1) = tmp;
16001
16002 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
16003
16004 j++;
16005 }
16006
16007 par = emit_insn (par);
16008 REG_NOTES (par) = dwarf;
16009 }
16010
16011 /* Calculate the size of the return value that is passed in registers. */
16012 static unsigned
16013 arm_size_return_regs (void)
16014 {
16015 enum machine_mode mode;
16016
16017 if (crtl->return_rtx != 0)
16018 mode = GET_MODE (crtl->return_rtx);
16019 else
16020 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16021
16022 return GET_MODE_SIZE (mode);
16023 }
16024
16025 /* Return true if the current function needs to save/restore LR. */
16026 static bool
16027 thumb_force_lr_save (void)
16028 {
16029 return !cfun->machine->lr_save_eliminated
16030 && (!leaf_function_p ()
16031 || thumb_far_jump_used_p ()
16032 || df_regs_ever_live_p (LR_REGNUM));
16033 }
16034
16035
16036 /* Return true if r3 is used by any of the tail call insns in the
16037 current function. */
16038 static bool
16039 any_sibcall_uses_r3 (void)
16040 {
16041 edge_iterator ei;
16042 edge e;
16043
16044 if (!crtl->tail_call_emit)
16045 return false;
16046 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
16047 if (e->flags & EDGE_SIBCALL)
16048 {
16049 rtx call = BB_END (e->src);
16050 if (!CALL_P (call))
16051 call = prev_nonnote_nondebug_insn (call);
16052 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
16053 if (find_regno_fusage (call, USE, 3))
16054 return true;
16055 }
16056 return false;
16057 }
16058
16059
16060 /* Compute the distance from register FROM to register TO.
16061 These can be the arg pointer (26), the soft frame pointer (25),
16062 the stack pointer (13) or the hard frame pointer (11).
16063 In thumb mode r7 is used as the soft frame pointer, if needed.
16064 Typical stack layout looks like this:
16065
16066 old stack pointer -> | |
16067 ----
16068 | | \
16069 | | saved arguments for
16070 | | vararg functions
16071 | | /
16072 --
16073 hard FP & arg pointer -> | | \
16074 | | stack
16075 | | frame
16076 | | /
16077 --
16078 | | \
16079 | | call saved
16080 | | registers
16081 soft frame pointer -> | | /
16082 --
16083 | | \
16084 | | local
16085 | | variables
16086 locals base pointer -> | | /
16087 --
16088 | | \
16089 | | outgoing
16090 | | arguments
16091 current stack pointer -> | | /
16092 --
16093
16094 For a given function some or all of these stack components
16095 may not be needed, giving rise to the possibility of
16096 eliminating some of the registers.
16097
16098 The values returned by this function must reflect the behavior
16099 of arm_expand_prologue() and arm_compute_save_reg_mask().
16100
16101 The sign of the number returned reflects the direction of stack
16102 growth, so the values are positive for all eliminations except
16103 from the soft frame pointer to the hard frame pointer.
16104
16105 SFP may point just inside the local variables block to ensure correct
16106 alignment. */
16107
16108
16109 /* Calculate stack offsets. These are used to calculate register elimination
16110 offsets and in prologue/epilogue code. Also calculates which registers
16111 should be saved. */
16112
16113 static arm_stack_offsets *
16114 arm_get_frame_offsets (void)
16115 {
16116 struct arm_stack_offsets *offsets;
16117 unsigned long func_type;
16118 int leaf;
16119 int saved;
16120 int core_saved;
16121 HOST_WIDE_INT frame_size;
16122 int i;
16123
16124 offsets = &cfun->machine->stack_offsets;
16125
16126 /* We need to know if we are a leaf function. Unfortunately, it
16127 is possible to be called after start_sequence has been called,
16128 which causes get_insns to return the insns for the sequence,
16129 not the function, which will cause leaf_function_p to return
16130 the incorrect result.
16131
16132 to know about leaf functions once reload has completed, and the
16133 frame size cannot be changed after that time, so we can safely
16134 use the cached value. */
16135
16136 if (reload_completed)
16137 return offsets;
16138
16139 /* Initially this is the size of the local variables. It will translated
16140 into an offset once we have determined the size of preceding data. */
16141 frame_size = ROUND_UP_WORD (get_frame_size ());
16142
16143 leaf = leaf_function_p ();
16144
16145 /* Space for variadic functions. */
16146 offsets->saved_args = crtl->args.pretend_args_size;
16147
16148 /* In Thumb mode this is incorrect, but never used. */
16149 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0) +
16150 arm_compute_static_chain_stack_bytes();
16151
16152 if (TARGET_32BIT)
16153 {
16154 unsigned int regno;
16155
16156 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
16157 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16158 saved = core_saved;
16159
16160 /* We know that SP will be doubleword aligned on entry, and we must
16161 preserve that condition at any subroutine call. We also require the
16162 soft frame pointer to be doubleword aligned. */
16163
16164 if (TARGET_REALLY_IWMMXT)
16165 {
16166 /* Check for the call-saved iWMMXt registers. */
16167 for (regno = FIRST_IWMMXT_REGNUM;
16168 regno <= LAST_IWMMXT_REGNUM;
16169 regno++)
16170 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
16171 saved += 8;
16172 }
16173
16174 func_type = arm_current_func_type ();
16175 /* Space for saved VFP registers. */
16176 if (! IS_VOLATILE (func_type)
16177 && TARGET_HARD_FLOAT && TARGET_VFP)
16178 saved += arm_get_vfp_saved_size ();
16179 }
16180 else /* TARGET_THUMB1 */
16181 {
16182 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
16183 core_saved = bit_count (offsets->saved_regs_mask) * 4;
16184 saved = core_saved;
16185 if (TARGET_BACKTRACE)
16186 saved += 16;
16187 }
16188
16189 /* Saved registers include the stack frame. */
16190 offsets->saved_regs = offsets->saved_args + saved +
16191 arm_compute_static_chain_stack_bytes();
16192 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
16193 /* A leaf function does not need any stack alignment if it has nothing
16194 on the stack. */
16195 if (leaf && frame_size == 0
16196 /* However if it calls alloca(), we have a dynamically allocated
16197 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
16198 && ! cfun->calls_alloca)
16199 {
16200 offsets->outgoing_args = offsets->soft_frame;
16201 offsets->locals_base = offsets->soft_frame;
16202 return offsets;
16203 }
16204
16205 /* Ensure SFP has the correct alignment. */
16206 if (ARM_DOUBLEWORD_ALIGN
16207 && (offsets->soft_frame & 7))
16208 {
16209 offsets->soft_frame += 4;
16210 /* Try to align stack by pushing an extra reg. Don't bother doing this
16211 when there is a stack frame as the alignment will be rolled into
16212 the normal stack adjustment. */
16213 if (frame_size + crtl->outgoing_args_size == 0)
16214 {
16215 int reg = -1;
16216
16217 /* If it is safe to use r3, then do so. This sometimes
16218 generates better code on Thumb-2 by avoiding the need to
16219 use 32-bit push/pop instructions. */
16220 if (! any_sibcall_uses_r3 ()
16221 && arm_size_return_regs () <= 12
16222 && (offsets->saved_regs_mask & (1 << 3)) == 0)
16223 {
16224 reg = 3;
16225 }
16226 else
16227 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
16228 {
16229 /* Avoid fixed registers; they may be changed at
16230 arbitrary times so it's unsafe to restore them
16231 during the epilogue. */
16232 if (!fixed_regs[i]
16233 && (offsets->saved_regs_mask & (1 << i)) == 0)
16234 {
16235 reg = i;
16236 break;
16237 }
16238 }
16239
16240 if (reg != -1)
16241 {
16242 offsets->saved_regs += 4;
16243 offsets->saved_regs_mask |= (1 << reg);
16244 }
16245 }
16246 }
16247
16248 offsets->locals_base = offsets->soft_frame + frame_size;
16249 offsets->outgoing_args = (offsets->locals_base
16250 + crtl->outgoing_args_size);
16251
16252 if (ARM_DOUBLEWORD_ALIGN)
16253 {
16254 /* Ensure SP remains doubleword aligned. */
16255 if (offsets->outgoing_args & 7)
16256 offsets->outgoing_args += 4;
16257 gcc_assert (!(offsets->outgoing_args & 7));
16258 }
16259
16260 return offsets;
16261 }
16262
16263
16264 /* Calculate the relative offsets for the different stack pointers. Positive
16265 offsets are in the direction of stack growth. */
16266
16267 HOST_WIDE_INT
16268 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16269 {
16270 arm_stack_offsets *offsets;
16271
16272 offsets = arm_get_frame_offsets ();
16273
16274 /* OK, now we have enough information to compute the distances.
16275 There must be an entry in these switch tables for each pair
16276 of registers in ELIMINABLE_REGS, even if some of the entries
16277 seem to be redundant or useless. */
16278 switch (from)
16279 {
16280 case ARG_POINTER_REGNUM:
16281 switch (to)
16282 {
16283 case THUMB_HARD_FRAME_POINTER_REGNUM:
16284 return 0;
16285
16286 case FRAME_POINTER_REGNUM:
16287 /* This is the reverse of the soft frame pointer
16288 to hard frame pointer elimination below. */
16289 return offsets->soft_frame - offsets->saved_args;
16290
16291 case ARM_HARD_FRAME_POINTER_REGNUM:
16292 /* This is only non-zero in the case where the static chain register
16293 is stored above the frame. */
16294 return offsets->frame - offsets->saved_args - 4;
16295
16296 case STACK_POINTER_REGNUM:
16297 /* If nothing has been pushed on the stack at all
16298 then this will return -4. This *is* correct! */
16299 return offsets->outgoing_args - (offsets->saved_args + 4);
16300
16301 default:
16302 gcc_unreachable ();
16303 }
16304 gcc_unreachable ();
16305
16306 case FRAME_POINTER_REGNUM:
16307 switch (to)
16308 {
16309 case THUMB_HARD_FRAME_POINTER_REGNUM:
16310 return 0;
16311
16312 case ARM_HARD_FRAME_POINTER_REGNUM:
16313 /* The hard frame pointer points to the top entry in the
16314 stack frame. The soft frame pointer to the bottom entry
16315 in the stack frame. If there is no stack frame at all,
16316 then they are identical. */
16317
16318 return offsets->frame - offsets->soft_frame;
16319
16320 case STACK_POINTER_REGNUM:
16321 return offsets->outgoing_args - offsets->soft_frame;
16322
16323 default:
16324 gcc_unreachable ();
16325 }
16326 gcc_unreachable ();
16327
16328 default:
16329 /* You cannot eliminate from the stack pointer.
16330 In theory you could eliminate from the hard frame
16331 pointer to the stack pointer, but this will never
16332 happen, since if a stack frame is not needed the
16333 hard frame pointer will never be used. */
16334 gcc_unreachable ();
16335 }
16336 }
16337
16338 /* Given FROM and TO register numbers, say whether this elimination is
16339 allowed. Frame pointer elimination is automatically handled.
16340
16341 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
16342 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
16343 pointer, we must eliminate FRAME_POINTER_REGNUM into
16344 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
16345 ARG_POINTER_REGNUM. */
16346
16347 bool
16348 arm_can_eliminate (const int from, const int to)
16349 {
16350 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
16351 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
16352 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
16353 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
16354 true);
16355 }
16356
16357 /* Emit RTL to save coprocessor registers on function entry. Returns the
16358 number of bytes pushed. */
16359
16360 static int
16361 arm_save_coproc_regs(void)
16362 {
16363 int saved_size = 0;
16364 unsigned reg;
16365 unsigned start_reg;
16366 rtx insn;
16367
16368 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
16369 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
16370 {
16371 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
16372 insn = gen_rtx_MEM (V2SImode, insn);
16373 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
16374 RTX_FRAME_RELATED_P (insn) = 1;
16375 saved_size += 8;
16376 }
16377
16378 if (TARGET_HARD_FLOAT && TARGET_VFP)
16379 {
16380 start_reg = FIRST_VFP_REGNUM;
16381
16382 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
16383 {
16384 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
16385 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
16386 {
16387 if (start_reg != reg)
16388 saved_size += vfp_emit_fstmd (start_reg,
16389 (reg - start_reg) / 2);
16390 start_reg = reg + 2;
16391 }
16392 }
16393 if (start_reg != reg)
16394 saved_size += vfp_emit_fstmd (start_reg,
16395 (reg - start_reg) / 2);
16396 }
16397 return saved_size;
16398 }
16399
16400
16401 /* Set the Thumb frame pointer from the stack pointer. */
16402
16403 static void
16404 thumb_set_frame_pointer (arm_stack_offsets *offsets)
16405 {
16406 HOST_WIDE_INT amount;
16407 rtx insn, dwarf;
16408
16409 amount = offsets->outgoing_args - offsets->locals_base;
16410 if (amount < 1024)
16411 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16412 stack_pointer_rtx, GEN_INT (amount)));
16413 else
16414 {
16415 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
16416 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
16417 expects the first two operands to be the same. */
16418 if (TARGET_THUMB2)
16419 {
16420 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16421 stack_pointer_rtx,
16422 hard_frame_pointer_rtx));
16423 }
16424 else
16425 {
16426 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16427 hard_frame_pointer_rtx,
16428 stack_pointer_rtx));
16429 }
16430 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
16431 plus_constant (Pmode, stack_pointer_rtx, amount));
16432 RTX_FRAME_RELATED_P (dwarf) = 1;
16433 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16434 }
16435
16436 RTX_FRAME_RELATED_P (insn) = 1;
16437 }
16438
16439 /* Generate the prologue instructions for entry into an ARM or Thumb-2
16440 function. */
16441 void
16442 arm_expand_prologue (void)
16443 {
16444 rtx amount;
16445 rtx insn;
16446 rtx ip_rtx;
16447 unsigned long live_regs_mask;
16448 unsigned long func_type;
16449 int fp_offset = 0;
16450 int saved_pretend_args = 0;
16451 int saved_regs = 0;
16452 unsigned HOST_WIDE_INT args_to_push;
16453 arm_stack_offsets *offsets;
16454
16455 func_type = arm_current_func_type ();
16456
16457 /* Naked functions don't have prologues. */
16458 if (IS_NAKED (func_type))
16459 return;
16460
16461 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
16462 args_to_push = crtl->args.pretend_args_size;
16463
16464 /* Compute which register we will have to save onto the stack. */
16465 offsets = arm_get_frame_offsets ();
16466 live_regs_mask = offsets->saved_regs_mask;
16467
16468 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
16469
16470 if (IS_STACKALIGN (func_type))
16471 {
16472 rtx r0, r1;
16473
16474 /* Handle a word-aligned stack pointer. We generate the following:
16475
16476 mov r0, sp
16477 bic r1, r0, #7
16478 mov sp, r1
16479 <save and restore r0 in normal prologue/epilogue>
16480 mov sp, r0
16481 bx lr
16482
16483 The unwinder doesn't need to know about the stack realignment.
16484 Just tell it we saved SP in r0. */
16485 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
16486
16487 r0 = gen_rtx_REG (SImode, 0);
16488 r1 = gen_rtx_REG (SImode, 1);
16489
16490 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
16491 RTX_FRAME_RELATED_P (insn) = 1;
16492 add_reg_note (insn, REG_CFA_REGISTER, NULL);
16493
16494 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
16495
16496 /* ??? The CFA changes here, which may cause GDB to conclude that it
16497 has entered a different function. That said, the unwind info is
16498 correct, individually, before and after this instruction because
16499 we've described the save of SP, which will override the default
16500 handling of SP as restoring from the CFA. */
16501 emit_insn (gen_movsi (stack_pointer_rtx, r1));
16502 }
16503
16504 /* For APCS frames, if IP register is clobbered
16505 when creating frame, save that register in a special
16506 way. */
16507 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
16508 {
16509 if (IS_INTERRUPT (func_type))
16510 {
16511 /* Interrupt functions must not corrupt any registers.
16512 Creating a frame pointer however, corrupts the IP
16513 register, so we must push it first. */
16514 emit_multi_reg_push (1 << IP_REGNUM);
16515
16516 /* Do not set RTX_FRAME_RELATED_P on this insn.
16517 The dwarf stack unwinding code only wants to see one
16518 stack decrement per function, and this is not it. If
16519 this instruction is labeled as being part of the frame
16520 creation sequence then dwarf2out_frame_debug_expr will
16521 die when it encounters the assignment of IP to FP
16522 later on, since the use of SP here establishes SP as
16523 the CFA register and not IP.
16524
16525 Anyway this instruction is not really part of the stack
16526 frame creation although it is part of the prologue. */
16527 }
16528 else if (IS_NESTED (func_type))
16529 {
16530 /* The Static chain register is the same as the IP register
16531 used as a scratch register during stack frame creation.
16532 To get around this need to find somewhere to store IP
16533 whilst the frame is being created. We try the following
16534 places in order:
16535
16536 1. The last argument register.
16537 2. A slot on the stack above the frame. (This only
16538 works if the function is not a varargs function).
16539 3. Register r3, after pushing the argument registers
16540 onto the stack.
16541
16542 Note - we only need to tell the dwarf2 backend about the SP
16543 adjustment in the second variant; the static chain register
16544 doesn't need to be unwound, as it doesn't contain a value
16545 inherited from the caller. */
16546
16547 if (df_regs_ever_live_p (3) == false)
16548 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16549 else if (args_to_push == 0)
16550 {
16551 rtx dwarf;
16552
16553 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
16554 saved_regs += 4;
16555
16556 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
16557 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
16558 fp_offset = 4;
16559
16560 /* Just tell the dwarf backend that we adjusted SP. */
16561 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16562 plus_constant (Pmode, stack_pointer_rtx,
16563 -fp_offset));
16564 RTX_FRAME_RELATED_P (insn) = 1;
16565 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
16566 }
16567 else
16568 {
16569 /* Store the args on the stack. */
16570 if (cfun->machine->uses_anonymous_args)
16571 insn = emit_multi_reg_push
16572 ((0xf0 >> (args_to_push / 4)) & 0xf);
16573 else
16574 insn = emit_insn
16575 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16576 GEN_INT (- args_to_push)));
16577
16578 RTX_FRAME_RELATED_P (insn) = 1;
16579
16580 saved_pretend_args = 1;
16581 fp_offset = args_to_push;
16582 args_to_push = 0;
16583
16584 /* Now reuse r3 to preserve IP. */
16585 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
16586 }
16587 }
16588
16589 insn = emit_set_insn (ip_rtx,
16590 plus_constant (Pmode, stack_pointer_rtx,
16591 fp_offset));
16592 RTX_FRAME_RELATED_P (insn) = 1;
16593 }
16594
16595 if (args_to_push)
16596 {
16597 /* Push the argument registers, or reserve space for them. */
16598 if (cfun->machine->uses_anonymous_args)
16599 insn = emit_multi_reg_push
16600 ((0xf0 >> (args_to_push / 4)) & 0xf);
16601 else
16602 insn = emit_insn
16603 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16604 GEN_INT (- args_to_push)));
16605 RTX_FRAME_RELATED_P (insn) = 1;
16606 }
16607
16608 /* If this is an interrupt service routine, and the link register
16609 is going to be pushed, and we're not generating extra
16610 push of IP (needed when frame is needed and frame layout if apcs),
16611 subtracting four from LR now will mean that the function return
16612 can be done with a single instruction. */
16613 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
16614 && (live_regs_mask & (1 << LR_REGNUM)) != 0
16615 && !(frame_pointer_needed && TARGET_APCS_FRAME)
16616 && TARGET_ARM)
16617 {
16618 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
16619
16620 emit_set_insn (lr, plus_constant (SImode, lr, -4));
16621 }
16622
16623 if (live_regs_mask)
16624 {
16625 saved_regs += bit_count (live_regs_mask) * 4;
16626 if (optimize_size && !frame_pointer_needed
16627 && saved_regs == offsets->saved_regs - offsets->saved_args)
16628 {
16629 /* If no coprocessor registers are being pushed and we don't have
16630 to worry about a frame pointer then push extra registers to
16631 create the stack frame. This is done is a way that does not
16632 alter the frame layout, so is independent of the epilogue. */
16633 int n;
16634 int frame;
16635 n = 0;
16636 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
16637 n++;
16638 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
16639 if (frame && n * 4 >= frame)
16640 {
16641 n = frame / 4;
16642 live_regs_mask |= (1 << n) - 1;
16643 saved_regs += frame;
16644 }
16645 }
16646 insn = emit_multi_reg_push (live_regs_mask);
16647 RTX_FRAME_RELATED_P (insn) = 1;
16648 }
16649
16650 if (! IS_VOLATILE (func_type))
16651 saved_regs += arm_save_coproc_regs ();
16652
16653 if (frame_pointer_needed && TARGET_ARM)
16654 {
16655 /* Create the new frame pointer. */
16656 if (TARGET_APCS_FRAME)
16657 {
16658 insn = GEN_INT (-(4 + args_to_push + fp_offset));
16659 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
16660 RTX_FRAME_RELATED_P (insn) = 1;
16661
16662 if (IS_NESTED (func_type))
16663 {
16664 /* Recover the static chain register. */
16665 if (!df_regs_ever_live_p (3)
16666 || saved_pretend_args)
16667 insn = gen_rtx_REG (SImode, 3);
16668 else /* if (crtl->args.pretend_args_size == 0) */
16669 {
16670 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
16671 insn = gen_frame_mem (SImode, insn);
16672 }
16673 emit_set_insn (ip_rtx, insn);
16674 /* Add a USE to stop propagate_one_insn() from barfing. */
16675 emit_insn (gen_prologue_use (ip_rtx));
16676 }
16677 }
16678 else
16679 {
16680 insn = GEN_INT (saved_regs - 4);
16681 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
16682 stack_pointer_rtx, insn));
16683 RTX_FRAME_RELATED_P (insn) = 1;
16684 }
16685 }
16686
16687 if (flag_stack_usage_info)
16688 current_function_static_stack_size
16689 = offsets->outgoing_args - offsets->saved_args;
16690
16691 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
16692 {
16693 /* This add can produce multiple insns for a large constant, so we
16694 need to get tricky. */
16695 rtx last = get_last_insn ();
16696
16697 amount = GEN_INT (offsets->saved_args + saved_regs
16698 - offsets->outgoing_args);
16699
16700 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16701 amount));
16702 do
16703 {
16704 last = last ? NEXT_INSN (last) : get_insns ();
16705 RTX_FRAME_RELATED_P (last) = 1;
16706 }
16707 while (last != insn);
16708
16709 /* If the frame pointer is needed, emit a special barrier that
16710 will prevent the scheduler from moving stores to the frame
16711 before the stack adjustment. */
16712 if (frame_pointer_needed)
16713 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
16714 hard_frame_pointer_rtx));
16715 }
16716
16717
16718 if (frame_pointer_needed && TARGET_THUMB2)
16719 thumb_set_frame_pointer (offsets);
16720
16721 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16722 {
16723 unsigned long mask;
16724
16725 mask = live_regs_mask;
16726 mask &= THUMB2_WORK_REGS;
16727 if (!IS_NESTED (func_type))
16728 mask |= (1 << IP_REGNUM);
16729 arm_load_pic_register (mask);
16730 }
16731
16732 /* If we are profiling, make sure no instructions are scheduled before
16733 the call to mcount. Similarly if the user has requested no
16734 scheduling in the prolog. Similarly if we want non-call exceptions
16735 using the EABI unwinder, to prevent faulting instructions from being
16736 swapped with a stack adjustment. */
16737 if (crtl->profile || !TARGET_SCHED_PROLOG
16738 || (arm_except_unwind_info (&global_options) == UI_TARGET
16739 && cfun->can_throw_non_call_exceptions))
16740 emit_insn (gen_blockage ());
16741
16742 /* If the link register is being kept alive, with the return address in it,
16743 then make sure that it does not get reused by the ce2 pass. */
16744 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
16745 cfun->machine->lr_save_eliminated = 1;
16746 }
16747 \f
16748 /* Print condition code to STREAM. Helper function for arm_print_operand. */
16749 static void
16750 arm_print_condition (FILE *stream)
16751 {
16752 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
16753 {
16754 /* Branch conversion is not implemented for Thumb-2. */
16755 if (TARGET_THUMB)
16756 {
16757 output_operand_lossage ("predicated Thumb instruction");
16758 return;
16759 }
16760 if (current_insn_predicate != NULL)
16761 {
16762 output_operand_lossage
16763 ("predicated instruction in conditional sequence");
16764 return;
16765 }
16766
16767 fputs (arm_condition_codes[arm_current_cc], stream);
16768 }
16769 else if (current_insn_predicate)
16770 {
16771 enum arm_cond_code code;
16772
16773 if (TARGET_THUMB1)
16774 {
16775 output_operand_lossage ("predicated Thumb instruction");
16776 return;
16777 }
16778
16779 code = get_arm_condition_code (current_insn_predicate);
16780 fputs (arm_condition_codes[code], stream);
16781 }
16782 }
16783
16784
16785 /* If CODE is 'd', then the X is a condition operand and the instruction
16786 should only be executed if the condition is true.
16787 if CODE is 'D', then the X is a condition operand and the instruction
16788 should only be executed if the condition is false: however, if the mode
16789 of the comparison is CCFPEmode, then always execute the instruction -- we
16790 do this because in these circumstances !GE does not necessarily imply LT;
16791 in these cases the instruction pattern will take care to make sure that
16792 an instruction containing %d will follow, thereby undoing the effects of
16793 doing this instruction unconditionally.
16794 If CODE is 'N' then X is a floating point operand that must be negated
16795 before output.
16796 If CODE is 'B' then output a bitwise inverted value of X (a const int).
16797 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
16798 static void
16799 arm_print_operand (FILE *stream, rtx x, int code)
16800 {
16801 switch (code)
16802 {
16803 case '@':
16804 fputs (ASM_COMMENT_START, stream);
16805 return;
16806
16807 case '_':
16808 fputs (user_label_prefix, stream);
16809 return;
16810
16811 case '|':
16812 fputs (REGISTER_PREFIX, stream);
16813 return;
16814
16815 case '?':
16816 arm_print_condition (stream);
16817 return;
16818
16819 case '(':
16820 /* Nothing in unified syntax, otherwise the current condition code. */
16821 if (!TARGET_UNIFIED_ASM)
16822 arm_print_condition (stream);
16823 break;
16824
16825 case ')':
16826 /* The current condition code in unified syntax, otherwise nothing. */
16827 if (TARGET_UNIFIED_ASM)
16828 arm_print_condition (stream);
16829 break;
16830
16831 case '.':
16832 /* The current condition code for a condition code setting instruction.
16833 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16834 if (TARGET_UNIFIED_ASM)
16835 {
16836 fputc('s', stream);
16837 arm_print_condition (stream);
16838 }
16839 else
16840 {
16841 arm_print_condition (stream);
16842 fputc('s', stream);
16843 }
16844 return;
16845
16846 case '!':
16847 /* If the instruction is conditionally executed then print
16848 the current condition code, otherwise print 's'. */
16849 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
16850 if (current_insn_predicate)
16851 arm_print_condition (stream);
16852 else
16853 fputc('s', stream);
16854 break;
16855
16856 /* %# is a "break" sequence. It doesn't output anything, but is used to
16857 separate e.g. operand numbers from following text, if that text consists
16858 of further digits which we don't want to be part of the operand
16859 number. */
16860 case '#':
16861 return;
16862
16863 case 'N':
16864 {
16865 REAL_VALUE_TYPE r;
16866 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16867 r = real_value_negate (&r);
16868 fprintf (stream, "%s", fp_const_from_val (&r));
16869 }
16870 return;
16871
16872 /* An integer or symbol address without a preceding # sign. */
16873 case 'c':
16874 switch (GET_CODE (x))
16875 {
16876 case CONST_INT:
16877 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16878 break;
16879
16880 case SYMBOL_REF:
16881 output_addr_const (stream, x);
16882 break;
16883
16884 case CONST:
16885 if (GET_CODE (XEXP (x, 0)) == PLUS
16886 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
16887 {
16888 output_addr_const (stream, x);
16889 break;
16890 }
16891 /* Fall through. */
16892
16893 default:
16894 output_operand_lossage ("Unsupported operand for code '%c'", code);
16895 }
16896 return;
16897
16898 /* An integer that we want to print in HEX. */
16899 case 'x':
16900 switch (GET_CODE (x))
16901 {
16902 case CONST_INT:
16903 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16904 break;
16905
16906 default:
16907 output_operand_lossage ("Unsupported operand for code '%c'", code);
16908 }
16909 return;
16910
16911 case 'B':
16912 if (GET_CODE (x) == CONST_INT)
16913 {
16914 HOST_WIDE_INT val;
16915 val = ARM_SIGN_EXTEND (~INTVAL (x));
16916 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
16917 }
16918 else
16919 {
16920 putc ('~', stream);
16921 output_addr_const (stream, x);
16922 }
16923 return;
16924
16925 case 'L':
16926 /* The low 16 bits of an immediate constant. */
16927 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
16928 return;
16929
16930 case 'i':
16931 fprintf (stream, "%s", arithmetic_instr (x, 1));
16932 return;
16933
16934 case 'I':
16935 fprintf (stream, "%s", arithmetic_instr (x, 0));
16936 return;
16937
16938 case 'S':
16939 {
16940 HOST_WIDE_INT val;
16941 const char *shift;
16942
16943 if (!shift_operator (x, SImode))
16944 {
16945 output_operand_lossage ("invalid shift operand");
16946 break;
16947 }
16948
16949 shift = shift_op (x, &val);
16950
16951 if (shift)
16952 {
16953 fprintf (stream, ", %s ", shift);
16954 if (val == -1)
16955 arm_print_operand (stream, XEXP (x, 1), 0);
16956 else
16957 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
16958 }
16959 }
16960 return;
16961
16962 /* An explanation of the 'Q', 'R' and 'H' register operands:
16963
16964 In a pair of registers containing a DI or DF value the 'Q'
16965 operand returns the register number of the register containing
16966 the least significant part of the value. The 'R' operand returns
16967 the register number of the register containing the most
16968 significant part of the value.
16969
16970 The 'H' operand returns the higher of the two register numbers.
16971 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16972 same as the 'Q' operand, since the most significant part of the
16973 value is held in the lower number register. The reverse is true
16974 on systems where WORDS_BIG_ENDIAN is false.
16975
16976 The purpose of these operands is to distinguish between cases
16977 where the endian-ness of the values is important (for example
16978 when they are added together), and cases where the endian-ness
16979 is irrelevant, but the order of register operations is important.
16980 For example when loading a value from memory into a register
16981 pair, the endian-ness does not matter. Provided that the value
16982 from the lower memory address is put into the lower numbered
16983 register, and the value from the higher address is put into the
16984 higher numbered register, the load will work regardless of whether
16985 the value being loaded is big-wordian or little-wordian. The
16986 order of the two register loads can matter however, if the address
16987 of the memory location is actually held in one of the registers
16988 being overwritten by the load.
16989
16990 The 'Q' and 'R' constraints are also available for 64-bit
16991 constants. */
16992 case 'Q':
16993 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16994 {
16995 rtx part = gen_lowpart (SImode, x);
16996 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16997 return;
16998 }
16999
17000 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17001 {
17002 output_operand_lossage ("invalid operand for code '%c'", code);
17003 return;
17004 }
17005
17006 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
17007 return;
17008
17009 case 'R':
17010 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
17011 {
17012 enum machine_mode mode = GET_MODE (x);
17013 rtx part;
17014
17015 if (mode == VOIDmode)
17016 mode = DImode;
17017 part = gen_highpart_mode (SImode, mode, x);
17018 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
17019 return;
17020 }
17021
17022 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17023 {
17024 output_operand_lossage ("invalid operand for code '%c'", code);
17025 return;
17026 }
17027
17028 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
17029 return;
17030
17031 case 'H':
17032 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17033 {
17034 output_operand_lossage ("invalid operand for code '%c'", code);
17035 return;
17036 }
17037
17038 asm_fprintf (stream, "%r", REGNO (x) + 1);
17039 return;
17040
17041 case 'J':
17042 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17043 {
17044 output_operand_lossage ("invalid operand for code '%c'", code);
17045 return;
17046 }
17047
17048 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
17049 return;
17050
17051 case 'K':
17052 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
17053 {
17054 output_operand_lossage ("invalid operand for code '%c'", code);
17055 return;
17056 }
17057
17058 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
17059 return;
17060
17061 case 'm':
17062 asm_fprintf (stream, "%r",
17063 GET_CODE (XEXP (x, 0)) == REG
17064 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
17065 return;
17066
17067 case 'M':
17068 asm_fprintf (stream, "{%r-%r}",
17069 REGNO (x),
17070 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
17071 return;
17072
17073 /* Like 'M', but writing doubleword vector registers, for use by Neon
17074 insns. */
17075 case 'h':
17076 {
17077 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
17078 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
17079 if (numregs == 1)
17080 asm_fprintf (stream, "{d%d}", regno);
17081 else
17082 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
17083 }
17084 return;
17085
17086 case 'd':
17087 /* CONST_TRUE_RTX means always -- that's the default. */
17088 if (x == const_true_rtx)
17089 return;
17090
17091 if (!COMPARISON_P (x))
17092 {
17093 output_operand_lossage ("invalid operand for code '%c'", code);
17094 return;
17095 }
17096
17097 fputs (arm_condition_codes[get_arm_condition_code (x)],
17098 stream);
17099 return;
17100
17101 case 'D':
17102 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
17103 want to do that. */
17104 if (x == const_true_rtx)
17105 {
17106 output_operand_lossage ("instruction never executed");
17107 return;
17108 }
17109 if (!COMPARISON_P (x))
17110 {
17111 output_operand_lossage ("invalid operand for code '%c'", code);
17112 return;
17113 }
17114
17115 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
17116 (get_arm_condition_code (x))],
17117 stream);
17118 return;
17119
17120 case 's':
17121 case 'V':
17122 case 'W':
17123 case 'X':
17124 case 'Y':
17125 case 'Z':
17126 /* Former Maverick support, removed after GCC-4.7. */
17127 output_operand_lossage ("obsolete Maverick format code '%c'", code);
17128 return;
17129
17130 case 'U':
17131 if (GET_CODE (x) != REG
17132 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
17133 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
17134 /* Bad value for wCG register number. */
17135 {
17136 output_operand_lossage ("invalid operand for code '%c'", code);
17137 return;
17138 }
17139
17140 else
17141 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
17142 return;
17143
17144 /* Print an iWMMXt control register name. */
17145 case 'w':
17146 if (GET_CODE (x) != CONST_INT
17147 || INTVAL (x) < 0
17148 || INTVAL (x) >= 16)
17149 /* Bad value for wC register number. */
17150 {
17151 output_operand_lossage ("invalid operand for code '%c'", code);
17152 return;
17153 }
17154
17155 else
17156 {
17157 static const char * wc_reg_names [16] =
17158 {
17159 "wCID", "wCon", "wCSSF", "wCASF",
17160 "wC4", "wC5", "wC6", "wC7",
17161 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
17162 "wC12", "wC13", "wC14", "wC15"
17163 };
17164
17165 fprintf (stream, wc_reg_names [INTVAL (x)]);
17166 }
17167 return;
17168
17169 /* Print the high single-precision register of a VFP double-precision
17170 register. */
17171 case 'p':
17172 {
17173 int mode = GET_MODE (x);
17174 int regno;
17175
17176 if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
17177 {
17178 output_operand_lossage ("invalid operand for code '%c'", code);
17179 return;
17180 }
17181
17182 regno = REGNO (x);
17183 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
17184 {
17185 output_operand_lossage ("invalid operand for code '%c'", code);
17186 return;
17187 }
17188
17189 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
17190 }
17191 return;
17192
17193 /* Print a VFP/Neon double precision or quad precision register name. */
17194 case 'P':
17195 case 'q':
17196 {
17197 int mode = GET_MODE (x);
17198 int is_quad = (code == 'q');
17199 int regno;
17200
17201 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
17202 {
17203 output_operand_lossage ("invalid operand for code '%c'", code);
17204 return;
17205 }
17206
17207 if (GET_CODE (x) != REG
17208 || !IS_VFP_REGNUM (REGNO (x)))
17209 {
17210 output_operand_lossage ("invalid operand for code '%c'", code);
17211 return;
17212 }
17213
17214 regno = REGNO (x);
17215 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
17216 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
17217 {
17218 output_operand_lossage ("invalid operand for code '%c'", code);
17219 return;
17220 }
17221
17222 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
17223 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
17224 }
17225 return;
17226
17227 /* These two codes print the low/high doubleword register of a Neon quad
17228 register, respectively. For pair-structure types, can also print
17229 low/high quadword registers. */
17230 case 'e':
17231 case 'f':
17232 {
17233 int mode = GET_MODE (x);
17234 int regno;
17235
17236 if ((GET_MODE_SIZE (mode) != 16
17237 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
17238 {
17239 output_operand_lossage ("invalid operand for code '%c'", code);
17240 return;
17241 }
17242
17243 regno = REGNO (x);
17244 if (!NEON_REGNO_OK_FOR_QUAD (regno))
17245 {
17246 output_operand_lossage ("invalid operand for code '%c'", code);
17247 return;
17248 }
17249
17250 if (GET_MODE_SIZE (mode) == 16)
17251 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
17252 + (code == 'f' ? 1 : 0));
17253 else
17254 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
17255 + (code == 'f' ? 1 : 0));
17256 }
17257 return;
17258
17259 /* Print a VFPv3 floating-point constant, represented as an integer
17260 index. */
17261 case 'G':
17262 {
17263 int index = vfp3_const_double_index (x);
17264 gcc_assert (index != -1);
17265 fprintf (stream, "%d", index);
17266 }
17267 return;
17268
17269 /* Print bits representing opcode features for Neon.
17270
17271 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
17272 and polynomials as unsigned.
17273
17274 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
17275
17276 Bit 2 is 1 for rounding functions, 0 otherwise. */
17277
17278 /* Identify the type as 's', 'u', 'p' or 'f'. */
17279 case 'T':
17280 {
17281 HOST_WIDE_INT bits = INTVAL (x);
17282 fputc ("uspf"[bits & 3], stream);
17283 }
17284 return;
17285
17286 /* Likewise, but signed and unsigned integers are both 'i'. */
17287 case 'F':
17288 {
17289 HOST_WIDE_INT bits = INTVAL (x);
17290 fputc ("iipf"[bits & 3], stream);
17291 }
17292 return;
17293
17294 /* As for 'T', but emit 'u' instead of 'p'. */
17295 case 't':
17296 {
17297 HOST_WIDE_INT bits = INTVAL (x);
17298 fputc ("usuf"[bits & 3], stream);
17299 }
17300 return;
17301
17302 /* Bit 2: rounding (vs none). */
17303 case 'O':
17304 {
17305 HOST_WIDE_INT bits = INTVAL (x);
17306 fputs ((bits & 4) != 0 ? "r" : "", stream);
17307 }
17308 return;
17309
17310 /* Memory operand for vld1/vst1 instruction. */
17311 case 'A':
17312 {
17313 rtx addr;
17314 bool postinc = FALSE;
17315 unsigned align, memsize, align_bits;
17316
17317 gcc_assert (GET_CODE (x) == MEM);
17318 addr = XEXP (x, 0);
17319 if (GET_CODE (addr) == POST_INC)
17320 {
17321 postinc = 1;
17322 addr = XEXP (addr, 0);
17323 }
17324 asm_fprintf (stream, "[%r", REGNO (addr));
17325
17326 /* We know the alignment of this access, so we can emit a hint in the
17327 instruction (for some alignments) as an aid to the memory subsystem
17328 of the target. */
17329 align = MEM_ALIGN (x) >> 3;
17330 memsize = MEM_SIZE (x);
17331
17332 /* Only certain alignment specifiers are supported by the hardware. */
17333 if (memsize == 32 && (align % 32) == 0)
17334 align_bits = 256;
17335 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
17336 align_bits = 128;
17337 else if (memsize >= 8 && (align % 8) == 0)
17338 align_bits = 64;
17339 else
17340 align_bits = 0;
17341
17342 if (align_bits != 0)
17343 asm_fprintf (stream, ":%d", align_bits);
17344
17345 asm_fprintf (stream, "]");
17346
17347 if (postinc)
17348 fputs("!", stream);
17349 }
17350 return;
17351
17352 case 'C':
17353 {
17354 rtx addr;
17355
17356 gcc_assert (GET_CODE (x) == MEM);
17357 addr = XEXP (x, 0);
17358 gcc_assert (GET_CODE (addr) == REG);
17359 asm_fprintf (stream, "[%r]", REGNO (addr));
17360 }
17361 return;
17362
17363 /* Translate an S register number into a D register number and element index. */
17364 case 'y':
17365 {
17366 int mode = GET_MODE (x);
17367 int regno;
17368
17369 if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
17370 {
17371 output_operand_lossage ("invalid operand for code '%c'", code);
17372 return;
17373 }
17374
17375 regno = REGNO (x);
17376 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17377 {
17378 output_operand_lossage ("invalid operand for code '%c'", code);
17379 return;
17380 }
17381
17382 regno = regno - FIRST_VFP_REGNUM;
17383 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
17384 }
17385 return;
17386
17387 case 'v':
17388 gcc_assert (GET_CODE (x) == CONST_DOUBLE);
17389 fprintf (stream, "#%d", vfp3_const_double_for_fract_bits (x));
17390 return;
17391
17392 /* Register specifier for vld1.16/vst1.16. Translate the S register
17393 number into a D register number and element index. */
17394 case 'z':
17395 {
17396 int mode = GET_MODE (x);
17397 int regno;
17398
17399 if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
17400 {
17401 output_operand_lossage ("invalid operand for code '%c'", code);
17402 return;
17403 }
17404
17405 regno = REGNO (x);
17406 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
17407 {
17408 output_operand_lossage ("invalid operand for code '%c'", code);
17409 return;
17410 }
17411
17412 regno = regno - FIRST_VFP_REGNUM;
17413 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
17414 }
17415 return;
17416
17417 default:
17418 if (x == 0)
17419 {
17420 output_operand_lossage ("missing operand");
17421 return;
17422 }
17423
17424 switch (GET_CODE (x))
17425 {
17426 case REG:
17427 asm_fprintf (stream, "%r", REGNO (x));
17428 break;
17429
17430 case MEM:
17431 output_memory_reference_mode = GET_MODE (x);
17432 output_address (XEXP (x, 0));
17433 break;
17434
17435 case CONST_DOUBLE:
17436 if (TARGET_NEON)
17437 {
17438 char fpstr[20];
17439 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
17440 sizeof (fpstr), 0, 1);
17441 fprintf (stream, "#%s", fpstr);
17442 }
17443 else
17444 fprintf (stream, "#%s", fp_immediate_constant (x));
17445 break;
17446
17447 default:
17448 gcc_assert (GET_CODE (x) != NEG);
17449 fputc ('#', stream);
17450 if (GET_CODE (x) == HIGH)
17451 {
17452 fputs (":lower16:", stream);
17453 x = XEXP (x, 0);
17454 }
17455
17456 output_addr_const (stream, x);
17457 break;
17458 }
17459 }
17460 }
17461 \f
17462 /* Target hook for printing a memory address. */
17463 static void
17464 arm_print_operand_address (FILE *stream, rtx x)
17465 {
17466 if (TARGET_32BIT)
17467 {
17468 int is_minus = GET_CODE (x) == MINUS;
17469
17470 if (GET_CODE (x) == REG)
17471 asm_fprintf (stream, "[%r]", REGNO (x));
17472 else if (GET_CODE (x) == PLUS || is_minus)
17473 {
17474 rtx base = XEXP (x, 0);
17475 rtx index = XEXP (x, 1);
17476 HOST_WIDE_INT offset = 0;
17477 if (GET_CODE (base) != REG
17478 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
17479 {
17480 /* Ensure that BASE is a register. */
17481 /* (one of them must be). */
17482 /* Also ensure the SP is not used as in index register. */
17483 rtx temp = base;
17484 base = index;
17485 index = temp;
17486 }
17487 switch (GET_CODE (index))
17488 {
17489 case CONST_INT:
17490 offset = INTVAL (index);
17491 if (is_minus)
17492 offset = -offset;
17493 asm_fprintf (stream, "[%r, #%wd]",
17494 REGNO (base), offset);
17495 break;
17496
17497 case REG:
17498 asm_fprintf (stream, "[%r, %s%r]",
17499 REGNO (base), is_minus ? "-" : "",
17500 REGNO (index));
17501 break;
17502
17503 case MULT:
17504 case ASHIFTRT:
17505 case LSHIFTRT:
17506 case ASHIFT:
17507 case ROTATERT:
17508 {
17509 asm_fprintf (stream, "[%r, %s%r",
17510 REGNO (base), is_minus ? "-" : "",
17511 REGNO (XEXP (index, 0)));
17512 arm_print_operand (stream, index, 'S');
17513 fputs ("]", stream);
17514 break;
17515 }
17516
17517 default:
17518 gcc_unreachable ();
17519 }
17520 }
17521 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
17522 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
17523 {
17524 extern enum machine_mode output_memory_reference_mode;
17525
17526 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17527
17528 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
17529 asm_fprintf (stream, "[%r, #%s%d]!",
17530 REGNO (XEXP (x, 0)),
17531 GET_CODE (x) == PRE_DEC ? "-" : "",
17532 GET_MODE_SIZE (output_memory_reference_mode));
17533 else
17534 asm_fprintf (stream, "[%r], #%s%d",
17535 REGNO (XEXP (x, 0)),
17536 GET_CODE (x) == POST_DEC ? "-" : "",
17537 GET_MODE_SIZE (output_memory_reference_mode));
17538 }
17539 else if (GET_CODE (x) == PRE_MODIFY)
17540 {
17541 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
17542 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17543 asm_fprintf (stream, "#%wd]!",
17544 INTVAL (XEXP (XEXP (x, 1), 1)));
17545 else
17546 asm_fprintf (stream, "%r]!",
17547 REGNO (XEXP (XEXP (x, 1), 1)));
17548 }
17549 else if (GET_CODE (x) == POST_MODIFY)
17550 {
17551 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
17552 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
17553 asm_fprintf (stream, "#%wd",
17554 INTVAL (XEXP (XEXP (x, 1), 1)));
17555 else
17556 asm_fprintf (stream, "%r",
17557 REGNO (XEXP (XEXP (x, 1), 1)));
17558 }
17559 else output_addr_const (stream, x);
17560 }
17561 else
17562 {
17563 if (GET_CODE (x) == REG)
17564 asm_fprintf (stream, "[%r]", REGNO (x));
17565 else if (GET_CODE (x) == POST_INC)
17566 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
17567 else if (GET_CODE (x) == PLUS)
17568 {
17569 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
17570 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
17571 asm_fprintf (stream, "[%r, #%wd]",
17572 REGNO (XEXP (x, 0)),
17573 INTVAL (XEXP (x, 1)));
17574 else
17575 asm_fprintf (stream, "[%r, %r]",
17576 REGNO (XEXP (x, 0)),
17577 REGNO (XEXP (x, 1)));
17578 }
17579 else
17580 output_addr_const (stream, x);
17581 }
17582 }
17583 \f
17584 /* Target hook for indicating whether a punctuation character for
17585 TARGET_PRINT_OPERAND is valid. */
17586 static bool
17587 arm_print_operand_punct_valid_p (unsigned char code)
17588 {
17589 return (code == '@' || code == '|' || code == '.'
17590 || code == '(' || code == ')' || code == '#'
17591 || (TARGET_32BIT && (code == '?'))
17592 || (TARGET_THUMB2 && (code == '!'))
17593 || (TARGET_THUMB && (code == '_')));
17594 }
17595 \f
17596 /* Target hook for assembling integer objects. The ARM version needs to
17597 handle word-sized values specially. */
17598 static bool
17599 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
17600 {
17601 enum machine_mode mode;
17602
17603 if (size == UNITS_PER_WORD && aligned_p)
17604 {
17605 fputs ("\t.word\t", asm_out_file);
17606 output_addr_const (asm_out_file, x);
17607
17608 /* Mark symbols as position independent. We only do this in the
17609 .text segment, not in the .data segment. */
17610 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
17611 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
17612 {
17613 /* See legitimize_pic_address for an explanation of the
17614 TARGET_VXWORKS_RTP check. */
17615 if (TARGET_VXWORKS_RTP
17616 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
17617 fputs ("(GOT)", asm_out_file);
17618 else
17619 fputs ("(GOTOFF)", asm_out_file);
17620 }
17621 fputc ('\n', asm_out_file);
17622 return true;
17623 }
17624
17625 mode = GET_MODE (x);
17626
17627 if (arm_vector_mode_supported_p (mode))
17628 {
17629 int i, units;
17630
17631 gcc_assert (GET_CODE (x) == CONST_VECTOR);
17632
17633 units = CONST_VECTOR_NUNITS (x);
17634 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
17635
17636 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17637 for (i = 0; i < units; i++)
17638 {
17639 rtx elt = CONST_VECTOR_ELT (x, i);
17640 assemble_integer
17641 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
17642 }
17643 else
17644 for (i = 0; i < units; i++)
17645 {
17646 rtx elt = CONST_VECTOR_ELT (x, i);
17647 REAL_VALUE_TYPE rval;
17648
17649 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
17650
17651 assemble_real
17652 (rval, GET_MODE_INNER (mode),
17653 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
17654 }
17655
17656 return true;
17657 }
17658
17659 return default_assemble_integer (x, size, aligned_p);
17660 }
17661
17662 static void
17663 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
17664 {
17665 section *s;
17666
17667 if (!TARGET_AAPCS_BASED)
17668 {
17669 (is_ctor ?
17670 default_named_section_asm_out_constructor
17671 : default_named_section_asm_out_destructor) (symbol, priority);
17672 return;
17673 }
17674
17675 /* Put these in the .init_array section, using a special relocation. */
17676 if (priority != DEFAULT_INIT_PRIORITY)
17677 {
17678 char buf[18];
17679 sprintf (buf, "%s.%.5u",
17680 is_ctor ? ".init_array" : ".fini_array",
17681 priority);
17682 s = get_section (buf, SECTION_WRITE, NULL_TREE);
17683 }
17684 else if (is_ctor)
17685 s = ctors_section;
17686 else
17687 s = dtors_section;
17688
17689 switch_to_section (s);
17690 assemble_align (POINTER_SIZE);
17691 fputs ("\t.word\t", asm_out_file);
17692 output_addr_const (asm_out_file, symbol);
17693 fputs ("(target1)\n", asm_out_file);
17694 }
17695
17696 /* Add a function to the list of static constructors. */
17697
17698 static void
17699 arm_elf_asm_constructor (rtx symbol, int priority)
17700 {
17701 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
17702 }
17703
17704 /* Add a function to the list of static destructors. */
17705
17706 static void
17707 arm_elf_asm_destructor (rtx symbol, int priority)
17708 {
17709 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
17710 }
17711 \f
17712 /* A finite state machine takes care of noticing whether or not instructions
17713 can be conditionally executed, and thus decrease execution time and code
17714 size by deleting branch instructions. The fsm is controlled by
17715 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
17716
17717 /* The state of the fsm controlling condition codes are:
17718 0: normal, do nothing special
17719 1: make ASM_OUTPUT_OPCODE not output this instruction
17720 2: make ASM_OUTPUT_OPCODE not output this instruction
17721 3: make instructions conditional
17722 4: make instructions conditional
17723
17724 State transitions (state->state by whom under condition):
17725 0 -> 1 final_prescan_insn if the `target' is a label
17726 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
17727 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
17728 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
17729 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
17730 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
17731 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
17732 (the target insn is arm_target_insn).
17733
17734 If the jump clobbers the conditions then we use states 2 and 4.
17735
17736 A similar thing can be done with conditional return insns.
17737
17738 XXX In case the `target' is an unconditional branch, this conditionalising
17739 of the instructions always reduces code size, but not always execution
17740 time. But then, I want to reduce the code size to somewhere near what
17741 /bin/cc produces. */
17742
17743 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
17744 instructions. When a COND_EXEC instruction is seen the subsequent
17745 instructions are scanned so that multiple conditional instructions can be
17746 combined into a single IT block. arm_condexec_count and arm_condexec_mask
17747 specify the length and true/false mask for the IT block. These will be
17748 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
17749
17750 /* Returns the index of the ARM condition code string in
17751 `arm_condition_codes', or ARM_NV if the comparison is invalid.
17752 COMPARISON should be an rtx like `(eq (...) (...))'. */
17753
17754 enum arm_cond_code
17755 maybe_get_arm_condition_code (rtx comparison)
17756 {
17757 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
17758 enum arm_cond_code code;
17759 enum rtx_code comp_code = GET_CODE (comparison);
17760
17761 if (GET_MODE_CLASS (mode) != MODE_CC)
17762 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
17763 XEXP (comparison, 1));
17764
17765 switch (mode)
17766 {
17767 case CC_DNEmode: code = ARM_NE; goto dominance;
17768 case CC_DEQmode: code = ARM_EQ; goto dominance;
17769 case CC_DGEmode: code = ARM_GE; goto dominance;
17770 case CC_DGTmode: code = ARM_GT; goto dominance;
17771 case CC_DLEmode: code = ARM_LE; goto dominance;
17772 case CC_DLTmode: code = ARM_LT; goto dominance;
17773 case CC_DGEUmode: code = ARM_CS; goto dominance;
17774 case CC_DGTUmode: code = ARM_HI; goto dominance;
17775 case CC_DLEUmode: code = ARM_LS; goto dominance;
17776 case CC_DLTUmode: code = ARM_CC;
17777
17778 dominance:
17779 if (comp_code == EQ)
17780 return ARM_INVERSE_CONDITION_CODE (code);
17781 if (comp_code == NE)
17782 return code;
17783 return ARM_NV;
17784
17785 case CC_NOOVmode:
17786 switch (comp_code)
17787 {
17788 case NE: return ARM_NE;
17789 case EQ: return ARM_EQ;
17790 case GE: return ARM_PL;
17791 case LT: return ARM_MI;
17792 default: return ARM_NV;
17793 }
17794
17795 case CC_Zmode:
17796 switch (comp_code)
17797 {
17798 case NE: return ARM_NE;
17799 case EQ: return ARM_EQ;
17800 default: return ARM_NV;
17801 }
17802
17803 case CC_Nmode:
17804 switch (comp_code)
17805 {
17806 case NE: return ARM_MI;
17807 case EQ: return ARM_PL;
17808 default: return ARM_NV;
17809 }
17810
17811 case CCFPEmode:
17812 case CCFPmode:
17813 /* We can handle all cases except UNEQ and LTGT. */
17814 switch (comp_code)
17815 {
17816 case GE: return ARM_GE;
17817 case GT: return ARM_GT;
17818 case LE: return ARM_LS;
17819 case LT: return ARM_MI;
17820 case NE: return ARM_NE;
17821 case EQ: return ARM_EQ;
17822 case ORDERED: return ARM_VC;
17823 case UNORDERED: return ARM_VS;
17824 case UNLT: return ARM_LT;
17825 case UNLE: return ARM_LE;
17826 case UNGT: return ARM_HI;
17827 case UNGE: return ARM_PL;
17828 /* UNEQ and LTGT do not have a representation. */
17829 case UNEQ: /* Fall through. */
17830 case LTGT: /* Fall through. */
17831 default: return ARM_NV;
17832 }
17833
17834 case CC_SWPmode:
17835 switch (comp_code)
17836 {
17837 case NE: return ARM_NE;
17838 case EQ: return ARM_EQ;
17839 case GE: return ARM_LE;
17840 case GT: return ARM_LT;
17841 case LE: return ARM_GE;
17842 case LT: return ARM_GT;
17843 case GEU: return ARM_LS;
17844 case GTU: return ARM_CC;
17845 case LEU: return ARM_CS;
17846 case LTU: return ARM_HI;
17847 default: return ARM_NV;
17848 }
17849
17850 case CC_Cmode:
17851 switch (comp_code)
17852 {
17853 case LTU: return ARM_CS;
17854 case GEU: return ARM_CC;
17855 default: return ARM_NV;
17856 }
17857
17858 case CC_CZmode:
17859 switch (comp_code)
17860 {
17861 case NE: return ARM_NE;
17862 case EQ: return ARM_EQ;
17863 case GEU: return ARM_CS;
17864 case GTU: return ARM_HI;
17865 case LEU: return ARM_LS;
17866 case LTU: return ARM_CC;
17867 default: return ARM_NV;
17868 }
17869
17870 case CC_NCVmode:
17871 switch (comp_code)
17872 {
17873 case GE: return ARM_GE;
17874 case LT: return ARM_LT;
17875 case GEU: return ARM_CS;
17876 case LTU: return ARM_CC;
17877 default: return ARM_NV;
17878 }
17879
17880 case CCmode:
17881 switch (comp_code)
17882 {
17883 case NE: return ARM_NE;
17884 case EQ: return ARM_EQ;
17885 case GE: return ARM_GE;
17886 case GT: return ARM_GT;
17887 case LE: return ARM_LE;
17888 case LT: return ARM_LT;
17889 case GEU: return ARM_CS;
17890 case GTU: return ARM_HI;
17891 case LEU: return ARM_LS;
17892 case LTU: return ARM_CC;
17893 default: return ARM_NV;
17894 }
17895
17896 default: gcc_unreachable ();
17897 }
17898 }
17899
17900 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
17901 static enum arm_cond_code
17902 get_arm_condition_code (rtx comparison)
17903 {
17904 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
17905 gcc_assert (code != ARM_NV);
17906 return code;
17907 }
17908
17909 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17910 instructions. */
17911 void
17912 thumb2_final_prescan_insn (rtx insn)
17913 {
17914 rtx first_insn = insn;
17915 rtx body = PATTERN (insn);
17916 rtx predicate;
17917 enum arm_cond_code code;
17918 int n;
17919 int mask;
17920
17921 /* Remove the previous insn from the count of insns to be output. */
17922 if (arm_condexec_count)
17923 arm_condexec_count--;
17924
17925 /* Nothing to do if we are already inside a conditional block. */
17926 if (arm_condexec_count)
17927 return;
17928
17929 if (GET_CODE (body) != COND_EXEC)
17930 return;
17931
17932 /* Conditional jumps are implemented directly. */
17933 if (GET_CODE (insn) == JUMP_INSN)
17934 return;
17935
17936 predicate = COND_EXEC_TEST (body);
17937 arm_current_cc = get_arm_condition_code (predicate);
17938
17939 n = get_attr_ce_count (insn);
17940 arm_condexec_count = 1;
17941 arm_condexec_mask = (1 << n) - 1;
17942 arm_condexec_masklen = n;
17943 /* See if subsequent instructions can be combined into the same block. */
17944 for (;;)
17945 {
17946 insn = next_nonnote_insn (insn);
17947
17948 /* Jumping into the middle of an IT block is illegal, so a label or
17949 barrier terminates the block. */
17950 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
17951 break;
17952
17953 body = PATTERN (insn);
17954 /* USE and CLOBBER aren't really insns, so just skip them. */
17955 if (GET_CODE (body) == USE
17956 || GET_CODE (body) == CLOBBER)
17957 continue;
17958
17959 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17960 if (GET_CODE (body) != COND_EXEC)
17961 break;
17962 /* Allow up to 4 conditionally executed instructions in a block. */
17963 n = get_attr_ce_count (insn);
17964 if (arm_condexec_masklen + n > 4)
17965 break;
17966
17967 predicate = COND_EXEC_TEST (body);
17968 code = get_arm_condition_code (predicate);
17969 mask = (1 << n) - 1;
17970 if (arm_current_cc == code)
17971 arm_condexec_mask |= (mask << arm_condexec_masklen);
17972 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
17973 break;
17974
17975 arm_condexec_count++;
17976 arm_condexec_masklen += n;
17977
17978 /* A jump must be the last instruction in a conditional block. */
17979 if (GET_CODE(insn) == JUMP_INSN)
17980 break;
17981 }
17982 /* Restore recog_data (getting the attributes of other insns can
17983 destroy this array, but final.c assumes that it remains intact
17984 across this call). */
17985 extract_constrain_insn_cached (first_insn);
17986 }
17987
17988 void
17989 arm_final_prescan_insn (rtx insn)
17990 {
17991 /* BODY will hold the body of INSN. */
17992 rtx body = PATTERN (insn);
17993
17994 /* This will be 1 if trying to repeat the trick, and things need to be
17995 reversed if it appears to fail. */
17996 int reverse = 0;
17997
17998 /* If we start with a return insn, we only succeed if we find another one. */
17999 int seeking_return = 0;
18000 enum rtx_code return_code = UNKNOWN;
18001
18002 /* START_INSN will hold the insn from where we start looking. This is the
18003 first insn after the following code_label if REVERSE is true. */
18004 rtx start_insn = insn;
18005
18006 /* If in state 4, check if the target branch is reached, in order to
18007 change back to state 0. */
18008 if (arm_ccfsm_state == 4)
18009 {
18010 if (insn == arm_target_insn)
18011 {
18012 arm_target_insn = NULL;
18013 arm_ccfsm_state = 0;
18014 }
18015 return;
18016 }
18017
18018 /* If in state 3, it is possible to repeat the trick, if this insn is an
18019 unconditional branch to a label, and immediately following this branch
18020 is the previous target label which is only used once, and the label this
18021 branch jumps to is not too far off. */
18022 if (arm_ccfsm_state == 3)
18023 {
18024 if (simplejump_p (insn))
18025 {
18026 start_insn = next_nonnote_insn (start_insn);
18027 if (GET_CODE (start_insn) == BARRIER)
18028 {
18029 /* XXX Isn't this always a barrier? */
18030 start_insn = next_nonnote_insn (start_insn);
18031 }
18032 if (GET_CODE (start_insn) == CODE_LABEL
18033 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18034 && LABEL_NUSES (start_insn) == 1)
18035 reverse = TRUE;
18036 else
18037 return;
18038 }
18039 else if (ANY_RETURN_P (body))
18040 {
18041 start_insn = next_nonnote_insn (start_insn);
18042 if (GET_CODE (start_insn) == BARRIER)
18043 start_insn = next_nonnote_insn (start_insn);
18044 if (GET_CODE (start_insn) == CODE_LABEL
18045 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
18046 && LABEL_NUSES (start_insn) == 1)
18047 {
18048 reverse = TRUE;
18049 seeking_return = 1;
18050 return_code = GET_CODE (body);
18051 }
18052 else
18053 return;
18054 }
18055 else
18056 return;
18057 }
18058
18059 gcc_assert (!arm_ccfsm_state || reverse);
18060 if (GET_CODE (insn) != JUMP_INSN)
18061 return;
18062
18063 /* This jump might be paralleled with a clobber of the condition codes
18064 the jump should always come first */
18065 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
18066 body = XVECEXP (body, 0, 0);
18067
18068 if (reverse
18069 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
18070 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
18071 {
18072 int insns_skipped;
18073 int fail = FALSE, succeed = FALSE;
18074 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
18075 int then_not_else = TRUE;
18076 rtx this_insn = start_insn, label = 0;
18077
18078 /* Register the insn jumped to. */
18079 if (reverse)
18080 {
18081 if (!seeking_return)
18082 label = XEXP (SET_SRC (body), 0);
18083 }
18084 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
18085 label = XEXP (XEXP (SET_SRC (body), 1), 0);
18086 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
18087 {
18088 label = XEXP (XEXP (SET_SRC (body), 2), 0);
18089 then_not_else = FALSE;
18090 }
18091 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
18092 {
18093 seeking_return = 1;
18094 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
18095 }
18096 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
18097 {
18098 seeking_return = 1;
18099 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
18100 then_not_else = FALSE;
18101 }
18102 else
18103 gcc_unreachable ();
18104
18105 /* See how many insns this branch skips, and what kind of insns. If all
18106 insns are okay, and the label or unconditional branch to the same
18107 label is not too far away, succeed. */
18108 for (insns_skipped = 0;
18109 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
18110 {
18111 rtx scanbody;
18112
18113 this_insn = next_nonnote_insn (this_insn);
18114 if (!this_insn)
18115 break;
18116
18117 switch (GET_CODE (this_insn))
18118 {
18119 case CODE_LABEL:
18120 /* Succeed if it is the target label, otherwise fail since
18121 control falls in from somewhere else. */
18122 if (this_insn == label)
18123 {
18124 arm_ccfsm_state = 1;
18125 succeed = TRUE;
18126 }
18127 else
18128 fail = TRUE;
18129 break;
18130
18131 case BARRIER:
18132 /* Succeed if the following insn is the target label.
18133 Otherwise fail.
18134 If return insns are used then the last insn in a function
18135 will be a barrier. */
18136 this_insn = next_nonnote_insn (this_insn);
18137 if (this_insn && this_insn == label)
18138 {
18139 arm_ccfsm_state = 1;
18140 succeed = TRUE;
18141 }
18142 else
18143 fail = TRUE;
18144 break;
18145
18146 case CALL_INSN:
18147 /* The AAPCS says that conditional calls should not be
18148 used since they make interworking inefficient (the
18149 linker can't transform BL<cond> into BLX). That's
18150 only a problem if the machine has BLX. */
18151 if (arm_arch5)
18152 {
18153 fail = TRUE;
18154 break;
18155 }
18156
18157 /* Succeed if the following insn is the target label, or
18158 if the following two insns are a barrier and the
18159 target label. */
18160 this_insn = next_nonnote_insn (this_insn);
18161 if (this_insn && GET_CODE (this_insn) == BARRIER)
18162 this_insn = next_nonnote_insn (this_insn);
18163
18164 if (this_insn && this_insn == label
18165 && insns_skipped < max_insns_skipped)
18166 {
18167 arm_ccfsm_state = 1;
18168 succeed = TRUE;
18169 }
18170 else
18171 fail = TRUE;
18172 break;
18173
18174 case JUMP_INSN:
18175 /* If this is an unconditional branch to the same label, succeed.
18176 If it is to another label, do nothing. If it is conditional,
18177 fail. */
18178 /* XXX Probably, the tests for SET and the PC are
18179 unnecessary. */
18180
18181 scanbody = PATTERN (this_insn);
18182 if (GET_CODE (scanbody) == SET
18183 && GET_CODE (SET_DEST (scanbody)) == PC)
18184 {
18185 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
18186 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
18187 {
18188 arm_ccfsm_state = 2;
18189 succeed = TRUE;
18190 }
18191 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
18192 fail = TRUE;
18193 }
18194 /* Fail if a conditional return is undesirable (e.g. on a
18195 StrongARM), but still allow this if optimizing for size. */
18196 else if (GET_CODE (scanbody) == return_code
18197 && !use_return_insn (TRUE, NULL)
18198 && !optimize_size)
18199 fail = TRUE;
18200 else if (GET_CODE (scanbody) == return_code)
18201 {
18202 arm_ccfsm_state = 2;
18203 succeed = TRUE;
18204 }
18205 else if (GET_CODE (scanbody) == PARALLEL)
18206 {
18207 switch (get_attr_conds (this_insn))
18208 {
18209 case CONDS_NOCOND:
18210 break;
18211 default:
18212 fail = TRUE;
18213 break;
18214 }
18215 }
18216 else
18217 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
18218
18219 break;
18220
18221 case INSN:
18222 /* Instructions using or affecting the condition codes make it
18223 fail. */
18224 scanbody = PATTERN (this_insn);
18225 if (!(GET_CODE (scanbody) == SET
18226 || GET_CODE (scanbody) == PARALLEL)
18227 || get_attr_conds (this_insn) != CONDS_NOCOND)
18228 fail = TRUE;
18229 break;
18230
18231 default:
18232 break;
18233 }
18234 }
18235 if (succeed)
18236 {
18237 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
18238 arm_target_label = CODE_LABEL_NUMBER (label);
18239 else
18240 {
18241 gcc_assert (seeking_return || arm_ccfsm_state == 2);
18242
18243 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
18244 {
18245 this_insn = next_nonnote_insn (this_insn);
18246 gcc_assert (!this_insn
18247 || (GET_CODE (this_insn) != BARRIER
18248 && GET_CODE (this_insn) != CODE_LABEL));
18249 }
18250 if (!this_insn)
18251 {
18252 /* Oh, dear! we ran off the end.. give up. */
18253 extract_constrain_insn_cached (insn);
18254 arm_ccfsm_state = 0;
18255 arm_target_insn = NULL;
18256 return;
18257 }
18258 arm_target_insn = this_insn;
18259 }
18260
18261 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
18262 what it was. */
18263 if (!reverse)
18264 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
18265
18266 if (reverse || then_not_else)
18267 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
18268 }
18269
18270 /* Restore recog_data (getting the attributes of other insns can
18271 destroy this array, but final.c assumes that it remains intact
18272 across this call. */
18273 extract_constrain_insn_cached (insn);
18274 }
18275 }
18276
18277 /* Output IT instructions. */
18278 void
18279 thumb2_asm_output_opcode (FILE * stream)
18280 {
18281 char buff[5];
18282 int n;
18283
18284 if (arm_condexec_mask)
18285 {
18286 for (n = 0; n < arm_condexec_masklen; n++)
18287 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
18288 buff[n] = 0;
18289 asm_fprintf(stream, "i%s\t%s\n\t", buff,
18290 arm_condition_codes[arm_current_cc]);
18291 arm_condexec_mask = 0;
18292 }
18293 }
18294
18295 /* Returns true if REGNO is a valid register
18296 for holding a quantity of type MODE. */
18297 int
18298 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
18299 {
18300 if (GET_MODE_CLASS (mode) == MODE_CC)
18301 return (regno == CC_REGNUM
18302 || (TARGET_HARD_FLOAT && TARGET_VFP
18303 && regno == VFPCC_REGNUM));
18304
18305 if (TARGET_THUMB1)
18306 /* For the Thumb we only allow values bigger than SImode in
18307 registers 0 - 6, so that there is always a second low
18308 register available to hold the upper part of the value.
18309 We probably we ought to ensure that the register is the
18310 start of an even numbered register pair. */
18311 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
18312
18313 if (TARGET_HARD_FLOAT && TARGET_VFP
18314 && IS_VFP_REGNUM (regno))
18315 {
18316 if (mode == SFmode || mode == SImode)
18317 return VFP_REGNO_OK_FOR_SINGLE (regno);
18318
18319 if (mode == DFmode)
18320 return VFP_REGNO_OK_FOR_DOUBLE (regno);
18321
18322 /* VFP registers can hold HFmode values, but there is no point in
18323 putting them there unless we have hardware conversion insns. */
18324 if (mode == HFmode)
18325 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
18326
18327 if (TARGET_NEON)
18328 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
18329 || (VALID_NEON_QREG_MODE (mode)
18330 && NEON_REGNO_OK_FOR_QUAD (regno))
18331 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
18332 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
18333 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
18334 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
18335 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
18336
18337 return FALSE;
18338 }
18339
18340 if (TARGET_REALLY_IWMMXT)
18341 {
18342 if (IS_IWMMXT_GR_REGNUM (regno))
18343 return mode == SImode;
18344
18345 if (IS_IWMMXT_REGNUM (regno))
18346 return VALID_IWMMXT_REG_MODE (mode);
18347 }
18348
18349 /* We allow almost any value to be stored in the general registers.
18350 Restrict doubleword quantities to even register pairs so that we can
18351 use ldrd. Do not allow very large Neon structure opaque modes in
18352 general registers; they would use too many. */
18353 if (regno <= LAST_ARM_REGNUM)
18354 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
18355 && ARM_NUM_REGS (mode) <= 4;
18356
18357 if (regno == FRAME_POINTER_REGNUM
18358 || regno == ARG_POINTER_REGNUM)
18359 /* We only allow integers in the fake hard registers. */
18360 return GET_MODE_CLASS (mode) == MODE_INT;
18361
18362 return FALSE;
18363 }
18364
18365 /* Implement MODES_TIEABLE_P. */
18366
18367 bool
18368 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18369 {
18370 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
18371 return true;
18372
18373 /* We specifically want to allow elements of "structure" modes to
18374 be tieable to the structure. This more general condition allows
18375 other rarer situations too. */
18376 if (TARGET_NEON
18377 && (VALID_NEON_DREG_MODE (mode1)
18378 || VALID_NEON_QREG_MODE (mode1)
18379 || VALID_NEON_STRUCT_MODE (mode1))
18380 && (VALID_NEON_DREG_MODE (mode2)
18381 || VALID_NEON_QREG_MODE (mode2)
18382 || VALID_NEON_STRUCT_MODE (mode2)))
18383 return true;
18384
18385 return false;
18386 }
18387
18388 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
18389 not used in arm mode. */
18390
18391 enum reg_class
18392 arm_regno_class (int regno)
18393 {
18394 if (TARGET_THUMB1)
18395 {
18396 if (regno == STACK_POINTER_REGNUM)
18397 return STACK_REG;
18398 if (regno == CC_REGNUM)
18399 return CC_REG;
18400 if (regno < 8)
18401 return LO_REGS;
18402 return HI_REGS;
18403 }
18404
18405 if (TARGET_THUMB2 && regno < 8)
18406 return LO_REGS;
18407
18408 if ( regno <= LAST_ARM_REGNUM
18409 || regno == FRAME_POINTER_REGNUM
18410 || regno == ARG_POINTER_REGNUM)
18411 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
18412
18413 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
18414 return TARGET_THUMB2 ? CC_REG : NO_REGS;
18415
18416 if (IS_VFP_REGNUM (regno))
18417 {
18418 if (regno <= D7_VFP_REGNUM)
18419 return VFP_D0_D7_REGS;
18420 else if (regno <= LAST_LO_VFP_REGNUM)
18421 return VFP_LO_REGS;
18422 else
18423 return VFP_HI_REGS;
18424 }
18425
18426 if (IS_IWMMXT_REGNUM (regno))
18427 return IWMMXT_REGS;
18428
18429 if (IS_IWMMXT_GR_REGNUM (regno))
18430 return IWMMXT_GR_REGS;
18431
18432 return NO_REGS;
18433 }
18434
18435 /* Handle a special case when computing the offset
18436 of an argument from the frame pointer. */
18437 int
18438 arm_debugger_arg_offset (int value, rtx addr)
18439 {
18440 rtx insn;
18441
18442 /* We are only interested if dbxout_parms() failed to compute the offset. */
18443 if (value != 0)
18444 return 0;
18445
18446 /* We can only cope with the case where the address is held in a register. */
18447 if (GET_CODE (addr) != REG)
18448 return 0;
18449
18450 /* If we are using the frame pointer to point at the argument, then
18451 an offset of 0 is correct. */
18452 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
18453 return 0;
18454
18455 /* If we are using the stack pointer to point at the
18456 argument, then an offset of 0 is correct. */
18457 /* ??? Check this is consistent with thumb2 frame layout. */
18458 if ((TARGET_THUMB || !frame_pointer_needed)
18459 && REGNO (addr) == SP_REGNUM)
18460 return 0;
18461
18462 /* Oh dear. The argument is pointed to by a register rather
18463 than being held in a register, or being stored at a known
18464 offset from the frame pointer. Since GDB only understands
18465 those two kinds of argument we must translate the address
18466 held in the register into an offset from the frame pointer.
18467 We do this by searching through the insns for the function
18468 looking to see where this register gets its value. If the
18469 register is initialized from the frame pointer plus an offset
18470 then we are in luck and we can continue, otherwise we give up.
18471
18472 This code is exercised by producing debugging information
18473 for a function with arguments like this:
18474
18475 double func (double a, double b, int c, double d) {return d;}
18476
18477 Without this code the stab for parameter 'd' will be set to
18478 an offset of 0 from the frame pointer, rather than 8. */
18479
18480 /* The if() statement says:
18481
18482 If the insn is a normal instruction
18483 and if the insn is setting the value in a register
18484 and if the register being set is the register holding the address of the argument
18485 and if the address is computing by an addition
18486 that involves adding to a register
18487 which is the frame pointer
18488 a constant integer
18489
18490 then... */
18491
18492 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
18493 {
18494 if ( GET_CODE (insn) == INSN
18495 && GET_CODE (PATTERN (insn)) == SET
18496 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
18497 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
18498 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
18499 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
18500 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
18501 )
18502 {
18503 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
18504
18505 break;
18506 }
18507 }
18508
18509 if (value == 0)
18510 {
18511 debug_rtx (addr);
18512 warning (0, "unable to compute real location of stacked parameter");
18513 value = 8; /* XXX magic hack */
18514 }
18515
18516 return value;
18517 }
18518 \f
18519 typedef enum {
18520 T_V8QI,
18521 T_V4HI,
18522 T_V2SI,
18523 T_V2SF,
18524 T_DI,
18525 T_V16QI,
18526 T_V8HI,
18527 T_V4SI,
18528 T_V4SF,
18529 T_V2DI,
18530 T_TI,
18531 T_EI,
18532 T_OI,
18533 T_MAX /* Size of enum. Keep last. */
18534 } neon_builtin_type_mode;
18535
18536 #define TYPE_MODE_BIT(X) (1 << (X))
18537
18538 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
18539 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
18540 | TYPE_MODE_BIT (T_DI))
18541 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
18542 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
18543 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
18544
18545 #define v8qi_UP T_V8QI
18546 #define v4hi_UP T_V4HI
18547 #define v2si_UP T_V2SI
18548 #define v2sf_UP T_V2SF
18549 #define di_UP T_DI
18550 #define v16qi_UP T_V16QI
18551 #define v8hi_UP T_V8HI
18552 #define v4si_UP T_V4SI
18553 #define v4sf_UP T_V4SF
18554 #define v2di_UP T_V2DI
18555 #define ti_UP T_TI
18556 #define ei_UP T_EI
18557 #define oi_UP T_OI
18558
18559 #define UP(X) X##_UP
18560
18561 typedef enum {
18562 NEON_BINOP,
18563 NEON_TERNOP,
18564 NEON_UNOP,
18565 NEON_GETLANE,
18566 NEON_SETLANE,
18567 NEON_CREATE,
18568 NEON_DUP,
18569 NEON_DUPLANE,
18570 NEON_COMBINE,
18571 NEON_SPLIT,
18572 NEON_LANEMUL,
18573 NEON_LANEMULL,
18574 NEON_LANEMULH,
18575 NEON_LANEMAC,
18576 NEON_SCALARMUL,
18577 NEON_SCALARMULL,
18578 NEON_SCALARMULH,
18579 NEON_SCALARMAC,
18580 NEON_CONVERT,
18581 NEON_FIXCONV,
18582 NEON_SELECT,
18583 NEON_RESULTPAIR,
18584 NEON_REINTERP,
18585 NEON_VTBL,
18586 NEON_VTBX,
18587 NEON_LOAD1,
18588 NEON_LOAD1LANE,
18589 NEON_STORE1,
18590 NEON_STORE1LANE,
18591 NEON_LOADSTRUCT,
18592 NEON_LOADSTRUCTLANE,
18593 NEON_STORESTRUCT,
18594 NEON_STORESTRUCTLANE,
18595 NEON_LOGICBINOP,
18596 NEON_SHIFTINSERT,
18597 NEON_SHIFTIMM,
18598 NEON_SHIFTACC
18599 } neon_itype;
18600
18601 typedef struct {
18602 const char *name;
18603 const neon_itype itype;
18604 const neon_builtin_type_mode mode;
18605 const enum insn_code code;
18606 unsigned int fcode;
18607 } neon_builtin_datum;
18608
18609 #define CF(N,X) CODE_FOR_neon_##N##X
18610
18611 #define VAR1(T, N, A) \
18612 {#N, NEON_##T, UP (A), CF (N, A), 0}
18613 #define VAR2(T, N, A, B) \
18614 VAR1 (T, N, A), \
18615 {#N, NEON_##T, UP (B), CF (N, B), 0}
18616 #define VAR3(T, N, A, B, C) \
18617 VAR2 (T, N, A, B), \
18618 {#N, NEON_##T, UP (C), CF (N, C), 0}
18619 #define VAR4(T, N, A, B, C, D) \
18620 VAR3 (T, N, A, B, C), \
18621 {#N, NEON_##T, UP (D), CF (N, D), 0}
18622 #define VAR5(T, N, A, B, C, D, E) \
18623 VAR4 (T, N, A, B, C, D), \
18624 {#N, NEON_##T, UP (E), CF (N, E), 0}
18625 #define VAR6(T, N, A, B, C, D, E, F) \
18626 VAR5 (T, N, A, B, C, D, E), \
18627 {#N, NEON_##T, UP (F), CF (N, F), 0}
18628 #define VAR7(T, N, A, B, C, D, E, F, G) \
18629 VAR6 (T, N, A, B, C, D, E, F), \
18630 {#N, NEON_##T, UP (G), CF (N, G), 0}
18631 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18632 VAR7 (T, N, A, B, C, D, E, F, G), \
18633 {#N, NEON_##T, UP (H), CF (N, H), 0}
18634 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18635 VAR8 (T, N, A, B, C, D, E, F, G, H), \
18636 {#N, NEON_##T, UP (I), CF (N, I), 0}
18637 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18638 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
18639 {#N, NEON_##T, UP (J), CF (N, J), 0}
18640
18641 /* The mode entries in the following table correspond to the "key" type of the
18642 instruction variant, i.e. equivalent to that which would be specified after
18643 the assembler mnemonic, which usually refers to the last vector operand.
18644 (Signed/unsigned/polynomial types are not differentiated between though, and
18645 are all mapped onto the same mode for a given element size.) The modes
18646 listed per instruction should be the same as those defined for that
18647 instruction's pattern in neon.md. */
18648
18649 static neon_builtin_datum neon_builtin_data[] =
18650 {
18651 VAR10 (BINOP, vadd,
18652 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18653 VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
18654 VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
18655 VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18656 VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18657 VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
18658 VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18659 VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18660 VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
18661 VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18662 VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
18663 VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
18664 VAR2 (TERNOP, vqdmlal, v4hi, v2si),
18665 VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
18666 VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
18667 VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
18668 VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
18669 VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
18670 VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
18671 VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
18672 VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
18673 VAR2 (BINOP, vqdmull, v4hi, v2si),
18674 VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18675 VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18676 VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18677 VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
18678 VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
18679 VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
18680 VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18681 VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18682 VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18683 VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
18684 VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18685 VAR10 (BINOP, vsub,
18686 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18687 VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
18688 VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
18689 VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18690 VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18691 VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
18692 VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18693 VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18694 VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18695 VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18696 VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18697 VAR2 (BINOP, vcage, v2sf, v4sf),
18698 VAR2 (BINOP, vcagt, v2sf, v4sf),
18699 VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18700 VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18701 VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
18702 VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18703 VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
18704 VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18705 VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18706 VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
18707 VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18708 VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18709 VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
18710 VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
18711 VAR2 (BINOP, vrecps, v2sf, v4sf),
18712 VAR2 (BINOP, vrsqrts, v2sf, v4sf),
18713 VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18714 VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
18715 VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18716 VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18717 VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18718 VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18719 VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18720 VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18721 VAR2 (UNOP, vcnt, v8qi, v16qi),
18722 VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
18723 VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
18724 VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
18725 /* FIXME: vget_lane supports more variants than this! */
18726 VAR10 (GETLANE, vget_lane,
18727 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18728 VAR10 (SETLANE, vset_lane,
18729 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18730 VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
18731 VAR10 (DUP, vdup_n,
18732 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18733 VAR10 (DUPLANE, vdup_lane,
18734 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18735 VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
18736 VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
18737 VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
18738 VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
18739 VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
18740 VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
18741 VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
18742 VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18743 VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18744 VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
18745 VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
18746 VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18747 VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
18748 VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
18749 VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18750 VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18751 VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
18752 VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
18753 VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18754 VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
18755 VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
18756 VAR10 (BINOP, vext,
18757 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18758 VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18759 VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
18760 VAR2 (UNOP, vrev16, v8qi, v16qi),
18761 VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
18762 VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
18763 VAR10 (SELECT, vbsl,
18764 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18765 VAR1 (VTBL, vtbl1, v8qi),
18766 VAR1 (VTBL, vtbl2, v8qi),
18767 VAR1 (VTBL, vtbl3, v8qi),
18768 VAR1 (VTBL, vtbl4, v8qi),
18769 VAR1 (VTBX, vtbx1, v8qi),
18770 VAR1 (VTBX, vtbx2, v8qi),
18771 VAR1 (VTBX, vtbx3, v8qi),
18772 VAR1 (VTBX, vtbx4, v8qi),
18773 VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18774 VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18775 VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
18776 VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
18777 VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
18778 VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
18779 VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
18780 VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
18781 VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
18782 VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
18783 VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
18784 VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
18785 VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
18786 VAR10 (LOAD1, vld1,
18787 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18788 VAR10 (LOAD1LANE, vld1_lane,
18789 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18790 VAR10 (LOAD1, vld1_dup,
18791 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18792 VAR10 (STORE1, vst1,
18793 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18794 VAR10 (STORE1LANE, vst1_lane,
18795 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18796 VAR9 (LOADSTRUCT,
18797 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18798 VAR7 (LOADSTRUCTLANE, vld2_lane,
18799 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18800 VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
18801 VAR9 (STORESTRUCT, vst2,
18802 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18803 VAR7 (STORESTRUCTLANE, vst2_lane,
18804 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18805 VAR9 (LOADSTRUCT,
18806 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18807 VAR7 (LOADSTRUCTLANE, vld3_lane,
18808 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18809 VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
18810 VAR9 (STORESTRUCT, vst3,
18811 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18812 VAR7 (STORESTRUCTLANE, vst3_lane,
18813 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18814 VAR9 (LOADSTRUCT, vld4,
18815 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18816 VAR7 (LOADSTRUCTLANE, vld4_lane,
18817 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18818 VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
18819 VAR9 (STORESTRUCT, vst4,
18820 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
18821 VAR7 (STORESTRUCTLANE, vst4_lane,
18822 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
18823 VAR10 (LOGICBINOP, vand,
18824 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18825 VAR10 (LOGICBINOP, vorr,
18826 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18827 VAR10 (BINOP, veor,
18828 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18829 VAR10 (LOGICBINOP, vbic,
18830 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
18831 VAR10 (LOGICBINOP, vorn,
18832 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
18833 };
18834
18835 #undef CF
18836 #undef VAR1
18837 #undef VAR2
18838 #undef VAR3
18839 #undef VAR4
18840 #undef VAR5
18841 #undef VAR6
18842 #undef VAR7
18843 #undef VAR8
18844 #undef VAR9
18845 #undef VAR10
18846
18847 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
18848 symbolic names defined here (which would require too much duplication).
18849 FIXME? */
18850 enum arm_builtins
18851 {
18852 ARM_BUILTIN_GETWCGR0,
18853 ARM_BUILTIN_GETWCGR1,
18854 ARM_BUILTIN_GETWCGR2,
18855 ARM_BUILTIN_GETWCGR3,
18856
18857 ARM_BUILTIN_SETWCGR0,
18858 ARM_BUILTIN_SETWCGR1,
18859 ARM_BUILTIN_SETWCGR2,
18860 ARM_BUILTIN_SETWCGR3,
18861
18862 ARM_BUILTIN_WZERO,
18863
18864 ARM_BUILTIN_WAVG2BR,
18865 ARM_BUILTIN_WAVG2HR,
18866 ARM_BUILTIN_WAVG2B,
18867 ARM_BUILTIN_WAVG2H,
18868
18869 ARM_BUILTIN_WACCB,
18870 ARM_BUILTIN_WACCH,
18871 ARM_BUILTIN_WACCW,
18872
18873 ARM_BUILTIN_WMACS,
18874 ARM_BUILTIN_WMACSZ,
18875 ARM_BUILTIN_WMACU,
18876 ARM_BUILTIN_WMACUZ,
18877
18878 ARM_BUILTIN_WSADB,
18879 ARM_BUILTIN_WSADBZ,
18880 ARM_BUILTIN_WSADH,
18881 ARM_BUILTIN_WSADHZ,
18882
18883 ARM_BUILTIN_WALIGNI,
18884 ARM_BUILTIN_WALIGNR0,
18885 ARM_BUILTIN_WALIGNR1,
18886 ARM_BUILTIN_WALIGNR2,
18887 ARM_BUILTIN_WALIGNR3,
18888
18889 ARM_BUILTIN_TMIA,
18890 ARM_BUILTIN_TMIAPH,
18891 ARM_BUILTIN_TMIABB,
18892 ARM_BUILTIN_TMIABT,
18893 ARM_BUILTIN_TMIATB,
18894 ARM_BUILTIN_TMIATT,
18895
18896 ARM_BUILTIN_TMOVMSKB,
18897 ARM_BUILTIN_TMOVMSKH,
18898 ARM_BUILTIN_TMOVMSKW,
18899
18900 ARM_BUILTIN_TBCSTB,
18901 ARM_BUILTIN_TBCSTH,
18902 ARM_BUILTIN_TBCSTW,
18903
18904 ARM_BUILTIN_WMADDS,
18905 ARM_BUILTIN_WMADDU,
18906
18907 ARM_BUILTIN_WPACKHSS,
18908 ARM_BUILTIN_WPACKWSS,
18909 ARM_BUILTIN_WPACKDSS,
18910 ARM_BUILTIN_WPACKHUS,
18911 ARM_BUILTIN_WPACKWUS,
18912 ARM_BUILTIN_WPACKDUS,
18913
18914 ARM_BUILTIN_WADDB,
18915 ARM_BUILTIN_WADDH,
18916 ARM_BUILTIN_WADDW,
18917 ARM_BUILTIN_WADDSSB,
18918 ARM_BUILTIN_WADDSSH,
18919 ARM_BUILTIN_WADDSSW,
18920 ARM_BUILTIN_WADDUSB,
18921 ARM_BUILTIN_WADDUSH,
18922 ARM_BUILTIN_WADDUSW,
18923 ARM_BUILTIN_WSUBB,
18924 ARM_BUILTIN_WSUBH,
18925 ARM_BUILTIN_WSUBW,
18926 ARM_BUILTIN_WSUBSSB,
18927 ARM_BUILTIN_WSUBSSH,
18928 ARM_BUILTIN_WSUBSSW,
18929 ARM_BUILTIN_WSUBUSB,
18930 ARM_BUILTIN_WSUBUSH,
18931 ARM_BUILTIN_WSUBUSW,
18932
18933 ARM_BUILTIN_WAND,
18934 ARM_BUILTIN_WANDN,
18935 ARM_BUILTIN_WOR,
18936 ARM_BUILTIN_WXOR,
18937
18938 ARM_BUILTIN_WCMPEQB,
18939 ARM_BUILTIN_WCMPEQH,
18940 ARM_BUILTIN_WCMPEQW,
18941 ARM_BUILTIN_WCMPGTUB,
18942 ARM_BUILTIN_WCMPGTUH,
18943 ARM_BUILTIN_WCMPGTUW,
18944 ARM_BUILTIN_WCMPGTSB,
18945 ARM_BUILTIN_WCMPGTSH,
18946 ARM_BUILTIN_WCMPGTSW,
18947
18948 ARM_BUILTIN_TEXTRMSB,
18949 ARM_BUILTIN_TEXTRMSH,
18950 ARM_BUILTIN_TEXTRMSW,
18951 ARM_BUILTIN_TEXTRMUB,
18952 ARM_BUILTIN_TEXTRMUH,
18953 ARM_BUILTIN_TEXTRMUW,
18954 ARM_BUILTIN_TINSRB,
18955 ARM_BUILTIN_TINSRH,
18956 ARM_BUILTIN_TINSRW,
18957
18958 ARM_BUILTIN_WMAXSW,
18959 ARM_BUILTIN_WMAXSH,
18960 ARM_BUILTIN_WMAXSB,
18961 ARM_BUILTIN_WMAXUW,
18962 ARM_BUILTIN_WMAXUH,
18963 ARM_BUILTIN_WMAXUB,
18964 ARM_BUILTIN_WMINSW,
18965 ARM_BUILTIN_WMINSH,
18966 ARM_BUILTIN_WMINSB,
18967 ARM_BUILTIN_WMINUW,
18968 ARM_BUILTIN_WMINUH,
18969 ARM_BUILTIN_WMINUB,
18970
18971 ARM_BUILTIN_WMULUM,
18972 ARM_BUILTIN_WMULSM,
18973 ARM_BUILTIN_WMULUL,
18974
18975 ARM_BUILTIN_PSADBH,
18976 ARM_BUILTIN_WSHUFH,
18977
18978 ARM_BUILTIN_WSLLH,
18979 ARM_BUILTIN_WSLLW,
18980 ARM_BUILTIN_WSLLD,
18981 ARM_BUILTIN_WSRAH,
18982 ARM_BUILTIN_WSRAW,
18983 ARM_BUILTIN_WSRAD,
18984 ARM_BUILTIN_WSRLH,
18985 ARM_BUILTIN_WSRLW,
18986 ARM_BUILTIN_WSRLD,
18987 ARM_BUILTIN_WRORH,
18988 ARM_BUILTIN_WRORW,
18989 ARM_BUILTIN_WRORD,
18990 ARM_BUILTIN_WSLLHI,
18991 ARM_BUILTIN_WSLLWI,
18992 ARM_BUILTIN_WSLLDI,
18993 ARM_BUILTIN_WSRAHI,
18994 ARM_BUILTIN_WSRAWI,
18995 ARM_BUILTIN_WSRADI,
18996 ARM_BUILTIN_WSRLHI,
18997 ARM_BUILTIN_WSRLWI,
18998 ARM_BUILTIN_WSRLDI,
18999 ARM_BUILTIN_WRORHI,
19000 ARM_BUILTIN_WRORWI,
19001 ARM_BUILTIN_WRORDI,
19002
19003 ARM_BUILTIN_WUNPCKIHB,
19004 ARM_BUILTIN_WUNPCKIHH,
19005 ARM_BUILTIN_WUNPCKIHW,
19006 ARM_BUILTIN_WUNPCKILB,
19007 ARM_BUILTIN_WUNPCKILH,
19008 ARM_BUILTIN_WUNPCKILW,
19009
19010 ARM_BUILTIN_WUNPCKEHSB,
19011 ARM_BUILTIN_WUNPCKEHSH,
19012 ARM_BUILTIN_WUNPCKEHSW,
19013 ARM_BUILTIN_WUNPCKEHUB,
19014 ARM_BUILTIN_WUNPCKEHUH,
19015 ARM_BUILTIN_WUNPCKEHUW,
19016 ARM_BUILTIN_WUNPCKELSB,
19017 ARM_BUILTIN_WUNPCKELSH,
19018 ARM_BUILTIN_WUNPCKELSW,
19019 ARM_BUILTIN_WUNPCKELUB,
19020 ARM_BUILTIN_WUNPCKELUH,
19021 ARM_BUILTIN_WUNPCKELUW,
19022
19023 ARM_BUILTIN_WABSB,
19024 ARM_BUILTIN_WABSH,
19025 ARM_BUILTIN_WABSW,
19026
19027 ARM_BUILTIN_WADDSUBHX,
19028 ARM_BUILTIN_WSUBADDHX,
19029
19030 ARM_BUILTIN_WABSDIFFB,
19031 ARM_BUILTIN_WABSDIFFH,
19032 ARM_BUILTIN_WABSDIFFW,
19033
19034 ARM_BUILTIN_WADDCH,
19035 ARM_BUILTIN_WADDCW,
19036
19037 ARM_BUILTIN_WAVG4,
19038 ARM_BUILTIN_WAVG4R,
19039
19040 ARM_BUILTIN_WMADDSX,
19041 ARM_BUILTIN_WMADDUX,
19042
19043 ARM_BUILTIN_WMADDSN,
19044 ARM_BUILTIN_WMADDUN,
19045
19046 ARM_BUILTIN_WMULWSM,
19047 ARM_BUILTIN_WMULWUM,
19048
19049 ARM_BUILTIN_WMULWSMR,
19050 ARM_BUILTIN_WMULWUMR,
19051
19052 ARM_BUILTIN_WMULWL,
19053
19054 ARM_BUILTIN_WMULSMR,
19055 ARM_BUILTIN_WMULUMR,
19056
19057 ARM_BUILTIN_WQMULM,
19058 ARM_BUILTIN_WQMULMR,
19059
19060 ARM_BUILTIN_WQMULWM,
19061 ARM_BUILTIN_WQMULWMR,
19062
19063 ARM_BUILTIN_WADDBHUSM,
19064 ARM_BUILTIN_WADDBHUSL,
19065
19066 ARM_BUILTIN_WQMIABB,
19067 ARM_BUILTIN_WQMIABT,
19068 ARM_BUILTIN_WQMIATB,
19069 ARM_BUILTIN_WQMIATT,
19070
19071 ARM_BUILTIN_WQMIABBN,
19072 ARM_BUILTIN_WQMIABTN,
19073 ARM_BUILTIN_WQMIATBN,
19074 ARM_BUILTIN_WQMIATTN,
19075
19076 ARM_BUILTIN_WMIABB,
19077 ARM_BUILTIN_WMIABT,
19078 ARM_BUILTIN_WMIATB,
19079 ARM_BUILTIN_WMIATT,
19080
19081 ARM_BUILTIN_WMIABBN,
19082 ARM_BUILTIN_WMIABTN,
19083 ARM_BUILTIN_WMIATBN,
19084 ARM_BUILTIN_WMIATTN,
19085
19086 ARM_BUILTIN_WMIAWBB,
19087 ARM_BUILTIN_WMIAWBT,
19088 ARM_BUILTIN_WMIAWTB,
19089 ARM_BUILTIN_WMIAWTT,
19090
19091 ARM_BUILTIN_WMIAWBBN,
19092 ARM_BUILTIN_WMIAWBTN,
19093 ARM_BUILTIN_WMIAWTBN,
19094 ARM_BUILTIN_WMIAWTTN,
19095
19096 ARM_BUILTIN_WMERGE,
19097
19098 ARM_BUILTIN_THREAD_POINTER,
19099
19100 ARM_BUILTIN_NEON_BASE,
19101
19102 ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
19103 };
19104
19105 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
19106
19107 static void
19108 arm_init_neon_builtins (void)
19109 {
19110 unsigned int i, fcode;
19111 tree decl;
19112
19113 tree neon_intQI_type_node;
19114 tree neon_intHI_type_node;
19115 tree neon_polyQI_type_node;
19116 tree neon_polyHI_type_node;
19117 tree neon_intSI_type_node;
19118 tree neon_intDI_type_node;
19119 tree neon_float_type_node;
19120
19121 tree intQI_pointer_node;
19122 tree intHI_pointer_node;
19123 tree intSI_pointer_node;
19124 tree intDI_pointer_node;
19125 tree float_pointer_node;
19126
19127 tree const_intQI_node;
19128 tree const_intHI_node;
19129 tree const_intSI_node;
19130 tree const_intDI_node;
19131 tree const_float_node;
19132
19133 tree const_intQI_pointer_node;
19134 tree const_intHI_pointer_node;
19135 tree const_intSI_pointer_node;
19136 tree const_intDI_pointer_node;
19137 tree const_float_pointer_node;
19138
19139 tree V8QI_type_node;
19140 tree V4HI_type_node;
19141 tree V2SI_type_node;
19142 tree V2SF_type_node;
19143 tree V16QI_type_node;
19144 tree V8HI_type_node;
19145 tree V4SI_type_node;
19146 tree V4SF_type_node;
19147 tree V2DI_type_node;
19148
19149 tree intUQI_type_node;
19150 tree intUHI_type_node;
19151 tree intUSI_type_node;
19152 tree intUDI_type_node;
19153
19154 tree intEI_type_node;
19155 tree intOI_type_node;
19156 tree intCI_type_node;
19157 tree intXI_type_node;
19158
19159 tree V8QI_pointer_node;
19160 tree V4HI_pointer_node;
19161 tree V2SI_pointer_node;
19162 tree V2SF_pointer_node;
19163 tree V16QI_pointer_node;
19164 tree V8HI_pointer_node;
19165 tree V4SI_pointer_node;
19166 tree V4SF_pointer_node;
19167 tree V2DI_pointer_node;
19168
19169 tree void_ftype_pv8qi_v8qi_v8qi;
19170 tree void_ftype_pv4hi_v4hi_v4hi;
19171 tree void_ftype_pv2si_v2si_v2si;
19172 tree void_ftype_pv2sf_v2sf_v2sf;
19173 tree void_ftype_pdi_di_di;
19174 tree void_ftype_pv16qi_v16qi_v16qi;
19175 tree void_ftype_pv8hi_v8hi_v8hi;
19176 tree void_ftype_pv4si_v4si_v4si;
19177 tree void_ftype_pv4sf_v4sf_v4sf;
19178 tree void_ftype_pv2di_v2di_v2di;
19179
19180 tree reinterp_ftype_dreg[5][5];
19181 tree reinterp_ftype_qreg[5][5];
19182 tree dreg_types[5], qreg_types[5];
19183
19184 /* Create distinguished type nodes for NEON vector element types,
19185 and pointers to values of such types, so we can detect them later. */
19186 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19187 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19188 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
19189 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
19190 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
19191 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
19192 neon_float_type_node = make_node (REAL_TYPE);
19193 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
19194 layout_type (neon_float_type_node);
19195
19196 /* Define typedefs which exactly correspond to the modes we are basing vector
19197 types on. If you change these names you'll need to change
19198 the table used by arm_mangle_type too. */
19199 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
19200 "__builtin_neon_qi");
19201 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
19202 "__builtin_neon_hi");
19203 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
19204 "__builtin_neon_si");
19205 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
19206 "__builtin_neon_sf");
19207 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
19208 "__builtin_neon_di");
19209 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
19210 "__builtin_neon_poly8");
19211 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
19212 "__builtin_neon_poly16");
19213
19214 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
19215 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
19216 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
19217 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
19218 float_pointer_node = build_pointer_type (neon_float_type_node);
19219
19220 /* Next create constant-qualified versions of the above types. */
19221 const_intQI_node = build_qualified_type (neon_intQI_type_node,
19222 TYPE_QUAL_CONST);
19223 const_intHI_node = build_qualified_type (neon_intHI_type_node,
19224 TYPE_QUAL_CONST);
19225 const_intSI_node = build_qualified_type (neon_intSI_type_node,
19226 TYPE_QUAL_CONST);
19227 const_intDI_node = build_qualified_type (neon_intDI_type_node,
19228 TYPE_QUAL_CONST);
19229 const_float_node = build_qualified_type (neon_float_type_node,
19230 TYPE_QUAL_CONST);
19231
19232 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
19233 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
19234 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
19235 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
19236 const_float_pointer_node = build_pointer_type (const_float_node);
19237
19238 /* Now create vector types based on our NEON element types. */
19239 /* 64-bit vectors. */
19240 V8QI_type_node =
19241 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
19242 V4HI_type_node =
19243 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
19244 V2SI_type_node =
19245 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
19246 V2SF_type_node =
19247 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
19248 /* 128-bit vectors. */
19249 V16QI_type_node =
19250 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
19251 V8HI_type_node =
19252 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
19253 V4SI_type_node =
19254 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
19255 V4SF_type_node =
19256 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
19257 V2DI_type_node =
19258 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
19259
19260 /* Unsigned integer types for various mode sizes. */
19261 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
19262 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
19263 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
19264 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
19265
19266 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
19267 "__builtin_neon_uqi");
19268 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
19269 "__builtin_neon_uhi");
19270 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
19271 "__builtin_neon_usi");
19272 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
19273 "__builtin_neon_udi");
19274
19275 /* Opaque integer types for structures of vectors. */
19276 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
19277 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
19278 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
19279 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
19280
19281 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
19282 "__builtin_neon_ti");
19283 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
19284 "__builtin_neon_ei");
19285 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
19286 "__builtin_neon_oi");
19287 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
19288 "__builtin_neon_ci");
19289 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
19290 "__builtin_neon_xi");
19291
19292 /* Pointers to vector types. */
19293 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
19294 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
19295 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
19296 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
19297 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
19298 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
19299 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
19300 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
19301 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
19302
19303 /* Operations which return results as pairs. */
19304 void_ftype_pv8qi_v8qi_v8qi =
19305 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
19306 V8QI_type_node, NULL);
19307 void_ftype_pv4hi_v4hi_v4hi =
19308 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
19309 V4HI_type_node, NULL);
19310 void_ftype_pv2si_v2si_v2si =
19311 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
19312 V2SI_type_node, NULL);
19313 void_ftype_pv2sf_v2sf_v2sf =
19314 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
19315 V2SF_type_node, NULL);
19316 void_ftype_pdi_di_di =
19317 build_function_type_list (void_type_node, intDI_pointer_node,
19318 neon_intDI_type_node, neon_intDI_type_node, NULL);
19319 void_ftype_pv16qi_v16qi_v16qi =
19320 build_function_type_list (void_type_node, V16QI_pointer_node,
19321 V16QI_type_node, V16QI_type_node, NULL);
19322 void_ftype_pv8hi_v8hi_v8hi =
19323 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
19324 V8HI_type_node, NULL);
19325 void_ftype_pv4si_v4si_v4si =
19326 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
19327 V4SI_type_node, NULL);
19328 void_ftype_pv4sf_v4sf_v4sf =
19329 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
19330 V4SF_type_node, NULL);
19331 void_ftype_pv2di_v2di_v2di =
19332 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
19333 V2DI_type_node, NULL);
19334
19335 dreg_types[0] = V8QI_type_node;
19336 dreg_types[1] = V4HI_type_node;
19337 dreg_types[2] = V2SI_type_node;
19338 dreg_types[3] = V2SF_type_node;
19339 dreg_types[4] = neon_intDI_type_node;
19340
19341 qreg_types[0] = V16QI_type_node;
19342 qreg_types[1] = V8HI_type_node;
19343 qreg_types[2] = V4SI_type_node;
19344 qreg_types[3] = V4SF_type_node;
19345 qreg_types[4] = V2DI_type_node;
19346
19347 for (i = 0; i < 5; i++)
19348 {
19349 int j;
19350 for (j = 0; j < 5; j++)
19351 {
19352 reinterp_ftype_dreg[i][j]
19353 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
19354 reinterp_ftype_qreg[i][j]
19355 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
19356 }
19357 }
19358
19359 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
19360 i < ARRAY_SIZE (neon_builtin_data);
19361 i++, fcode++)
19362 {
19363 neon_builtin_datum *d = &neon_builtin_data[i];
19364
19365 const char* const modenames[] = {
19366 "v8qi", "v4hi", "v2si", "v2sf", "di",
19367 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
19368 "ti", "ei", "oi"
19369 };
19370 char namebuf[60];
19371 tree ftype = NULL;
19372 int is_load = 0, is_store = 0;
19373
19374 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
19375
19376 d->fcode = fcode;
19377
19378 switch (d->itype)
19379 {
19380 case NEON_LOAD1:
19381 case NEON_LOAD1LANE:
19382 case NEON_LOADSTRUCT:
19383 case NEON_LOADSTRUCTLANE:
19384 is_load = 1;
19385 /* Fall through. */
19386 case NEON_STORE1:
19387 case NEON_STORE1LANE:
19388 case NEON_STORESTRUCT:
19389 case NEON_STORESTRUCTLANE:
19390 if (!is_load)
19391 is_store = 1;
19392 /* Fall through. */
19393 case NEON_UNOP:
19394 case NEON_BINOP:
19395 case NEON_LOGICBINOP:
19396 case NEON_SHIFTINSERT:
19397 case NEON_TERNOP:
19398 case NEON_GETLANE:
19399 case NEON_SETLANE:
19400 case NEON_CREATE:
19401 case NEON_DUP:
19402 case NEON_DUPLANE:
19403 case NEON_SHIFTIMM:
19404 case NEON_SHIFTACC:
19405 case NEON_COMBINE:
19406 case NEON_SPLIT:
19407 case NEON_CONVERT:
19408 case NEON_FIXCONV:
19409 case NEON_LANEMUL:
19410 case NEON_LANEMULL:
19411 case NEON_LANEMULH:
19412 case NEON_LANEMAC:
19413 case NEON_SCALARMUL:
19414 case NEON_SCALARMULL:
19415 case NEON_SCALARMULH:
19416 case NEON_SCALARMAC:
19417 case NEON_SELECT:
19418 case NEON_VTBL:
19419 case NEON_VTBX:
19420 {
19421 int k;
19422 tree return_type = void_type_node, args = void_list_node;
19423
19424 /* Build a function type directly from the insn_data for
19425 this builtin. The build_function_type() function takes
19426 care of removing duplicates for us. */
19427 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
19428 {
19429 tree eltype;
19430
19431 if (is_load && k == 1)
19432 {
19433 /* Neon load patterns always have the memory
19434 operand in the operand 1 position. */
19435 gcc_assert (insn_data[d->code].operand[k].predicate
19436 == neon_struct_operand);
19437
19438 switch (d->mode)
19439 {
19440 case T_V8QI:
19441 case T_V16QI:
19442 eltype = const_intQI_pointer_node;
19443 break;
19444
19445 case T_V4HI:
19446 case T_V8HI:
19447 eltype = const_intHI_pointer_node;
19448 break;
19449
19450 case T_V2SI:
19451 case T_V4SI:
19452 eltype = const_intSI_pointer_node;
19453 break;
19454
19455 case T_V2SF:
19456 case T_V4SF:
19457 eltype = const_float_pointer_node;
19458 break;
19459
19460 case T_DI:
19461 case T_V2DI:
19462 eltype = const_intDI_pointer_node;
19463 break;
19464
19465 default: gcc_unreachable ();
19466 }
19467 }
19468 else if (is_store && k == 0)
19469 {
19470 /* Similarly, Neon store patterns use operand 0 as
19471 the memory location to store to. */
19472 gcc_assert (insn_data[d->code].operand[k].predicate
19473 == neon_struct_operand);
19474
19475 switch (d->mode)
19476 {
19477 case T_V8QI:
19478 case T_V16QI:
19479 eltype = intQI_pointer_node;
19480 break;
19481
19482 case T_V4HI:
19483 case T_V8HI:
19484 eltype = intHI_pointer_node;
19485 break;
19486
19487 case T_V2SI:
19488 case T_V4SI:
19489 eltype = intSI_pointer_node;
19490 break;
19491
19492 case T_V2SF:
19493 case T_V4SF:
19494 eltype = float_pointer_node;
19495 break;
19496
19497 case T_DI:
19498 case T_V2DI:
19499 eltype = intDI_pointer_node;
19500 break;
19501
19502 default: gcc_unreachable ();
19503 }
19504 }
19505 else
19506 {
19507 switch (insn_data[d->code].operand[k].mode)
19508 {
19509 case VOIDmode: eltype = void_type_node; break;
19510 /* Scalars. */
19511 case QImode: eltype = neon_intQI_type_node; break;
19512 case HImode: eltype = neon_intHI_type_node; break;
19513 case SImode: eltype = neon_intSI_type_node; break;
19514 case SFmode: eltype = neon_float_type_node; break;
19515 case DImode: eltype = neon_intDI_type_node; break;
19516 case TImode: eltype = intTI_type_node; break;
19517 case EImode: eltype = intEI_type_node; break;
19518 case OImode: eltype = intOI_type_node; break;
19519 case CImode: eltype = intCI_type_node; break;
19520 case XImode: eltype = intXI_type_node; break;
19521 /* 64-bit vectors. */
19522 case V8QImode: eltype = V8QI_type_node; break;
19523 case V4HImode: eltype = V4HI_type_node; break;
19524 case V2SImode: eltype = V2SI_type_node; break;
19525 case V2SFmode: eltype = V2SF_type_node; break;
19526 /* 128-bit vectors. */
19527 case V16QImode: eltype = V16QI_type_node; break;
19528 case V8HImode: eltype = V8HI_type_node; break;
19529 case V4SImode: eltype = V4SI_type_node; break;
19530 case V4SFmode: eltype = V4SF_type_node; break;
19531 case V2DImode: eltype = V2DI_type_node; break;
19532 default: gcc_unreachable ();
19533 }
19534 }
19535
19536 if (k == 0 && !is_store)
19537 return_type = eltype;
19538 else
19539 args = tree_cons (NULL_TREE, eltype, args);
19540 }
19541
19542 ftype = build_function_type (return_type, args);
19543 }
19544 break;
19545
19546 case NEON_RESULTPAIR:
19547 {
19548 switch (insn_data[d->code].operand[1].mode)
19549 {
19550 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
19551 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
19552 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
19553 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
19554 case DImode: ftype = void_ftype_pdi_di_di; break;
19555 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
19556 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
19557 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
19558 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
19559 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
19560 default: gcc_unreachable ();
19561 }
19562 }
19563 break;
19564
19565 case NEON_REINTERP:
19566 {
19567 /* We iterate over 5 doubleword types, then 5 quadword
19568 types. */
19569 int rhs = d->mode % 5;
19570 switch (insn_data[d->code].operand[0].mode)
19571 {
19572 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
19573 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
19574 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
19575 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
19576 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
19577 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
19578 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
19579 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
19580 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
19581 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
19582 default: gcc_unreachable ();
19583 }
19584 }
19585 break;
19586
19587 default:
19588 gcc_unreachable ();
19589 }
19590
19591 gcc_assert (ftype != NULL);
19592
19593 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
19594
19595 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
19596 NULL_TREE);
19597 arm_builtin_decls[fcode] = decl;
19598 }
19599 }
19600
19601 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
19602 do \
19603 { \
19604 if ((MASK) & insn_flags) \
19605 { \
19606 tree bdecl; \
19607 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
19608 BUILT_IN_MD, NULL, NULL_TREE); \
19609 arm_builtin_decls[CODE] = bdecl; \
19610 } \
19611 } \
19612 while (0)
19613
19614 struct builtin_description
19615 {
19616 const unsigned int mask;
19617 const enum insn_code icode;
19618 const char * const name;
19619 const enum arm_builtins code;
19620 const enum rtx_code comparison;
19621 const unsigned int flag;
19622 };
19623
19624 static const struct builtin_description bdesc_2arg[] =
19625 {
19626 #define IWMMXT_BUILTIN(code, string, builtin) \
19627 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
19628 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19629
19630 #define IWMMXT2_BUILTIN(code, string, builtin) \
19631 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
19632 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19633
19634 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
19635 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
19636 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
19637 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
19638 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
19639 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
19640 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
19641 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
19642 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
19643 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
19644 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
19645 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
19646 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
19647 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
19648 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
19649 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
19650 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
19651 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
19652 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
19653 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
19654 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
19655 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
19656 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
19657 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
19658 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
19659 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
19660 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
19661 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
19662 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
19663 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
19664 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
19665 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
19666 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
19667 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
19668 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
19669 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
19670 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
19671 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
19672 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
19673 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
19674 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
19675 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
19676 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
19677 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
19678 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
19679 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
19680 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
19681 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
19682 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
19683 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
19684 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
19685 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
19686 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
19687 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
19688 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
19689 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
19690 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
19691 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
19692 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
19693 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
19694 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
19695 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
19696 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
19697 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
19698 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
19699 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
19700 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
19701 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
19702 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
19703 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
19704 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
19705 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
19706 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
19707 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
19708 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
19709 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
19710 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
19711 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
19712
19713 #define IWMMXT_BUILTIN2(code, builtin) \
19714 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19715
19716 #define IWMMXT2_BUILTIN2(code, builtin) \
19717 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19718
19719 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
19720 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
19721 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
19722 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
19723 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
19724 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
19725 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
19726 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
19727 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
19728 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
19729 };
19730
19731 static const struct builtin_description bdesc_1arg[] =
19732 {
19733 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
19734 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
19735 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
19736 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
19737 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
19738 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
19739 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
19740 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
19741 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
19742 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
19743 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
19744 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
19745 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
19746 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
19747 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
19748 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
19749 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
19750 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
19751 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
19752 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
19753 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
19754 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
19755 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
19756 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
19757 };
19758
19759 /* Set up all the iWMMXt builtins. This is not called if
19760 TARGET_IWMMXT is zero. */
19761
19762 static void
19763 arm_init_iwmmxt_builtins (void)
19764 {
19765 const struct builtin_description * d;
19766 size_t i;
19767
19768 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
19769 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
19770 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
19771
19772 tree v8qi_ftype_v8qi_v8qi_int
19773 = build_function_type_list (V8QI_type_node,
19774 V8QI_type_node, V8QI_type_node,
19775 integer_type_node, NULL_TREE);
19776 tree v4hi_ftype_v4hi_int
19777 = build_function_type_list (V4HI_type_node,
19778 V4HI_type_node, integer_type_node, NULL_TREE);
19779 tree v2si_ftype_v2si_int
19780 = build_function_type_list (V2SI_type_node,
19781 V2SI_type_node, integer_type_node, NULL_TREE);
19782 tree v2si_ftype_di_di
19783 = build_function_type_list (V2SI_type_node,
19784 long_long_integer_type_node,
19785 long_long_integer_type_node,
19786 NULL_TREE);
19787 tree di_ftype_di_int
19788 = build_function_type_list (long_long_integer_type_node,
19789 long_long_integer_type_node,
19790 integer_type_node, NULL_TREE);
19791 tree di_ftype_di_int_int
19792 = build_function_type_list (long_long_integer_type_node,
19793 long_long_integer_type_node,
19794 integer_type_node,
19795 integer_type_node, NULL_TREE);
19796 tree int_ftype_v8qi
19797 = build_function_type_list (integer_type_node,
19798 V8QI_type_node, NULL_TREE);
19799 tree int_ftype_v4hi
19800 = build_function_type_list (integer_type_node,
19801 V4HI_type_node, NULL_TREE);
19802 tree int_ftype_v2si
19803 = build_function_type_list (integer_type_node,
19804 V2SI_type_node, NULL_TREE);
19805 tree int_ftype_v8qi_int
19806 = build_function_type_list (integer_type_node,
19807 V8QI_type_node, integer_type_node, NULL_TREE);
19808 tree int_ftype_v4hi_int
19809 = build_function_type_list (integer_type_node,
19810 V4HI_type_node, integer_type_node, NULL_TREE);
19811 tree int_ftype_v2si_int
19812 = build_function_type_list (integer_type_node,
19813 V2SI_type_node, integer_type_node, NULL_TREE);
19814 tree v8qi_ftype_v8qi_int_int
19815 = build_function_type_list (V8QI_type_node,
19816 V8QI_type_node, integer_type_node,
19817 integer_type_node, NULL_TREE);
19818 tree v4hi_ftype_v4hi_int_int
19819 = build_function_type_list (V4HI_type_node,
19820 V4HI_type_node, integer_type_node,
19821 integer_type_node, NULL_TREE);
19822 tree v2si_ftype_v2si_int_int
19823 = build_function_type_list (V2SI_type_node,
19824 V2SI_type_node, integer_type_node,
19825 integer_type_node, NULL_TREE);
19826 /* Miscellaneous. */
19827 tree v8qi_ftype_v4hi_v4hi
19828 = build_function_type_list (V8QI_type_node,
19829 V4HI_type_node, V4HI_type_node, NULL_TREE);
19830 tree v4hi_ftype_v2si_v2si
19831 = build_function_type_list (V4HI_type_node,
19832 V2SI_type_node, V2SI_type_node, NULL_TREE);
19833 tree v8qi_ftype_v4hi_v8qi
19834 = build_function_type_list (V8QI_type_node,
19835 V4HI_type_node, V8QI_type_node, NULL_TREE);
19836 tree v2si_ftype_v4hi_v4hi
19837 = build_function_type_list (V2SI_type_node,
19838 V4HI_type_node, V4HI_type_node, NULL_TREE);
19839 tree v2si_ftype_v8qi_v8qi
19840 = build_function_type_list (V2SI_type_node,
19841 V8QI_type_node, V8QI_type_node, NULL_TREE);
19842 tree v4hi_ftype_v4hi_di
19843 = build_function_type_list (V4HI_type_node,
19844 V4HI_type_node, long_long_integer_type_node,
19845 NULL_TREE);
19846 tree v2si_ftype_v2si_di
19847 = build_function_type_list (V2SI_type_node,
19848 V2SI_type_node, long_long_integer_type_node,
19849 NULL_TREE);
19850 tree di_ftype_void
19851 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
19852 tree int_ftype_void
19853 = build_function_type_list (integer_type_node, NULL_TREE);
19854 tree di_ftype_v8qi
19855 = build_function_type_list (long_long_integer_type_node,
19856 V8QI_type_node, NULL_TREE);
19857 tree di_ftype_v4hi
19858 = build_function_type_list (long_long_integer_type_node,
19859 V4HI_type_node, NULL_TREE);
19860 tree di_ftype_v2si
19861 = build_function_type_list (long_long_integer_type_node,
19862 V2SI_type_node, NULL_TREE);
19863 tree v2si_ftype_v4hi
19864 = build_function_type_list (V2SI_type_node,
19865 V4HI_type_node, NULL_TREE);
19866 tree v4hi_ftype_v8qi
19867 = build_function_type_list (V4HI_type_node,
19868 V8QI_type_node, NULL_TREE);
19869 tree v8qi_ftype_v8qi
19870 = build_function_type_list (V8QI_type_node,
19871 V8QI_type_node, NULL_TREE);
19872 tree v4hi_ftype_v4hi
19873 = build_function_type_list (V4HI_type_node,
19874 V4HI_type_node, NULL_TREE);
19875 tree v2si_ftype_v2si
19876 = build_function_type_list (V2SI_type_node,
19877 V2SI_type_node, NULL_TREE);
19878
19879 tree di_ftype_di_v4hi_v4hi
19880 = build_function_type_list (long_long_unsigned_type_node,
19881 long_long_unsigned_type_node,
19882 V4HI_type_node, V4HI_type_node,
19883 NULL_TREE);
19884
19885 tree di_ftype_v4hi_v4hi
19886 = build_function_type_list (long_long_unsigned_type_node,
19887 V4HI_type_node,V4HI_type_node,
19888 NULL_TREE);
19889
19890 tree v2si_ftype_v2si_v4hi_v4hi
19891 = build_function_type_list (V2SI_type_node,
19892 V2SI_type_node, V4HI_type_node,
19893 V4HI_type_node, NULL_TREE);
19894
19895 tree v2si_ftype_v2si_v8qi_v8qi
19896 = build_function_type_list (V2SI_type_node,
19897 V2SI_type_node, V8QI_type_node,
19898 V8QI_type_node, NULL_TREE);
19899
19900 tree di_ftype_di_v2si_v2si
19901 = build_function_type_list (long_long_unsigned_type_node,
19902 long_long_unsigned_type_node,
19903 V2SI_type_node, V2SI_type_node,
19904 NULL_TREE);
19905
19906 tree di_ftype_di_di_int
19907 = build_function_type_list (long_long_unsigned_type_node,
19908 long_long_unsigned_type_node,
19909 long_long_unsigned_type_node,
19910 integer_type_node, NULL_TREE);
19911
19912 tree void_ftype_int
19913 = build_function_type_list (void_type_node,
19914 integer_type_node, NULL_TREE);
19915
19916 tree v8qi_ftype_char
19917 = build_function_type_list (V8QI_type_node,
19918 signed_char_type_node, NULL_TREE);
19919
19920 tree v4hi_ftype_short
19921 = build_function_type_list (V4HI_type_node,
19922 short_integer_type_node, NULL_TREE);
19923
19924 tree v2si_ftype_int
19925 = build_function_type_list (V2SI_type_node,
19926 integer_type_node, NULL_TREE);
19927
19928 /* Normal vector binops. */
19929 tree v8qi_ftype_v8qi_v8qi
19930 = build_function_type_list (V8QI_type_node,
19931 V8QI_type_node, V8QI_type_node, NULL_TREE);
19932 tree v4hi_ftype_v4hi_v4hi
19933 = build_function_type_list (V4HI_type_node,
19934 V4HI_type_node,V4HI_type_node, NULL_TREE);
19935 tree v2si_ftype_v2si_v2si
19936 = build_function_type_list (V2SI_type_node,
19937 V2SI_type_node, V2SI_type_node, NULL_TREE);
19938 tree di_ftype_di_di
19939 = build_function_type_list (long_long_unsigned_type_node,
19940 long_long_unsigned_type_node,
19941 long_long_unsigned_type_node,
19942 NULL_TREE);
19943
19944 /* Add all builtins that are more or less simple operations on two
19945 operands. */
19946 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19947 {
19948 /* Use one of the operands; the target can have a different mode for
19949 mask-generating compares. */
19950 enum machine_mode mode;
19951 tree type;
19952
19953 if (d->name == 0)
19954 continue;
19955
19956 mode = insn_data[d->icode].operand[1].mode;
19957
19958 switch (mode)
19959 {
19960 case V8QImode:
19961 type = v8qi_ftype_v8qi_v8qi;
19962 break;
19963 case V4HImode:
19964 type = v4hi_ftype_v4hi_v4hi;
19965 break;
19966 case V2SImode:
19967 type = v2si_ftype_v2si_v2si;
19968 break;
19969 case DImode:
19970 type = di_ftype_di_di;
19971 break;
19972
19973 default:
19974 gcc_unreachable ();
19975 }
19976
19977 def_mbuiltin (d->mask, d->name, type, d->code);
19978 }
19979
19980 /* Add the remaining MMX insns with somewhat more complicated types. */
19981 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
19982 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
19983 ARM_BUILTIN_ ## CODE)
19984
19985 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
19986 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
19987 ARM_BUILTIN_ ## CODE)
19988
19989 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
19990 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
19991 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
19992 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
19993 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
19994 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
19995 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
19996 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
19997 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
19998
19999 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
20000 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
20001 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
20002 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
20003 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
20004 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
20005
20006 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
20007 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
20008 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
20009 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
20010 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
20011 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
20012
20013 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
20014 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
20015 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
20016 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
20017 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
20018 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
20019
20020 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
20021 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
20022 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
20023 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
20024 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
20025 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
20026
20027 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
20028
20029 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
20030 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
20031 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
20032 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
20033 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
20034 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
20035 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
20036 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
20037 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
20038 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
20039
20040 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
20041 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
20042 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
20043 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
20044 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
20045 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
20046 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
20047 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
20048 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
20049
20050 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
20051 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
20052 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
20053
20054 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
20055 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
20056 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
20057
20058 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
20059 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
20060
20061 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
20062 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
20063 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
20064 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
20065 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
20066 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
20067
20068 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
20069 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
20070 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
20071 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
20072 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
20073 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
20074 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
20075 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
20076 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
20077 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
20078 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
20079 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
20080
20081 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
20082 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
20083 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
20084 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
20085
20086 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
20087 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
20088 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
20089 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
20090 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
20091 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
20092 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
20093
20094 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
20095 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
20096 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
20097
20098 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
20099 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
20100 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
20101 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
20102
20103 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
20104 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
20105 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
20106 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
20107
20108 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
20109 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
20110 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
20111 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
20112
20113 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
20114 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
20115 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
20116 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
20117
20118 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
20119 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
20120 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
20121 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
20122
20123 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
20124 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
20125 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
20126 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
20127
20128 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
20129
20130 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
20131 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
20132 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
20133
20134 #undef iwmmx_mbuiltin
20135 #undef iwmmx2_mbuiltin
20136 }
20137
20138 static void
20139 arm_init_tls_builtins (void)
20140 {
20141 tree ftype, decl;
20142
20143 ftype = build_function_type (ptr_type_node, void_list_node);
20144 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
20145 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
20146 NULL, NULL_TREE);
20147 TREE_NOTHROW (decl) = 1;
20148 TREE_READONLY (decl) = 1;
20149 arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
20150 }
20151
20152 static void
20153 arm_init_fp16_builtins (void)
20154 {
20155 tree fp16_type = make_node (REAL_TYPE);
20156 TYPE_PRECISION (fp16_type) = 16;
20157 layout_type (fp16_type);
20158 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
20159 }
20160
20161 static void
20162 arm_init_builtins (void)
20163 {
20164 arm_init_tls_builtins ();
20165
20166 if (TARGET_REALLY_IWMMXT)
20167 arm_init_iwmmxt_builtins ();
20168
20169 if (TARGET_NEON)
20170 arm_init_neon_builtins ();
20171
20172 if (arm_fp16_format)
20173 arm_init_fp16_builtins ();
20174 }
20175
20176 /* Return the ARM builtin for CODE. */
20177
20178 static tree
20179 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
20180 {
20181 if (code >= ARM_BUILTIN_MAX)
20182 return error_mark_node;
20183
20184 return arm_builtin_decls[code];
20185 }
20186
20187 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20188
20189 static const char *
20190 arm_invalid_parameter_type (const_tree t)
20191 {
20192 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20193 return N_("function parameters cannot have __fp16 type");
20194 return NULL;
20195 }
20196
20197 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
20198
20199 static const char *
20200 arm_invalid_return_type (const_tree t)
20201 {
20202 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20203 return N_("functions cannot return __fp16 type");
20204 return NULL;
20205 }
20206
20207 /* Implement TARGET_PROMOTED_TYPE. */
20208
20209 static tree
20210 arm_promoted_type (const_tree t)
20211 {
20212 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
20213 return float_type_node;
20214 return NULL_TREE;
20215 }
20216
20217 /* Implement TARGET_CONVERT_TO_TYPE.
20218 Specifically, this hook implements the peculiarity of the ARM
20219 half-precision floating-point C semantics that requires conversions between
20220 __fp16 to or from double to do an intermediate conversion to float. */
20221
20222 static tree
20223 arm_convert_to_type (tree type, tree expr)
20224 {
20225 tree fromtype = TREE_TYPE (expr);
20226 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
20227 return NULL_TREE;
20228 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
20229 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
20230 return convert (type, convert (float_type_node, expr));
20231 return NULL_TREE;
20232 }
20233
20234 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
20235 This simply adds HFmode as a supported mode; even though we don't
20236 implement arithmetic on this type directly, it's supported by
20237 optabs conversions, much the way the double-word arithmetic is
20238 special-cased in the default hook. */
20239
20240 static bool
20241 arm_scalar_mode_supported_p (enum machine_mode mode)
20242 {
20243 if (mode == HFmode)
20244 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
20245 else if (ALL_FIXED_POINT_MODE_P (mode))
20246 return true;
20247 else
20248 return default_scalar_mode_supported_p (mode);
20249 }
20250
20251 /* Errors in the source file can cause expand_expr to return const0_rtx
20252 where we expect a vector. To avoid crashing, use one of the vector
20253 clear instructions. */
20254
20255 static rtx
20256 safe_vector_operand (rtx x, enum machine_mode mode)
20257 {
20258 if (x != const0_rtx)
20259 return x;
20260 x = gen_reg_rtx (mode);
20261
20262 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
20263 : gen_rtx_SUBREG (DImode, x, 0)));
20264 return x;
20265 }
20266
20267 /* Subroutine of arm_expand_builtin to take care of binop insns. */
20268
20269 static rtx
20270 arm_expand_binop_builtin (enum insn_code icode,
20271 tree exp, rtx target)
20272 {
20273 rtx pat;
20274 tree arg0 = CALL_EXPR_ARG (exp, 0);
20275 tree arg1 = CALL_EXPR_ARG (exp, 1);
20276 rtx op0 = expand_normal (arg0);
20277 rtx op1 = expand_normal (arg1);
20278 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20279 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20280 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20281
20282 if (VECTOR_MODE_P (mode0))
20283 op0 = safe_vector_operand (op0, mode0);
20284 if (VECTOR_MODE_P (mode1))
20285 op1 = safe_vector_operand (op1, mode1);
20286
20287 if (! target
20288 || GET_MODE (target) != tmode
20289 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20290 target = gen_reg_rtx (tmode);
20291
20292 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
20293 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
20294
20295 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20296 op0 = copy_to_mode_reg (mode0, op0);
20297 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20298 op1 = copy_to_mode_reg (mode1, op1);
20299
20300 pat = GEN_FCN (icode) (target, op0, op1);
20301 if (! pat)
20302 return 0;
20303 emit_insn (pat);
20304 return target;
20305 }
20306
20307 /* Subroutine of arm_expand_builtin to take care of unop insns. */
20308
20309 static rtx
20310 arm_expand_unop_builtin (enum insn_code icode,
20311 tree exp, rtx target, int do_load)
20312 {
20313 rtx pat;
20314 tree arg0 = CALL_EXPR_ARG (exp, 0);
20315 rtx op0 = expand_normal (arg0);
20316 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20317 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20318
20319 if (! target
20320 || GET_MODE (target) != tmode
20321 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20322 target = gen_reg_rtx (tmode);
20323 if (do_load)
20324 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20325 else
20326 {
20327 if (VECTOR_MODE_P (mode0))
20328 op0 = safe_vector_operand (op0, mode0);
20329
20330 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20331 op0 = copy_to_mode_reg (mode0, op0);
20332 }
20333
20334 pat = GEN_FCN (icode) (target, op0);
20335 if (! pat)
20336 return 0;
20337 emit_insn (pat);
20338 return target;
20339 }
20340
20341 typedef enum {
20342 NEON_ARG_COPY_TO_REG,
20343 NEON_ARG_CONSTANT,
20344 NEON_ARG_MEMORY,
20345 NEON_ARG_STOP
20346 } builtin_arg;
20347
20348 #define NEON_MAX_BUILTIN_ARGS 5
20349
20350 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
20351 and return an expression for the accessed memory.
20352
20353 The intrinsic function operates on a block of registers that has
20354 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
20355 function references the memory at EXP of type TYPE and in mode
20356 MEM_MODE; this mode may be BLKmode if no more suitable mode is
20357 available. */
20358
20359 static tree
20360 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
20361 enum machine_mode reg_mode,
20362 neon_builtin_type_mode type_mode)
20363 {
20364 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
20365 tree elem_type, upper_bound, array_type;
20366
20367 /* Work out the size of the register block in bytes. */
20368 reg_size = GET_MODE_SIZE (reg_mode);
20369
20370 /* Work out the size of each vector in bytes. */
20371 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
20372 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
20373
20374 /* Work out how many vectors there are. */
20375 gcc_assert (reg_size % vector_size == 0);
20376 nvectors = reg_size / vector_size;
20377
20378 /* Work out the type of each element. */
20379 gcc_assert (POINTER_TYPE_P (type));
20380 elem_type = TREE_TYPE (type);
20381
20382 /* Work out how many elements are being loaded or stored.
20383 MEM_MODE == REG_MODE implies a one-to-one mapping between register
20384 and memory elements; anything else implies a lane load or store. */
20385 if (mem_mode == reg_mode)
20386 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
20387 else
20388 nelems = nvectors;
20389
20390 /* Create a type that describes the full access. */
20391 upper_bound = build_int_cst (size_type_node, nelems - 1);
20392 array_type = build_array_type (elem_type, build_index_type (upper_bound));
20393
20394 /* Dereference EXP using that type. */
20395 return fold_build2 (MEM_REF, array_type, exp,
20396 build_int_cst (build_pointer_type (array_type), 0));
20397 }
20398
20399 /* Expand a Neon builtin. */
20400 static rtx
20401 arm_expand_neon_args (rtx target, int icode, int have_retval,
20402 neon_builtin_type_mode type_mode,
20403 tree exp, int fcode, ...)
20404 {
20405 va_list ap;
20406 rtx pat;
20407 tree arg[NEON_MAX_BUILTIN_ARGS];
20408 rtx op[NEON_MAX_BUILTIN_ARGS];
20409 tree arg_type;
20410 tree formals;
20411 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20412 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
20413 enum machine_mode other_mode;
20414 int argc = 0;
20415 int opno;
20416
20417 if (have_retval
20418 && (!target
20419 || GET_MODE (target) != tmode
20420 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
20421 target = gen_reg_rtx (tmode);
20422
20423 va_start (ap, fcode);
20424
20425 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
20426
20427 for (;;)
20428 {
20429 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
20430
20431 if (thisarg == NEON_ARG_STOP)
20432 break;
20433 else
20434 {
20435 opno = argc + have_retval;
20436 mode[argc] = insn_data[icode].operand[opno].mode;
20437 arg[argc] = CALL_EXPR_ARG (exp, argc);
20438 arg_type = TREE_VALUE (formals);
20439 if (thisarg == NEON_ARG_MEMORY)
20440 {
20441 other_mode = insn_data[icode].operand[1 - opno].mode;
20442 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
20443 mode[argc], other_mode,
20444 type_mode);
20445 }
20446
20447 op[argc] = expand_normal (arg[argc]);
20448
20449 switch (thisarg)
20450 {
20451 case NEON_ARG_COPY_TO_REG:
20452 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
20453 if (!(*insn_data[icode].operand[opno].predicate)
20454 (op[argc], mode[argc]))
20455 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
20456 break;
20457
20458 case NEON_ARG_CONSTANT:
20459 /* FIXME: This error message is somewhat unhelpful. */
20460 if (!(*insn_data[icode].operand[opno].predicate)
20461 (op[argc], mode[argc]))
20462 error ("argument must be a constant");
20463 break;
20464
20465 case NEON_ARG_MEMORY:
20466 gcc_assert (MEM_P (op[argc]));
20467 PUT_MODE (op[argc], mode[argc]);
20468 /* ??? arm_neon.h uses the same built-in functions for signed
20469 and unsigned accesses, casting where necessary. This isn't
20470 alias safe. */
20471 set_mem_alias_set (op[argc], 0);
20472 if (!(*insn_data[icode].operand[opno].predicate)
20473 (op[argc], mode[argc]))
20474 op[argc] = (replace_equiv_address
20475 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
20476 break;
20477
20478 case NEON_ARG_STOP:
20479 gcc_unreachable ();
20480 }
20481
20482 argc++;
20483 formals = TREE_CHAIN (formals);
20484 }
20485 }
20486
20487 va_end (ap);
20488
20489 if (have_retval)
20490 switch (argc)
20491 {
20492 case 1:
20493 pat = GEN_FCN (icode) (target, op[0]);
20494 break;
20495
20496 case 2:
20497 pat = GEN_FCN (icode) (target, op[0], op[1]);
20498 break;
20499
20500 case 3:
20501 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
20502 break;
20503
20504 case 4:
20505 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
20506 break;
20507
20508 case 5:
20509 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
20510 break;
20511
20512 default:
20513 gcc_unreachable ();
20514 }
20515 else
20516 switch (argc)
20517 {
20518 case 1:
20519 pat = GEN_FCN (icode) (op[0]);
20520 break;
20521
20522 case 2:
20523 pat = GEN_FCN (icode) (op[0], op[1]);
20524 break;
20525
20526 case 3:
20527 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
20528 break;
20529
20530 case 4:
20531 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
20532 break;
20533
20534 case 5:
20535 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
20536 break;
20537
20538 default:
20539 gcc_unreachable ();
20540 }
20541
20542 if (!pat)
20543 return 0;
20544
20545 emit_insn (pat);
20546
20547 return target;
20548 }
20549
20550 /* Expand a Neon builtin. These are "special" because they don't have symbolic
20551 constants defined per-instruction or per instruction-variant. Instead, the
20552 required info is looked up in the table neon_builtin_data. */
20553 static rtx
20554 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
20555 {
20556 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
20557 neon_itype itype = d->itype;
20558 enum insn_code icode = d->code;
20559 neon_builtin_type_mode type_mode = d->mode;
20560
20561 switch (itype)
20562 {
20563 case NEON_UNOP:
20564 case NEON_CONVERT:
20565 case NEON_DUPLANE:
20566 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20567 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20568
20569 case NEON_BINOP:
20570 case NEON_SETLANE:
20571 case NEON_SCALARMUL:
20572 case NEON_SCALARMULL:
20573 case NEON_SCALARMULH:
20574 case NEON_SHIFTINSERT:
20575 case NEON_LOGICBINOP:
20576 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20577 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20578 NEON_ARG_STOP);
20579
20580 case NEON_TERNOP:
20581 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20582 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20583 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20584
20585 case NEON_GETLANE:
20586 case NEON_FIXCONV:
20587 case NEON_SHIFTIMM:
20588 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20589 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
20590 NEON_ARG_STOP);
20591
20592 case NEON_CREATE:
20593 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20594 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20595
20596 case NEON_DUP:
20597 case NEON_SPLIT:
20598 case NEON_REINTERP:
20599 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20600 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20601
20602 case NEON_COMBINE:
20603 case NEON_VTBL:
20604 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20605 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20606
20607 case NEON_RESULTPAIR:
20608 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
20609 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20610 NEON_ARG_STOP);
20611
20612 case NEON_LANEMUL:
20613 case NEON_LANEMULL:
20614 case NEON_LANEMULH:
20615 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20616 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20617 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20618
20619 case NEON_LANEMAC:
20620 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20621 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20622 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
20623
20624 case NEON_SHIFTACC:
20625 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20626 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20627 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20628
20629 case NEON_SCALARMAC:
20630 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20631 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20632 NEON_ARG_CONSTANT, NEON_ARG_STOP);
20633
20634 case NEON_SELECT:
20635 case NEON_VTBX:
20636 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20637 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
20638 NEON_ARG_STOP);
20639
20640 case NEON_LOAD1:
20641 case NEON_LOADSTRUCT:
20642 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20643 NEON_ARG_MEMORY, NEON_ARG_STOP);
20644
20645 case NEON_LOAD1LANE:
20646 case NEON_LOADSTRUCTLANE:
20647 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
20648 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20649 NEON_ARG_STOP);
20650
20651 case NEON_STORE1:
20652 case NEON_STORESTRUCT:
20653 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
20654 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
20655
20656 case NEON_STORE1LANE:
20657 case NEON_STORESTRUCTLANE:
20658 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
20659 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
20660 NEON_ARG_STOP);
20661 }
20662
20663 gcc_unreachable ();
20664 }
20665
20666 /* Emit code to reinterpret one Neon type as another, without altering bits. */
20667 void
20668 neon_reinterpret (rtx dest, rtx src)
20669 {
20670 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
20671 }
20672
20673 /* Emit code to place a Neon pair result in memory locations (with equal
20674 registers). */
20675 void
20676 neon_emit_pair_result_insn (enum machine_mode mode,
20677 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
20678 rtx op1, rtx op2)
20679 {
20680 rtx mem = gen_rtx_MEM (mode, destaddr);
20681 rtx tmp1 = gen_reg_rtx (mode);
20682 rtx tmp2 = gen_reg_rtx (mode);
20683
20684 emit_insn (intfn (tmp1, op1, op2, tmp2));
20685
20686 emit_move_insn (mem, tmp1);
20687 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
20688 emit_move_insn (mem, tmp2);
20689 }
20690
20691 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
20692 not to early-clobber SRC registers in the process.
20693
20694 We assume that the operands described by SRC and DEST represent a
20695 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
20696 number of components into which the copy has been decomposed. */
20697 void
20698 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
20699 {
20700 unsigned int i;
20701
20702 if (!reg_overlap_mentioned_p (operands[0], operands[1])
20703 || REGNO (operands[0]) < REGNO (operands[1]))
20704 {
20705 for (i = 0; i < count; i++)
20706 {
20707 operands[2 * i] = dest[i];
20708 operands[2 * i + 1] = src[i];
20709 }
20710 }
20711 else
20712 {
20713 for (i = 0; i < count; i++)
20714 {
20715 operands[2 * i] = dest[count - i - 1];
20716 operands[2 * i + 1] = src[count - i - 1];
20717 }
20718 }
20719 }
20720
20721 /* Split operands into moves from op[1] + op[2] into op[0]. */
20722
20723 void
20724 neon_split_vcombine (rtx operands[3])
20725 {
20726 unsigned int dest = REGNO (operands[0]);
20727 unsigned int src1 = REGNO (operands[1]);
20728 unsigned int src2 = REGNO (operands[2]);
20729 enum machine_mode halfmode = GET_MODE (operands[1]);
20730 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
20731 rtx destlo, desthi;
20732
20733 if (src1 == dest && src2 == dest + halfregs)
20734 {
20735 /* No-op move. Can't split to nothing; emit something. */
20736 emit_note (NOTE_INSN_DELETED);
20737 return;
20738 }
20739
20740 /* Preserve register attributes for variable tracking. */
20741 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
20742 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
20743 GET_MODE_SIZE (halfmode));
20744
20745 /* Special case of reversed high/low parts. Use VSWP. */
20746 if (src2 == dest && src1 == dest + halfregs)
20747 {
20748 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
20749 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
20750 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
20751 return;
20752 }
20753
20754 if (!reg_overlap_mentioned_p (operands[2], destlo))
20755 {
20756 /* Try to avoid unnecessary moves if part of the result
20757 is in the right place already. */
20758 if (src1 != dest)
20759 emit_move_insn (destlo, operands[1]);
20760 if (src2 != dest + halfregs)
20761 emit_move_insn (desthi, operands[2]);
20762 }
20763 else
20764 {
20765 if (src2 != dest + halfregs)
20766 emit_move_insn (desthi, operands[2]);
20767 if (src1 != dest)
20768 emit_move_insn (destlo, operands[1]);
20769 }
20770 }
20771
20772 /* Expand an expression EXP that calls a built-in function,
20773 with result going to TARGET if that's convenient
20774 (and in mode MODE if that's convenient).
20775 SUBTARGET may be used as the target for computing one of EXP's operands.
20776 IGNORE is nonzero if the value is to be ignored. */
20777
20778 static rtx
20779 arm_expand_builtin (tree exp,
20780 rtx target,
20781 rtx subtarget ATTRIBUTE_UNUSED,
20782 enum machine_mode mode ATTRIBUTE_UNUSED,
20783 int ignore ATTRIBUTE_UNUSED)
20784 {
20785 const struct builtin_description * d;
20786 enum insn_code icode;
20787 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20788 tree arg0;
20789 tree arg1;
20790 tree arg2;
20791 rtx op0;
20792 rtx op1;
20793 rtx op2;
20794 rtx pat;
20795 int fcode = DECL_FUNCTION_CODE (fndecl);
20796 size_t i;
20797 enum machine_mode tmode;
20798 enum machine_mode mode0;
20799 enum machine_mode mode1;
20800 enum machine_mode mode2;
20801 int opint;
20802 int selector;
20803 int mask;
20804 int imm;
20805
20806 if (fcode >= ARM_BUILTIN_NEON_BASE)
20807 return arm_expand_neon_builtin (fcode, exp, target);
20808
20809 switch (fcode)
20810 {
20811 case ARM_BUILTIN_TEXTRMSB:
20812 case ARM_BUILTIN_TEXTRMUB:
20813 case ARM_BUILTIN_TEXTRMSH:
20814 case ARM_BUILTIN_TEXTRMUH:
20815 case ARM_BUILTIN_TEXTRMSW:
20816 case ARM_BUILTIN_TEXTRMUW:
20817 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
20818 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
20819 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
20820 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
20821 : CODE_FOR_iwmmxt_textrmw);
20822
20823 arg0 = CALL_EXPR_ARG (exp, 0);
20824 arg1 = CALL_EXPR_ARG (exp, 1);
20825 op0 = expand_normal (arg0);
20826 op1 = expand_normal (arg1);
20827 tmode = insn_data[icode].operand[0].mode;
20828 mode0 = insn_data[icode].operand[1].mode;
20829 mode1 = insn_data[icode].operand[2].mode;
20830
20831 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20832 op0 = copy_to_mode_reg (mode0, op0);
20833 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20834 {
20835 /* @@@ better error message */
20836 error ("selector must be an immediate");
20837 return gen_reg_rtx (tmode);
20838 }
20839
20840 opint = INTVAL (op1);
20841 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
20842 {
20843 if (opint > 7 || opint < 0)
20844 error ("the range of selector should be in 0 to 7");
20845 }
20846 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
20847 {
20848 if (opint > 3 || opint < 0)
20849 error ("the range of selector should be in 0 to 3");
20850 }
20851 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
20852 {
20853 if (opint > 1 || opint < 0)
20854 error ("the range of selector should be in 0 to 1");
20855 }
20856
20857 if (target == 0
20858 || GET_MODE (target) != tmode
20859 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20860 target = gen_reg_rtx (tmode);
20861 pat = GEN_FCN (icode) (target, op0, op1);
20862 if (! pat)
20863 return 0;
20864 emit_insn (pat);
20865 return target;
20866
20867 case ARM_BUILTIN_WALIGNI:
20868 /* If op2 is immediate, call walighi, else call walighr. */
20869 arg0 = CALL_EXPR_ARG (exp, 0);
20870 arg1 = CALL_EXPR_ARG (exp, 1);
20871 arg2 = CALL_EXPR_ARG (exp, 2);
20872 op0 = expand_normal (arg0);
20873 op1 = expand_normal (arg1);
20874 op2 = expand_normal (arg2);
20875 if (GET_CODE (op2) == CONST_INT)
20876 {
20877 icode = CODE_FOR_iwmmxt_waligni;
20878 tmode = insn_data[icode].operand[0].mode;
20879 mode0 = insn_data[icode].operand[1].mode;
20880 mode1 = insn_data[icode].operand[2].mode;
20881 mode2 = insn_data[icode].operand[3].mode;
20882 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
20883 op0 = copy_to_mode_reg (mode0, op0);
20884 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
20885 op1 = copy_to_mode_reg (mode1, op1);
20886 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
20887 selector = INTVAL (op2);
20888 if (selector > 7 || selector < 0)
20889 error ("the range of selector should be in 0 to 7");
20890 }
20891 else
20892 {
20893 icode = CODE_FOR_iwmmxt_walignr;
20894 tmode = insn_data[icode].operand[0].mode;
20895 mode0 = insn_data[icode].operand[1].mode;
20896 mode1 = insn_data[icode].operand[2].mode;
20897 mode2 = insn_data[icode].operand[3].mode;
20898 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
20899 op0 = copy_to_mode_reg (mode0, op0);
20900 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
20901 op1 = copy_to_mode_reg (mode1, op1);
20902 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
20903 op2 = copy_to_mode_reg (mode2, op2);
20904 }
20905 if (target == 0
20906 || GET_MODE (target) != tmode
20907 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
20908 target = gen_reg_rtx (tmode);
20909 pat = GEN_FCN (icode) (target, op0, op1, op2);
20910 if (!pat)
20911 return 0;
20912 emit_insn (pat);
20913 return target;
20914
20915 case ARM_BUILTIN_TINSRB:
20916 case ARM_BUILTIN_TINSRH:
20917 case ARM_BUILTIN_TINSRW:
20918 case ARM_BUILTIN_WMERGE:
20919 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
20920 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
20921 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
20922 : CODE_FOR_iwmmxt_tinsrw);
20923 arg0 = CALL_EXPR_ARG (exp, 0);
20924 arg1 = CALL_EXPR_ARG (exp, 1);
20925 arg2 = CALL_EXPR_ARG (exp, 2);
20926 op0 = expand_normal (arg0);
20927 op1 = expand_normal (arg1);
20928 op2 = expand_normal (arg2);
20929 tmode = insn_data[icode].operand[0].mode;
20930 mode0 = insn_data[icode].operand[1].mode;
20931 mode1 = insn_data[icode].operand[2].mode;
20932 mode2 = insn_data[icode].operand[3].mode;
20933
20934 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20935 op0 = copy_to_mode_reg (mode0, op0);
20936 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20937 op1 = copy_to_mode_reg (mode1, op1);
20938 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
20939 {
20940 error ("selector must be an immediate");
20941 return const0_rtx;
20942 }
20943 if (icode == CODE_FOR_iwmmxt_wmerge)
20944 {
20945 selector = INTVAL (op2);
20946 if (selector > 7 || selector < 0)
20947 error ("the range of selector should be in 0 to 7");
20948 }
20949 if ((icode == CODE_FOR_iwmmxt_tinsrb)
20950 || (icode == CODE_FOR_iwmmxt_tinsrh)
20951 || (icode == CODE_FOR_iwmmxt_tinsrw))
20952 {
20953 mask = 0x01;
20954 selector= INTVAL (op2);
20955 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
20956 error ("the range of selector should be in 0 to 7");
20957 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
20958 error ("the range of selector should be in 0 to 3");
20959 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
20960 error ("the range of selector should be in 0 to 1");
20961 mask <<= selector;
20962 op2 = gen_rtx_CONST_INT (SImode, mask);
20963 }
20964 if (target == 0
20965 || GET_MODE (target) != tmode
20966 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20967 target = gen_reg_rtx (tmode);
20968 pat = GEN_FCN (icode) (target, op0, op1, op2);
20969 if (! pat)
20970 return 0;
20971 emit_insn (pat);
20972 return target;
20973
20974 case ARM_BUILTIN_SETWCGR0:
20975 case ARM_BUILTIN_SETWCGR1:
20976 case ARM_BUILTIN_SETWCGR2:
20977 case ARM_BUILTIN_SETWCGR3:
20978 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
20979 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
20980 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
20981 : CODE_FOR_iwmmxt_setwcgr3);
20982 arg0 = CALL_EXPR_ARG (exp, 0);
20983 op0 = expand_normal (arg0);
20984 mode0 = insn_data[icode].operand[0].mode;
20985 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
20986 op0 = copy_to_mode_reg (mode0, op0);
20987 pat = GEN_FCN (icode) (op0);
20988 if (!pat)
20989 return 0;
20990 emit_insn (pat);
20991 return 0;
20992
20993 case ARM_BUILTIN_GETWCGR0:
20994 case ARM_BUILTIN_GETWCGR1:
20995 case ARM_BUILTIN_GETWCGR2:
20996 case ARM_BUILTIN_GETWCGR3:
20997 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
20998 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
20999 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
21000 : CODE_FOR_iwmmxt_getwcgr3);
21001 tmode = insn_data[icode].operand[0].mode;
21002 if (target == 0
21003 || GET_MODE (target) != tmode
21004 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
21005 target = gen_reg_rtx (tmode);
21006 pat = GEN_FCN (icode) (target);
21007 if (!pat)
21008 return 0;
21009 emit_insn (pat);
21010 return target;
21011
21012 case ARM_BUILTIN_WSHUFH:
21013 icode = CODE_FOR_iwmmxt_wshufh;
21014 arg0 = CALL_EXPR_ARG (exp, 0);
21015 arg1 = CALL_EXPR_ARG (exp, 1);
21016 op0 = expand_normal (arg0);
21017 op1 = expand_normal (arg1);
21018 tmode = insn_data[icode].operand[0].mode;
21019 mode1 = insn_data[icode].operand[1].mode;
21020 mode2 = insn_data[icode].operand[2].mode;
21021
21022 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21023 op0 = copy_to_mode_reg (mode1, op0);
21024 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21025 {
21026 error ("mask must be an immediate");
21027 return const0_rtx;
21028 }
21029 selector = INTVAL (op1);
21030 if (selector < 0 || selector > 255)
21031 error ("the range of mask should be in 0 to 255");
21032 if (target == 0
21033 || GET_MODE (target) != tmode
21034 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21035 target = gen_reg_rtx (tmode);
21036 pat = GEN_FCN (icode) (target, op0, op1);
21037 if (! pat)
21038 return 0;
21039 emit_insn (pat);
21040 return target;
21041
21042 case ARM_BUILTIN_WMADDS:
21043 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
21044 case ARM_BUILTIN_WMADDSX:
21045 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
21046 case ARM_BUILTIN_WMADDSN:
21047 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
21048 case ARM_BUILTIN_WMADDU:
21049 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
21050 case ARM_BUILTIN_WMADDUX:
21051 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
21052 case ARM_BUILTIN_WMADDUN:
21053 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
21054 case ARM_BUILTIN_WSADBZ:
21055 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
21056 case ARM_BUILTIN_WSADHZ:
21057 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
21058
21059 /* Several three-argument builtins. */
21060 case ARM_BUILTIN_WMACS:
21061 case ARM_BUILTIN_WMACU:
21062 case ARM_BUILTIN_TMIA:
21063 case ARM_BUILTIN_TMIAPH:
21064 case ARM_BUILTIN_TMIATT:
21065 case ARM_BUILTIN_TMIATB:
21066 case ARM_BUILTIN_TMIABT:
21067 case ARM_BUILTIN_TMIABB:
21068 case ARM_BUILTIN_WQMIABB:
21069 case ARM_BUILTIN_WQMIABT:
21070 case ARM_BUILTIN_WQMIATB:
21071 case ARM_BUILTIN_WQMIATT:
21072 case ARM_BUILTIN_WQMIABBN:
21073 case ARM_BUILTIN_WQMIABTN:
21074 case ARM_BUILTIN_WQMIATBN:
21075 case ARM_BUILTIN_WQMIATTN:
21076 case ARM_BUILTIN_WMIABB:
21077 case ARM_BUILTIN_WMIABT:
21078 case ARM_BUILTIN_WMIATB:
21079 case ARM_BUILTIN_WMIATT:
21080 case ARM_BUILTIN_WMIABBN:
21081 case ARM_BUILTIN_WMIABTN:
21082 case ARM_BUILTIN_WMIATBN:
21083 case ARM_BUILTIN_WMIATTN:
21084 case ARM_BUILTIN_WMIAWBB:
21085 case ARM_BUILTIN_WMIAWBT:
21086 case ARM_BUILTIN_WMIAWTB:
21087 case ARM_BUILTIN_WMIAWTT:
21088 case ARM_BUILTIN_WMIAWBBN:
21089 case ARM_BUILTIN_WMIAWBTN:
21090 case ARM_BUILTIN_WMIAWTBN:
21091 case ARM_BUILTIN_WMIAWTTN:
21092 case ARM_BUILTIN_WSADB:
21093 case ARM_BUILTIN_WSADH:
21094 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
21095 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
21096 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
21097 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
21098 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
21099 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
21100 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
21101 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
21102 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
21103 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
21104 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
21105 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
21106 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
21107 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
21108 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
21109 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
21110 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
21111 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
21112 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
21113 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
21114 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
21115 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
21116 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
21117 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
21118 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
21119 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
21120 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
21121 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
21122 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
21123 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
21124 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
21125 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
21126 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
21127 : CODE_FOR_iwmmxt_wsadh);
21128 arg0 = CALL_EXPR_ARG (exp, 0);
21129 arg1 = CALL_EXPR_ARG (exp, 1);
21130 arg2 = CALL_EXPR_ARG (exp, 2);
21131 op0 = expand_normal (arg0);
21132 op1 = expand_normal (arg1);
21133 op2 = expand_normal (arg2);
21134 tmode = insn_data[icode].operand[0].mode;
21135 mode0 = insn_data[icode].operand[1].mode;
21136 mode1 = insn_data[icode].operand[2].mode;
21137 mode2 = insn_data[icode].operand[3].mode;
21138
21139 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
21140 op0 = copy_to_mode_reg (mode0, op0);
21141 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
21142 op1 = copy_to_mode_reg (mode1, op1);
21143 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
21144 op2 = copy_to_mode_reg (mode2, op2);
21145 if (target == 0
21146 || GET_MODE (target) != tmode
21147 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21148 target = gen_reg_rtx (tmode);
21149 pat = GEN_FCN (icode) (target, op0, op1, op2);
21150 if (! pat)
21151 return 0;
21152 emit_insn (pat);
21153 return target;
21154
21155 case ARM_BUILTIN_WZERO:
21156 target = gen_reg_rtx (DImode);
21157 emit_insn (gen_iwmmxt_clrdi (target));
21158 return target;
21159
21160 case ARM_BUILTIN_WSRLHI:
21161 case ARM_BUILTIN_WSRLWI:
21162 case ARM_BUILTIN_WSRLDI:
21163 case ARM_BUILTIN_WSLLHI:
21164 case ARM_BUILTIN_WSLLWI:
21165 case ARM_BUILTIN_WSLLDI:
21166 case ARM_BUILTIN_WSRAHI:
21167 case ARM_BUILTIN_WSRAWI:
21168 case ARM_BUILTIN_WSRADI:
21169 case ARM_BUILTIN_WRORHI:
21170 case ARM_BUILTIN_WRORWI:
21171 case ARM_BUILTIN_WRORDI:
21172 case ARM_BUILTIN_WSRLH:
21173 case ARM_BUILTIN_WSRLW:
21174 case ARM_BUILTIN_WSRLD:
21175 case ARM_BUILTIN_WSLLH:
21176 case ARM_BUILTIN_WSLLW:
21177 case ARM_BUILTIN_WSLLD:
21178 case ARM_BUILTIN_WSRAH:
21179 case ARM_BUILTIN_WSRAW:
21180 case ARM_BUILTIN_WSRAD:
21181 case ARM_BUILTIN_WRORH:
21182 case ARM_BUILTIN_WRORW:
21183 case ARM_BUILTIN_WRORD:
21184 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
21185 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
21186 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
21187 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
21188 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
21189 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
21190 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
21191 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
21192 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
21193 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
21194 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
21195 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
21196 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
21197 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
21198 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
21199 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
21200 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
21201 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
21202 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
21203 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
21204 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
21205 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
21206 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
21207 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
21208 : CODE_FOR_nothing);
21209 arg1 = CALL_EXPR_ARG (exp, 1);
21210 op1 = expand_normal (arg1);
21211 if (GET_MODE (op1) == VOIDmode)
21212 {
21213 imm = INTVAL (op1);
21214 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
21215 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
21216 && (imm < 0 || imm > 32))
21217 {
21218 if (fcode == ARM_BUILTIN_WRORHI)
21219 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
21220 else if (fcode == ARM_BUILTIN_WRORWI)
21221 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
21222 else if (fcode == ARM_BUILTIN_WRORH)
21223 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
21224 else
21225 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
21226 }
21227 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
21228 && (imm < 0 || imm > 64))
21229 {
21230 if (fcode == ARM_BUILTIN_WRORDI)
21231 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
21232 else
21233 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
21234 }
21235 else if (imm < 0)
21236 {
21237 if (fcode == ARM_BUILTIN_WSRLHI)
21238 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
21239 else if (fcode == ARM_BUILTIN_WSRLWI)
21240 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
21241 else if (fcode == ARM_BUILTIN_WSRLDI)
21242 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
21243 else if (fcode == ARM_BUILTIN_WSLLHI)
21244 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
21245 else if (fcode == ARM_BUILTIN_WSLLWI)
21246 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
21247 else if (fcode == ARM_BUILTIN_WSLLDI)
21248 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
21249 else if (fcode == ARM_BUILTIN_WSRAHI)
21250 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
21251 else if (fcode == ARM_BUILTIN_WSRAWI)
21252 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
21253 else if (fcode == ARM_BUILTIN_WSRADI)
21254 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
21255 else if (fcode == ARM_BUILTIN_WSRLH)
21256 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
21257 else if (fcode == ARM_BUILTIN_WSRLW)
21258 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
21259 else if (fcode == ARM_BUILTIN_WSRLD)
21260 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
21261 else if (fcode == ARM_BUILTIN_WSLLH)
21262 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
21263 else if (fcode == ARM_BUILTIN_WSLLW)
21264 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
21265 else if (fcode == ARM_BUILTIN_WSLLD)
21266 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
21267 else if (fcode == ARM_BUILTIN_WSRAH)
21268 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
21269 else if (fcode == ARM_BUILTIN_WSRAW)
21270 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
21271 else
21272 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
21273 }
21274 }
21275 return arm_expand_binop_builtin (icode, exp, target);
21276
21277 case ARM_BUILTIN_THREAD_POINTER:
21278 return arm_load_tp (target);
21279
21280 default:
21281 break;
21282 }
21283
21284 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21285 if (d->code == (const enum arm_builtins) fcode)
21286 return arm_expand_binop_builtin (d->icode, exp, target);
21287
21288 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21289 if (d->code == (const enum arm_builtins) fcode)
21290 return arm_expand_unop_builtin (d->icode, exp, target, 0);
21291
21292 /* @@@ Should really do something sensible here. */
21293 return NULL_RTX;
21294 }
21295 \f
21296 /* Return the number (counting from 0) of
21297 the least significant set bit in MASK. */
21298
21299 inline static int
21300 number_of_first_bit_set (unsigned mask)
21301 {
21302 return ctz_hwi (mask);
21303 }
21304
21305 /* Like emit_multi_reg_push, but allowing for a different set of
21306 registers to be described as saved. MASK is the set of registers
21307 to be saved; REAL_REGS is the set of registers to be described as
21308 saved. If REAL_REGS is 0, only describe the stack adjustment. */
21309
21310 static rtx
21311 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
21312 {
21313 unsigned long regno;
21314 rtx par[10], tmp, reg, insn;
21315 int i, j;
21316
21317 /* Build the parallel of the registers actually being stored. */
21318 for (i = 0; mask; ++i, mask &= mask - 1)
21319 {
21320 regno = ctz_hwi (mask);
21321 reg = gen_rtx_REG (SImode, regno);
21322
21323 if (i == 0)
21324 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
21325 else
21326 tmp = gen_rtx_USE (VOIDmode, reg);
21327
21328 par[i] = tmp;
21329 }
21330
21331 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
21332 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
21333 tmp = gen_frame_mem (BLKmode, tmp);
21334 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
21335 par[0] = tmp;
21336
21337 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
21338 insn = emit_insn (tmp);
21339
21340 /* Always build the stack adjustment note for unwind info. */
21341 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
21342 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
21343 par[0] = tmp;
21344
21345 /* Build the parallel of the registers recorded as saved for unwind. */
21346 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
21347 {
21348 regno = ctz_hwi (real_regs);
21349 reg = gen_rtx_REG (SImode, regno);
21350
21351 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
21352 tmp = gen_frame_mem (SImode, tmp);
21353 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
21354 RTX_FRAME_RELATED_P (tmp) = 1;
21355 par[j + 1] = tmp;
21356 }
21357
21358 if (j == 0)
21359 tmp = par[0];
21360 else
21361 {
21362 RTX_FRAME_RELATED_P (par[0]) = 1;
21363 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
21364 }
21365
21366 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
21367
21368 return insn;
21369 }
21370
21371 /* Emit code to push or pop registers to or from the stack. F is the
21372 assembly file. MASK is the registers to pop. */
21373 static void
21374 thumb_pop (FILE *f, unsigned long mask)
21375 {
21376 int regno;
21377 int lo_mask = mask & 0xFF;
21378 int pushed_words = 0;
21379
21380 gcc_assert (mask);
21381
21382 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
21383 {
21384 /* Special case. Do not generate a POP PC statement here, do it in
21385 thumb_exit() */
21386 thumb_exit (f, -1);
21387 return;
21388 }
21389
21390 fprintf (f, "\tpop\t{");
21391
21392 /* Look at the low registers first. */
21393 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
21394 {
21395 if (lo_mask & 1)
21396 {
21397 asm_fprintf (f, "%r", regno);
21398
21399 if ((lo_mask & ~1) != 0)
21400 fprintf (f, ", ");
21401
21402 pushed_words++;
21403 }
21404 }
21405
21406 if (mask & (1 << PC_REGNUM))
21407 {
21408 /* Catch popping the PC. */
21409 if (TARGET_INTERWORK || TARGET_BACKTRACE
21410 || crtl->calls_eh_return)
21411 {
21412 /* The PC is never poped directly, instead
21413 it is popped into r3 and then BX is used. */
21414 fprintf (f, "}\n");
21415
21416 thumb_exit (f, -1);
21417
21418 return;
21419 }
21420 else
21421 {
21422 if (mask & 0xFF)
21423 fprintf (f, ", ");
21424
21425 asm_fprintf (f, "%r", PC_REGNUM);
21426 }
21427 }
21428
21429 fprintf (f, "}\n");
21430 }
21431
21432 /* Generate code to return from a thumb function.
21433 If 'reg_containing_return_addr' is -1, then the return address is
21434 actually on the stack, at the stack pointer. */
21435 static void
21436 thumb_exit (FILE *f, int reg_containing_return_addr)
21437 {
21438 unsigned regs_available_for_popping;
21439 unsigned regs_to_pop;
21440 int pops_needed;
21441 unsigned available;
21442 unsigned required;
21443 int mode;
21444 int size;
21445 int restore_a4 = FALSE;
21446
21447 /* Compute the registers we need to pop. */
21448 regs_to_pop = 0;
21449 pops_needed = 0;
21450
21451 if (reg_containing_return_addr == -1)
21452 {
21453 regs_to_pop |= 1 << LR_REGNUM;
21454 ++pops_needed;
21455 }
21456
21457 if (TARGET_BACKTRACE)
21458 {
21459 /* Restore the (ARM) frame pointer and stack pointer. */
21460 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
21461 pops_needed += 2;
21462 }
21463
21464 /* If there is nothing to pop then just emit the BX instruction and
21465 return. */
21466 if (pops_needed == 0)
21467 {
21468 if (crtl->calls_eh_return)
21469 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21470
21471 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21472 return;
21473 }
21474 /* Otherwise if we are not supporting interworking and we have not created
21475 a backtrace structure and the function was not entered in ARM mode then
21476 just pop the return address straight into the PC. */
21477 else if (!TARGET_INTERWORK
21478 && !TARGET_BACKTRACE
21479 && !is_called_in_ARM_mode (current_function_decl)
21480 && !crtl->calls_eh_return)
21481 {
21482 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
21483 return;
21484 }
21485
21486 /* Find out how many of the (return) argument registers we can corrupt. */
21487 regs_available_for_popping = 0;
21488
21489 /* If returning via __builtin_eh_return, the bottom three registers
21490 all contain information needed for the return. */
21491 if (crtl->calls_eh_return)
21492 size = 12;
21493 else
21494 {
21495 /* If we can deduce the registers used from the function's
21496 return value. This is more reliable that examining
21497 df_regs_ever_live_p () because that will be set if the register is
21498 ever used in the function, not just if the register is used
21499 to hold a return value. */
21500
21501 if (crtl->return_rtx != 0)
21502 mode = GET_MODE (crtl->return_rtx);
21503 else
21504 mode = DECL_MODE (DECL_RESULT (current_function_decl));
21505
21506 size = GET_MODE_SIZE (mode);
21507
21508 if (size == 0)
21509 {
21510 /* In a void function we can use any argument register.
21511 In a function that returns a structure on the stack
21512 we can use the second and third argument registers. */
21513 if (mode == VOIDmode)
21514 regs_available_for_popping =
21515 (1 << ARG_REGISTER (1))
21516 | (1 << ARG_REGISTER (2))
21517 | (1 << ARG_REGISTER (3));
21518 else
21519 regs_available_for_popping =
21520 (1 << ARG_REGISTER (2))
21521 | (1 << ARG_REGISTER (3));
21522 }
21523 else if (size <= 4)
21524 regs_available_for_popping =
21525 (1 << ARG_REGISTER (2))
21526 | (1 << ARG_REGISTER (3));
21527 else if (size <= 8)
21528 regs_available_for_popping =
21529 (1 << ARG_REGISTER (3));
21530 }
21531
21532 /* Match registers to be popped with registers into which we pop them. */
21533 for (available = regs_available_for_popping,
21534 required = regs_to_pop;
21535 required != 0 && available != 0;
21536 available &= ~(available & - available),
21537 required &= ~(required & - required))
21538 -- pops_needed;
21539
21540 /* If we have any popping registers left over, remove them. */
21541 if (available > 0)
21542 regs_available_for_popping &= ~available;
21543
21544 /* Otherwise if we need another popping register we can use
21545 the fourth argument register. */
21546 else if (pops_needed)
21547 {
21548 /* If we have not found any free argument registers and
21549 reg a4 contains the return address, we must move it. */
21550 if (regs_available_for_popping == 0
21551 && reg_containing_return_addr == LAST_ARG_REGNUM)
21552 {
21553 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21554 reg_containing_return_addr = LR_REGNUM;
21555 }
21556 else if (size > 12)
21557 {
21558 /* Register a4 is being used to hold part of the return value,
21559 but we have dire need of a free, low register. */
21560 restore_a4 = TRUE;
21561
21562 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
21563 }
21564
21565 if (reg_containing_return_addr != LAST_ARG_REGNUM)
21566 {
21567 /* The fourth argument register is available. */
21568 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
21569
21570 --pops_needed;
21571 }
21572 }
21573
21574 /* Pop as many registers as we can. */
21575 thumb_pop (f, regs_available_for_popping);
21576
21577 /* Process the registers we popped. */
21578 if (reg_containing_return_addr == -1)
21579 {
21580 /* The return address was popped into the lowest numbered register. */
21581 regs_to_pop &= ~(1 << LR_REGNUM);
21582
21583 reg_containing_return_addr =
21584 number_of_first_bit_set (regs_available_for_popping);
21585
21586 /* Remove this register for the mask of available registers, so that
21587 the return address will not be corrupted by further pops. */
21588 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
21589 }
21590
21591 /* If we popped other registers then handle them here. */
21592 if (regs_available_for_popping)
21593 {
21594 int frame_pointer;
21595
21596 /* Work out which register currently contains the frame pointer. */
21597 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
21598
21599 /* Move it into the correct place. */
21600 asm_fprintf (f, "\tmov\t%r, %r\n",
21601 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
21602
21603 /* (Temporarily) remove it from the mask of popped registers. */
21604 regs_available_for_popping &= ~(1 << frame_pointer);
21605 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
21606
21607 if (regs_available_for_popping)
21608 {
21609 int stack_pointer;
21610
21611 /* We popped the stack pointer as well,
21612 find the register that contains it. */
21613 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
21614
21615 /* Move it into the stack register. */
21616 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
21617
21618 /* At this point we have popped all necessary registers, so
21619 do not worry about restoring regs_available_for_popping
21620 to its correct value:
21621
21622 assert (pops_needed == 0)
21623 assert (regs_available_for_popping == (1 << frame_pointer))
21624 assert (regs_to_pop == (1 << STACK_POINTER)) */
21625 }
21626 else
21627 {
21628 /* Since we have just move the popped value into the frame
21629 pointer, the popping register is available for reuse, and
21630 we know that we still have the stack pointer left to pop. */
21631 regs_available_for_popping |= (1 << frame_pointer);
21632 }
21633 }
21634
21635 /* If we still have registers left on the stack, but we no longer have
21636 any registers into which we can pop them, then we must move the return
21637 address into the link register and make available the register that
21638 contained it. */
21639 if (regs_available_for_popping == 0 && pops_needed > 0)
21640 {
21641 regs_available_for_popping |= 1 << reg_containing_return_addr;
21642
21643 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
21644 reg_containing_return_addr);
21645
21646 reg_containing_return_addr = LR_REGNUM;
21647 }
21648
21649 /* If we have registers left on the stack then pop some more.
21650 We know that at most we will want to pop FP and SP. */
21651 if (pops_needed > 0)
21652 {
21653 int popped_into;
21654 int move_to;
21655
21656 thumb_pop (f, regs_available_for_popping);
21657
21658 /* We have popped either FP or SP.
21659 Move whichever one it is into the correct register. */
21660 popped_into = number_of_first_bit_set (regs_available_for_popping);
21661 move_to = number_of_first_bit_set (regs_to_pop);
21662
21663 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
21664
21665 regs_to_pop &= ~(1 << move_to);
21666
21667 --pops_needed;
21668 }
21669
21670 /* If we still have not popped everything then we must have only
21671 had one register available to us and we are now popping the SP. */
21672 if (pops_needed > 0)
21673 {
21674 int popped_into;
21675
21676 thumb_pop (f, regs_available_for_popping);
21677
21678 popped_into = number_of_first_bit_set (regs_available_for_popping);
21679
21680 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
21681 /*
21682 assert (regs_to_pop == (1 << STACK_POINTER))
21683 assert (pops_needed == 1)
21684 */
21685 }
21686
21687 /* If necessary restore the a4 register. */
21688 if (restore_a4)
21689 {
21690 if (reg_containing_return_addr != LR_REGNUM)
21691 {
21692 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
21693 reg_containing_return_addr = LR_REGNUM;
21694 }
21695
21696 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
21697 }
21698
21699 if (crtl->calls_eh_return)
21700 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
21701
21702 /* Return to caller. */
21703 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
21704 }
21705 \f
21706 /* Scan INSN just before assembler is output for it.
21707 For Thumb-1, we track the status of the condition codes; this
21708 information is used in the cbranchsi4_insn pattern. */
21709 void
21710 thumb1_final_prescan_insn (rtx insn)
21711 {
21712 if (flag_print_asm_name)
21713 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
21714 INSN_ADDRESSES (INSN_UID (insn)));
21715 /* Don't overwrite the previous setter when we get to a cbranch. */
21716 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
21717 {
21718 enum attr_conds conds;
21719
21720 if (cfun->machine->thumb1_cc_insn)
21721 {
21722 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
21723 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
21724 CC_STATUS_INIT;
21725 }
21726 conds = get_attr_conds (insn);
21727 if (conds == CONDS_SET)
21728 {
21729 rtx set = single_set (insn);
21730 cfun->machine->thumb1_cc_insn = insn;
21731 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
21732 cfun->machine->thumb1_cc_op1 = const0_rtx;
21733 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
21734 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
21735 {
21736 rtx src1 = XEXP (SET_SRC (set), 1);
21737 if (src1 == const0_rtx)
21738 cfun->machine->thumb1_cc_mode = CCmode;
21739 }
21740 }
21741 else if (conds != CONDS_NOCOND)
21742 cfun->machine->thumb1_cc_insn = NULL_RTX;
21743 }
21744 }
21745
21746 int
21747 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
21748 {
21749 unsigned HOST_WIDE_INT mask = 0xff;
21750 int i;
21751
21752 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
21753 if (val == 0) /* XXX */
21754 return 0;
21755
21756 for (i = 0; i < 25; i++)
21757 if ((val & (mask << i)) == val)
21758 return 1;
21759
21760 return 0;
21761 }
21762
21763 /* Returns nonzero if the current function contains,
21764 or might contain a far jump. */
21765 static int
21766 thumb_far_jump_used_p (void)
21767 {
21768 rtx insn;
21769
21770 /* This test is only important for leaf functions. */
21771 /* assert (!leaf_function_p ()); */
21772
21773 /* If we have already decided that far jumps may be used,
21774 do not bother checking again, and always return true even if
21775 it turns out that they are not being used. Once we have made
21776 the decision that far jumps are present (and that hence the link
21777 register will be pushed onto the stack) we cannot go back on it. */
21778 if (cfun->machine->far_jump_used)
21779 return 1;
21780
21781 /* If this function is not being called from the prologue/epilogue
21782 generation code then it must be being called from the
21783 INITIAL_ELIMINATION_OFFSET macro. */
21784 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
21785 {
21786 /* In this case we know that we are being asked about the elimination
21787 of the arg pointer register. If that register is not being used,
21788 then there are no arguments on the stack, and we do not have to
21789 worry that a far jump might force the prologue to push the link
21790 register, changing the stack offsets. In this case we can just
21791 return false, since the presence of far jumps in the function will
21792 not affect stack offsets.
21793
21794 If the arg pointer is live (or if it was live, but has now been
21795 eliminated and so set to dead) then we do have to test to see if
21796 the function might contain a far jump. This test can lead to some
21797 false negatives, since before reload is completed, then length of
21798 branch instructions is not known, so gcc defaults to returning their
21799 longest length, which in turn sets the far jump attribute to true.
21800
21801 A false negative will not result in bad code being generated, but it
21802 will result in a needless push and pop of the link register. We
21803 hope that this does not occur too often.
21804
21805 If we need doubleword stack alignment this could affect the other
21806 elimination offsets so we can't risk getting it wrong. */
21807 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
21808 cfun->machine->arg_pointer_live = 1;
21809 else if (!cfun->machine->arg_pointer_live)
21810 return 0;
21811 }
21812
21813 /* Check to see if the function contains a branch
21814 insn with the far jump attribute set. */
21815 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21816 {
21817 if (GET_CODE (insn) == JUMP_INSN
21818 /* Ignore tablejump patterns. */
21819 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21820 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
21821 && get_attr_far_jump (insn) == FAR_JUMP_YES
21822 )
21823 {
21824 /* Record the fact that we have decided that
21825 the function does use far jumps. */
21826 cfun->machine->far_jump_used = 1;
21827 return 1;
21828 }
21829 }
21830
21831 return 0;
21832 }
21833
21834 /* Return nonzero if FUNC must be entered in ARM mode. */
21835 int
21836 is_called_in_ARM_mode (tree func)
21837 {
21838 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
21839
21840 /* Ignore the problem about functions whose address is taken. */
21841 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
21842 return TRUE;
21843
21844 #ifdef ARM_PE
21845 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
21846 #else
21847 return FALSE;
21848 #endif
21849 }
21850
21851 /* Given the stack offsets and register mask in OFFSETS, decide how
21852 many additional registers to push instead of subtracting a constant
21853 from SP. For epilogues the principle is the same except we use pop.
21854 FOR_PROLOGUE indicates which we're generating. */
21855 static int
21856 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
21857 {
21858 HOST_WIDE_INT amount;
21859 unsigned long live_regs_mask = offsets->saved_regs_mask;
21860 /* Extract a mask of the ones we can give to the Thumb's push/pop
21861 instruction. */
21862 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
21863 /* Then count how many other high registers will need to be pushed. */
21864 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21865 int n_free, reg_base;
21866
21867 if (!for_prologue && frame_pointer_needed)
21868 amount = offsets->locals_base - offsets->saved_regs;
21869 else
21870 amount = offsets->outgoing_args - offsets->saved_regs;
21871
21872 /* If the stack frame size is 512 exactly, we can save one load
21873 instruction, which should make this a win even when optimizing
21874 for speed. */
21875 if (!optimize_size && amount != 512)
21876 return 0;
21877
21878 /* Can't do this if there are high registers to push. */
21879 if (high_regs_pushed != 0)
21880 return 0;
21881
21882 /* Shouldn't do it in the prologue if no registers would normally
21883 be pushed at all. In the epilogue, also allow it if we'll have
21884 a pop insn for the PC. */
21885 if (l_mask == 0
21886 && (for_prologue
21887 || TARGET_BACKTRACE
21888 || (live_regs_mask & 1 << LR_REGNUM) == 0
21889 || TARGET_INTERWORK
21890 || crtl->args.pretend_args_size != 0))
21891 return 0;
21892
21893 /* Don't do this if thumb_expand_prologue wants to emit instructions
21894 between the push and the stack frame allocation. */
21895 if (for_prologue
21896 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
21897 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
21898 return 0;
21899
21900 reg_base = 0;
21901 n_free = 0;
21902 if (!for_prologue)
21903 {
21904 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
21905 live_regs_mask >>= reg_base;
21906 }
21907
21908 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
21909 && (for_prologue || call_used_regs[reg_base + n_free]))
21910 {
21911 live_regs_mask >>= 1;
21912 n_free++;
21913 }
21914
21915 if (n_free == 0)
21916 return 0;
21917 gcc_assert (amount / 4 * 4 == amount);
21918
21919 if (amount >= 512 && (amount - n_free * 4) < 512)
21920 return (amount - 508) / 4;
21921 if (amount <= n_free * 4)
21922 return amount / 4;
21923 return 0;
21924 }
21925
21926 /* The bits which aren't usefully expanded as rtl. */
21927 const char *
21928 thumb1_unexpanded_epilogue (void)
21929 {
21930 arm_stack_offsets *offsets;
21931 int regno;
21932 unsigned long live_regs_mask = 0;
21933 int high_regs_pushed = 0;
21934 int extra_pop;
21935 int had_to_push_lr;
21936 int size;
21937
21938 if (cfun->machine->return_used_this_function != 0)
21939 return "";
21940
21941 if (IS_NAKED (arm_current_func_type ()))
21942 return "";
21943
21944 offsets = arm_get_frame_offsets ();
21945 live_regs_mask = offsets->saved_regs_mask;
21946 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
21947
21948 /* If we can deduce the registers used from the function's return value.
21949 This is more reliable that examining df_regs_ever_live_p () because that
21950 will be set if the register is ever used in the function, not just if
21951 the register is used to hold a return value. */
21952 size = arm_size_return_regs ();
21953
21954 extra_pop = thumb1_extra_regs_pushed (offsets, false);
21955 if (extra_pop > 0)
21956 {
21957 unsigned long extra_mask = (1 << extra_pop) - 1;
21958 live_regs_mask |= extra_mask << ((size + UNITS_PER_WORD - 1)
21959 / UNITS_PER_WORD);
21960 }
21961
21962 /* The prolog may have pushed some high registers to use as
21963 work registers. e.g. the testsuite file:
21964 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
21965 compiles to produce:
21966 push {r4, r5, r6, r7, lr}
21967 mov r7, r9
21968 mov r6, r8
21969 push {r6, r7}
21970 as part of the prolog. We have to undo that pushing here. */
21971
21972 if (high_regs_pushed)
21973 {
21974 unsigned long mask = live_regs_mask & 0xff;
21975 int next_hi_reg;
21976
21977 /* The available low registers depend on the size of the value we are
21978 returning. */
21979 if (size <= 12)
21980 mask |= 1 << 3;
21981 if (size <= 8)
21982 mask |= 1 << 2;
21983
21984 if (mask == 0)
21985 /* Oh dear! We have no low registers into which we can pop
21986 high registers! */
21987 internal_error
21988 ("no low registers available for popping high registers");
21989
21990 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
21991 if (live_regs_mask & (1 << next_hi_reg))
21992 break;
21993
21994 while (high_regs_pushed)
21995 {
21996 /* Find lo register(s) into which the high register(s) can
21997 be popped. */
21998 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
21999 {
22000 if (mask & (1 << regno))
22001 high_regs_pushed--;
22002 if (high_regs_pushed == 0)
22003 break;
22004 }
22005
22006 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
22007
22008 /* Pop the values into the low register(s). */
22009 thumb_pop (asm_out_file, mask);
22010
22011 /* Move the value(s) into the high registers. */
22012 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
22013 {
22014 if (mask & (1 << regno))
22015 {
22016 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
22017 regno);
22018
22019 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
22020 if (live_regs_mask & (1 << next_hi_reg))
22021 break;
22022 }
22023 }
22024 }
22025 live_regs_mask &= ~0x0f00;
22026 }
22027
22028 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
22029 live_regs_mask &= 0xff;
22030
22031 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
22032 {
22033 /* Pop the return address into the PC. */
22034 if (had_to_push_lr)
22035 live_regs_mask |= 1 << PC_REGNUM;
22036
22037 /* Either no argument registers were pushed or a backtrace
22038 structure was created which includes an adjusted stack
22039 pointer, so just pop everything. */
22040 if (live_regs_mask)
22041 thumb_pop (asm_out_file, live_regs_mask);
22042
22043 /* We have either just popped the return address into the
22044 PC or it is was kept in LR for the entire function.
22045 Note that thumb_pop has already called thumb_exit if the
22046 PC was in the list. */
22047 if (!had_to_push_lr)
22048 thumb_exit (asm_out_file, LR_REGNUM);
22049 }
22050 else
22051 {
22052 /* Pop everything but the return address. */
22053 if (live_regs_mask)
22054 thumb_pop (asm_out_file, live_regs_mask);
22055
22056 if (had_to_push_lr)
22057 {
22058 if (size > 12)
22059 {
22060 /* We have no free low regs, so save one. */
22061 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
22062 LAST_ARG_REGNUM);
22063 }
22064
22065 /* Get the return address into a temporary register. */
22066 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
22067
22068 if (size > 12)
22069 {
22070 /* Move the return address to lr. */
22071 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
22072 LAST_ARG_REGNUM);
22073 /* Restore the low register. */
22074 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
22075 IP_REGNUM);
22076 regno = LR_REGNUM;
22077 }
22078 else
22079 regno = LAST_ARG_REGNUM;
22080 }
22081 else
22082 regno = LR_REGNUM;
22083
22084 /* Remove the argument registers that were pushed onto the stack. */
22085 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
22086 SP_REGNUM, SP_REGNUM,
22087 crtl->args.pretend_args_size);
22088
22089 thumb_exit (asm_out_file, regno);
22090 }
22091
22092 return "";
22093 }
22094
22095 /* Functions to save and restore machine-specific function data. */
22096 static struct machine_function *
22097 arm_init_machine_status (void)
22098 {
22099 struct machine_function *machine;
22100 machine = ggc_alloc_cleared_machine_function ();
22101
22102 #if ARM_FT_UNKNOWN != 0
22103 machine->func_type = ARM_FT_UNKNOWN;
22104 #endif
22105 return machine;
22106 }
22107
22108 /* Return an RTX indicating where the return address to the
22109 calling function can be found. */
22110 rtx
22111 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
22112 {
22113 if (count != 0)
22114 return NULL_RTX;
22115
22116 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
22117 }
22118
22119 /* Do anything needed before RTL is emitted for each function. */
22120 void
22121 arm_init_expanders (void)
22122 {
22123 /* Arrange to initialize and mark the machine per-function status. */
22124 init_machine_status = arm_init_machine_status;
22125
22126 /* This is to stop the combine pass optimizing away the alignment
22127 adjustment of va_arg. */
22128 /* ??? It is claimed that this should not be necessary. */
22129 if (cfun)
22130 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
22131 }
22132
22133
22134 /* Like arm_compute_initial_elimination offset. Simpler because there
22135 isn't an ABI specified frame pointer for Thumb. Instead, we set it
22136 to point at the base of the local variables after static stack
22137 space for a function has been allocated. */
22138
22139 HOST_WIDE_INT
22140 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
22141 {
22142 arm_stack_offsets *offsets;
22143
22144 offsets = arm_get_frame_offsets ();
22145
22146 switch (from)
22147 {
22148 case ARG_POINTER_REGNUM:
22149 switch (to)
22150 {
22151 case STACK_POINTER_REGNUM:
22152 return offsets->outgoing_args - offsets->saved_args;
22153
22154 case FRAME_POINTER_REGNUM:
22155 return offsets->soft_frame - offsets->saved_args;
22156
22157 case ARM_HARD_FRAME_POINTER_REGNUM:
22158 return offsets->saved_regs - offsets->saved_args;
22159
22160 case THUMB_HARD_FRAME_POINTER_REGNUM:
22161 return offsets->locals_base - offsets->saved_args;
22162
22163 default:
22164 gcc_unreachable ();
22165 }
22166 break;
22167
22168 case FRAME_POINTER_REGNUM:
22169 switch (to)
22170 {
22171 case STACK_POINTER_REGNUM:
22172 return offsets->outgoing_args - offsets->soft_frame;
22173
22174 case ARM_HARD_FRAME_POINTER_REGNUM:
22175 return offsets->saved_regs - offsets->soft_frame;
22176
22177 case THUMB_HARD_FRAME_POINTER_REGNUM:
22178 return offsets->locals_base - offsets->soft_frame;
22179
22180 default:
22181 gcc_unreachable ();
22182 }
22183 break;
22184
22185 default:
22186 gcc_unreachable ();
22187 }
22188 }
22189
22190 /* Generate the function's prologue. */
22191
22192 void
22193 thumb1_expand_prologue (void)
22194 {
22195 rtx insn;
22196
22197 HOST_WIDE_INT amount;
22198 arm_stack_offsets *offsets;
22199 unsigned long func_type;
22200 int regno;
22201 unsigned long live_regs_mask;
22202 unsigned long l_mask;
22203 unsigned high_regs_pushed = 0;
22204
22205 func_type = arm_current_func_type ();
22206
22207 /* Naked functions don't have prologues. */
22208 if (IS_NAKED (func_type))
22209 return;
22210
22211 if (IS_INTERRUPT (func_type))
22212 {
22213 error ("interrupt Service Routines cannot be coded in Thumb mode");
22214 return;
22215 }
22216
22217 if (is_called_in_ARM_mode (current_function_decl))
22218 emit_insn (gen_prologue_thumb1_interwork ());
22219
22220 offsets = arm_get_frame_offsets ();
22221 live_regs_mask = offsets->saved_regs_mask;
22222
22223 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
22224 l_mask = live_regs_mask & 0x40ff;
22225 /* Then count how many other high registers will need to be pushed. */
22226 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
22227
22228 if (crtl->args.pretend_args_size)
22229 {
22230 rtx x = GEN_INT (-crtl->args.pretend_args_size);
22231
22232 if (cfun->machine->uses_anonymous_args)
22233 {
22234 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
22235 unsigned long mask;
22236
22237 mask = 1ul << (LAST_ARG_REGNUM + 1);
22238 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
22239
22240 insn = thumb1_emit_multi_reg_push (mask, 0);
22241 }
22242 else
22243 {
22244 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22245 stack_pointer_rtx, x));
22246 }
22247 RTX_FRAME_RELATED_P (insn) = 1;
22248 }
22249
22250 if (TARGET_BACKTRACE)
22251 {
22252 HOST_WIDE_INT offset = 0;
22253 unsigned work_register;
22254 rtx work_reg, x, arm_hfp_rtx;
22255
22256 /* We have been asked to create a stack backtrace structure.
22257 The code looks like this:
22258
22259 0 .align 2
22260 0 func:
22261 0 sub SP, #16 Reserve space for 4 registers.
22262 2 push {R7} Push low registers.
22263 4 add R7, SP, #20 Get the stack pointer before the push.
22264 6 str R7, [SP, #8] Store the stack pointer
22265 (before reserving the space).
22266 8 mov R7, PC Get hold of the start of this code + 12.
22267 10 str R7, [SP, #16] Store it.
22268 12 mov R7, FP Get hold of the current frame pointer.
22269 14 str R7, [SP, #4] Store it.
22270 16 mov R7, LR Get hold of the current return address.
22271 18 str R7, [SP, #12] Store it.
22272 20 add R7, SP, #16 Point at the start of the
22273 backtrace structure.
22274 22 mov FP, R7 Put this value into the frame pointer. */
22275
22276 work_register = thumb_find_work_register (live_regs_mask);
22277 work_reg = gen_rtx_REG (SImode, work_register);
22278 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
22279
22280 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22281 stack_pointer_rtx, GEN_INT (-16)));
22282 RTX_FRAME_RELATED_P (insn) = 1;
22283
22284 if (l_mask)
22285 {
22286 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
22287 RTX_FRAME_RELATED_P (insn) = 1;
22288
22289 offset = bit_count (l_mask) * UNITS_PER_WORD;
22290 }
22291
22292 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
22293 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22294
22295 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
22296 x = gen_frame_mem (SImode, x);
22297 emit_move_insn (x, work_reg);
22298
22299 /* Make sure that the instruction fetching the PC is in the right place
22300 to calculate "start of backtrace creation code + 12". */
22301 /* ??? The stores using the common WORK_REG ought to be enough to
22302 prevent the scheduler from doing anything weird. Failing that
22303 we could always move all of the following into an UNSPEC_VOLATILE. */
22304 if (l_mask)
22305 {
22306 x = gen_rtx_REG (SImode, PC_REGNUM);
22307 emit_move_insn (work_reg, x);
22308
22309 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
22310 x = gen_frame_mem (SImode, x);
22311 emit_move_insn (x, work_reg);
22312
22313 emit_move_insn (work_reg, arm_hfp_rtx);
22314
22315 x = plus_constant (Pmode, stack_pointer_rtx, offset);
22316 x = gen_frame_mem (SImode, x);
22317 emit_move_insn (x, work_reg);
22318 }
22319 else
22320 {
22321 emit_move_insn (work_reg, arm_hfp_rtx);
22322
22323 x = plus_constant (Pmode, stack_pointer_rtx, offset);
22324 x = gen_frame_mem (SImode, x);
22325 emit_move_insn (x, work_reg);
22326
22327 x = gen_rtx_REG (SImode, PC_REGNUM);
22328 emit_move_insn (work_reg, x);
22329
22330 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
22331 x = gen_frame_mem (SImode, x);
22332 emit_move_insn (x, work_reg);
22333 }
22334
22335 x = gen_rtx_REG (SImode, LR_REGNUM);
22336 emit_move_insn (work_reg, x);
22337
22338 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
22339 x = gen_frame_mem (SImode, x);
22340 emit_move_insn (x, work_reg);
22341
22342 x = GEN_INT (offset + 12);
22343 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
22344
22345 emit_move_insn (arm_hfp_rtx, work_reg);
22346 }
22347 /* Optimization: If we are not pushing any low registers but we are going
22348 to push some high registers then delay our first push. This will just
22349 be a push of LR and we can combine it with the push of the first high
22350 register. */
22351 else if ((l_mask & 0xff) != 0
22352 || (high_regs_pushed == 0 && l_mask))
22353 {
22354 unsigned long mask = l_mask;
22355 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
22356 insn = thumb1_emit_multi_reg_push (mask, mask);
22357 RTX_FRAME_RELATED_P (insn) = 1;
22358 }
22359
22360 if (high_regs_pushed)
22361 {
22362 unsigned pushable_regs;
22363 unsigned next_hi_reg;
22364
22365 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
22366 if (live_regs_mask & (1 << next_hi_reg))
22367 break;
22368
22369 pushable_regs = l_mask & 0xff;
22370
22371 if (pushable_regs == 0)
22372 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
22373
22374 while (high_regs_pushed > 0)
22375 {
22376 unsigned long real_regs_mask = 0;
22377
22378 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
22379 {
22380 if (pushable_regs & (1 << regno))
22381 {
22382 emit_move_insn (gen_rtx_REG (SImode, regno),
22383 gen_rtx_REG (SImode, next_hi_reg));
22384
22385 high_regs_pushed --;
22386 real_regs_mask |= (1 << next_hi_reg);
22387
22388 if (high_regs_pushed)
22389 {
22390 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
22391 next_hi_reg --)
22392 if (live_regs_mask & (1 << next_hi_reg))
22393 break;
22394 }
22395 else
22396 {
22397 pushable_regs &= ~((1 << regno) - 1);
22398 break;
22399 }
22400 }
22401 }
22402
22403 /* If we had to find a work register and we have not yet
22404 saved the LR then add it to the list of regs to push. */
22405 if (l_mask == (1 << LR_REGNUM))
22406 {
22407 pushable_regs |= l_mask;
22408 real_regs_mask |= l_mask;
22409 l_mask = 0;
22410 }
22411
22412 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
22413 RTX_FRAME_RELATED_P (insn) = 1;
22414 }
22415 }
22416
22417 /* Load the pic register before setting the frame pointer,
22418 so we can use r7 as a temporary work register. */
22419 if (flag_pic && arm_pic_register != INVALID_REGNUM)
22420 arm_load_pic_register (live_regs_mask);
22421
22422 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
22423 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
22424 stack_pointer_rtx);
22425
22426 if (flag_stack_usage_info)
22427 current_function_static_stack_size
22428 = offsets->outgoing_args - offsets->saved_args;
22429
22430 amount = offsets->outgoing_args - offsets->saved_regs;
22431 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
22432 if (amount)
22433 {
22434 if (amount < 512)
22435 {
22436 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22437 GEN_INT (- amount)));
22438 RTX_FRAME_RELATED_P (insn) = 1;
22439 }
22440 else
22441 {
22442 rtx reg, dwarf;
22443
22444 /* The stack decrement is too big for an immediate value in a single
22445 insn. In theory we could issue multiple subtracts, but after
22446 three of them it becomes more space efficient to place the full
22447 value in the constant pool and load into a register. (Also the
22448 ARM debugger really likes to see only one stack decrement per
22449 function). So instead we look for a scratch register into which
22450 we can load the decrement, and then we subtract this from the
22451 stack pointer. Unfortunately on the thumb the only available
22452 scratch registers are the argument registers, and we cannot use
22453 these as they may hold arguments to the function. Instead we
22454 attempt to locate a call preserved register which is used by this
22455 function. If we can find one, then we know that it will have
22456 been pushed at the start of the prologue and so we can corrupt
22457 it now. */
22458 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
22459 if (live_regs_mask & (1 << regno))
22460 break;
22461
22462 gcc_assert(regno <= LAST_LO_REGNUM);
22463
22464 reg = gen_rtx_REG (SImode, regno);
22465
22466 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
22467
22468 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
22469 stack_pointer_rtx, reg));
22470
22471 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
22472 plus_constant (Pmode, stack_pointer_rtx,
22473 -amount));
22474 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
22475 RTX_FRAME_RELATED_P (insn) = 1;
22476 }
22477 }
22478
22479 if (frame_pointer_needed)
22480 thumb_set_frame_pointer (offsets);
22481
22482 /* If we are profiling, make sure no instructions are scheduled before
22483 the call to mcount. Similarly if the user has requested no
22484 scheduling in the prolog. Similarly if we want non-call exceptions
22485 using the EABI unwinder, to prevent faulting instructions from being
22486 swapped with a stack adjustment. */
22487 if (crtl->profile || !TARGET_SCHED_PROLOG
22488 || (arm_except_unwind_info (&global_options) == UI_TARGET
22489 && cfun->can_throw_non_call_exceptions))
22490 emit_insn (gen_blockage ());
22491
22492 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
22493 if (live_regs_mask & 0xff)
22494 cfun->machine->lr_save_eliminated = 0;
22495 }
22496
22497 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
22498 POP instruction can be generated. LR should be replaced by PC. All
22499 the checks required are already done by USE_RETURN_INSN (). Hence,
22500 all we really need to check here is if single register is to be
22501 returned, or multiple register return. */
22502 void
22503 thumb2_expand_return (void)
22504 {
22505 int i, num_regs;
22506 unsigned long saved_regs_mask;
22507 arm_stack_offsets *offsets;
22508
22509 offsets = arm_get_frame_offsets ();
22510 saved_regs_mask = offsets->saved_regs_mask;
22511
22512 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
22513 if (saved_regs_mask & (1 << i))
22514 num_regs++;
22515
22516 if (saved_regs_mask)
22517 {
22518 if (num_regs == 1)
22519 {
22520 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22521 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
22522 rtx addr = gen_rtx_MEM (SImode,
22523 gen_rtx_POST_INC (SImode,
22524 stack_pointer_rtx));
22525 set_mem_alias_set (addr, get_frame_alias_set ());
22526 XVECEXP (par, 0, 0) = ret_rtx;
22527 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
22528 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
22529 emit_jump_insn (par);
22530 }
22531 else
22532 {
22533 saved_regs_mask &= ~ (1 << LR_REGNUM);
22534 saved_regs_mask |= (1 << PC_REGNUM);
22535 arm_emit_multi_reg_pop (saved_regs_mask);
22536 }
22537 }
22538 else
22539 {
22540 emit_jump_insn (simple_return_rtx);
22541 }
22542 }
22543
22544 void
22545 thumb1_expand_epilogue (void)
22546 {
22547 HOST_WIDE_INT amount;
22548 arm_stack_offsets *offsets;
22549 int regno;
22550
22551 /* Naked functions don't have prologues. */
22552 if (IS_NAKED (arm_current_func_type ()))
22553 return;
22554
22555 offsets = arm_get_frame_offsets ();
22556 amount = offsets->outgoing_args - offsets->saved_regs;
22557
22558 if (frame_pointer_needed)
22559 {
22560 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
22561 amount = offsets->locals_base - offsets->saved_regs;
22562 }
22563 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
22564
22565 gcc_assert (amount >= 0);
22566 if (amount)
22567 {
22568 emit_insn (gen_blockage ());
22569
22570 if (amount < 512)
22571 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22572 GEN_INT (amount)));
22573 else
22574 {
22575 /* r3 is always free in the epilogue. */
22576 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
22577
22578 emit_insn (gen_movsi (reg, GEN_INT (amount)));
22579 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
22580 }
22581 }
22582
22583 /* Emit a USE (stack_pointer_rtx), so that
22584 the stack adjustment will not be deleted. */
22585 emit_insn (gen_prologue_use (stack_pointer_rtx));
22586
22587 if (crtl->profile || !TARGET_SCHED_PROLOG)
22588 emit_insn (gen_blockage ());
22589
22590 /* Emit a clobber for each insn that will be restored in the epilogue,
22591 so that flow2 will get register lifetimes correct. */
22592 for (regno = 0; regno < 13; regno++)
22593 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
22594 emit_clobber (gen_rtx_REG (SImode, regno));
22595
22596 if (! df_regs_ever_live_p (LR_REGNUM))
22597 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
22598 }
22599
22600 /* Epilogue code for APCS frame. */
22601 static void
22602 arm_expand_epilogue_apcs_frame (bool really_return)
22603 {
22604 unsigned long func_type;
22605 unsigned long saved_regs_mask;
22606 int num_regs = 0;
22607 int i;
22608 int floats_from_frame = 0;
22609 arm_stack_offsets *offsets;
22610
22611 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
22612 func_type = arm_current_func_type ();
22613
22614 /* Get frame offsets for ARM. */
22615 offsets = arm_get_frame_offsets ();
22616 saved_regs_mask = offsets->saved_regs_mask;
22617
22618 /* Find the offset of the floating-point save area in the frame. */
22619 floats_from_frame = offsets->saved_args - offsets->frame;
22620
22621 /* Compute how many core registers saved and how far away the floats are. */
22622 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22623 if (saved_regs_mask & (1 << i))
22624 {
22625 num_regs++;
22626 floats_from_frame += 4;
22627 }
22628
22629 if (TARGET_HARD_FLOAT && TARGET_VFP)
22630 {
22631 int start_reg;
22632
22633 /* The offset is from IP_REGNUM. */
22634 int saved_size = arm_get_vfp_saved_size ();
22635 if (saved_size > 0)
22636 {
22637 floats_from_frame += saved_size;
22638 emit_insn (gen_addsi3 (gen_rtx_REG (SImode, IP_REGNUM),
22639 hard_frame_pointer_rtx,
22640 GEN_INT (-floats_from_frame)));
22641 }
22642
22643 /* Generate VFP register multi-pop. */
22644 start_reg = FIRST_VFP_REGNUM;
22645
22646 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
22647 /* Look for a case where a reg does not need restoring. */
22648 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
22649 && (!df_regs_ever_live_p (i + 1)
22650 || call_used_regs[i + 1]))
22651 {
22652 if (start_reg != i)
22653 arm_emit_vfp_multi_reg_pop (start_reg,
22654 (i - start_reg) / 2,
22655 gen_rtx_REG (SImode,
22656 IP_REGNUM));
22657 start_reg = i + 2;
22658 }
22659
22660 /* Restore the remaining regs that we have discovered (or possibly
22661 even all of them, if the conditional in the for loop never
22662 fired). */
22663 if (start_reg != i)
22664 arm_emit_vfp_multi_reg_pop (start_reg,
22665 (i - start_reg) / 2,
22666 gen_rtx_REG (SImode, IP_REGNUM));
22667 }
22668
22669 if (TARGET_IWMMXT)
22670 {
22671 /* The frame pointer is guaranteed to be non-double-word aligned, as
22672 it is set to double-word-aligned old_stack_pointer - 4. */
22673 rtx insn;
22674 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
22675
22676 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
22677 if (df_regs_ever_live_p (i) && !call_used_regs[i])
22678 {
22679 rtx addr = gen_frame_mem (V2SImode,
22680 plus_constant (Pmode, hard_frame_pointer_rtx,
22681 - lrm_count * 4));
22682 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
22683 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22684 gen_rtx_REG (V2SImode, i),
22685 NULL_RTX);
22686 lrm_count += 2;
22687 }
22688 }
22689
22690 /* saved_regs_mask should contain IP which contains old stack pointer
22691 at the time of activation creation. Since SP and IP are adjacent registers,
22692 we can restore the value directly into SP. */
22693 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
22694 saved_regs_mask &= ~(1 << IP_REGNUM);
22695 saved_regs_mask |= (1 << SP_REGNUM);
22696
22697 /* There are two registers left in saved_regs_mask - LR and PC. We
22698 only need to restore LR (the return address), but to
22699 save time we can load it directly into PC, unless we need a
22700 special function exit sequence, or we are not really returning. */
22701 if (really_return
22702 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
22703 && !crtl->calls_eh_return)
22704 /* Delete LR from the register mask, so that LR on
22705 the stack is loaded into the PC in the register mask. */
22706 saved_regs_mask &= ~(1 << LR_REGNUM);
22707 else
22708 saved_regs_mask &= ~(1 << PC_REGNUM);
22709
22710 num_regs = bit_count (saved_regs_mask);
22711 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
22712 {
22713 /* Unwind the stack to just below the saved registers. */
22714 emit_insn (gen_addsi3 (stack_pointer_rtx,
22715 hard_frame_pointer_rtx,
22716 GEN_INT (- 4 * num_regs)));
22717 }
22718
22719 arm_emit_multi_reg_pop (saved_regs_mask);
22720
22721 if (IS_INTERRUPT (func_type))
22722 {
22723 /* Interrupt handlers will have pushed the
22724 IP onto the stack, so restore it now. */
22725 rtx insn;
22726 rtx addr = gen_rtx_MEM (SImode,
22727 gen_rtx_POST_INC (SImode,
22728 stack_pointer_rtx));
22729 set_mem_alias_set (addr, get_frame_alias_set ());
22730 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
22731 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22732 gen_rtx_REG (SImode, IP_REGNUM),
22733 NULL_RTX);
22734 }
22735
22736 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
22737 return;
22738
22739 if (crtl->calls_eh_return)
22740 emit_insn (gen_addsi3 (stack_pointer_rtx,
22741 stack_pointer_rtx,
22742 GEN_INT (ARM_EH_STACKADJ_REGNUM)));
22743
22744 if (IS_STACKALIGN (func_type))
22745 /* Restore the original stack pointer. Before prologue, the stack was
22746 realigned and the original stack pointer saved in r0. For details,
22747 see comment in arm_expand_prologue. */
22748 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
22749
22750 emit_jump_insn (simple_return_rtx);
22751 }
22752
22753 /* Generate RTL to represent ARM epilogue. Really_return is true if the
22754 function is not a sibcall. */
22755 void
22756 arm_expand_epilogue (bool really_return)
22757 {
22758 unsigned long func_type;
22759 unsigned long saved_regs_mask;
22760 int num_regs = 0;
22761 int i;
22762 int amount;
22763 int floats_from_frame = 0;
22764 arm_stack_offsets *offsets;
22765
22766 func_type = arm_current_func_type ();
22767
22768 /* Naked functions don't have epilogue. Hence, generate return pattern, and
22769 let output_return_instruction take care of instruction emition if any. */
22770 if (IS_NAKED (func_type)
22771 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
22772 {
22773 emit_jump_insn (simple_return_rtx);
22774 return;
22775 }
22776
22777 /* If we are throwing an exception, then we really must be doing a
22778 return, so we can't tail-call. */
22779 gcc_assert (!crtl->calls_eh_return || really_return);
22780
22781 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
22782 {
22783 arm_expand_epilogue_apcs_frame (really_return);
22784 return;
22785 }
22786
22787 /* Get frame offsets for ARM. */
22788 offsets = arm_get_frame_offsets ();
22789 saved_regs_mask = offsets->saved_regs_mask;
22790
22791 /* Find offset of floating point register from frame pointer.
22792 The initialization is done in this way to take care of frame pointer
22793 and static-chain register, if stored. */
22794 floats_from_frame = offsets->saved_args - offsets->frame;
22795 /* Compute how many registers saved and how far away the floats will be. */
22796 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22797 if (saved_regs_mask & (1 << i))
22798 {
22799 num_regs++;
22800 floats_from_frame += 4;
22801 }
22802
22803 if (frame_pointer_needed)
22804 {
22805 /* Restore stack pointer if necessary. */
22806 if (TARGET_ARM)
22807 {
22808 /* In ARM mode, frame pointer points to first saved register.
22809 Restore stack pointer to last saved register. */
22810 amount = offsets->frame - offsets->saved_regs;
22811
22812 /* Force out any pending memory operations that reference stacked data
22813 before stack de-allocation occurs. */
22814 emit_insn (gen_blockage ());
22815 emit_insn (gen_addsi3 (stack_pointer_rtx,
22816 hard_frame_pointer_rtx,
22817 GEN_INT (amount)));
22818
22819 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
22820 deleted. */
22821 emit_insn (gen_prologue_use (stack_pointer_rtx));
22822 }
22823 else
22824 {
22825 /* In Thumb-2 mode, the frame pointer points to the last saved
22826 register. */
22827 amount = offsets->locals_base - offsets->saved_regs;
22828 if (amount)
22829 emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22830 hard_frame_pointer_rtx,
22831 GEN_INT (amount)));
22832
22833 /* Force out any pending memory operations that reference stacked data
22834 before stack de-allocation occurs. */
22835 emit_insn (gen_blockage ());
22836 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
22837 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
22838 deleted. */
22839 emit_insn (gen_prologue_use (stack_pointer_rtx));
22840 }
22841 }
22842 else
22843 {
22844 /* Pop off outgoing args and local frame to adjust stack pointer to
22845 last saved register. */
22846 amount = offsets->outgoing_args - offsets->saved_regs;
22847 if (amount)
22848 {
22849 /* Force out any pending memory operations that reference stacked data
22850 before stack de-allocation occurs. */
22851 emit_insn (gen_blockage ());
22852 emit_insn (gen_addsi3 (stack_pointer_rtx,
22853 stack_pointer_rtx,
22854 GEN_INT (amount)));
22855 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
22856 not deleted. */
22857 emit_insn (gen_prologue_use (stack_pointer_rtx));
22858 }
22859 }
22860
22861 if (TARGET_HARD_FLOAT && TARGET_VFP)
22862 {
22863 /* Generate VFP register multi-pop. */
22864 int end_reg = LAST_VFP_REGNUM + 1;
22865
22866 /* Scan the registers in reverse order. We need to match
22867 any groupings made in the prologue and generate matching
22868 vldm operations. The need to match groups is because,
22869 unlike pop, vldm can only do consecutive regs. */
22870 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
22871 /* Look for a case where a reg does not need restoring. */
22872 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
22873 && (!df_regs_ever_live_p (i + 1)
22874 || call_used_regs[i + 1]))
22875 {
22876 /* Restore the regs discovered so far (from reg+2 to
22877 end_reg). */
22878 if (end_reg > i + 2)
22879 arm_emit_vfp_multi_reg_pop (i + 2,
22880 (end_reg - (i + 2)) / 2,
22881 stack_pointer_rtx);
22882 end_reg = i;
22883 }
22884
22885 /* Restore the remaining regs that we have discovered (or possibly
22886 even all of them, if the conditional in the for loop never
22887 fired). */
22888 if (end_reg > i + 2)
22889 arm_emit_vfp_multi_reg_pop (i + 2,
22890 (end_reg - (i + 2)) / 2,
22891 stack_pointer_rtx);
22892 }
22893
22894 if (TARGET_IWMMXT)
22895 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
22896 if (df_regs_ever_live_p (i) && !call_used_regs[i])
22897 {
22898 rtx insn;
22899 rtx addr = gen_rtx_MEM (V2SImode,
22900 gen_rtx_POST_INC (SImode,
22901 stack_pointer_rtx));
22902 set_mem_alias_set (addr, get_frame_alias_set ());
22903 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
22904 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22905 gen_rtx_REG (V2SImode, i),
22906 NULL_RTX);
22907 }
22908
22909 if (saved_regs_mask)
22910 {
22911 rtx insn;
22912 bool return_in_pc = false;
22913
22914 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
22915 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
22916 && !IS_STACKALIGN (func_type)
22917 && really_return
22918 && crtl->args.pretend_args_size == 0
22919 && saved_regs_mask & (1 << LR_REGNUM)
22920 && !crtl->calls_eh_return)
22921 {
22922 saved_regs_mask &= ~(1 << LR_REGNUM);
22923 saved_regs_mask |= (1 << PC_REGNUM);
22924 return_in_pc = true;
22925 }
22926
22927 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
22928 {
22929 for (i = 0; i <= LAST_ARM_REGNUM; i++)
22930 if (saved_regs_mask & (1 << i))
22931 {
22932 rtx addr = gen_rtx_MEM (SImode,
22933 gen_rtx_POST_INC (SImode,
22934 stack_pointer_rtx));
22935 set_mem_alias_set (addr, get_frame_alias_set ());
22936
22937 if (i == PC_REGNUM)
22938 {
22939 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
22940 XVECEXP (insn, 0, 0) = ret_rtx;
22941 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
22942 gen_rtx_REG (SImode, i),
22943 addr);
22944 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
22945 insn = emit_jump_insn (insn);
22946 }
22947 else
22948 {
22949 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
22950 addr));
22951 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
22952 gen_rtx_REG (SImode, i),
22953 NULL_RTX);
22954 }
22955 }
22956 }
22957 else
22958 {
22959 arm_emit_multi_reg_pop (saved_regs_mask);
22960 }
22961
22962 if (return_in_pc == true)
22963 return;
22964 }
22965
22966 if (crtl->args.pretend_args_size)
22967 emit_insn (gen_addsi3 (stack_pointer_rtx,
22968 stack_pointer_rtx,
22969 GEN_INT (crtl->args.pretend_args_size)));
22970
22971 if (!really_return)
22972 return;
22973
22974 if (crtl->calls_eh_return)
22975 emit_insn (gen_addsi3 (stack_pointer_rtx,
22976 stack_pointer_rtx,
22977 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
22978
22979 if (IS_STACKALIGN (func_type))
22980 /* Restore the original stack pointer. Before prologue, the stack was
22981 realigned and the original stack pointer saved in r0. For details,
22982 see comment in arm_expand_prologue. */
22983 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
22984
22985 emit_jump_insn (simple_return_rtx);
22986 }
22987
22988 /* Implementation of insn prologue_thumb1_interwork. This is the first
22989 "instruction" of a function called in ARM mode. Swap to thumb mode. */
22990
22991 const char *
22992 thumb1_output_interwork (void)
22993 {
22994 const char * name;
22995 FILE *f = asm_out_file;
22996
22997 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
22998 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
22999 == SYMBOL_REF);
23000 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
23001
23002 /* Generate code sequence to switch us into Thumb mode. */
23003 /* The .code 32 directive has already been emitted by
23004 ASM_DECLARE_FUNCTION_NAME. */
23005 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
23006 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
23007
23008 /* Generate a label, so that the debugger will notice the
23009 change in instruction sets. This label is also used by
23010 the assembler to bypass the ARM code when this function
23011 is called from a Thumb encoded function elsewhere in the
23012 same file. Hence the definition of STUB_NAME here must
23013 agree with the definition in gas/config/tc-arm.c. */
23014
23015 #define STUB_NAME ".real_start_of"
23016
23017 fprintf (f, "\t.code\t16\n");
23018 #ifdef ARM_PE
23019 if (arm_dllexport_name_p (name))
23020 name = arm_strip_name_encoding (name);
23021 #endif
23022 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
23023 fprintf (f, "\t.thumb_func\n");
23024 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
23025
23026 return "";
23027 }
23028
23029 /* Handle the case of a double word load into a low register from
23030 a computed memory address. The computed address may involve a
23031 register which is overwritten by the load. */
23032 const char *
23033 thumb_load_double_from_address (rtx *operands)
23034 {
23035 rtx addr;
23036 rtx base;
23037 rtx offset;
23038 rtx arg1;
23039 rtx arg2;
23040
23041 gcc_assert (GET_CODE (operands[0]) == REG);
23042 gcc_assert (GET_CODE (operands[1]) == MEM);
23043
23044 /* Get the memory address. */
23045 addr = XEXP (operands[1], 0);
23046
23047 /* Work out how the memory address is computed. */
23048 switch (GET_CODE (addr))
23049 {
23050 case REG:
23051 operands[2] = adjust_address (operands[1], SImode, 4);
23052
23053 if (REGNO (operands[0]) == REGNO (addr))
23054 {
23055 output_asm_insn ("ldr\t%H0, %2", operands);
23056 output_asm_insn ("ldr\t%0, %1", operands);
23057 }
23058 else
23059 {
23060 output_asm_insn ("ldr\t%0, %1", operands);
23061 output_asm_insn ("ldr\t%H0, %2", operands);
23062 }
23063 break;
23064
23065 case CONST:
23066 /* Compute <address> + 4 for the high order load. */
23067 operands[2] = adjust_address (operands[1], SImode, 4);
23068
23069 output_asm_insn ("ldr\t%0, %1", operands);
23070 output_asm_insn ("ldr\t%H0, %2", operands);
23071 break;
23072
23073 case PLUS:
23074 arg1 = XEXP (addr, 0);
23075 arg2 = XEXP (addr, 1);
23076
23077 if (CONSTANT_P (arg1))
23078 base = arg2, offset = arg1;
23079 else
23080 base = arg1, offset = arg2;
23081
23082 gcc_assert (GET_CODE (base) == REG);
23083
23084 /* Catch the case of <address> = <reg> + <reg> */
23085 if (GET_CODE (offset) == REG)
23086 {
23087 int reg_offset = REGNO (offset);
23088 int reg_base = REGNO (base);
23089 int reg_dest = REGNO (operands[0]);
23090
23091 /* Add the base and offset registers together into the
23092 higher destination register. */
23093 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
23094 reg_dest + 1, reg_base, reg_offset);
23095
23096 /* Load the lower destination register from the address in
23097 the higher destination register. */
23098 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
23099 reg_dest, reg_dest + 1);
23100
23101 /* Load the higher destination register from its own address
23102 plus 4. */
23103 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
23104 reg_dest + 1, reg_dest + 1);
23105 }
23106 else
23107 {
23108 /* Compute <address> + 4 for the high order load. */
23109 operands[2] = adjust_address (operands[1], SImode, 4);
23110
23111 /* If the computed address is held in the low order register
23112 then load the high order register first, otherwise always
23113 load the low order register first. */
23114 if (REGNO (operands[0]) == REGNO (base))
23115 {
23116 output_asm_insn ("ldr\t%H0, %2", operands);
23117 output_asm_insn ("ldr\t%0, %1", operands);
23118 }
23119 else
23120 {
23121 output_asm_insn ("ldr\t%0, %1", operands);
23122 output_asm_insn ("ldr\t%H0, %2", operands);
23123 }
23124 }
23125 break;
23126
23127 case LABEL_REF:
23128 /* With no registers to worry about we can just load the value
23129 directly. */
23130 operands[2] = adjust_address (operands[1], SImode, 4);
23131
23132 output_asm_insn ("ldr\t%H0, %2", operands);
23133 output_asm_insn ("ldr\t%0, %1", operands);
23134 break;
23135
23136 default:
23137 gcc_unreachable ();
23138 }
23139
23140 return "";
23141 }
23142
23143 const char *
23144 thumb_output_move_mem_multiple (int n, rtx *operands)
23145 {
23146 rtx tmp;
23147
23148 switch (n)
23149 {
23150 case 2:
23151 if (REGNO (operands[4]) > REGNO (operands[5]))
23152 {
23153 tmp = operands[4];
23154 operands[4] = operands[5];
23155 operands[5] = tmp;
23156 }
23157 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
23158 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
23159 break;
23160
23161 case 3:
23162 if (REGNO (operands[4]) > REGNO (operands[5]))
23163 {
23164 tmp = operands[4];
23165 operands[4] = operands[5];
23166 operands[5] = tmp;
23167 }
23168 if (REGNO (operands[5]) > REGNO (operands[6]))
23169 {
23170 tmp = operands[5];
23171 operands[5] = operands[6];
23172 operands[6] = tmp;
23173 }
23174 if (REGNO (operands[4]) > REGNO (operands[5]))
23175 {
23176 tmp = operands[4];
23177 operands[4] = operands[5];
23178 operands[5] = tmp;
23179 }
23180
23181 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
23182 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
23183 break;
23184
23185 default:
23186 gcc_unreachable ();
23187 }
23188
23189 return "";
23190 }
23191
23192 /* Output a call-via instruction for thumb state. */
23193 const char *
23194 thumb_call_via_reg (rtx reg)
23195 {
23196 int regno = REGNO (reg);
23197 rtx *labelp;
23198
23199 gcc_assert (regno < LR_REGNUM);
23200
23201 /* If we are in the normal text section we can use a single instance
23202 per compilation unit. If we are doing function sections, then we need
23203 an entry per section, since we can't rely on reachability. */
23204 if (in_section == text_section)
23205 {
23206 thumb_call_reg_needed = 1;
23207
23208 if (thumb_call_via_label[regno] == NULL)
23209 thumb_call_via_label[regno] = gen_label_rtx ();
23210 labelp = thumb_call_via_label + regno;
23211 }
23212 else
23213 {
23214 if (cfun->machine->call_via[regno] == NULL)
23215 cfun->machine->call_via[regno] = gen_label_rtx ();
23216 labelp = cfun->machine->call_via + regno;
23217 }
23218
23219 output_asm_insn ("bl\t%a0", labelp);
23220 return "";
23221 }
23222
23223 /* Routines for generating rtl. */
23224 void
23225 thumb_expand_movmemqi (rtx *operands)
23226 {
23227 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
23228 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
23229 HOST_WIDE_INT len = INTVAL (operands[2]);
23230 HOST_WIDE_INT offset = 0;
23231
23232 while (len >= 12)
23233 {
23234 emit_insn (gen_movmem12b (out, in, out, in));
23235 len -= 12;
23236 }
23237
23238 if (len >= 8)
23239 {
23240 emit_insn (gen_movmem8b (out, in, out, in));
23241 len -= 8;
23242 }
23243
23244 if (len >= 4)
23245 {
23246 rtx reg = gen_reg_rtx (SImode);
23247 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
23248 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
23249 len -= 4;
23250 offset += 4;
23251 }
23252
23253 if (len >= 2)
23254 {
23255 rtx reg = gen_reg_rtx (HImode);
23256 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
23257 plus_constant (Pmode, in,
23258 offset))));
23259 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
23260 offset)),
23261 reg));
23262 len -= 2;
23263 offset += 2;
23264 }
23265
23266 if (len)
23267 {
23268 rtx reg = gen_reg_rtx (QImode);
23269 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
23270 plus_constant (Pmode, in,
23271 offset))));
23272 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
23273 offset)),
23274 reg));
23275 }
23276 }
23277
23278 void
23279 thumb_reload_out_hi (rtx *operands)
23280 {
23281 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
23282 }
23283
23284 /* Handle reading a half-word from memory during reload. */
23285 void
23286 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
23287 {
23288 gcc_unreachable ();
23289 }
23290
23291 /* Return the length of a function name prefix
23292 that starts with the character 'c'. */
23293 static int
23294 arm_get_strip_length (int c)
23295 {
23296 switch (c)
23297 {
23298 ARM_NAME_ENCODING_LENGTHS
23299 default: return 0;
23300 }
23301 }
23302
23303 /* Return a pointer to a function's name with any
23304 and all prefix encodings stripped from it. */
23305 const char *
23306 arm_strip_name_encoding (const char *name)
23307 {
23308 int skip;
23309
23310 while ((skip = arm_get_strip_length (* name)))
23311 name += skip;
23312
23313 return name;
23314 }
23315
23316 /* If there is a '*' anywhere in the name's prefix, then
23317 emit the stripped name verbatim, otherwise prepend an
23318 underscore if leading underscores are being used. */
23319 void
23320 arm_asm_output_labelref (FILE *stream, const char *name)
23321 {
23322 int skip;
23323 int verbatim = 0;
23324
23325 while ((skip = arm_get_strip_length (* name)))
23326 {
23327 verbatim |= (*name == '*');
23328 name += skip;
23329 }
23330
23331 if (verbatim)
23332 fputs (name, stream);
23333 else
23334 asm_fprintf (stream, "%U%s", name);
23335 }
23336
23337 /* This function is used to emit an EABI tag and its associated value.
23338 We emit the numerical value of the tag in case the assembler does not
23339 support textual tags. (Eg gas prior to 2.20). If requested we include
23340 the tag name in a comment so that anyone reading the assembler output
23341 will know which tag is being set.
23342
23343 This function is not static because arm-c.c needs it too. */
23344
23345 void
23346 arm_emit_eabi_attribute (const char *name, int num, int val)
23347 {
23348 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
23349 if (flag_verbose_asm || flag_debug_asm)
23350 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
23351 asm_fprintf (asm_out_file, "\n");
23352 }
23353
23354 static void
23355 arm_file_start (void)
23356 {
23357 int val;
23358
23359 if (TARGET_UNIFIED_ASM)
23360 asm_fprintf (asm_out_file, "\t.syntax unified\n");
23361
23362 if (TARGET_BPABI)
23363 {
23364 const char *fpu_name;
23365 if (arm_selected_arch)
23366 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
23367 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
23368 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
23369 else
23370 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
23371
23372 if (TARGET_SOFT_FLOAT)
23373 {
23374 fpu_name = "softvfp";
23375 }
23376 else
23377 {
23378 fpu_name = arm_fpu_desc->name;
23379 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
23380 {
23381 if (TARGET_HARD_FLOAT)
23382 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
23383 if (TARGET_HARD_FLOAT_ABI)
23384 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
23385 }
23386 }
23387 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
23388
23389 /* Some of these attributes only apply when the corresponding features
23390 are used. However we don't have any easy way of figuring this out.
23391 Conservatively record the setting that would have been used. */
23392
23393 if (flag_rounding_math)
23394 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
23395
23396 if (!flag_unsafe_math_optimizations)
23397 {
23398 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
23399 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
23400 }
23401 if (flag_signaling_nans)
23402 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
23403
23404 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
23405 flag_finite_math_only ? 1 : 3);
23406
23407 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
23408 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
23409 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
23410 flag_short_enums ? 1 : 2);
23411
23412 /* Tag_ABI_optimization_goals. */
23413 if (optimize_size)
23414 val = 4;
23415 else if (optimize >= 2)
23416 val = 2;
23417 else if (optimize)
23418 val = 1;
23419 else
23420 val = 6;
23421 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
23422
23423 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
23424 unaligned_access);
23425
23426 if (arm_fp16_format)
23427 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
23428 (int) arm_fp16_format);
23429
23430 if (arm_lang_output_object_attributes_hook)
23431 arm_lang_output_object_attributes_hook();
23432 }
23433
23434 default_file_start ();
23435 }
23436
23437 static void
23438 arm_file_end (void)
23439 {
23440 int regno;
23441
23442 if (NEED_INDICATE_EXEC_STACK)
23443 /* Add .note.GNU-stack. */
23444 file_end_indicate_exec_stack ();
23445
23446 if (! thumb_call_reg_needed)
23447 return;
23448
23449 switch_to_section (text_section);
23450 asm_fprintf (asm_out_file, "\t.code 16\n");
23451 ASM_OUTPUT_ALIGN (asm_out_file, 1);
23452
23453 for (regno = 0; regno < LR_REGNUM; regno++)
23454 {
23455 rtx label = thumb_call_via_label[regno];
23456
23457 if (label != 0)
23458 {
23459 targetm.asm_out.internal_label (asm_out_file, "L",
23460 CODE_LABEL_NUMBER (label));
23461 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
23462 }
23463 }
23464 }
23465
23466 #ifndef ARM_PE
23467 /* Symbols in the text segment can be accessed without indirecting via the
23468 constant pool; it may take an extra binary operation, but this is still
23469 faster than indirecting via memory. Don't do this when not optimizing,
23470 since we won't be calculating al of the offsets necessary to do this
23471 simplification. */
23472
23473 static void
23474 arm_encode_section_info (tree decl, rtx rtl, int first)
23475 {
23476 if (optimize > 0 && TREE_CONSTANT (decl))
23477 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
23478
23479 default_encode_section_info (decl, rtl, first);
23480 }
23481 #endif /* !ARM_PE */
23482
23483 static void
23484 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
23485 {
23486 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
23487 && !strcmp (prefix, "L"))
23488 {
23489 arm_ccfsm_state = 0;
23490 arm_target_insn = NULL;
23491 }
23492 default_internal_label (stream, prefix, labelno);
23493 }
23494
23495 /* Output code to add DELTA to the first argument, and then jump
23496 to FUNCTION. Used for C++ multiple inheritance. */
23497 static void
23498 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
23499 HOST_WIDE_INT delta,
23500 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
23501 tree function)
23502 {
23503 static int thunk_label = 0;
23504 char label[256];
23505 char labelpc[256];
23506 int mi_delta = delta;
23507 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
23508 int shift = 0;
23509 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
23510 ? 1 : 0);
23511 if (mi_delta < 0)
23512 mi_delta = - mi_delta;
23513
23514 if (TARGET_THUMB1)
23515 {
23516 int labelno = thunk_label++;
23517 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
23518 /* Thunks are entered in arm mode when avaiable. */
23519 if (TARGET_THUMB1_ONLY)
23520 {
23521 /* push r3 so we can use it as a temporary. */
23522 /* TODO: Omit this save if r3 is not used. */
23523 fputs ("\tpush {r3}\n", file);
23524 fputs ("\tldr\tr3, ", file);
23525 }
23526 else
23527 {
23528 fputs ("\tldr\tr12, ", file);
23529 }
23530 assemble_name (file, label);
23531 fputc ('\n', file);
23532 if (flag_pic)
23533 {
23534 /* If we are generating PIC, the ldr instruction below loads
23535 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
23536 the address of the add + 8, so we have:
23537
23538 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
23539 = target + 1.
23540
23541 Note that we have "+ 1" because some versions of GNU ld
23542 don't set the low bit of the result for R_ARM_REL32
23543 relocations against thumb function symbols.
23544 On ARMv6M this is +4, not +8. */
23545 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
23546 assemble_name (file, labelpc);
23547 fputs (":\n", file);
23548 if (TARGET_THUMB1_ONLY)
23549 {
23550 /* This is 2 insns after the start of the thunk, so we know it
23551 is 4-byte aligned. */
23552 fputs ("\tadd\tr3, pc, r3\n", file);
23553 fputs ("\tmov r12, r3\n", file);
23554 }
23555 else
23556 fputs ("\tadd\tr12, pc, r12\n", file);
23557 }
23558 else if (TARGET_THUMB1_ONLY)
23559 fputs ("\tmov r12, r3\n", file);
23560 }
23561 if (TARGET_THUMB1_ONLY)
23562 {
23563 if (mi_delta > 255)
23564 {
23565 fputs ("\tldr\tr3, ", file);
23566 assemble_name (file, label);
23567 fputs ("+4\n", file);
23568 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
23569 mi_op, this_regno, this_regno);
23570 }
23571 else if (mi_delta != 0)
23572 {
23573 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23574 mi_op, this_regno, this_regno,
23575 mi_delta);
23576 }
23577 }
23578 else
23579 {
23580 /* TODO: Use movw/movt for large constants when available. */
23581 while (mi_delta != 0)
23582 {
23583 if ((mi_delta & (3 << shift)) == 0)
23584 shift += 2;
23585 else
23586 {
23587 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
23588 mi_op, this_regno, this_regno,
23589 mi_delta & (0xff << shift));
23590 mi_delta &= ~(0xff << shift);
23591 shift += 8;
23592 }
23593 }
23594 }
23595 if (TARGET_THUMB1)
23596 {
23597 if (TARGET_THUMB1_ONLY)
23598 fputs ("\tpop\t{r3}\n", file);
23599
23600 fprintf (file, "\tbx\tr12\n");
23601 ASM_OUTPUT_ALIGN (file, 2);
23602 assemble_name (file, label);
23603 fputs (":\n", file);
23604 if (flag_pic)
23605 {
23606 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
23607 rtx tem = XEXP (DECL_RTL (function), 0);
23608 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
23609 tem = gen_rtx_MINUS (GET_MODE (tem),
23610 tem,
23611 gen_rtx_SYMBOL_REF (Pmode,
23612 ggc_strdup (labelpc)));
23613 assemble_integer (tem, 4, BITS_PER_WORD, 1);
23614 }
23615 else
23616 /* Output ".word .LTHUNKn". */
23617 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
23618
23619 if (TARGET_THUMB1_ONLY && mi_delta > 255)
23620 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
23621 }
23622 else
23623 {
23624 fputs ("\tb\t", file);
23625 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
23626 if (NEED_PLT_RELOC)
23627 fputs ("(PLT)", file);
23628 fputc ('\n', file);
23629 }
23630 }
23631
23632 int
23633 arm_emit_vector_const (FILE *file, rtx x)
23634 {
23635 int i;
23636 const char * pattern;
23637
23638 gcc_assert (GET_CODE (x) == CONST_VECTOR);
23639
23640 switch (GET_MODE (x))
23641 {
23642 case V2SImode: pattern = "%08x"; break;
23643 case V4HImode: pattern = "%04x"; break;
23644 case V8QImode: pattern = "%02x"; break;
23645 default: gcc_unreachable ();
23646 }
23647
23648 fprintf (file, "0x");
23649 for (i = CONST_VECTOR_NUNITS (x); i--;)
23650 {
23651 rtx element;
23652
23653 element = CONST_VECTOR_ELT (x, i);
23654 fprintf (file, pattern, INTVAL (element));
23655 }
23656
23657 return 1;
23658 }
23659
23660 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
23661 HFmode constant pool entries are actually loaded with ldr. */
23662 void
23663 arm_emit_fp16_const (rtx c)
23664 {
23665 REAL_VALUE_TYPE r;
23666 long bits;
23667
23668 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
23669 bits = real_to_target (NULL, &r, HFmode);
23670 if (WORDS_BIG_ENDIAN)
23671 assemble_zeros (2);
23672 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
23673 if (!WORDS_BIG_ENDIAN)
23674 assemble_zeros (2);
23675 }
23676
23677 const char *
23678 arm_output_load_gr (rtx *operands)
23679 {
23680 rtx reg;
23681 rtx offset;
23682 rtx wcgr;
23683 rtx sum;
23684
23685 if (GET_CODE (operands [1]) != MEM
23686 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
23687 || GET_CODE (reg = XEXP (sum, 0)) != REG
23688 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
23689 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
23690 return "wldrw%?\t%0, %1";
23691
23692 /* Fix up an out-of-range load of a GR register. */
23693 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
23694 wcgr = operands[0];
23695 operands[0] = reg;
23696 output_asm_insn ("ldr%?\t%0, %1", operands);
23697
23698 operands[0] = wcgr;
23699 operands[1] = reg;
23700 output_asm_insn ("tmcr%?\t%0, %1", operands);
23701 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
23702
23703 return "";
23704 }
23705
23706 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
23707
23708 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
23709 named arg and all anonymous args onto the stack.
23710 XXX I know the prologue shouldn't be pushing registers, but it is faster
23711 that way. */
23712
23713 static void
23714 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
23715 enum machine_mode mode,
23716 tree type,
23717 int *pretend_size,
23718 int second_time ATTRIBUTE_UNUSED)
23719 {
23720 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
23721 int nregs;
23722
23723 cfun->machine->uses_anonymous_args = 1;
23724 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
23725 {
23726 nregs = pcum->aapcs_ncrn;
23727 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
23728 nregs++;
23729 }
23730 else
23731 nregs = pcum->nregs;
23732
23733 if (nregs < NUM_ARG_REGS)
23734 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
23735 }
23736
23737 /* Return nonzero if the CONSUMER instruction (a store) does not need
23738 PRODUCER's value to calculate the address. */
23739
23740 int
23741 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
23742 {
23743 rtx value = PATTERN (producer);
23744 rtx addr = PATTERN (consumer);
23745
23746 if (GET_CODE (value) == COND_EXEC)
23747 value = COND_EXEC_CODE (value);
23748 if (GET_CODE (value) == PARALLEL)
23749 value = XVECEXP (value, 0, 0);
23750 value = XEXP (value, 0);
23751 if (GET_CODE (addr) == COND_EXEC)
23752 addr = COND_EXEC_CODE (addr);
23753 if (GET_CODE (addr) == PARALLEL)
23754 addr = XVECEXP (addr, 0, 0);
23755 addr = XEXP (addr, 0);
23756
23757 return !reg_overlap_mentioned_p (value, addr);
23758 }
23759
23760 /* Return nonzero if the CONSUMER instruction (a store) does need
23761 PRODUCER's value to calculate the address. */
23762
23763 int
23764 arm_early_store_addr_dep (rtx producer, rtx consumer)
23765 {
23766 return !arm_no_early_store_addr_dep (producer, consumer);
23767 }
23768
23769 /* Return nonzero if the CONSUMER instruction (a load) does need
23770 PRODUCER's value to calculate the address. */
23771
23772 int
23773 arm_early_load_addr_dep (rtx producer, rtx consumer)
23774 {
23775 rtx value = PATTERN (producer);
23776 rtx addr = PATTERN (consumer);
23777
23778 if (GET_CODE (value) == COND_EXEC)
23779 value = COND_EXEC_CODE (value);
23780 if (GET_CODE (value) == PARALLEL)
23781 value = XVECEXP (value, 0, 0);
23782 value = XEXP (value, 0);
23783 if (GET_CODE (addr) == COND_EXEC)
23784 addr = COND_EXEC_CODE (addr);
23785 if (GET_CODE (addr) == PARALLEL)
23786 {
23787 if (GET_CODE (XVECEXP (addr, 0, 0)) == RETURN)
23788 addr = XVECEXP (addr, 0, 1);
23789 else
23790 addr = XVECEXP (addr, 0, 0);
23791 }
23792 addr = XEXP (addr, 1);
23793
23794 return reg_overlap_mentioned_p (value, addr);
23795 }
23796
23797 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23798 have an early register shift value or amount dependency on the
23799 result of PRODUCER. */
23800
23801 int
23802 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
23803 {
23804 rtx value = PATTERN (producer);
23805 rtx op = PATTERN (consumer);
23806 rtx early_op;
23807
23808 if (GET_CODE (value) == COND_EXEC)
23809 value = COND_EXEC_CODE (value);
23810 if (GET_CODE (value) == PARALLEL)
23811 value = XVECEXP (value, 0, 0);
23812 value = XEXP (value, 0);
23813 if (GET_CODE (op) == COND_EXEC)
23814 op = COND_EXEC_CODE (op);
23815 if (GET_CODE (op) == PARALLEL)
23816 op = XVECEXP (op, 0, 0);
23817 op = XEXP (op, 1);
23818
23819 early_op = XEXP (op, 0);
23820 /* This is either an actual independent shift, or a shift applied to
23821 the first operand of another operation. We want the whole shift
23822 operation. */
23823 if (GET_CODE (early_op) == REG)
23824 early_op = op;
23825
23826 return !reg_overlap_mentioned_p (value, early_op);
23827 }
23828
23829 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
23830 have an early register shift value dependency on the result of
23831 PRODUCER. */
23832
23833 int
23834 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
23835 {
23836 rtx value = PATTERN (producer);
23837 rtx op = PATTERN (consumer);
23838 rtx early_op;
23839
23840 if (GET_CODE (value) == COND_EXEC)
23841 value = COND_EXEC_CODE (value);
23842 if (GET_CODE (value) == PARALLEL)
23843 value = XVECEXP (value, 0, 0);
23844 value = XEXP (value, 0);
23845 if (GET_CODE (op) == COND_EXEC)
23846 op = COND_EXEC_CODE (op);
23847 if (GET_CODE (op) == PARALLEL)
23848 op = XVECEXP (op, 0, 0);
23849 op = XEXP (op, 1);
23850
23851 early_op = XEXP (op, 0);
23852
23853 /* This is either an actual independent shift, or a shift applied to
23854 the first operand of another operation. We want the value being
23855 shifted, in either case. */
23856 if (GET_CODE (early_op) != REG)
23857 early_op = XEXP (early_op, 0);
23858
23859 return !reg_overlap_mentioned_p (value, early_op);
23860 }
23861
23862 /* Return nonzero if the CONSUMER (a mul or mac op) does not
23863 have an early register mult dependency on the result of
23864 PRODUCER. */
23865
23866 int
23867 arm_no_early_mul_dep (rtx producer, rtx consumer)
23868 {
23869 rtx value = PATTERN (producer);
23870 rtx op = PATTERN (consumer);
23871
23872 if (GET_CODE (value) == COND_EXEC)
23873 value = COND_EXEC_CODE (value);
23874 if (GET_CODE (value) == PARALLEL)
23875 value = XVECEXP (value, 0, 0);
23876 value = XEXP (value, 0);
23877 if (GET_CODE (op) == COND_EXEC)
23878 op = COND_EXEC_CODE (op);
23879 if (GET_CODE (op) == PARALLEL)
23880 op = XVECEXP (op, 0, 0);
23881 op = XEXP (op, 1);
23882
23883 if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
23884 {
23885 if (GET_CODE (XEXP (op, 0)) == MULT)
23886 return !reg_overlap_mentioned_p (value, XEXP (op, 0));
23887 else
23888 return !reg_overlap_mentioned_p (value, XEXP (op, 1));
23889 }
23890
23891 return 0;
23892 }
23893
23894 /* We can't rely on the caller doing the proper promotion when
23895 using APCS or ATPCS. */
23896
23897 static bool
23898 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
23899 {
23900 return !TARGET_AAPCS_BASED;
23901 }
23902
23903 static enum machine_mode
23904 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
23905 enum machine_mode mode,
23906 int *punsignedp ATTRIBUTE_UNUSED,
23907 const_tree fntype ATTRIBUTE_UNUSED,
23908 int for_return ATTRIBUTE_UNUSED)
23909 {
23910 if (GET_MODE_CLASS (mode) == MODE_INT
23911 && GET_MODE_SIZE (mode) < 4)
23912 return SImode;
23913
23914 return mode;
23915 }
23916
23917 /* AAPCS based ABIs use short enums by default. */
23918
23919 static bool
23920 arm_default_short_enums (void)
23921 {
23922 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
23923 }
23924
23925
23926 /* AAPCS requires that anonymous bitfields affect structure alignment. */
23927
23928 static bool
23929 arm_align_anon_bitfield (void)
23930 {
23931 return TARGET_AAPCS_BASED;
23932 }
23933
23934
23935 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
23936
23937 static tree
23938 arm_cxx_guard_type (void)
23939 {
23940 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
23941 }
23942
23943 /* Return non-zero if the consumer (a multiply-accumulate instruction)
23944 has an accumulator dependency on the result of the producer (a
23945 multiplication instruction) and no other dependency on that result. */
23946 int
23947 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
23948 {
23949 rtx mul = PATTERN (producer);
23950 rtx mac = PATTERN (consumer);
23951 rtx mul_result;
23952 rtx mac_op0, mac_op1, mac_acc;
23953
23954 if (GET_CODE (mul) == COND_EXEC)
23955 mul = COND_EXEC_CODE (mul);
23956 if (GET_CODE (mac) == COND_EXEC)
23957 mac = COND_EXEC_CODE (mac);
23958
23959 /* Check that mul is of the form (set (...) (mult ...))
23960 and mla is of the form (set (...) (plus (mult ...) (...))). */
23961 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
23962 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
23963 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
23964 return 0;
23965
23966 mul_result = XEXP (mul, 0);
23967 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
23968 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
23969 mac_acc = XEXP (XEXP (mac, 1), 1);
23970
23971 return (reg_overlap_mentioned_p (mul_result, mac_acc)
23972 && !reg_overlap_mentioned_p (mul_result, mac_op0)
23973 && !reg_overlap_mentioned_p (mul_result, mac_op1));
23974 }
23975
23976
23977 /* The EABI says test the least significant bit of a guard variable. */
23978
23979 static bool
23980 arm_cxx_guard_mask_bit (void)
23981 {
23982 return TARGET_AAPCS_BASED;
23983 }
23984
23985
23986 /* The EABI specifies that all array cookies are 8 bytes long. */
23987
23988 static tree
23989 arm_get_cookie_size (tree type)
23990 {
23991 tree size;
23992
23993 if (!TARGET_AAPCS_BASED)
23994 return default_cxx_get_cookie_size (type);
23995
23996 size = build_int_cst (sizetype, 8);
23997 return size;
23998 }
23999
24000
24001 /* The EABI says that array cookies should also contain the element size. */
24002
24003 static bool
24004 arm_cookie_has_size (void)
24005 {
24006 return TARGET_AAPCS_BASED;
24007 }
24008
24009
24010 /* The EABI says constructors and destructors should return a pointer to
24011 the object constructed/destroyed. */
24012
24013 static bool
24014 arm_cxx_cdtor_returns_this (void)
24015 {
24016 return TARGET_AAPCS_BASED;
24017 }
24018
24019 /* The EABI says that an inline function may never be the key
24020 method. */
24021
24022 static bool
24023 arm_cxx_key_method_may_be_inline (void)
24024 {
24025 return !TARGET_AAPCS_BASED;
24026 }
24027
24028 static void
24029 arm_cxx_determine_class_data_visibility (tree decl)
24030 {
24031 if (!TARGET_AAPCS_BASED
24032 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
24033 return;
24034
24035 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
24036 is exported. However, on systems without dynamic vague linkage,
24037 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
24038 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
24039 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
24040 else
24041 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
24042 DECL_VISIBILITY_SPECIFIED (decl) = 1;
24043 }
24044
24045 static bool
24046 arm_cxx_class_data_always_comdat (void)
24047 {
24048 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
24049 vague linkage if the class has no key function. */
24050 return !TARGET_AAPCS_BASED;
24051 }
24052
24053
24054 /* The EABI says __aeabi_atexit should be used to register static
24055 destructors. */
24056
24057 static bool
24058 arm_cxx_use_aeabi_atexit (void)
24059 {
24060 return TARGET_AAPCS_BASED;
24061 }
24062
24063
24064 void
24065 arm_set_return_address (rtx source, rtx scratch)
24066 {
24067 arm_stack_offsets *offsets;
24068 HOST_WIDE_INT delta;
24069 rtx addr;
24070 unsigned long saved_regs;
24071
24072 offsets = arm_get_frame_offsets ();
24073 saved_regs = offsets->saved_regs_mask;
24074
24075 if ((saved_regs & (1 << LR_REGNUM)) == 0)
24076 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
24077 else
24078 {
24079 if (frame_pointer_needed)
24080 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
24081 else
24082 {
24083 /* LR will be the first saved register. */
24084 delta = offsets->outgoing_args - (offsets->frame + 4);
24085
24086
24087 if (delta >= 4096)
24088 {
24089 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
24090 GEN_INT (delta & ~4095)));
24091 addr = scratch;
24092 delta &= 4095;
24093 }
24094 else
24095 addr = stack_pointer_rtx;
24096
24097 addr = plus_constant (Pmode, addr, delta);
24098 }
24099 emit_move_insn (gen_frame_mem (Pmode, addr), source);
24100 }
24101 }
24102
24103
24104 void
24105 thumb_set_return_address (rtx source, rtx scratch)
24106 {
24107 arm_stack_offsets *offsets;
24108 HOST_WIDE_INT delta;
24109 HOST_WIDE_INT limit;
24110 int reg;
24111 rtx addr;
24112 unsigned long mask;
24113
24114 emit_use (source);
24115
24116 offsets = arm_get_frame_offsets ();
24117 mask = offsets->saved_regs_mask;
24118 if (mask & (1 << LR_REGNUM))
24119 {
24120 limit = 1024;
24121 /* Find the saved regs. */
24122 if (frame_pointer_needed)
24123 {
24124 delta = offsets->soft_frame - offsets->saved_args;
24125 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
24126 if (TARGET_THUMB1)
24127 limit = 128;
24128 }
24129 else
24130 {
24131 delta = offsets->outgoing_args - offsets->saved_args;
24132 reg = SP_REGNUM;
24133 }
24134 /* Allow for the stack frame. */
24135 if (TARGET_THUMB1 && TARGET_BACKTRACE)
24136 delta -= 16;
24137 /* The link register is always the first saved register. */
24138 delta -= 4;
24139
24140 /* Construct the address. */
24141 addr = gen_rtx_REG (SImode, reg);
24142 if (delta > limit)
24143 {
24144 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
24145 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
24146 addr = scratch;
24147 }
24148 else
24149 addr = plus_constant (Pmode, addr, delta);
24150
24151 emit_move_insn (gen_frame_mem (Pmode, addr), source);
24152 }
24153 else
24154 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
24155 }
24156
24157 /* Implements target hook vector_mode_supported_p. */
24158 bool
24159 arm_vector_mode_supported_p (enum machine_mode mode)
24160 {
24161 /* Neon also supports V2SImode, etc. listed in the clause below. */
24162 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
24163 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
24164 return true;
24165
24166 if ((TARGET_NEON || TARGET_IWMMXT)
24167 && ((mode == V2SImode)
24168 || (mode == V4HImode)
24169 || (mode == V8QImode)))
24170 return true;
24171
24172 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
24173 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
24174 || mode == V2HAmode))
24175 return true;
24176
24177 return false;
24178 }
24179
24180 /* Implements target hook array_mode_supported_p. */
24181
24182 static bool
24183 arm_array_mode_supported_p (enum machine_mode mode,
24184 unsigned HOST_WIDE_INT nelems)
24185 {
24186 if (TARGET_NEON
24187 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
24188 && (nelems >= 2 && nelems <= 4))
24189 return true;
24190
24191 return false;
24192 }
24193
24194 /* Use the option -mvectorize-with-neon-double to override the use of quardword
24195 registers when autovectorizing for Neon, at least until multiple vector
24196 widths are supported properly by the middle-end. */
24197
24198 static enum machine_mode
24199 arm_preferred_simd_mode (enum machine_mode mode)
24200 {
24201 if (TARGET_NEON)
24202 switch (mode)
24203 {
24204 case SFmode:
24205 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
24206 case SImode:
24207 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
24208 case HImode:
24209 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
24210 case QImode:
24211 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
24212 case DImode:
24213 if (!TARGET_NEON_VECTORIZE_DOUBLE)
24214 return V2DImode;
24215 break;
24216
24217 default:;
24218 }
24219
24220 if (TARGET_REALLY_IWMMXT)
24221 switch (mode)
24222 {
24223 case SImode:
24224 return V2SImode;
24225 case HImode:
24226 return V4HImode;
24227 case QImode:
24228 return V8QImode;
24229
24230 default:;
24231 }
24232
24233 return word_mode;
24234 }
24235
24236 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
24237
24238 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
24239 using r0-r4 for function arguments, r7 for the stack frame and don't have
24240 enough left over to do doubleword arithmetic. For Thumb-2 all the
24241 potentially problematic instructions accept high registers so this is not
24242 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
24243 that require many low registers. */
24244 static bool
24245 arm_class_likely_spilled_p (reg_class_t rclass)
24246 {
24247 if ((TARGET_THUMB1 && rclass == LO_REGS)
24248 || rclass == CC_REG)
24249 return true;
24250
24251 return false;
24252 }
24253
24254 /* Implements target hook small_register_classes_for_mode_p. */
24255 bool
24256 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
24257 {
24258 return TARGET_THUMB1;
24259 }
24260
24261 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
24262 ARM insns and therefore guarantee that the shift count is modulo 256.
24263 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
24264 guarantee no particular behavior for out-of-range counts. */
24265
24266 static unsigned HOST_WIDE_INT
24267 arm_shift_truncation_mask (enum machine_mode mode)
24268 {
24269 return mode == SImode ? 255 : 0;
24270 }
24271
24272
24273 /* Map internal gcc register numbers to DWARF2 register numbers. */
24274
24275 unsigned int
24276 arm_dbx_register_number (unsigned int regno)
24277 {
24278 if (regno < 16)
24279 return regno;
24280
24281 if (IS_VFP_REGNUM (regno))
24282 {
24283 /* See comment in arm_dwarf_register_span. */
24284 if (VFP_REGNO_OK_FOR_SINGLE (regno))
24285 return 64 + regno - FIRST_VFP_REGNUM;
24286 else
24287 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
24288 }
24289
24290 if (IS_IWMMXT_GR_REGNUM (regno))
24291 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
24292
24293 if (IS_IWMMXT_REGNUM (regno))
24294 return 112 + regno - FIRST_IWMMXT_REGNUM;
24295
24296 gcc_unreachable ();
24297 }
24298
24299 /* Dwarf models VFPv3 registers as 32 64-bit registers.
24300 GCC models tham as 64 32-bit registers, so we need to describe this to
24301 the DWARF generation code. Other registers can use the default. */
24302 static rtx
24303 arm_dwarf_register_span (rtx rtl)
24304 {
24305 unsigned regno;
24306 int nregs;
24307 int i;
24308 rtx p;
24309
24310 regno = REGNO (rtl);
24311 if (!IS_VFP_REGNUM (regno))
24312 return NULL_RTX;
24313
24314 /* XXX FIXME: The EABI defines two VFP register ranges:
24315 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
24316 256-287: D0-D31
24317 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
24318 corresponding D register. Until GDB supports this, we shall use the
24319 legacy encodings. We also use these encodings for D0-D15 for
24320 compatibility with older debuggers. */
24321 if (VFP_REGNO_OK_FOR_SINGLE (regno))
24322 return NULL_RTX;
24323
24324 nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
24325 p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
24326 regno = (regno - FIRST_VFP_REGNUM) / 2;
24327 for (i = 0; i < nregs; i++)
24328 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
24329
24330 return p;
24331 }
24332
24333 #if ARM_UNWIND_INFO
24334 /* Emit unwind directives for a store-multiple instruction or stack pointer
24335 push during alignment.
24336 These should only ever be generated by the function prologue code, so
24337 expect them to have a particular form. */
24338
24339 static void
24340 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
24341 {
24342 int i;
24343 HOST_WIDE_INT offset;
24344 HOST_WIDE_INT nregs;
24345 int reg_size;
24346 unsigned reg;
24347 unsigned lastreg;
24348 rtx e;
24349
24350 e = XVECEXP (p, 0, 0);
24351 if (GET_CODE (e) != SET)
24352 abort ();
24353
24354 /* First insn will adjust the stack pointer. */
24355 if (GET_CODE (e) != SET
24356 || GET_CODE (XEXP (e, 0)) != REG
24357 || REGNO (XEXP (e, 0)) != SP_REGNUM
24358 || GET_CODE (XEXP (e, 1)) != PLUS)
24359 abort ();
24360
24361 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
24362 nregs = XVECLEN (p, 0) - 1;
24363
24364 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
24365 if (reg < 16)
24366 {
24367 /* The function prologue may also push pc, but not annotate it as it is
24368 never restored. We turn this into a stack pointer adjustment. */
24369 if (nregs * 4 == offset - 4)
24370 {
24371 fprintf (asm_out_file, "\t.pad #4\n");
24372 offset -= 4;
24373 }
24374 reg_size = 4;
24375 fprintf (asm_out_file, "\t.save {");
24376 }
24377 else if (IS_VFP_REGNUM (reg))
24378 {
24379 reg_size = 8;
24380 fprintf (asm_out_file, "\t.vsave {");
24381 }
24382 else
24383 /* Unknown register type. */
24384 abort ();
24385
24386 /* If the stack increment doesn't match the size of the saved registers,
24387 something has gone horribly wrong. */
24388 if (offset != nregs * reg_size)
24389 abort ();
24390
24391 offset = 0;
24392 lastreg = 0;
24393 /* The remaining insns will describe the stores. */
24394 for (i = 1; i <= nregs; i++)
24395 {
24396 /* Expect (set (mem <addr>) (reg)).
24397 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
24398 e = XVECEXP (p, 0, i);
24399 if (GET_CODE (e) != SET
24400 || GET_CODE (XEXP (e, 0)) != MEM
24401 || GET_CODE (XEXP (e, 1)) != REG)
24402 abort ();
24403
24404 reg = REGNO (XEXP (e, 1));
24405 if (reg < lastreg)
24406 abort ();
24407
24408 if (i != 1)
24409 fprintf (asm_out_file, ", ");
24410 /* We can't use %r for vfp because we need to use the
24411 double precision register names. */
24412 if (IS_VFP_REGNUM (reg))
24413 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
24414 else
24415 asm_fprintf (asm_out_file, "%r", reg);
24416
24417 #ifdef ENABLE_CHECKING
24418 /* Check that the addresses are consecutive. */
24419 e = XEXP (XEXP (e, 0), 0);
24420 if (GET_CODE (e) == PLUS)
24421 {
24422 offset += reg_size;
24423 if (GET_CODE (XEXP (e, 0)) != REG
24424 || REGNO (XEXP (e, 0)) != SP_REGNUM
24425 || GET_CODE (XEXP (e, 1)) != CONST_INT
24426 || offset != INTVAL (XEXP (e, 1)))
24427 abort ();
24428 }
24429 else if (i != 1
24430 || GET_CODE (e) != REG
24431 || REGNO (e) != SP_REGNUM)
24432 abort ();
24433 #endif
24434 }
24435 fprintf (asm_out_file, "}\n");
24436 }
24437
24438 /* Emit unwind directives for a SET. */
24439
24440 static void
24441 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
24442 {
24443 rtx e0;
24444 rtx e1;
24445 unsigned reg;
24446
24447 e0 = XEXP (p, 0);
24448 e1 = XEXP (p, 1);
24449 switch (GET_CODE (e0))
24450 {
24451 case MEM:
24452 /* Pushing a single register. */
24453 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
24454 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
24455 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
24456 abort ();
24457
24458 asm_fprintf (asm_out_file, "\t.save ");
24459 if (IS_VFP_REGNUM (REGNO (e1)))
24460 asm_fprintf(asm_out_file, "{d%d}\n",
24461 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
24462 else
24463 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
24464 break;
24465
24466 case REG:
24467 if (REGNO (e0) == SP_REGNUM)
24468 {
24469 /* A stack increment. */
24470 if (GET_CODE (e1) != PLUS
24471 || GET_CODE (XEXP (e1, 0)) != REG
24472 || REGNO (XEXP (e1, 0)) != SP_REGNUM
24473 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
24474 abort ();
24475
24476 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
24477 -INTVAL (XEXP (e1, 1)));
24478 }
24479 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
24480 {
24481 HOST_WIDE_INT offset;
24482
24483 if (GET_CODE (e1) == PLUS)
24484 {
24485 if (GET_CODE (XEXP (e1, 0)) != REG
24486 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
24487 abort ();
24488 reg = REGNO (XEXP (e1, 0));
24489 offset = INTVAL (XEXP (e1, 1));
24490 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
24491 HARD_FRAME_POINTER_REGNUM, reg,
24492 offset);
24493 }
24494 else if (GET_CODE (e1) == REG)
24495 {
24496 reg = REGNO (e1);
24497 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
24498 HARD_FRAME_POINTER_REGNUM, reg);
24499 }
24500 else
24501 abort ();
24502 }
24503 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
24504 {
24505 /* Move from sp to reg. */
24506 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
24507 }
24508 else if (GET_CODE (e1) == PLUS
24509 && GET_CODE (XEXP (e1, 0)) == REG
24510 && REGNO (XEXP (e1, 0)) == SP_REGNUM
24511 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
24512 {
24513 /* Set reg to offset from sp. */
24514 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
24515 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
24516 }
24517 else
24518 abort ();
24519 break;
24520
24521 default:
24522 abort ();
24523 }
24524 }
24525
24526
24527 /* Emit unwind directives for the given insn. */
24528
24529 static void
24530 arm_unwind_emit (FILE * asm_out_file, rtx insn)
24531 {
24532 rtx note, pat;
24533 bool handled_one = false;
24534
24535 if (arm_except_unwind_info (&global_options) != UI_TARGET)
24536 return;
24537
24538 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24539 && (TREE_NOTHROW (current_function_decl)
24540 || crtl->all_throwers_are_sibcalls))
24541 return;
24542
24543 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
24544 return;
24545
24546 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
24547 {
24548 pat = XEXP (note, 0);
24549 switch (REG_NOTE_KIND (note))
24550 {
24551 case REG_FRAME_RELATED_EXPR:
24552 goto found;
24553
24554 case REG_CFA_REGISTER:
24555 if (pat == NULL)
24556 {
24557 pat = PATTERN (insn);
24558 if (GET_CODE (pat) == PARALLEL)
24559 pat = XVECEXP (pat, 0, 0);
24560 }
24561
24562 /* Only emitted for IS_STACKALIGN re-alignment. */
24563 {
24564 rtx dest, src;
24565 unsigned reg;
24566
24567 src = SET_SRC (pat);
24568 dest = SET_DEST (pat);
24569
24570 gcc_assert (src == stack_pointer_rtx);
24571 reg = REGNO (dest);
24572 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
24573 reg + 0x90, reg);
24574 }
24575 handled_one = true;
24576 break;
24577
24578 case REG_CFA_DEF_CFA:
24579 case REG_CFA_EXPRESSION:
24580 case REG_CFA_ADJUST_CFA:
24581 case REG_CFA_OFFSET:
24582 /* ??? Only handling here what we actually emit. */
24583 gcc_unreachable ();
24584
24585 default:
24586 break;
24587 }
24588 }
24589 if (handled_one)
24590 return;
24591 pat = PATTERN (insn);
24592 found:
24593
24594 switch (GET_CODE (pat))
24595 {
24596 case SET:
24597 arm_unwind_emit_set (asm_out_file, pat);
24598 break;
24599
24600 case SEQUENCE:
24601 /* Store multiple. */
24602 arm_unwind_emit_sequence (asm_out_file, pat);
24603 break;
24604
24605 default:
24606 abort();
24607 }
24608 }
24609
24610
24611 /* Output a reference from a function exception table to the type_info
24612 object X. The EABI specifies that the symbol should be relocated by
24613 an R_ARM_TARGET2 relocation. */
24614
24615 static bool
24616 arm_output_ttype (rtx x)
24617 {
24618 fputs ("\t.word\t", asm_out_file);
24619 output_addr_const (asm_out_file, x);
24620 /* Use special relocations for symbol references. */
24621 if (GET_CODE (x) != CONST_INT)
24622 fputs ("(TARGET2)", asm_out_file);
24623 fputc ('\n', asm_out_file);
24624
24625 return TRUE;
24626 }
24627
24628 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
24629
24630 static void
24631 arm_asm_emit_except_personality (rtx personality)
24632 {
24633 fputs ("\t.personality\t", asm_out_file);
24634 output_addr_const (asm_out_file, personality);
24635 fputc ('\n', asm_out_file);
24636 }
24637
24638 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
24639
24640 static void
24641 arm_asm_init_sections (void)
24642 {
24643 exception_section = get_unnamed_section (0, output_section_asm_op,
24644 "\t.handlerdata");
24645 }
24646 #endif /* ARM_UNWIND_INFO */
24647
24648 /* Output unwind directives for the start/end of a function. */
24649
24650 void
24651 arm_output_fn_unwind (FILE * f, bool prologue)
24652 {
24653 if (arm_except_unwind_info (&global_options) != UI_TARGET)
24654 return;
24655
24656 if (prologue)
24657 fputs ("\t.fnstart\n", f);
24658 else
24659 {
24660 /* If this function will never be unwound, then mark it as such.
24661 The came condition is used in arm_unwind_emit to suppress
24662 the frame annotations. */
24663 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
24664 && (TREE_NOTHROW (current_function_decl)
24665 || crtl->all_throwers_are_sibcalls))
24666 fputs("\t.cantunwind\n", f);
24667
24668 fputs ("\t.fnend\n", f);
24669 }
24670 }
24671
24672 static bool
24673 arm_emit_tls_decoration (FILE *fp, rtx x)
24674 {
24675 enum tls_reloc reloc;
24676 rtx val;
24677
24678 val = XVECEXP (x, 0, 0);
24679 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
24680
24681 output_addr_const (fp, val);
24682
24683 switch (reloc)
24684 {
24685 case TLS_GD32:
24686 fputs ("(tlsgd)", fp);
24687 break;
24688 case TLS_LDM32:
24689 fputs ("(tlsldm)", fp);
24690 break;
24691 case TLS_LDO32:
24692 fputs ("(tlsldo)", fp);
24693 break;
24694 case TLS_IE32:
24695 fputs ("(gottpoff)", fp);
24696 break;
24697 case TLS_LE32:
24698 fputs ("(tpoff)", fp);
24699 break;
24700 case TLS_DESCSEQ:
24701 fputs ("(tlsdesc)", fp);
24702 break;
24703 default:
24704 gcc_unreachable ();
24705 }
24706
24707 switch (reloc)
24708 {
24709 case TLS_GD32:
24710 case TLS_LDM32:
24711 case TLS_IE32:
24712 case TLS_DESCSEQ:
24713 fputs (" + (. - ", fp);
24714 output_addr_const (fp, XVECEXP (x, 0, 2));
24715 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
24716 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
24717 output_addr_const (fp, XVECEXP (x, 0, 3));
24718 fputc (')', fp);
24719 break;
24720 default:
24721 break;
24722 }
24723
24724 return TRUE;
24725 }
24726
24727 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
24728
24729 static void
24730 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
24731 {
24732 gcc_assert (size == 4);
24733 fputs ("\t.word\t", file);
24734 output_addr_const (file, x);
24735 fputs ("(tlsldo)", file);
24736 }
24737
24738 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
24739
24740 static bool
24741 arm_output_addr_const_extra (FILE *fp, rtx x)
24742 {
24743 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
24744 return arm_emit_tls_decoration (fp, x);
24745 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
24746 {
24747 char label[256];
24748 int labelno = INTVAL (XVECEXP (x, 0, 0));
24749
24750 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
24751 assemble_name_raw (fp, label);
24752
24753 return TRUE;
24754 }
24755 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
24756 {
24757 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
24758 if (GOT_PCREL)
24759 fputs ("+.", fp);
24760 fputs ("-(", fp);
24761 output_addr_const (fp, XVECEXP (x, 0, 0));
24762 fputc (')', fp);
24763 return TRUE;
24764 }
24765 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
24766 {
24767 output_addr_const (fp, XVECEXP (x, 0, 0));
24768 if (GOT_PCREL)
24769 fputs ("+.", fp);
24770 fputs ("-(", fp);
24771 output_addr_const (fp, XVECEXP (x, 0, 1));
24772 fputc (')', fp);
24773 return TRUE;
24774 }
24775 else if (GET_CODE (x) == CONST_VECTOR)
24776 return arm_emit_vector_const (fp, x);
24777
24778 return FALSE;
24779 }
24780
24781 /* Output assembly for a shift instruction.
24782 SET_FLAGS determines how the instruction modifies the condition codes.
24783 0 - Do not set condition codes.
24784 1 - Set condition codes.
24785 2 - Use smallest instruction. */
24786 const char *
24787 arm_output_shift(rtx * operands, int set_flags)
24788 {
24789 char pattern[100];
24790 static const char flag_chars[3] = {'?', '.', '!'};
24791 const char *shift;
24792 HOST_WIDE_INT val;
24793 char c;
24794
24795 c = flag_chars[set_flags];
24796 if (TARGET_UNIFIED_ASM)
24797 {
24798 shift = shift_op(operands[3], &val);
24799 if (shift)
24800 {
24801 if (val != -1)
24802 operands[2] = GEN_INT(val);
24803 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
24804 }
24805 else
24806 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
24807 }
24808 else
24809 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
24810 output_asm_insn (pattern, operands);
24811 return "";
24812 }
24813
24814 /* Output assembly for a WMMX immediate shift instruction. */
24815 const char *
24816 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
24817 {
24818 int shift = INTVAL (operands[2]);
24819 char templ[50];
24820 enum machine_mode opmode = GET_MODE (operands[0]);
24821
24822 gcc_assert (shift >= 0);
24823
24824 /* If the shift value in the register versions is > 63 (for D qualifier),
24825 31 (for W qualifier) or 15 (for H qualifier). */
24826 if (((opmode == V4HImode) && (shift > 15))
24827 || ((opmode == V2SImode) && (shift > 31))
24828 || ((opmode == DImode) && (shift > 63)))
24829 {
24830 if (wror_or_wsra)
24831 {
24832 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
24833 output_asm_insn (templ, operands);
24834 if (opmode == DImode)
24835 {
24836 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
24837 output_asm_insn (templ, operands);
24838 }
24839 }
24840 else
24841 {
24842 /* The destination register will contain all zeros. */
24843 sprintf (templ, "wzero\t%%0");
24844 output_asm_insn (templ, operands);
24845 }
24846 return "";
24847 }
24848
24849 if ((opmode == DImode) && (shift > 32))
24850 {
24851 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
24852 output_asm_insn (templ, operands);
24853 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
24854 output_asm_insn (templ, operands);
24855 }
24856 else
24857 {
24858 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
24859 output_asm_insn (templ, operands);
24860 }
24861 return "";
24862 }
24863
24864 /* Output assembly for a WMMX tinsr instruction. */
24865 const char *
24866 arm_output_iwmmxt_tinsr (rtx *operands)
24867 {
24868 int mask = INTVAL (operands[3]);
24869 int i;
24870 char templ[50];
24871 int units = mode_nunits[GET_MODE (operands[0])];
24872 gcc_assert ((mask & (mask - 1)) == 0);
24873 for (i = 0; i < units; ++i)
24874 {
24875 if ((mask & 0x01) == 1)
24876 {
24877 break;
24878 }
24879 mask >>= 1;
24880 }
24881 gcc_assert (i < units);
24882 {
24883 switch (GET_MODE (operands[0]))
24884 {
24885 case V8QImode:
24886 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
24887 break;
24888 case V4HImode:
24889 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
24890 break;
24891 case V2SImode:
24892 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
24893 break;
24894 default:
24895 gcc_unreachable ();
24896 break;
24897 }
24898 output_asm_insn (templ, operands);
24899 }
24900 return "";
24901 }
24902
24903 /* Output a Thumb-1 casesi dispatch sequence. */
24904 const char *
24905 thumb1_output_casesi (rtx *operands)
24906 {
24907 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
24908
24909 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24910
24911 switch (GET_MODE(diff_vec))
24912 {
24913 case QImode:
24914 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24915 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
24916 case HImode:
24917 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
24918 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
24919 case SImode:
24920 return "bl\t%___gnu_thumb1_case_si";
24921 default:
24922 gcc_unreachable ();
24923 }
24924 }
24925
24926 /* Output a Thumb-2 casesi instruction. */
24927 const char *
24928 thumb2_output_casesi (rtx *operands)
24929 {
24930 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
24931
24932 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
24933
24934 output_asm_insn ("cmp\t%0, %1", operands);
24935 output_asm_insn ("bhi\t%l3", operands);
24936 switch (GET_MODE(diff_vec))
24937 {
24938 case QImode:
24939 return "tbb\t[%|pc, %0]";
24940 case HImode:
24941 return "tbh\t[%|pc, %0, lsl #1]";
24942 case SImode:
24943 if (flag_pic)
24944 {
24945 output_asm_insn ("adr\t%4, %l2", operands);
24946 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
24947 output_asm_insn ("add\t%4, %4, %5", operands);
24948 return "bx\t%4";
24949 }
24950 else
24951 {
24952 output_asm_insn ("adr\t%4, %l2", operands);
24953 return "ldr\t%|pc, [%4, %0, lsl #2]";
24954 }
24955 default:
24956 gcc_unreachable ();
24957 }
24958 }
24959
24960 /* Most ARM cores are single issue, but some newer ones can dual issue.
24961 The scheduler descriptions rely on this being correct. */
24962 static int
24963 arm_issue_rate (void)
24964 {
24965 switch (arm_tune)
24966 {
24967 case cortexa15:
24968 return 3;
24969
24970 case cortexr4:
24971 case cortexr4f:
24972 case cortexr5:
24973 case genericv7a:
24974 case cortexa5:
24975 case cortexa8:
24976 case cortexa9:
24977 case fa726te:
24978 return 2;
24979
24980 default:
24981 return 1;
24982 }
24983 }
24984
24985 /* A table and a function to perform ARM-specific name mangling for
24986 NEON vector types in order to conform to the AAPCS (see "Procedure
24987 Call Standard for the ARM Architecture", Appendix A). To qualify
24988 for emission with the mangled names defined in that document, a
24989 vector type must not only be of the correct mode but also be
24990 composed of NEON vector element types (e.g. __builtin_neon_qi). */
24991 typedef struct
24992 {
24993 enum machine_mode mode;
24994 const char *element_type_name;
24995 const char *aapcs_name;
24996 } arm_mangle_map_entry;
24997
24998 static arm_mangle_map_entry arm_mangle_map[] = {
24999 /* 64-bit containerized types. */
25000 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
25001 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
25002 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
25003 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
25004 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
25005 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
25006 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
25007 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
25008 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
25009 /* 128-bit containerized types. */
25010 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
25011 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
25012 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
25013 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
25014 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
25015 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
25016 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
25017 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
25018 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
25019 { VOIDmode, NULL, NULL }
25020 };
25021
25022 const char *
25023 arm_mangle_type (const_tree type)
25024 {
25025 arm_mangle_map_entry *pos = arm_mangle_map;
25026
25027 /* The ARM ABI documents (10th October 2008) say that "__va_list"
25028 has to be managled as if it is in the "std" namespace. */
25029 if (TARGET_AAPCS_BASED
25030 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
25031 {
25032 static bool warned;
25033 if (!warned && warn_psabi && !in_system_header)
25034 {
25035 warned = true;
25036 inform (input_location,
25037 "the mangling of %<va_list%> has changed in GCC 4.4");
25038 }
25039 return "St9__va_list";
25040 }
25041
25042 /* Half-precision float. */
25043 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
25044 return "Dh";
25045
25046 if (TREE_CODE (type) != VECTOR_TYPE)
25047 return NULL;
25048
25049 /* Check the mode of the vector type, and the name of the vector
25050 element type, against the table. */
25051 while (pos->mode != VOIDmode)
25052 {
25053 tree elt_type = TREE_TYPE (type);
25054
25055 if (pos->mode == TYPE_MODE (type)
25056 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
25057 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
25058 pos->element_type_name))
25059 return pos->aapcs_name;
25060
25061 pos++;
25062 }
25063
25064 /* Use the default mangling for unrecognized (possibly user-defined)
25065 vector types. */
25066 return NULL;
25067 }
25068
25069 /* Order of allocation of core registers for Thumb: this allocation is
25070 written over the corresponding initial entries of the array
25071 initialized with REG_ALLOC_ORDER. We allocate all low registers
25072 first. Saving and restoring a low register is usually cheaper than
25073 using a call-clobbered high register. */
25074
25075 static const int thumb_core_reg_alloc_order[] =
25076 {
25077 3, 2, 1, 0, 4, 5, 6, 7,
25078 14, 12, 8, 9, 10, 11
25079 };
25080
25081 /* Adjust register allocation order when compiling for Thumb. */
25082
25083 void
25084 arm_order_regs_for_local_alloc (void)
25085 {
25086 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
25087 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
25088 if (TARGET_THUMB)
25089 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
25090 sizeof (thumb_core_reg_alloc_order));
25091 }
25092
25093 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
25094
25095 bool
25096 arm_frame_pointer_required (void)
25097 {
25098 return (cfun->has_nonlocal_label
25099 || SUBTARGET_FRAME_POINTER_REQUIRED
25100 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
25101 }
25102
25103 /* Only thumb1 can't support conditional execution, so return true if
25104 the target is not thumb1. */
25105 static bool
25106 arm_have_conditional_execution (void)
25107 {
25108 return !TARGET_THUMB1;
25109 }
25110
25111 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
25112 static HOST_WIDE_INT
25113 arm_vector_alignment (const_tree type)
25114 {
25115 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
25116
25117 if (TARGET_AAPCS_BASED)
25118 align = MIN (align, 64);
25119
25120 return align;
25121 }
25122
25123 static unsigned int
25124 arm_autovectorize_vector_sizes (void)
25125 {
25126 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
25127 }
25128
25129 static bool
25130 arm_vector_alignment_reachable (const_tree type, bool is_packed)
25131 {
25132 /* Vectors which aren't in packed structures will not be less aligned than
25133 the natural alignment of their element type, so this is safe. */
25134 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
25135 return !is_packed;
25136
25137 return default_builtin_vector_alignment_reachable (type, is_packed);
25138 }
25139
25140 static bool
25141 arm_builtin_support_vector_misalignment (enum machine_mode mode,
25142 const_tree type, int misalignment,
25143 bool is_packed)
25144 {
25145 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
25146 {
25147 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
25148
25149 if (is_packed)
25150 return align == 1;
25151
25152 /* If the misalignment is unknown, we should be able to handle the access
25153 so long as it is not to a member of a packed data structure. */
25154 if (misalignment == -1)
25155 return true;
25156
25157 /* Return true if the misalignment is a multiple of the natural alignment
25158 of the vector's element type. This is probably always going to be
25159 true in practice, since we've already established that this isn't a
25160 packed access. */
25161 return ((misalignment % align) == 0);
25162 }
25163
25164 return default_builtin_support_vector_misalignment (mode, type, misalignment,
25165 is_packed);
25166 }
25167
25168 static void
25169 arm_conditional_register_usage (void)
25170 {
25171 int regno;
25172
25173 if (TARGET_THUMB1 && optimize_size)
25174 {
25175 /* When optimizing for size on Thumb-1, it's better not
25176 to use the HI regs, because of the overhead of
25177 stacking them. */
25178 for (regno = FIRST_HI_REGNUM;
25179 regno <= LAST_HI_REGNUM; ++regno)
25180 fixed_regs[regno] = call_used_regs[regno] = 1;
25181 }
25182
25183 /* The link register can be clobbered by any branch insn,
25184 but we have no way to track that at present, so mark
25185 it as unavailable. */
25186 if (TARGET_THUMB1)
25187 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
25188
25189 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
25190 {
25191 /* VFPv3 registers are disabled when earlier VFP
25192 versions are selected due to the definition of
25193 LAST_VFP_REGNUM. */
25194 for (regno = FIRST_VFP_REGNUM;
25195 regno <= LAST_VFP_REGNUM; ++ regno)
25196 {
25197 fixed_regs[regno] = 0;
25198 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
25199 || regno >= FIRST_VFP_REGNUM + 32;
25200 }
25201 }
25202
25203 if (TARGET_REALLY_IWMMXT)
25204 {
25205 regno = FIRST_IWMMXT_GR_REGNUM;
25206 /* The 2002/10/09 revision of the XScale ABI has wCG0
25207 and wCG1 as call-preserved registers. The 2002/11/21
25208 revision changed this so that all wCG registers are
25209 scratch registers. */
25210 for (regno = FIRST_IWMMXT_GR_REGNUM;
25211 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
25212 fixed_regs[regno] = 0;
25213 /* The XScale ABI has wR0 - wR9 as scratch registers,
25214 the rest as call-preserved registers. */
25215 for (regno = FIRST_IWMMXT_REGNUM;
25216 regno <= LAST_IWMMXT_REGNUM; ++ regno)
25217 {
25218 fixed_regs[regno] = 0;
25219 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
25220 }
25221 }
25222
25223 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
25224 {
25225 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
25226 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
25227 }
25228 else if (TARGET_APCS_STACK)
25229 {
25230 fixed_regs[10] = 1;
25231 call_used_regs[10] = 1;
25232 }
25233 /* -mcaller-super-interworking reserves r11 for calls to
25234 _interwork_r11_call_via_rN(). Making the register global
25235 is an easy way of ensuring that it remains valid for all
25236 calls. */
25237 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
25238 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
25239 {
25240 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25241 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25242 if (TARGET_CALLER_INTERWORKING)
25243 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
25244 }
25245 SUBTARGET_CONDITIONAL_REGISTER_USAGE
25246 }
25247
25248 static reg_class_t
25249 arm_preferred_rename_class (reg_class_t rclass)
25250 {
25251 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
25252 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
25253 and code size can be reduced. */
25254 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
25255 return LO_REGS;
25256 else
25257 return NO_REGS;
25258 }
25259
25260 /* Compute the atrribute "length" of insn "*push_multi".
25261 So this function MUST be kept in sync with that insn pattern. */
25262 int
25263 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
25264 {
25265 int i, regno, hi_reg;
25266 int num_saves = XVECLEN (parallel_op, 0);
25267
25268 /* ARM mode. */
25269 if (TARGET_ARM)
25270 return 4;
25271 /* Thumb1 mode. */
25272 if (TARGET_THUMB1)
25273 return 2;
25274
25275 /* Thumb2 mode. */
25276 regno = REGNO (first_op);
25277 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
25278 for (i = 1; i < num_saves && !hi_reg; i++)
25279 {
25280 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
25281 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
25282 }
25283
25284 if (!hi_reg)
25285 return 2;
25286 return 4;
25287 }
25288
25289 /* Compute the number of instructions emitted by output_move_double. */
25290 int
25291 arm_count_output_move_double_insns (rtx *operands)
25292 {
25293 int count;
25294 rtx ops[2];
25295 /* output_move_double may modify the operands array, so call it
25296 here on a copy of the array. */
25297 ops[0] = operands[0];
25298 ops[1] = operands[1];
25299 output_move_double (ops, false, &count);
25300 return count;
25301 }
25302
25303 int
25304 vfp3_const_double_for_fract_bits (rtx operand)
25305 {
25306 REAL_VALUE_TYPE r0;
25307
25308 if (GET_CODE (operand) != CONST_DOUBLE)
25309 return 0;
25310
25311 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
25312 if (exact_real_inverse (DFmode, &r0))
25313 {
25314 if (exact_real_truncate (DFmode, &r0))
25315 {
25316 HOST_WIDE_INT value = real_to_integer (&r0);
25317 value = value & 0xffffffff;
25318 if ((value != 0) && ( (value & (value - 1)) == 0))
25319 return int_log2 (value);
25320 }
25321 }
25322 return 0;
25323 }
25324 \f
25325 /* Emit a memory barrier around an atomic sequence according to MODEL. */
25326
25327 static void
25328 arm_pre_atomic_barrier (enum memmodel model)
25329 {
25330 if (need_atomic_barrier_p (model, true))
25331 emit_insn (gen_memory_barrier ());
25332 }
25333
25334 static void
25335 arm_post_atomic_barrier (enum memmodel model)
25336 {
25337 if (need_atomic_barrier_p (model, false))
25338 emit_insn (gen_memory_barrier ());
25339 }
25340
25341 /* Emit the load-exclusive and store-exclusive instructions. */
25342
25343 static void
25344 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem)
25345 {
25346 rtx (*gen) (rtx, rtx);
25347
25348 switch (mode)
25349 {
25350 case QImode: gen = gen_arm_load_exclusiveqi; break;
25351 case HImode: gen = gen_arm_load_exclusivehi; break;
25352 case SImode: gen = gen_arm_load_exclusivesi; break;
25353 case DImode: gen = gen_arm_load_exclusivedi; break;
25354 default:
25355 gcc_unreachable ();
25356 }
25357
25358 emit_insn (gen (rval, mem));
25359 }
25360
25361 static void
25362 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval, rtx mem)
25363 {
25364 rtx (*gen) (rtx, rtx, rtx);
25365
25366 switch (mode)
25367 {
25368 case QImode: gen = gen_arm_store_exclusiveqi; break;
25369 case HImode: gen = gen_arm_store_exclusivehi; break;
25370 case SImode: gen = gen_arm_store_exclusivesi; break;
25371 case DImode: gen = gen_arm_store_exclusivedi; break;
25372 default:
25373 gcc_unreachable ();
25374 }
25375
25376 emit_insn (gen (bval, rval, mem));
25377 }
25378
25379 /* Mark the previous jump instruction as unlikely. */
25380
25381 static void
25382 emit_unlikely_jump (rtx insn)
25383 {
25384 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
25385
25386 insn = emit_jump_insn (insn);
25387 add_reg_note (insn, REG_BR_PROB, very_unlikely);
25388 }
25389
25390 /* Expand a compare and swap pattern. */
25391
25392 void
25393 arm_expand_compare_and_swap (rtx operands[])
25394 {
25395 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
25396 enum machine_mode mode;
25397 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
25398
25399 bval = operands[0];
25400 rval = operands[1];
25401 mem = operands[2];
25402 oldval = operands[3];
25403 newval = operands[4];
25404 is_weak = operands[5];
25405 mod_s = operands[6];
25406 mod_f = operands[7];
25407 mode = GET_MODE (mem);
25408
25409 switch (mode)
25410 {
25411 case QImode:
25412 case HImode:
25413 /* For narrow modes, we're going to perform the comparison in SImode,
25414 so do the zero-extension now. */
25415 rval = gen_reg_rtx (SImode);
25416 oldval = convert_modes (SImode, mode, oldval, true);
25417 /* FALLTHRU */
25418
25419 case SImode:
25420 /* Force the value into a register if needed. We waited until after
25421 the zero-extension above to do this properly. */
25422 if (!arm_add_operand (oldval, mode))
25423 oldval = force_reg (mode, oldval);
25424 break;
25425
25426 case DImode:
25427 if (!cmpdi_operand (oldval, mode))
25428 oldval = force_reg (mode, oldval);
25429 break;
25430
25431 default:
25432 gcc_unreachable ();
25433 }
25434
25435 switch (mode)
25436 {
25437 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
25438 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
25439 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
25440 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
25441 default:
25442 gcc_unreachable ();
25443 }
25444
25445 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
25446
25447 if (mode == QImode || mode == HImode)
25448 emit_move_insn (operands[1], gen_lowpart (mode, rval));
25449
25450 /* In all cases, we arrange for success to be signaled by Z set.
25451 This arrangement allows for the boolean result to be used directly
25452 in a subsequent branch, post optimization. */
25453 x = gen_rtx_REG (CCmode, CC_REGNUM);
25454 x = gen_rtx_EQ (SImode, x, const0_rtx);
25455 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
25456 }
25457
25458 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
25459 another memory store between the load-exclusive and store-exclusive can
25460 reset the monitor from Exclusive to Open state. This means we must wait
25461 until after reload to split the pattern, lest we get a register spill in
25462 the middle of the atomic sequence. */
25463
25464 void
25465 arm_split_compare_and_swap (rtx operands[])
25466 {
25467 rtx rval, mem, oldval, newval, scratch;
25468 enum machine_mode mode;
25469 enum memmodel mod_s, mod_f;
25470 bool is_weak;
25471 rtx label1, label2, x, cond;
25472
25473 rval = operands[0];
25474 mem = operands[1];
25475 oldval = operands[2];
25476 newval = operands[3];
25477 is_weak = (operands[4] != const0_rtx);
25478 mod_s = (enum memmodel) INTVAL (operands[5]);
25479 mod_f = (enum memmodel) INTVAL (operands[6]);
25480 scratch = operands[7];
25481 mode = GET_MODE (mem);
25482
25483 arm_pre_atomic_barrier (mod_s);
25484
25485 label1 = NULL_RTX;
25486 if (!is_weak)
25487 {
25488 label1 = gen_label_rtx ();
25489 emit_label (label1);
25490 }
25491 label2 = gen_label_rtx ();
25492
25493 arm_emit_load_exclusive (mode, rval, mem);
25494
25495 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
25496 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25497 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
25498 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
25499 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
25500
25501 arm_emit_store_exclusive (mode, scratch, mem, newval);
25502
25503 /* Weak or strong, we want EQ to be true for success, so that we
25504 match the flags that we got from the compare above. */
25505 cond = gen_rtx_REG (CCmode, CC_REGNUM);
25506 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
25507 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
25508
25509 if (!is_weak)
25510 {
25511 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25512 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
25513 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
25514 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
25515 }
25516
25517 if (mod_f != MEMMODEL_RELAXED)
25518 emit_label (label2);
25519
25520 arm_post_atomic_barrier (mod_s);
25521
25522 if (mod_f == MEMMODEL_RELAXED)
25523 emit_label (label2);
25524 }
25525
25526 void
25527 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
25528 rtx value, rtx model_rtx, rtx cond)
25529 {
25530 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
25531 enum machine_mode mode = GET_MODE (mem);
25532 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
25533 rtx label, x;
25534
25535 arm_pre_atomic_barrier (model);
25536
25537 label = gen_label_rtx ();
25538 emit_label (label);
25539
25540 if (new_out)
25541 new_out = gen_lowpart (wmode, new_out);
25542 if (old_out)
25543 old_out = gen_lowpart (wmode, old_out);
25544 else
25545 old_out = new_out;
25546 value = simplify_gen_subreg (wmode, value, mode, 0);
25547
25548 arm_emit_load_exclusive (mode, old_out, mem);
25549
25550 switch (code)
25551 {
25552 case SET:
25553 new_out = value;
25554 break;
25555
25556 case NOT:
25557 x = gen_rtx_AND (wmode, old_out, value);
25558 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25559 x = gen_rtx_NOT (wmode, new_out);
25560 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25561 break;
25562
25563 case MINUS:
25564 if (CONST_INT_P (value))
25565 {
25566 value = GEN_INT (-INTVAL (value));
25567 code = PLUS;
25568 }
25569 /* FALLTHRU */
25570
25571 case PLUS:
25572 if (mode == DImode)
25573 {
25574 /* DImode plus/minus need to clobber flags. */
25575 /* The adddi3 and subdi3 patterns are incorrectly written so that
25576 they require matching operands, even when we could easily support
25577 three operands. Thankfully, this can be fixed up post-splitting,
25578 as the individual add+adc patterns do accept three operands and
25579 post-reload cprop can make these moves go away. */
25580 emit_move_insn (new_out, old_out);
25581 if (code == PLUS)
25582 x = gen_adddi3 (new_out, new_out, value);
25583 else
25584 x = gen_subdi3 (new_out, new_out, value);
25585 emit_insn (x);
25586 break;
25587 }
25588 /* FALLTHRU */
25589
25590 default:
25591 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
25592 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
25593 break;
25594 }
25595
25596 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out));
25597
25598 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
25599 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
25600
25601 arm_post_atomic_barrier (model);
25602 }
25603 \f
25604 #define MAX_VECT_LEN 16
25605
25606 struct expand_vec_perm_d
25607 {
25608 rtx target, op0, op1;
25609 unsigned char perm[MAX_VECT_LEN];
25610 enum machine_mode vmode;
25611 unsigned char nelt;
25612 bool one_vector_p;
25613 bool testing_p;
25614 };
25615
25616 /* Generate a variable permutation. */
25617
25618 static void
25619 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
25620 {
25621 enum machine_mode vmode = GET_MODE (target);
25622 bool one_vector_p = rtx_equal_p (op0, op1);
25623
25624 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
25625 gcc_checking_assert (GET_MODE (op0) == vmode);
25626 gcc_checking_assert (GET_MODE (op1) == vmode);
25627 gcc_checking_assert (GET_MODE (sel) == vmode);
25628 gcc_checking_assert (TARGET_NEON);
25629
25630 if (one_vector_p)
25631 {
25632 if (vmode == V8QImode)
25633 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
25634 else
25635 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
25636 }
25637 else
25638 {
25639 rtx pair;
25640
25641 if (vmode == V8QImode)
25642 {
25643 pair = gen_reg_rtx (V16QImode);
25644 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
25645 pair = gen_lowpart (TImode, pair);
25646 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
25647 }
25648 else
25649 {
25650 pair = gen_reg_rtx (OImode);
25651 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
25652 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
25653 }
25654 }
25655 }
25656
25657 void
25658 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
25659 {
25660 enum machine_mode vmode = GET_MODE (target);
25661 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
25662 bool one_vector_p = rtx_equal_p (op0, op1);
25663 rtx rmask[MAX_VECT_LEN], mask;
25664
25665 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
25666 numbering of elements for big-endian, we must reverse the order. */
25667 gcc_checking_assert (!BYTES_BIG_ENDIAN);
25668
25669 /* The VTBL instruction does not use a modulo index, so we must take care
25670 of that ourselves. */
25671 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
25672 for (i = 0; i < nelt; ++i)
25673 rmask[i] = mask;
25674 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
25675 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
25676
25677 arm_expand_vec_perm_1 (target, op0, op1, sel);
25678 }
25679
25680 /* Generate or test for an insn that supports a constant permutation. */
25681
25682 /* Recognize patterns for the VUZP insns. */
25683
25684 static bool
25685 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
25686 {
25687 unsigned int i, odd, mask, nelt = d->nelt;
25688 rtx out0, out1, in0, in1, x;
25689 rtx (*gen)(rtx, rtx, rtx, rtx);
25690
25691 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25692 return false;
25693
25694 /* Note that these are little-endian tests. Adjust for big-endian later. */
25695 if (d->perm[0] == 0)
25696 odd = 0;
25697 else if (d->perm[0] == 1)
25698 odd = 1;
25699 else
25700 return false;
25701 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25702
25703 for (i = 0; i < nelt; i++)
25704 {
25705 unsigned elt = (i * 2 + odd) & mask;
25706 if (d->perm[i] != elt)
25707 return false;
25708 }
25709
25710 /* Success! */
25711 if (d->testing_p)
25712 return true;
25713
25714 switch (d->vmode)
25715 {
25716 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
25717 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
25718 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
25719 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
25720 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
25721 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
25722 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
25723 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
25724 default:
25725 gcc_unreachable ();
25726 }
25727
25728 in0 = d->op0;
25729 in1 = d->op1;
25730 if (BYTES_BIG_ENDIAN)
25731 {
25732 x = in0, in0 = in1, in1 = x;
25733 odd = !odd;
25734 }
25735
25736 out0 = d->target;
25737 out1 = gen_reg_rtx (d->vmode);
25738 if (odd)
25739 x = out0, out0 = out1, out1 = x;
25740
25741 emit_insn (gen (out0, in0, in1, out1));
25742 return true;
25743 }
25744
25745 /* Recognize patterns for the VZIP insns. */
25746
25747 static bool
25748 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
25749 {
25750 unsigned int i, high, mask, nelt = d->nelt;
25751 rtx out0, out1, in0, in1, x;
25752 rtx (*gen)(rtx, rtx, rtx, rtx);
25753
25754 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25755 return false;
25756
25757 /* Note that these are little-endian tests. Adjust for big-endian later. */
25758 high = nelt / 2;
25759 if (d->perm[0] == high)
25760 ;
25761 else if (d->perm[0] == 0)
25762 high = 0;
25763 else
25764 return false;
25765 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25766
25767 for (i = 0; i < nelt / 2; i++)
25768 {
25769 unsigned elt = (i + high) & mask;
25770 if (d->perm[i * 2] != elt)
25771 return false;
25772 elt = (elt + nelt) & mask;
25773 if (d->perm[i * 2 + 1] != elt)
25774 return false;
25775 }
25776
25777 /* Success! */
25778 if (d->testing_p)
25779 return true;
25780
25781 switch (d->vmode)
25782 {
25783 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
25784 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
25785 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
25786 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
25787 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
25788 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
25789 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
25790 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
25791 default:
25792 gcc_unreachable ();
25793 }
25794
25795 in0 = d->op0;
25796 in1 = d->op1;
25797 if (BYTES_BIG_ENDIAN)
25798 {
25799 x = in0, in0 = in1, in1 = x;
25800 high = !high;
25801 }
25802
25803 out0 = d->target;
25804 out1 = gen_reg_rtx (d->vmode);
25805 if (high)
25806 x = out0, out0 = out1, out1 = x;
25807
25808 emit_insn (gen (out0, in0, in1, out1));
25809 return true;
25810 }
25811
25812 /* Recognize patterns for the VREV insns. */
25813
25814 static bool
25815 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
25816 {
25817 unsigned int i, j, diff, nelt = d->nelt;
25818 rtx (*gen)(rtx, rtx, rtx);
25819
25820 if (!d->one_vector_p)
25821 return false;
25822
25823 diff = d->perm[0];
25824 switch (diff)
25825 {
25826 case 7:
25827 switch (d->vmode)
25828 {
25829 case V16QImode: gen = gen_neon_vrev64v16qi; break;
25830 case V8QImode: gen = gen_neon_vrev64v8qi; break;
25831 default:
25832 return false;
25833 }
25834 break;
25835 case 3:
25836 switch (d->vmode)
25837 {
25838 case V16QImode: gen = gen_neon_vrev32v16qi; break;
25839 case V8QImode: gen = gen_neon_vrev32v8qi; break;
25840 case V8HImode: gen = gen_neon_vrev64v8hi; break;
25841 case V4HImode: gen = gen_neon_vrev64v4hi; break;
25842 default:
25843 return false;
25844 }
25845 break;
25846 case 1:
25847 switch (d->vmode)
25848 {
25849 case V16QImode: gen = gen_neon_vrev16v16qi; break;
25850 case V8QImode: gen = gen_neon_vrev16v8qi; break;
25851 case V8HImode: gen = gen_neon_vrev32v8hi; break;
25852 case V4HImode: gen = gen_neon_vrev32v4hi; break;
25853 case V4SImode: gen = gen_neon_vrev64v4si; break;
25854 case V2SImode: gen = gen_neon_vrev64v2si; break;
25855 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
25856 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
25857 default:
25858 return false;
25859 }
25860 break;
25861 default:
25862 return false;
25863 }
25864
25865 for (i = 0; i < nelt ; i += diff + 1)
25866 for (j = 0; j <= diff; j += 1)
25867 {
25868 /* This is guaranteed to be true as the value of diff
25869 is 7, 3, 1 and we should have enough elements in the
25870 queue to generate this. Getting a vector mask with a
25871 value of diff other than these values implies that
25872 something is wrong by the time we get here. */
25873 gcc_assert (i + j < nelt);
25874 if (d->perm[i + j] != i + diff - j)
25875 return false;
25876 }
25877
25878 /* Success! */
25879 if (d->testing_p)
25880 return true;
25881
25882 /* ??? The third operand is an artifact of the builtin infrastructure
25883 and is ignored by the actual instruction. */
25884 emit_insn (gen (d->target, d->op0, const0_rtx));
25885 return true;
25886 }
25887
25888 /* Recognize patterns for the VTRN insns. */
25889
25890 static bool
25891 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
25892 {
25893 unsigned int i, odd, mask, nelt = d->nelt;
25894 rtx out0, out1, in0, in1, x;
25895 rtx (*gen)(rtx, rtx, rtx, rtx);
25896
25897 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
25898 return false;
25899
25900 /* Note that these are little-endian tests. Adjust for big-endian later. */
25901 if (d->perm[0] == 0)
25902 odd = 0;
25903 else if (d->perm[0] == 1)
25904 odd = 1;
25905 else
25906 return false;
25907 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
25908
25909 for (i = 0; i < nelt; i += 2)
25910 {
25911 if (d->perm[i] != i + odd)
25912 return false;
25913 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
25914 return false;
25915 }
25916
25917 /* Success! */
25918 if (d->testing_p)
25919 return true;
25920
25921 switch (d->vmode)
25922 {
25923 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
25924 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
25925 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
25926 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
25927 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
25928 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
25929 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
25930 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
25931 default:
25932 gcc_unreachable ();
25933 }
25934
25935 in0 = d->op0;
25936 in1 = d->op1;
25937 if (BYTES_BIG_ENDIAN)
25938 {
25939 x = in0, in0 = in1, in1 = x;
25940 odd = !odd;
25941 }
25942
25943 out0 = d->target;
25944 out1 = gen_reg_rtx (d->vmode);
25945 if (odd)
25946 x = out0, out0 = out1, out1 = x;
25947
25948 emit_insn (gen (out0, in0, in1, out1));
25949 return true;
25950 }
25951
25952 /* The NEON VTBL instruction is a fully variable permuation that's even
25953 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
25954 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
25955 can do slightly better by expanding this as a constant where we don't
25956 have to apply a mask. */
25957
25958 static bool
25959 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
25960 {
25961 rtx rperm[MAX_VECT_LEN], sel;
25962 enum machine_mode vmode = d->vmode;
25963 unsigned int i, nelt = d->nelt;
25964
25965 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
25966 numbering of elements for big-endian, we must reverse the order. */
25967 if (BYTES_BIG_ENDIAN)
25968 return false;
25969
25970 if (d->testing_p)
25971 return true;
25972
25973 /* Generic code will try constant permutation twice. Once with the
25974 original mode and again with the elements lowered to QImode.
25975 So wait and don't do the selector expansion ourselves. */
25976 if (vmode != V8QImode && vmode != V16QImode)
25977 return false;
25978
25979 for (i = 0; i < nelt; ++i)
25980 rperm[i] = GEN_INT (d->perm[i]);
25981 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
25982 sel = force_reg (vmode, sel);
25983
25984 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
25985 return true;
25986 }
25987
25988 static bool
25989 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
25990 {
25991 /* The pattern matching functions above are written to look for a small
25992 number to begin the sequence (0, 1, N/2). If we begin with an index
25993 from the second operand, we can swap the operands. */
25994 if (d->perm[0] >= d->nelt)
25995 {
25996 unsigned i, nelt = d->nelt;
25997 rtx x;
25998
25999 for (i = 0; i < nelt; ++i)
26000 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
26001
26002 x = d->op0;
26003 d->op0 = d->op1;
26004 d->op1 = x;
26005 }
26006
26007 if (TARGET_NEON)
26008 {
26009 if (arm_evpc_neon_vuzp (d))
26010 return true;
26011 if (arm_evpc_neon_vzip (d))
26012 return true;
26013 if (arm_evpc_neon_vrev (d))
26014 return true;
26015 if (arm_evpc_neon_vtrn (d))
26016 return true;
26017 return arm_evpc_neon_vtbl (d);
26018 }
26019 return false;
26020 }
26021
26022 /* Expand a vec_perm_const pattern. */
26023
26024 bool
26025 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
26026 {
26027 struct expand_vec_perm_d d;
26028 int i, nelt, which;
26029
26030 d.target = target;
26031 d.op0 = op0;
26032 d.op1 = op1;
26033
26034 d.vmode = GET_MODE (target);
26035 gcc_assert (VECTOR_MODE_P (d.vmode));
26036 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
26037 d.testing_p = false;
26038
26039 for (i = which = 0; i < nelt; ++i)
26040 {
26041 rtx e = XVECEXP (sel, 0, i);
26042 int ei = INTVAL (e) & (2 * nelt - 1);
26043 which |= (ei < nelt ? 1 : 2);
26044 d.perm[i] = ei;
26045 }
26046
26047 switch (which)
26048 {
26049 default:
26050 gcc_unreachable();
26051
26052 case 3:
26053 d.one_vector_p = false;
26054 if (!rtx_equal_p (op0, op1))
26055 break;
26056
26057 /* The elements of PERM do not suggest that only the first operand
26058 is used, but both operands are identical. Allow easier matching
26059 of the permutation by folding the permutation into the single
26060 input vector. */
26061 /* FALLTHRU */
26062 case 2:
26063 for (i = 0; i < nelt; ++i)
26064 d.perm[i] &= nelt - 1;
26065 d.op0 = op1;
26066 d.one_vector_p = true;
26067 break;
26068
26069 case 1:
26070 d.op1 = op0;
26071 d.one_vector_p = true;
26072 break;
26073 }
26074
26075 return arm_expand_vec_perm_const_1 (&d);
26076 }
26077
26078 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
26079
26080 static bool
26081 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
26082 const unsigned char *sel)
26083 {
26084 struct expand_vec_perm_d d;
26085 unsigned int i, nelt, which;
26086 bool ret;
26087
26088 d.vmode = vmode;
26089 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
26090 d.testing_p = true;
26091 memcpy (d.perm, sel, nelt);
26092
26093 /* Categorize the set of elements in the selector. */
26094 for (i = which = 0; i < nelt; ++i)
26095 {
26096 unsigned char e = d.perm[i];
26097 gcc_assert (e < 2 * nelt);
26098 which |= (e < nelt ? 1 : 2);
26099 }
26100
26101 /* For all elements from second vector, fold the elements to first. */
26102 if (which == 2)
26103 for (i = 0; i < nelt; ++i)
26104 d.perm[i] -= nelt;
26105
26106 /* Check whether the mask can be applied to the vector type. */
26107 d.one_vector_p = (which != 3);
26108
26109 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
26110 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
26111 if (!d.one_vector_p)
26112 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
26113
26114 start_sequence ();
26115 ret = arm_expand_vec_perm_const_1 (&d);
26116 end_sequence ();
26117
26118 return ret;
26119 }
26120
26121 bool
26122 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
26123 {
26124 /* If we are soft float and we do not have ldrd
26125 then all auto increment forms are ok. */
26126 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
26127 return true;
26128
26129 switch (code)
26130 {
26131 /* Post increment and Pre Decrement are supported for all
26132 instruction forms except for vector forms. */
26133 case ARM_POST_INC:
26134 case ARM_PRE_DEC:
26135 if (VECTOR_MODE_P (mode))
26136 {
26137 if (code != ARM_PRE_DEC)
26138 return true;
26139 else
26140 return false;
26141 }
26142
26143 return true;
26144
26145 case ARM_POST_DEC:
26146 case ARM_PRE_INC:
26147 /* Without LDRD and mode size greater than
26148 word size, there is no point in auto-incrementing
26149 because ldm and stm will not have these forms. */
26150 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
26151 return false;
26152
26153 /* Vector and floating point modes do not support
26154 these auto increment forms. */
26155 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
26156 return false;
26157
26158 return true;
26159
26160 default:
26161 return false;
26162
26163 }
26164
26165 return false;
26166 }
26167
26168 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
26169 on ARM, since we know that shifts by negative amounts are no-ops.
26170 Additionally, the default expansion code is not available or suitable
26171 for post-reload insn splits (this can occur when the register allocator
26172 chooses not to do a shift in NEON).
26173
26174 This function is used in both initial expand and post-reload splits, and
26175 handles all kinds of 64-bit shifts.
26176
26177 Input requirements:
26178 - It is safe for the input and output to be the same register, but
26179 early-clobber rules apply for the shift amount and scratch registers.
26180 - Shift by register requires both scratch registers. Shift by a constant
26181 less than 32 in Thumb2 mode requires SCRATCH1 only. In all other cases
26182 the scratch registers may be NULL.
26183 - Ashiftrt by a register also clobbers the CC register. */
26184 void
26185 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
26186 rtx amount, rtx scratch1, rtx scratch2)
26187 {
26188 rtx out_high = gen_highpart (SImode, out);
26189 rtx out_low = gen_lowpart (SImode, out);
26190 rtx in_high = gen_highpart (SImode, in);
26191 rtx in_low = gen_lowpart (SImode, in);
26192
26193 /* Terminology:
26194 in = the register pair containing the input value.
26195 out = the destination register pair.
26196 up = the high- or low-part of each pair.
26197 down = the opposite part to "up".
26198 In a shift, we can consider bits to shift from "up"-stream to
26199 "down"-stream, so in a left-shift "up" is the low-part and "down"
26200 is the high-part of each register pair. */
26201
26202 rtx out_up = code == ASHIFT ? out_low : out_high;
26203 rtx out_down = code == ASHIFT ? out_high : out_low;
26204 rtx in_up = code == ASHIFT ? in_low : in_high;
26205 rtx in_down = code == ASHIFT ? in_high : in_low;
26206
26207 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
26208 gcc_assert (out
26209 && (REG_P (out) || GET_CODE (out) == SUBREG)
26210 && GET_MODE (out) == DImode);
26211 gcc_assert (in
26212 && (REG_P (in) || GET_CODE (in) == SUBREG)
26213 && GET_MODE (in) == DImode);
26214 gcc_assert (amount
26215 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
26216 && GET_MODE (amount) == SImode)
26217 || CONST_INT_P (amount)));
26218 gcc_assert (scratch1 == NULL
26219 || (GET_CODE (scratch1) == SCRATCH)
26220 || (GET_MODE (scratch1) == SImode
26221 && REG_P (scratch1)));
26222 gcc_assert (scratch2 == NULL
26223 || (GET_CODE (scratch2) == SCRATCH)
26224 || (GET_MODE (scratch2) == SImode
26225 && REG_P (scratch2)));
26226 gcc_assert (!REG_P (out) || !REG_P (amount)
26227 || !HARD_REGISTER_P (out)
26228 || (REGNO (out) != REGNO (amount)
26229 && REGNO (out) + 1 != REGNO (amount)));
26230
26231 /* Macros to make following code more readable. */
26232 #define SUB_32(DEST,SRC) \
26233 gen_addsi3 ((DEST), (SRC), gen_rtx_CONST_INT (VOIDmode, -32))
26234 #define RSB_32(DEST,SRC) \
26235 gen_subsi3 ((DEST), gen_rtx_CONST_INT (VOIDmode, 32), (SRC))
26236 #define SUB_S_32(DEST,SRC) \
26237 gen_addsi3_compare0 ((DEST), (SRC), \
26238 gen_rtx_CONST_INT (VOIDmode, -32))
26239 #define SET(DEST,SRC) \
26240 gen_rtx_SET (SImode, (DEST), (SRC))
26241 #define SHIFT(CODE,SRC,AMOUNT) \
26242 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
26243 #define LSHIFT(CODE,SRC,AMOUNT) \
26244 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
26245 SImode, (SRC), (AMOUNT))
26246 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
26247 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
26248 SImode, (SRC), (AMOUNT))
26249 #define ORR(A,B) \
26250 gen_rtx_IOR (SImode, (A), (B))
26251 #define BRANCH(COND,LABEL) \
26252 gen_arm_cond_branch ((LABEL), \
26253 gen_rtx_ ## COND (CCmode, cc_reg, \
26254 const0_rtx), \
26255 cc_reg)
26256
26257 /* Shifts by register and shifts by constant are handled separately. */
26258 if (CONST_INT_P (amount))
26259 {
26260 /* We have a shift-by-constant. */
26261
26262 /* First, handle out-of-range shift amounts.
26263 In both cases we try to match the result an ARM instruction in a
26264 shift-by-register would give. This helps reduce execution
26265 differences between optimization levels, but it won't stop other
26266 parts of the compiler doing different things. This is "undefined
26267 behaviour, in any case. */
26268 if (INTVAL (amount) <= 0)
26269 emit_insn (gen_movdi (out, in));
26270 else if (INTVAL (amount) >= 64)
26271 {
26272 if (code == ASHIFTRT)
26273 {
26274 rtx const31_rtx = gen_rtx_CONST_INT (VOIDmode, 31);
26275 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
26276 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
26277 }
26278 else
26279 emit_insn (gen_movdi (out, const0_rtx));
26280 }
26281
26282 /* Now handle valid shifts. */
26283 else if (INTVAL (amount) < 32)
26284 {
26285 /* Shifts by a constant less than 32. */
26286 rtx reverse_amount = gen_rtx_CONST_INT (VOIDmode,
26287 32 - INTVAL (amount));
26288
26289 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
26290 emit_insn (SET (out_down,
26291 ORR (REV_LSHIFT (code, in_up, reverse_amount),
26292 out_down)));
26293 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
26294 }
26295 else
26296 {
26297 /* Shifts by a constant greater than 31. */
26298 rtx adj_amount = gen_rtx_CONST_INT (VOIDmode, INTVAL (amount) - 32);
26299
26300 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
26301 if (code == ASHIFTRT)
26302 emit_insn (gen_ashrsi3 (out_up, in_up,
26303 gen_rtx_CONST_INT (VOIDmode, 31)));
26304 else
26305 emit_insn (SET (out_up, const0_rtx));
26306 }
26307 }
26308 else
26309 {
26310 /* We have a shift-by-register. */
26311 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
26312
26313 /* This alternative requires the scratch registers. */
26314 gcc_assert (scratch1 && REG_P (scratch1));
26315 gcc_assert (scratch2 && REG_P (scratch2));
26316
26317 /* We will need the values "amount-32" and "32-amount" later.
26318 Swapping them around now allows the later code to be more general. */
26319 switch (code)
26320 {
26321 case ASHIFT:
26322 emit_insn (SUB_32 (scratch1, amount));
26323 emit_insn (RSB_32 (scratch2, amount));
26324 break;
26325 case ASHIFTRT:
26326 emit_insn (RSB_32 (scratch1, amount));
26327 /* Also set CC = amount > 32. */
26328 emit_insn (SUB_S_32 (scratch2, amount));
26329 break;
26330 case LSHIFTRT:
26331 emit_insn (RSB_32 (scratch1, amount));
26332 emit_insn (SUB_32 (scratch2, amount));
26333 break;
26334 default:
26335 gcc_unreachable ();
26336 }
26337
26338 /* Emit code like this:
26339
26340 arithmetic-left:
26341 out_down = in_down << amount;
26342 out_down = (in_up << (amount - 32)) | out_down;
26343 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
26344 out_up = in_up << amount;
26345
26346 arithmetic-right:
26347 out_down = in_down >> amount;
26348 out_down = (in_up << (32 - amount)) | out_down;
26349 if (amount < 32)
26350 out_down = ((signed)in_up >> (amount - 32)) | out_down;
26351 out_up = in_up << amount;
26352
26353 logical-right:
26354 out_down = in_down >> amount;
26355 out_down = (in_up << (32 - amount)) | out_down;
26356 if (amount < 32)
26357 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
26358 out_up = in_up << amount;
26359
26360 The ARM and Thumb2 variants are the same but implemented slightly
26361 differently. If this were only called during expand we could just
26362 use the Thumb2 case and let combine do the right thing, but this
26363 can also be called from post-reload splitters. */
26364
26365 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
26366
26367 if (!TARGET_THUMB2)
26368 {
26369 /* Emit code for ARM mode. */
26370 emit_insn (SET (out_down,
26371 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
26372 if (code == ASHIFTRT)
26373 {
26374 rtx done_label = gen_label_rtx ();
26375 emit_jump_insn (BRANCH (LT, done_label));
26376 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
26377 out_down)));
26378 emit_label (done_label);
26379 }
26380 else
26381 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
26382 out_down)));
26383 }
26384 else
26385 {
26386 /* Emit code for Thumb2 mode.
26387 Thumb2 can't do shift and or in one insn. */
26388 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
26389 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
26390
26391 if (code == ASHIFTRT)
26392 {
26393 rtx done_label = gen_label_rtx ();
26394 emit_jump_insn (BRANCH (LT, done_label));
26395 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
26396 emit_insn (SET (out_down, ORR (out_down, scratch2)));
26397 emit_label (done_label);
26398 }
26399 else
26400 {
26401 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
26402 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
26403 }
26404 }
26405
26406 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
26407 }
26408
26409 #undef SUB_32
26410 #undef RSB_32
26411 #undef SUB_S_32
26412 #undef SET
26413 #undef SHIFT
26414 #undef LSHIFT
26415 #undef REV_LSHIFT
26416 #undef ORR
26417 #undef BRANCH
26418 }
26419
26420
26421 /* Returns true if a valid comparison operation and makes
26422 the operands in a form that is valid. */
26423 bool
26424 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
26425 {
26426 enum rtx_code code = GET_CODE (*comparison);
26427 enum rtx_code canonical_code;
26428 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
26429 ? GET_MODE (*op2) : GET_MODE (*op1);
26430
26431 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
26432
26433 if (code == UNEQ || code == LTGT)
26434 return false;
26435
26436 canonical_code = arm_canonicalize_comparison (code, op1, op2);
26437 PUT_CODE (*comparison, canonical_code);
26438
26439 switch (mode)
26440 {
26441 case SImode:
26442 if (!arm_add_operand (*op1, mode))
26443 *op1 = force_reg (mode, *op1);
26444 if (!arm_add_operand (*op2, mode))
26445 *op2 = force_reg (mode, *op2);
26446 return true;
26447
26448 case DImode:
26449 if (!cmpdi_operand (*op1, mode))
26450 *op1 = force_reg (mode, *op1);
26451 if (!cmpdi_operand (*op2, mode))
26452 *op2 = force_reg (mode, *op2);
26453 return true;
26454
26455 case SFmode:
26456 case DFmode:
26457 if (!arm_float_compare_operand (*op1, mode))
26458 *op1 = force_reg (mode, *op1);
26459 if (!arm_float_compare_operand (*op2, mode))
26460 *op2 = force_reg (mode, *op2);
26461 return true;
26462 default:
26463 break;
26464 }
26465
26466 return false;
26467
26468 }
26469
26470 #include "gt-arm.h"