773c353ba0996cd4601aecf743f4b4306526ee9b
[gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "function.h"
44 #include "expr.h"
45 #include "optabs.h"
46 #include "diagnostic-core.h"
47 #include "recog.h"
48 #include "cgraph.h"
49 #include "ggc.h"
50 #include "except.h"
51 #include "tm_p.h"
52 #include "target.h"
53 #include "target-def.h"
54 #include "debug.h"
55 #include "langhooks.h"
56 #include "df.h"
57 #include "intl.h"
58 #include "libfuncs.h"
59 #include "params.h"
60 #include "opts.h"
61 #include "dumpfile.h"
62
63 /* Forward definitions of types. */
64 typedef struct minipool_node Mnode;
65 typedef struct minipool_fixup Mfix;
66
67 void (*arm_lang_output_object_attributes_hook)(void);
68
69 struct four_ints
70 {
71 int i[4];
72 };
73
74 /* Forward function declarations. */
75 static bool arm_const_not_ok_for_debug_p (rtx);
76 static bool arm_lra_p (void);
77 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
78 static int arm_compute_static_chain_stack_bytes (void);
79 static arm_stack_offsets *arm_get_frame_offsets (void);
80 static void arm_add_gc_roots (void);
81 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
82 HOST_WIDE_INT, rtx, rtx, int, int);
83 static unsigned bit_count (unsigned long);
84 static int arm_address_register_rtx_p (rtx, int);
85 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
86 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
87 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
88 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
89 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
90 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
91 inline static int thumb1_index_register_rtx_p (rtx, int);
92 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
93 static int thumb_far_jump_used_p (void);
94 static bool thumb_force_lr_save (void);
95 static unsigned arm_size_return_regs (void);
96 static bool arm_assemble_integer (rtx, unsigned int, int);
97 static void arm_print_operand (FILE *, rtx, int);
98 static void arm_print_operand_address (FILE *, rtx);
99 static bool arm_print_operand_punct_valid_p (unsigned char code);
100 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
101 static arm_cc get_arm_condition_code (rtx);
102 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
103 static const char *output_multi_immediate (rtx *, const char *, const char *,
104 int, HOST_WIDE_INT);
105 static const char *shift_op (rtx, HOST_WIDE_INT *);
106 static struct machine_function *arm_init_machine_status (void);
107 static void thumb_exit (FILE *, int);
108 static HOST_WIDE_INT get_jump_table_size (rtx);
109 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
110 static Mnode *add_minipool_forward_ref (Mfix *);
111 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
112 static Mnode *add_minipool_backward_ref (Mfix *);
113 static void assign_minipool_offsets (Mfix *);
114 static void arm_print_value (FILE *, rtx);
115 static void dump_minipool (rtx);
116 static int arm_barrier_cost (rtx);
117 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
118 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
119 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
120 rtx);
121 static void arm_reorg (void);
122 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
123 static unsigned long arm_compute_save_reg0_reg12_mask (void);
124 static unsigned long arm_compute_save_reg_mask (void);
125 static unsigned long arm_isr_value (tree);
126 static unsigned long arm_compute_func_type (void);
127 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
128 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
129 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
130 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
131 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
132 #endif
133 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
134 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
135 static int arm_comp_type_attributes (const_tree, const_tree);
136 static void arm_set_default_type_attributes (tree);
137 static int arm_adjust_cost (rtx, rtx, rtx, int);
138 static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
139 static int optimal_immediate_sequence (enum rtx_code code,
140 unsigned HOST_WIDE_INT val,
141 struct four_ints *return_sequence);
142 static int optimal_immediate_sequence_1 (enum rtx_code code,
143 unsigned HOST_WIDE_INT val,
144 struct four_ints *return_sequence,
145 int i);
146 static int arm_get_strip_length (int);
147 static bool arm_function_ok_for_sibcall (tree, tree);
148 static enum machine_mode arm_promote_function_mode (const_tree,
149 enum machine_mode, int *,
150 const_tree, int);
151 static bool arm_return_in_memory (const_tree, const_tree);
152 static rtx arm_function_value (const_tree, const_tree, bool);
153 static rtx arm_libcall_value_1 (enum machine_mode);
154 static rtx arm_libcall_value (enum machine_mode, const_rtx);
155 static bool arm_function_value_regno_p (const unsigned int);
156 static void arm_internal_label (FILE *, const char *, unsigned long);
157 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
158 tree);
159 static bool arm_have_conditional_execution (void);
160 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
161 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
162 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
163 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
164 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
165 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
166 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
167 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
168 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
169 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
170 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
171 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
172 static void arm_init_builtins (void);
173 static void arm_init_iwmmxt_builtins (void);
174 static rtx safe_vector_operand (rtx, enum machine_mode);
175 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
176 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
177 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
178 static tree arm_builtin_decl (unsigned, bool);
179 static void emit_constant_insn (rtx cond, rtx pattern);
180 static rtx emit_set_insn (rtx, rtx);
181 static rtx emit_multi_reg_push (unsigned long, unsigned long);
182 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
183 tree, bool);
184 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
185 const_tree, bool);
186 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
187 const_tree, bool);
188 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
189 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
190 const_tree);
191 static rtx aapcs_libcall_value (enum machine_mode);
192 static int aapcs_select_return_coproc (const_tree, const_tree);
193
194 #ifdef OBJECT_FORMAT_ELF
195 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
196 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
197 #endif
198 #ifndef ARM_PE
199 static void arm_encode_section_info (tree, rtx, int);
200 #endif
201
202 static void arm_file_end (void);
203 static void arm_file_start (void);
204
205 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
206 tree, int *, int);
207 static bool arm_pass_by_reference (cumulative_args_t,
208 enum machine_mode, const_tree, bool);
209 static bool arm_promote_prototypes (const_tree);
210 static bool arm_default_short_enums (void);
211 static bool arm_align_anon_bitfield (void);
212 static bool arm_return_in_msb (const_tree);
213 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
214 static bool arm_return_in_memory (const_tree, const_tree);
215 #if ARM_UNWIND_INFO
216 static void arm_unwind_emit (FILE *, rtx);
217 static bool arm_output_ttype (rtx);
218 static void arm_asm_emit_except_personality (rtx);
219 static void arm_asm_init_sections (void);
220 #endif
221 static rtx arm_dwarf_register_span (rtx);
222
223 static tree arm_cxx_guard_type (void);
224 static bool arm_cxx_guard_mask_bit (void);
225 static tree arm_get_cookie_size (tree);
226 static bool arm_cookie_has_size (void);
227 static bool arm_cxx_cdtor_returns_this (void);
228 static bool arm_cxx_key_method_may_be_inline (void);
229 static void arm_cxx_determine_class_data_visibility (tree);
230 static bool arm_cxx_class_data_always_comdat (void);
231 static bool arm_cxx_use_aeabi_atexit (void);
232 static void arm_init_libfuncs (void);
233 static tree arm_build_builtin_va_list (void);
234 static void arm_expand_builtin_va_start (tree, rtx);
235 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
236 static void arm_option_override (void);
237 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
238 static bool arm_cannot_copy_insn_p (rtx);
239 static int arm_issue_rate (void);
240 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
241 static bool arm_output_addr_const_extra (FILE *, rtx);
242 static bool arm_allocate_stack_slots_for_args (void);
243 static bool arm_warn_func_return (tree);
244 static const char *arm_invalid_parameter_type (const_tree t);
245 static const char *arm_invalid_return_type (const_tree t);
246 static tree arm_promoted_type (const_tree t);
247 static tree arm_convert_to_type (tree type, tree expr);
248 static bool arm_scalar_mode_supported_p (enum machine_mode);
249 static bool arm_frame_pointer_required (void);
250 static bool arm_can_eliminate (const int, const int);
251 static void arm_asm_trampoline_template (FILE *);
252 static void arm_trampoline_init (rtx, tree, rtx);
253 static rtx arm_trampoline_adjust_address (rtx);
254 static rtx arm_pic_static_addr (rtx orig, rtx reg);
255 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
256 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
257 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
258 static bool arm_array_mode_supported_p (enum machine_mode,
259 unsigned HOST_WIDE_INT);
260 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
261 static bool arm_class_likely_spilled_p (reg_class_t);
262 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
263 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
264 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
265 const_tree type,
266 int misalignment,
267 bool is_packed);
268 static void arm_conditional_register_usage (void);
269 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
270 static unsigned int arm_autovectorize_vector_sizes (void);
271 static int arm_default_branch_cost (bool, bool);
272 static int arm_cortex_a5_branch_cost (bool, bool);
273 static int arm_cortex_m_branch_cost (bool, bool);
274
275 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
276 const unsigned char *sel);
277
278 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
279 tree vectype,
280 int misalign ATTRIBUTE_UNUSED);
281 static unsigned arm_add_stmt_cost (void *data, int count,
282 enum vect_cost_for_stmt kind,
283 struct _stmt_vec_info *stmt_info,
284 int misalign,
285 enum vect_cost_model_location where);
286
287 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
288 bool op0_preserve_value);
289 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
290 \f
291 /* Table of machine attributes. */
292 static const struct attribute_spec arm_attribute_table[] =
293 {
294 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
295 affects_type_identity } */
296 /* Function calls made to this symbol must be done indirectly, because
297 it may lie outside of the 26 bit addressing range of a normal function
298 call. */
299 { "long_call", 0, 0, false, true, true, NULL, false },
300 /* Whereas these functions are always known to reside within the 26 bit
301 addressing range. */
302 { "short_call", 0, 0, false, true, true, NULL, false },
303 /* Specify the procedure call conventions for a function. */
304 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
305 false },
306 /* Interrupt Service Routines have special prologue and epilogue requirements. */
307 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
308 false },
309 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
310 false },
311 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
312 false },
313 #ifdef ARM_PE
314 /* ARM/PE has three new attributes:
315 interfacearm - ?
316 dllexport - for exporting a function/variable that will live in a dll
317 dllimport - for importing a function/variable from a dll
318
319 Microsoft allows multiple declspecs in one __declspec, separating
320 them with spaces. We do NOT support this. Instead, use __declspec
321 multiple times.
322 */
323 { "dllimport", 0, 0, true, false, false, NULL, false },
324 { "dllexport", 0, 0, true, false, false, NULL, false },
325 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
326 false },
327 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
328 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
329 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
330 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
331 false },
332 #endif
333 { NULL, 0, 0, false, false, false, NULL, false }
334 };
335 \f
336 /* Initialize the GCC target structure. */
337 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
338 #undef TARGET_MERGE_DECL_ATTRIBUTES
339 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
340 #endif
341
342 #undef TARGET_LEGITIMIZE_ADDRESS
343 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
344
345 #undef TARGET_LRA_P
346 #define TARGET_LRA_P arm_lra_p
347
348 #undef TARGET_ATTRIBUTE_TABLE
349 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
350
351 #undef TARGET_ASM_FILE_START
352 #define TARGET_ASM_FILE_START arm_file_start
353 #undef TARGET_ASM_FILE_END
354 #define TARGET_ASM_FILE_END arm_file_end
355
356 #undef TARGET_ASM_ALIGNED_SI_OP
357 #define TARGET_ASM_ALIGNED_SI_OP NULL
358 #undef TARGET_ASM_INTEGER
359 #define TARGET_ASM_INTEGER arm_assemble_integer
360
361 #undef TARGET_PRINT_OPERAND
362 #define TARGET_PRINT_OPERAND arm_print_operand
363 #undef TARGET_PRINT_OPERAND_ADDRESS
364 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
365 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
366 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
367
368 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
369 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
370
371 #undef TARGET_ASM_FUNCTION_PROLOGUE
372 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
373
374 #undef TARGET_ASM_FUNCTION_EPILOGUE
375 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
376
377 #undef TARGET_OPTION_OVERRIDE
378 #define TARGET_OPTION_OVERRIDE arm_option_override
379
380 #undef TARGET_COMP_TYPE_ATTRIBUTES
381 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
382
383 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
384 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
385
386 #undef TARGET_SCHED_ADJUST_COST
387 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
388
389 #undef TARGET_SCHED_REORDER
390 #define TARGET_SCHED_REORDER arm_sched_reorder
391
392 #undef TARGET_REGISTER_MOVE_COST
393 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
394
395 #undef TARGET_MEMORY_MOVE_COST
396 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
397
398 #undef TARGET_ENCODE_SECTION_INFO
399 #ifdef ARM_PE
400 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
401 #else
402 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
403 #endif
404
405 #undef TARGET_STRIP_NAME_ENCODING
406 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
407
408 #undef TARGET_ASM_INTERNAL_LABEL
409 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
410
411 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
412 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
413
414 #undef TARGET_FUNCTION_VALUE
415 #define TARGET_FUNCTION_VALUE arm_function_value
416
417 #undef TARGET_LIBCALL_VALUE
418 #define TARGET_LIBCALL_VALUE arm_libcall_value
419
420 #undef TARGET_FUNCTION_VALUE_REGNO_P
421 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
422
423 #undef TARGET_ASM_OUTPUT_MI_THUNK
424 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
425 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
426 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
427
428 #undef TARGET_RTX_COSTS
429 #define TARGET_RTX_COSTS arm_rtx_costs
430 #undef TARGET_ADDRESS_COST
431 #define TARGET_ADDRESS_COST arm_address_cost
432
433 #undef TARGET_SHIFT_TRUNCATION_MASK
434 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
435 #undef TARGET_VECTOR_MODE_SUPPORTED_P
436 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
437 #undef TARGET_ARRAY_MODE_SUPPORTED_P
438 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
439 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
440 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
441 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
442 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
443 arm_autovectorize_vector_sizes
444
445 #undef TARGET_MACHINE_DEPENDENT_REORG
446 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
447
448 #undef TARGET_INIT_BUILTINS
449 #define TARGET_INIT_BUILTINS arm_init_builtins
450 #undef TARGET_EXPAND_BUILTIN
451 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
452 #undef TARGET_BUILTIN_DECL
453 #define TARGET_BUILTIN_DECL arm_builtin_decl
454
455 #undef TARGET_INIT_LIBFUNCS
456 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
457
458 #undef TARGET_PROMOTE_FUNCTION_MODE
459 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
460 #undef TARGET_PROMOTE_PROTOTYPES
461 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
462 #undef TARGET_PASS_BY_REFERENCE
463 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
464 #undef TARGET_ARG_PARTIAL_BYTES
465 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
466 #undef TARGET_FUNCTION_ARG
467 #define TARGET_FUNCTION_ARG arm_function_arg
468 #undef TARGET_FUNCTION_ARG_ADVANCE
469 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
470 #undef TARGET_FUNCTION_ARG_BOUNDARY
471 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
472
473 #undef TARGET_SETUP_INCOMING_VARARGS
474 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
475
476 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
477 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
478
479 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
480 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
481 #undef TARGET_TRAMPOLINE_INIT
482 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
483 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
484 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
485
486 #undef TARGET_WARN_FUNC_RETURN
487 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
488
489 #undef TARGET_DEFAULT_SHORT_ENUMS
490 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
491
492 #undef TARGET_ALIGN_ANON_BITFIELD
493 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
494
495 #undef TARGET_NARROW_VOLATILE_BITFIELD
496 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
497
498 #undef TARGET_CXX_GUARD_TYPE
499 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
500
501 #undef TARGET_CXX_GUARD_MASK_BIT
502 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
503
504 #undef TARGET_CXX_GET_COOKIE_SIZE
505 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
506
507 #undef TARGET_CXX_COOKIE_HAS_SIZE
508 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
509
510 #undef TARGET_CXX_CDTOR_RETURNS_THIS
511 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
512
513 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
514 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
515
516 #undef TARGET_CXX_USE_AEABI_ATEXIT
517 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
518
519 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
520 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
521 arm_cxx_determine_class_data_visibility
522
523 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
524 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
525
526 #undef TARGET_RETURN_IN_MSB
527 #define TARGET_RETURN_IN_MSB arm_return_in_msb
528
529 #undef TARGET_RETURN_IN_MEMORY
530 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
531
532 #undef TARGET_MUST_PASS_IN_STACK
533 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
534
535 #if ARM_UNWIND_INFO
536 #undef TARGET_ASM_UNWIND_EMIT
537 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
538
539 /* EABI unwinding tables use a different format for the typeinfo tables. */
540 #undef TARGET_ASM_TTYPE
541 #define TARGET_ASM_TTYPE arm_output_ttype
542
543 #undef TARGET_ARM_EABI_UNWINDER
544 #define TARGET_ARM_EABI_UNWINDER true
545
546 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
547 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
548
549 #undef TARGET_ASM_INIT_SECTIONS
550 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
551 #endif /* ARM_UNWIND_INFO */
552
553 #undef TARGET_DWARF_REGISTER_SPAN
554 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
555
556 #undef TARGET_CANNOT_COPY_INSN_P
557 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
558
559 #ifdef HAVE_AS_TLS
560 #undef TARGET_HAVE_TLS
561 #define TARGET_HAVE_TLS true
562 #endif
563
564 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
565 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
566
567 #undef TARGET_LEGITIMATE_CONSTANT_P
568 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
569
570 #undef TARGET_CANNOT_FORCE_CONST_MEM
571 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
572
573 #undef TARGET_MAX_ANCHOR_OFFSET
574 #define TARGET_MAX_ANCHOR_OFFSET 4095
575
576 /* The minimum is set such that the total size of the block
577 for a particular anchor is -4088 + 1 + 4095 bytes, which is
578 divisible by eight, ensuring natural spacing of anchors. */
579 #undef TARGET_MIN_ANCHOR_OFFSET
580 #define TARGET_MIN_ANCHOR_OFFSET -4088
581
582 #undef TARGET_SCHED_ISSUE_RATE
583 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
584
585 #undef TARGET_MANGLE_TYPE
586 #define TARGET_MANGLE_TYPE arm_mangle_type
587
588 #undef TARGET_BUILD_BUILTIN_VA_LIST
589 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
590 #undef TARGET_EXPAND_BUILTIN_VA_START
591 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
592 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
593 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
594
595 #ifdef HAVE_AS_TLS
596 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
597 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
598 #endif
599
600 #undef TARGET_LEGITIMATE_ADDRESS_P
601 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
602
603 #undef TARGET_PREFERRED_RELOAD_CLASS
604 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
605
606 #undef TARGET_INVALID_PARAMETER_TYPE
607 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
608
609 #undef TARGET_INVALID_RETURN_TYPE
610 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
611
612 #undef TARGET_PROMOTED_TYPE
613 #define TARGET_PROMOTED_TYPE arm_promoted_type
614
615 #undef TARGET_CONVERT_TO_TYPE
616 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
617
618 #undef TARGET_SCALAR_MODE_SUPPORTED_P
619 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
620
621 #undef TARGET_FRAME_POINTER_REQUIRED
622 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
623
624 #undef TARGET_CAN_ELIMINATE
625 #define TARGET_CAN_ELIMINATE arm_can_eliminate
626
627 #undef TARGET_CONDITIONAL_REGISTER_USAGE
628 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
629
630 #undef TARGET_CLASS_LIKELY_SPILLED_P
631 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
632
633 #undef TARGET_VECTORIZE_BUILTINS
634 #define TARGET_VECTORIZE_BUILTINS
635
636 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
637 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
638 arm_builtin_vectorized_function
639
640 #undef TARGET_VECTOR_ALIGNMENT
641 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
642
643 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
644 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
645 arm_vector_alignment_reachable
646
647 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
648 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
649 arm_builtin_support_vector_misalignment
650
651 #undef TARGET_PREFERRED_RENAME_CLASS
652 #define TARGET_PREFERRED_RENAME_CLASS \
653 arm_preferred_rename_class
654
655 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
656 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
657 arm_vectorize_vec_perm_const_ok
658
659 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
660 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
661 arm_builtin_vectorization_cost
662 #undef TARGET_VECTORIZE_ADD_STMT_COST
663 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
664
665 #undef TARGET_CANONICALIZE_COMPARISON
666 #define TARGET_CANONICALIZE_COMPARISON \
667 arm_canonicalize_comparison
668
669 #undef TARGET_ASAN_SHADOW_OFFSET
670 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
671
672 #undef MAX_INSN_PER_IT_BLOCK
673 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
674
675 #undef TARGET_CAN_USE_DOLOOP_P
676 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
677
678 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
679 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
680
681 struct gcc_target targetm = TARGET_INITIALIZER;
682 \f
683 /* Obstack for minipool constant handling. */
684 static struct obstack minipool_obstack;
685 static char * minipool_startobj;
686
687 /* The maximum number of insns skipped which
688 will be conditionalised if possible. */
689 static int max_insns_skipped = 5;
690
691 extern FILE * asm_out_file;
692
693 /* True if we are currently building a constant table. */
694 int making_const_table;
695
696 /* The processor for which instructions should be scheduled. */
697 enum processor_type arm_tune = arm_none;
698
699 /* The current tuning set. */
700 const struct tune_params *current_tune;
701
702 /* Which floating point hardware to schedule for. */
703 int arm_fpu_attr;
704
705 /* Which floating popint hardware to use. */
706 const struct arm_fpu_desc *arm_fpu_desc;
707
708 /* Used for Thumb call_via trampolines. */
709 rtx thumb_call_via_label[14];
710 static int thumb_call_reg_needed;
711
712 /* Bit values used to identify processor capabilities. */
713 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
714 #define FL_ARCH3M (1 << 1) /* Extended multiply */
715 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
716 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
717 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
718 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
719 #define FL_THUMB (1 << 6) /* Thumb aware */
720 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
721 #define FL_STRONG (1 << 8) /* StrongARM */
722 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
723 #define FL_XSCALE (1 << 10) /* XScale */
724 /* spare (1 << 11) */
725 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
726 media instructions. */
727 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
728 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
729 Note: ARM6 & 7 derivatives only. */
730 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
731 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
732 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
733 profile. */
734 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
735 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
736 #define FL_NEON (1 << 20) /* Neon instructions. */
737 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
738 architecture. */
739 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
740 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
741 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
742 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
743
744 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
745 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
746
747 /* Flags that only effect tuning, not available instructions. */
748 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
749 | FL_CO_PROC)
750
751 #define FL_FOR_ARCH2 FL_NOTM
752 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
753 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
754 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
755 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
756 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
757 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
758 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
759 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
760 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
761 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
762 #define FL_FOR_ARCH6J FL_FOR_ARCH6
763 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
764 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
765 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
766 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
767 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
768 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
769 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
770 #define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
771 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
772 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
773 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
774 #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
775
776 /* The bits in this mask specify which
777 instructions we are allowed to generate. */
778 static unsigned long insn_flags = 0;
779
780 /* The bits in this mask specify which instruction scheduling options should
781 be used. */
782 static unsigned long tune_flags = 0;
783
784 /* The highest ARM architecture version supported by the
785 target. */
786 enum base_architecture arm_base_arch = BASE_ARCH_0;
787
788 /* The following are used in the arm.md file as equivalents to bits
789 in the above two flag variables. */
790
791 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
792 int arm_arch3m = 0;
793
794 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
795 int arm_arch4 = 0;
796
797 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
798 int arm_arch4t = 0;
799
800 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
801 int arm_arch5 = 0;
802
803 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
804 int arm_arch5e = 0;
805
806 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
807 int arm_arch6 = 0;
808
809 /* Nonzero if this chip supports the ARM 6K extensions. */
810 int arm_arch6k = 0;
811
812 /* Nonzero if instructions present in ARMv6-M can be used. */
813 int arm_arch6m = 0;
814
815 /* Nonzero if this chip supports the ARM 7 extensions. */
816 int arm_arch7 = 0;
817
818 /* Nonzero if instructions not present in the 'M' profile can be used. */
819 int arm_arch_notm = 0;
820
821 /* Nonzero if instructions present in ARMv7E-M can be used. */
822 int arm_arch7em = 0;
823
824 /* Nonzero if instructions present in ARMv8 can be used. */
825 int arm_arch8 = 0;
826
827 /* Nonzero if this chip can benefit from load scheduling. */
828 int arm_ld_sched = 0;
829
830 /* Nonzero if this chip is a StrongARM. */
831 int arm_tune_strongarm = 0;
832
833 /* Nonzero if this chip supports Intel Wireless MMX technology. */
834 int arm_arch_iwmmxt = 0;
835
836 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
837 int arm_arch_iwmmxt2 = 0;
838
839 /* Nonzero if this chip is an XScale. */
840 int arm_arch_xscale = 0;
841
842 /* Nonzero if tuning for XScale */
843 int arm_tune_xscale = 0;
844
845 /* Nonzero if we want to tune for stores that access the write-buffer.
846 This typically means an ARM6 or ARM7 with MMU or MPU. */
847 int arm_tune_wbuf = 0;
848
849 /* Nonzero if tuning for Cortex-A9. */
850 int arm_tune_cortex_a9 = 0;
851
852 /* Nonzero if generating Thumb instructions. */
853 int thumb_code = 0;
854
855 /* Nonzero if generating Thumb-1 instructions. */
856 int thumb1_code = 0;
857
858 /* Nonzero if we should define __THUMB_INTERWORK__ in the
859 preprocessor.
860 XXX This is a bit of a hack, it's intended to help work around
861 problems in GLD which doesn't understand that armv5t code is
862 interworking clean. */
863 int arm_cpp_interwork = 0;
864
865 /* Nonzero if chip supports Thumb 2. */
866 int arm_arch_thumb2;
867
868 /* Nonzero if chip supports integer division instruction. */
869 int arm_arch_arm_hwdiv;
870 int arm_arch_thumb_hwdiv;
871
872 /* Nonzero if we should use Neon to handle 64-bits operations rather
873 than core registers. */
874 int prefer_neon_for_64bits = 0;
875
876 /* Nonzero if we shouldn't use literal pools. */
877 bool arm_disable_literal_pool = false;
878
879 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
880 we must report the mode of the memory reference from
881 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
882 enum machine_mode output_memory_reference_mode;
883
884 /* The register number to be used for the PIC offset register. */
885 unsigned arm_pic_register = INVALID_REGNUM;
886
887 /* Set to 1 after arm_reorg has started. Reset to start at the start of
888 the next function. */
889 static int after_arm_reorg = 0;
890
891 enum arm_pcs arm_pcs_default;
892
893 /* For an explanation of these variables, see final_prescan_insn below. */
894 int arm_ccfsm_state;
895 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
896 enum arm_cond_code arm_current_cc;
897
898 rtx arm_target_insn;
899 int arm_target_label;
900 /* The number of conditionally executed insns, including the current insn. */
901 int arm_condexec_count = 0;
902 /* A bitmask specifying the patterns for the IT block.
903 Zero means do not output an IT block before this insn. */
904 int arm_condexec_mask = 0;
905 /* The number of bits used in arm_condexec_mask. */
906 int arm_condexec_masklen = 0;
907
908 /* Nonzero if chip supports the ARMv8 CRC instructions. */
909 int arm_arch_crc = 0;
910
911 /* The condition codes of the ARM, and the inverse function. */
912 static const char * const arm_condition_codes[] =
913 {
914 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
915 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
916 };
917
918 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
919 int arm_regs_in_sequence[] =
920 {
921 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
922 };
923
924 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
925 #define streq(string1, string2) (strcmp (string1, string2) == 0)
926
927 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
928 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
929 | (1 << PIC_OFFSET_TABLE_REGNUM)))
930 \f
931 /* Initialization code. */
932
933 struct processors
934 {
935 const char *const name;
936 enum processor_type core;
937 const char *arch;
938 enum base_architecture base_arch;
939 const unsigned long flags;
940 const struct tune_params *const tune;
941 };
942
943
944 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
945 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
946 prefetch_slots, \
947 l1_size, \
948 l1_line_size
949
950 /* arm generic vectorizer costs. */
951 static const
952 struct cpu_vec_costs arm_default_vec_cost = {
953 1, /* scalar_stmt_cost. */
954 1, /* scalar load_cost. */
955 1, /* scalar_store_cost. */
956 1, /* vec_stmt_cost. */
957 1, /* vec_to_scalar_cost. */
958 1, /* scalar_to_vec_cost. */
959 1, /* vec_align_load_cost. */
960 1, /* vec_unalign_load_cost. */
961 1, /* vec_unalign_store_cost. */
962 1, /* vec_store_cost. */
963 3, /* cond_taken_branch_cost. */
964 1, /* cond_not_taken_branch_cost. */
965 };
966
967 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
968 #include "aarch-cost-tables.h"
969
970
971
972 const struct cpu_cost_table cortexa9_extra_costs =
973 {
974 /* ALU */
975 {
976 0, /* arith. */
977 0, /* logical. */
978 0, /* shift. */
979 COSTS_N_INSNS (1), /* shift_reg. */
980 COSTS_N_INSNS (1), /* arith_shift. */
981 COSTS_N_INSNS (2), /* arith_shift_reg. */
982 0, /* log_shift. */
983 COSTS_N_INSNS (1), /* log_shift_reg. */
984 COSTS_N_INSNS (1), /* extend. */
985 COSTS_N_INSNS (2), /* extend_arith. */
986 COSTS_N_INSNS (1), /* bfi. */
987 COSTS_N_INSNS (1), /* bfx. */
988 0, /* clz. */
989 0, /* non_exec. */
990 true /* non_exec_costs_exec. */
991 },
992 {
993 /* MULT SImode */
994 {
995 COSTS_N_INSNS (3), /* simple. */
996 COSTS_N_INSNS (3), /* flag_setting. */
997 COSTS_N_INSNS (2), /* extend. */
998 COSTS_N_INSNS (3), /* add. */
999 COSTS_N_INSNS (2), /* extend_add. */
1000 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1001 },
1002 /* MULT DImode */
1003 {
1004 0, /* simple (N/A). */
1005 0, /* flag_setting (N/A). */
1006 COSTS_N_INSNS (4), /* extend. */
1007 0, /* add (N/A). */
1008 COSTS_N_INSNS (4), /* extend_add. */
1009 0 /* idiv (N/A). */
1010 }
1011 },
1012 /* LD/ST */
1013 {
1014 COSTS_N_INSNS (2), /* load. */
1015 COSTS_N_INSNS (2), /* load_sign_extend. */
1016 COSTS_N_INSNS (2), /* ldrd. */
1017 COSTS_N_INSNS (2), /* ldm_1st. */
1018 1, /* ldm_regs_per_insn_1st. */
1019 2, /* ldm_regs_per_insn_subsequent. */
1020 COSTS_N_INSNS (5), /* loadf. */
1021 COSTS_N_INSNS (5), /* loadd. */
1022 COSTS_N_INSNS (1), /* load_unaligned. */
1023 COSTS_N_INSNS (2), /* store. */
1024 COSTS_N_INSNS (2), /* strd. */
1025 COSTS_N_INSNS (2), /* stm_1st. */
1026 1, /* stm_regs_per_insn_1st. */
1027 2, /* stm_regs_per_insn_subsequent. */
1028 COSTS_N_INSNS (1), /* storef. */
1029 COSTS_N_INSNS (1), /* stored. */
1030 COSTS_N_INSNS (1) /* store_unaligned. */
1031 },
1032 {
1033 /* FP SFmode */
1034 {
1035 COSTS_N_INSNS (14), /* div. */
1036 COSTS_N_INSNS (4), /* mult. */
1037 COSTS_N_INSNS (7), /* mult_addsub. */
1038 COSTS_N_INSNS (30), /* fma. */
1039 COSTS_N_INSNS (3), /* addsub. */
1040 COSTS_N_INSNS (1), /* fpconst. */
1041 COSTS_N_INSNS (1), /* neg. */
1042 COSTS_N_INSNS (3), /* compare. */
1043 COSTS_N_INSNS (3), /* widen. */
1044 COSTS_N_INSNS (3), /* narrow. */
1045 COSTS_N_INSNS (3), /* toint. */
1046 COSTS_N_INSNS (3), /* fromint. */
1047 COSTS_N_INSNS (3) /* roundint. */
1048 },
1049 /* FP DFmode */
1050 {
1051 COSTS_N_INSNS (24), /* div. */
1052 COSTS_N_INSNS (5), /* mult. */
1053 COSTS_N_INSNS (8), /* mult_addsub. */
1054 COSTS_N_INSNS (30), /* fma. */
1055 COSTS_N_INSNS (3), /* addsub. */
1056 COSTS_N_INSNS (1), /* fpconst. */
1057 COSTS_N_INSNS (1), /* neg. */
1058 COSTS_N_INSNS (3), /* compare. */
1059 COSTS_N_INSNS (3), /* widen. */
1060 COSTS_N_INSNS (3), /* narrow. */
1061 COSTS_N_INSNS (3), /* toint. */
1062 COSTS_N_INSNS (3), /* fromint. */
1063 COSTS_N_INSNS (3) /* roundint. */
1064 }
1065 },
1066 /* Vector */
1067 {
1068 COSTS_N_INSNS (1) /* alu. */
1069 }
1070 };
1071
1072
1073 const struct cpu_cost_table cortexa7_extra_costs =
1074 {
1075 /* ALU */
1076 {
1077 0, /* arith. */
1078 0, /* logical. */
1079 COSTS_N_INSNS (1), /* shift. */
1080 COSTS_N_INSNS (1), /* shift_reg. */
1081 COSTS_N_INSNS (1), /* arith_shift. */
1082 COSTS_N_INSNS (1), /* arith_shift_reg. */
1083 COSTS_N_INSNS (1), /* log_shift. */
1084 COSTS_N_INSNS (1), /* log_shift_reg. */
1085 COSTS_N_INSNS (1), /* extend. */
1086 COSTS_N_INSNS (1), /* extend_arith. */
1087 COSTS_N_INSNS (1), /* bfi. */
1088 COSTS_N_INSNS (1), /* bfx. */
1089 COSTS_N_INSNS (1), /* clz. */
1090 0, /* non_exec. */
1091 true /* non_exec_costs_exec. */
1092 },
1093
1094 {
1095 /* MULT SImode */
1096 {
1097 0, /* simple. */
1098 COSTS_N_INSNS (1), /* flag_setting. */
1099 COSTS_N_INSNS (1), /* extend. */
1100 COSTS_N_INSNS (1), /* add. */
1101 COSTS_N_INSNS (1), /* extend_add. */
1102 COSTS_N_INSNS (7) /* idiv. */
1103 },
1104 /* MULT DImode */
1105 {
1106 0, /* simple (N/A). */
1107 0, /* flag_setting (N/A). */
1108 COSTS_N_INSNS (1), /* extend. */
1109 0, /* add. */
1110 COSTS_N_INSNS (2), /* extend_add. */
1111 0 /* idiv (N/A). */
1112 }
1113 },
1114 /* LD/ST */
1115 {
1116 COSTS_N_INSNS (1), /* load. */
1117 COSTS_N_INSNS (1), /* load_sign_extend. */
1118 COSTS_N_INSNS (3), /* ldrd. */
1119 COSTS_N_INSNS (1), /* ldm_1st. */
1120 1, /* ldm_regs_per_insn_1st. */
1121 2, /* ldm_regs_per_insn_subsequent. */
1122 COSTS_N_INSNS (2), /* loadf. */
1123 COSTS_N_INSNS (2), /* loadd. */
1124 COSTS_N_INSNS (1), /* load_unaligned. */
1125 COSTS_N_INSNS (1), /* store. */
1126 COSTS_N_INSNS (3), /* strd. */
1127 COSTS_N_INSNS (1), /* stm_1st. */
1128 1, /* stm_regs_per_insn_1st. */
1129 2, /* stm_regs_per_insn_subsequent. */
1130 COSTS_N_INSNS (2), /* storef. */
1131 COSTS_N_INSNS (2), /* stored. */
1132 COSTS_N_INSNS (1) /* store_unaligned. */
1133 },
1134 {
1135 /* FP SFmode */
1136 {
1137 COSTS_N_INSNS (15), /* div. */
1138 COSTS_N_INSNS (3), /* mult. */
1139 COSTS_N_INSNS (7), /* mult_addsub. */
1140 COSTS_N_INSNS (7), /* fma. */
1141 COSTS_N_INSNS (3), /* addsub. */
1142 COSTS_N_INSNS (3), /* fpconst. */
1143 COSTS_N_INSNS (3), /* neg. */
1144 COSTS_N_INSNS (3), /* compare. */
1145 COSTS_N_INSNS (3), /* widen. */
1146 COSTS_N_INSNS (3), /* narrow. */
1147 COSTS_N_INSNS (3), /* toint. */
1148 COSTS_N_INSNS (3), /* fromint. */
1149 COSTS_N_INSNS (3) /* roundint. */
1150 },
1151 /* FP DFmode */
1152 {
1153 COSTS_N_INSNS (30), /* div. */
1154 COSTS_N_INSNS (6), /* mult. */
1155 COSTS_N_INSNS (10), /* mult_addsub. */
1156 COSTS_N_INSNS (7), /* fma. */
1157 COSTS_N_INSNS (3), /* addsub. */
1158 COSTS_N_INSNS (3), /* fpconst. */
1159 COSTS_N_INSNS (3), /* neg. */
1160 COSTS_N_INSNS (3), /* compare. */
1161 COSTS_N_INSNS (3), /* widen. */
1162 COSTS_N_INSNS (3), /* narrow. */
1163 COSTS_N_INSNS (3), /* toint. */
1164 COSTS_N_INSNS (3), /* fromint. */
1165 COSTS_N_INSNS (3) /* roundint. */
1166 }
1167 },
1168 /* Vector */
1169 {
1170 COSTS_N_INSNS (1) /* alu. */
1171 }
1172 };
1173
1174 const struct cpu_cost_table cortexa12_extra_costs =
1175 {
1176 /* ALU */
1177 {
1178 0, /* arith. */
1179 0, /* logical. */
1180 0, /* shift. */
1181 COSTS_N_INSNS (1), /* shift_reg. */
1182 COSTS_N_INSNS (1), /* arith_shift. */
1183 COSTS_N_INSNS (1), /* arith_shift_reg. */
1184 COSTS_N_INSNS (1), /* log_shift. */
1185 COSTS_N_INSNS (1), /* log_shift_reg. */
1186 0, /* extend. */
1187 COSTS_N_INSNS (1), /* extend_arith. */
1188 0, /* bfi. */
1189 COSTS_N_INSNS (1), /* bfx. */
1190 COSTS_N_INSNS (1), /* clz. */
1191 0, /* non_exec. */
1192 true /* non_exec_costs_exec. */
1193 },
1194 /* MULT SImode */
1195 {
1196 {
1197 COSTS_N_INSNS (2), /* simple. */
1198 COSTS_N_INSNS (3), /* flag_setting. */
1199 COSTS_N_INSNS (2), /* extend. */
1200 COSTS_N_INSNS (3), /* add. */
1201 COSTS_N_INSNS (2), /* extend_add. */
1202 COSTS_N_INSNS (18) /* idiv. */
1203 },
1204 /* MULT DImode */
1205 {
1206 0, /* simple (N/A). */
1207 0, /* flag_setting (N/A). */
1208 COSTS_N_INSNS (3), /* extend. */
1209 0, /* add (N/A). */
1210 COSTS_N_INSNS (3), /* extend_add. */
1211 0 /* idiv (N/A). */
1212 }
1213 },
1214 /* LD/ST */
1215 {
1216 COSTS_N_INSNS (3), /* load. */
1217 COSTS_N_INSNS (3), /* load_sign_extend. */
1218 COSTS_N_INSNS (3), /* ldrd. */
1219 COSTS_N_INSNS (3), /* ldm_1st. */
1220 1, /* ldm_regs_per_insn_1st. */
1221 2, /* ldm_regs_per_insn_subsequent. */
1222 COSTS_N_INSNS (3), /* loadf. */
1223 COSTS_N_INSNS (3), /* loadd. */
1224 0, /* load_unaligned. */
1225 0, /* store. */
1226 0, /* strd. */
1227 0, /* stm_1st. */
1228 1, /* stm_regs_per_insn_1st. */
1229 2, /* stm_regs_per_insn_subsequent. */
1230 COSTS_N_INSNS (2), /* storef. */
1231 COSTS_N_INSNS (2), /* stored. */
1232 0 /* store_unaligned. */
1233 },
1234 {
1235 /* FP SFmode */
1236 {
1237 COSTS_N_INSNS (17), /* div. */
1238 COSTS_N_INSNS (4), /* mult. */
1239 COSTS_N_INSNS (8), /* mult_addsub. */
1240 COSTS_N_INSNS (8), /* fma. */
1241 COSTS_N_INSNS (4), /* addsub. */
1242 COSTS_N_INSNS (2), /* fpconst. */
1243 COSTS_N_INSNS (2), /* neg. */
1244 COSTS_N_INSNS (2), /* compare. */
1245 COSTS_N_INSNS (4), /* widen. */
1246 COSTS_N_INSNS (4), /* narrow. */
1247 COSTS_N_INSNS (4), /* toint. */
1248 COSTS_N_INSNS (4), /* fromint. */
1249 COSTS_N_INSNS (4) /* roundint. */
1250 },
1251 /* FP DFmode */
1252 {
1253 COSTS_N_INSNS (31), /* div. */
1254 COSTS_N_INSNS (4), /* mult. */
1255 COSTS_N_INSNS (8), /* mult_addsub. */
1256 COSTS_N_INSNS (8), /* fma. */
1257 COSTS_N_INSNS (4), /* addsub. */
1258 COSTS_N_INSNS (2), /* fpconst. */
1259 COSTS_N_INSNS (2), /* neg. */
1260 COSTS_N_INSNS (2), /* compare. */
1261 COSTS_N_INSNS (4), /* widen. */
1262 COSTS_N_INSNS (4), /* narrow. */
1263 COSTS_N_INSNS (4), /* toint. */
1264 COSTS_N_INSNS (4), /* fromint. */
1265 COSTS_N_INSNS (4) /* roundint. */
1266 }
1267 },
1268 /* Vector */
1269 {
1270 COSTS_N_INSNS (1) /* alu. */
1271 }
1272 };
1273
1274 const struct cpu_cost_table cortexa15_extra_costs =
1275 {
1276 /* ALU */
1277 {
1278 0, /* arith. */
1279 0, /* logical. */
1280 0, /* shift. */
1281 0, /* shift_reg. */
1282 COSTS_N_INSNS (1), /* arith_shift. */
1283 COSTS_N_INSNS (1), /* arith_shift_reg. */
1284 COSTS_N_INSNS (1), /* log_shift. */
1285 COSTS_N_INSNS (1), /* log_shift_reg. */
1286 0, /* extend. */
1287 COSTS_N_INSNS (1), /* extend_arith. */
1288 COSTS_N_INSNS (1), /* bfi. */
1289 0, /* bfx. */
1290 0, /* clz. */
1291 0, /* non_exec. */
1292 true /* non_exec_costs_exec. */
1293 },
1294 /* MULT SImode */
1295 {
1296 {
1297 COSTS_N_INSNS (2), /* simple. */
1298 COSTS_N_INSNS (3), /* flag_setting. */
1299 COSTS_N_INSNS (2), /* extend. */
1300 COSTS_N_INSNS (2), /* add. */
1301 COSTS_N_INSNS (2), /* extend_add. */
1302 COSTS_N_INSNS (18) /* idiv. */
1303 },
1304 /* MULT DImode */
1305 {
1306 0, /* simple (N/A). */
1307 0, /* flag_setting (N/A). */
1308 COSTS_N_INSNS (3), /* extend. */
1309 0, /* add (N/A). */
1310 COSTS_N_INSNS (3), /* extend_add. */
1311 0 /* idiv (N/A). */
1312 }
1313 },
1314 /* LD/ST */
1315 {
1316 COSTS_N_INSNS (3), /* load. */
1317 COSTS_N_INSNS (3), /* load_sign_extend. */
1318 COSTS_N_INSNS (3), /* ldrd. */
1319 COSTS_N_INSNS (4), /* ldm_1st. */
1320 1, /* ldm_regs_per_insn_1st. */
1321 2, /* ldm_regs_per_insn_subsequent. */
1322 COSTS_N_INSNS (4), /* loadf. */
1323 COSTS_N_INSNS (4), /* loadd. */
1324 0, /* load_unaligned. */
1325 0, /* store. */
1326 0, /* strd. */
1327 COSTS_N_INSNS (1), /* stm_1st. */
1328 1, /* stm_regs_per_insn_1st. */
1329 2, /* stm_regs_per_insn_subsequent. */
1330 0, /* storef. */
1331 0, /* stored. */
1332 0 /* store_unaligned. */
1333 },
1334 {
1335 /* FP SFmode */
1336 {
1337 COSTS_N_INSNS (17), /* div. */
1338 COSTS_N_INSNS (4), /* mult. */
1339 COSTS_N_INSNS (8), /* mult_addsub. */
1340 COSTS_N_INSNS (8), /* fma. */
1341 COSTS_N_INSNS (4), /* addsub. */
1342 COSTS_N_INSNS (2), /* fpconst. */
1343 COSTS_N_INSNS (2), /* neg. */
1344 COSTS_N_INSNS (5), /* compare. */
1345 COSTS_N_INSNS (4), /* widen. */
1346 COSTS_N_INSNS (4), /* narrow. */
1347 COSTS_N_INSNS (4), /* toint. */
1348 COSTS_N_INSNS (4), /* fromint. */
1349 COSTS_N_INSNS (4) /* roundint. */
1350 },
1351 /* FP DFmode */
1352 {
1353 COSTS_N_INSNS (31), /* div. */
1354 COSTS_N_INSNS (4), /* mult. */
1355 COSTS_N_INSNS (8), /* mult_addsub. */
1356 COSTS_N_INSNS (8), /* fma. */
1357 COSTS_N_INSNS (4), /* addsub. */
1358 COSTS_N_INSNS (2), /* fpconst. */
1359 COSTS_N_INSNS (2), /* neg. */
1360 COSTS_N_INSNS (2), /* compare. */
1361 COSTS_N_INSNS (4), /* widen. */
1362 COSTS_N_INSNS (4), /* narrow. */
1363 COSTS_N_INSNS (4), /* toint. */
1364 COSTS_N_INSNS (4), /* fromint. */
1365 COSTS_N_INSNS (4) /* roundint. */
1366 }
1367 },
1368 /* Vector */
1369 {
1370 COSTS_N_INSNS (1) /* alu. */
1371 }
1372 };
1373
1374 const struct cpu_cost_table v7m_extra_costs =
1375 {
1376 /* ALU */
1377 {
1378 0, /* arith. */
1379 0, /* logical. */
1380 0, /* shift. */
1381 0, /* shift_reg. */
1382 0, /* arith_shift. */
1383 COSTS_N_INSNS (1), /* arith_shift_reg. */
1384 0, /* log_shift. */
1385 COSTS_N_INSNS (1), /* log_shift_reg. */
1386 0, /* extend. */
1387 COSTS_N_INSNS (1), /* extend_arith. */
1388 0, /* bfi. */
1389 0, /* bfx. */
1390 0, /* clz. */
1391 COSTS_N_INSNS (1), /* non_exec. */
1392 false /* non_exec_costs_exec. */
1393 },
1394 {
1395 /* MULT SImode */
1396 {
1397 COSTS_N_INSNS (1), /* simple. */
1398 COSTS_N_INSNS (1), /* flag_setting. */
1399 COSTS_N_INSNS (2), /* extend. */
1400 COSTS_N_INSNS (1), /* add. */
1401 COSTS_N_INSNS (3), /* extend_add. */
1402 COSTS_N_INSNS (8) /* idiv. */
1403 },
1404 /* MULT DImode */
1405 {
1406 0, /* simple (N/A). */
1407 0, /* flag_setting (N/A). */
1408 COSTS_N_INSNS (2), /* extend. */
1409 0, /* add (N/A). */
1410 COSTS_N_INSNS (3), /* extend_add. */
1411 0 /* idiv (N/A). */
1412 }
1413 },
1414 /* LD/ST */
1415 {
1416 COSTS_N_INSNS (2), /* load. */
1417 0, /* load_sign_extend. */
1418 COSTS_N_INSNS (3), /* ldrd. */
1419 COSTS_N_INSNS (2), /* ldm_1st. */
1420 1, /* ldm_regs_per_insn_1st. */
1421 1, /* ldm_regs_per_insn_subsequent. */
1422 COSTS_N_INSNS (2), /* loadf. */
1423 COSTS_N_INSNS (3), /* loadd. */
1424 COSTS_N_INSNS (1), /* load_unaligned. */
1425 COSTS_N_INSNS (2), /* store. */
1426 COSTS_N_INSNS (3), /* strd. */
1427 COSTS_N_INSNS (2), /* stm_1st. */
1428 1, /* stm_regs_per_insn_1st. */
1429 1, /* stm_regs_per_insn_subsequent. */
1430 COSTS_N_INSNS (2), /* storef. */
1431 COSTS_N_INSNS (3), /* stored. */
1432 COSTS_N_INSNS (1) /* store_unaligned. */
1433 },
1434 {
1435 /* FP SFmode */
1436 {
1437 COSTS_N_INSNS (7), /* div. */
1438 COSTS_N_INSNS (2), /* mult. */
1439 COSTS_N_INSNS (5), /* mult_addsub. */
1440 COSTS_N_INSNS (3), /* fma. */
1441 COSTS_N_INSNS (1), /* addsub. */
1442 0, /* fpconst. */
1443 0, /* neg. */
1444 0, /* compare. */
1445 0, /* widen. */
1446 0, /* narrow. */
1447 0, /* toint. */
1448 0, /* fromint. */
1449 0 /* roundint. */
1450 },
1451 /* FP DFmode */
1452 {
1453 COSTS_N_INSNS (15), /* div. */
1454 COSTS_N_INSNS (5), /* mult. */
1455 COSTS_N_INSNS (7), /* mult_addsub. */
1456 COSTS_N_INSNS (7), /* fma. */
1457 COSTS_N_INSNS (3), /* addsub. */
1458 0, /* fpconst. */
1459 0, /* neg. */
1460 0, /* compare. */
1461 0, /* widen. */
1462 0, /* narrow. */
1463 0, /* toint. */
1464 0, /* fromint. */
1465 0 /* roundint. */
1466 }
1467 },
1468 /* Vector */
1469 {
1470 COSTS_N_INSNS (1) /* alu. */
1471 }
1472 };
1473
1474 const struct tune_params arm_slowmul_tune =
1475 {
1476 arm_slowmul_rtx_costs,
1477 NULL,
1478 NULL, /* Sched adj cost. */
1479 3, /* Constant limit. */
1480 5, /* Max cond insns. */
1481 ARM_PREFETCH_NOT_BENEFICIAL,
1482 true, /* Prefer constant pool. */
1483 arm_default_branch_cost,
1484 false, /* Prefer LDRD/STRD. */
1485 {true, true}, /* Prefer non short circuit. */
1486 &arm_default_vec_cost, /* Vectorizer costs. */
1487 false /* Prefer Neon for 64-bits bitops. */
1488 };
1489
1490 const struct tune_params arm_fastmul_tune =
1491 {
1492 arm_fastmul_rtx_costs,
1493 NULL,
1494 NULL, /* Sched adj cost. */
1495 1, /* Constant limit. */
1496 5, /* Max cond insns. */
1497 ARM_PREFETCH_NOT_BENEFICIAL,
1498 true, /* Prefer constant pool. */
1499 arm_default_branch_cost,
1500 false, /* Prefer LDRD/STRD. */
1501 {true, true}, /* Prefer non short circuit. */
1502 &arm_default_vec_cost, /* Vectorizer costs. */
1503 false /* Prefer Neon for 64-bits bitops. */
1504 };
1505
1506 /* StrongARM has early execution of branches, so a sequence that is worth
1507 skipping is shorter. Set max_insns_skipped to a lower value. */
1508
1509 const struct tune_params arm_strongarm_tune =
1510 {
1511 arm_fastmul_rtx_costs,
1512 NULL,
1513 NULL, /* Sched adj cost. */
1514 1, /* Constant limit. */
1515 3, /* Max cond insns. */
1516 ARM_PREFETCH_NOT_BENEFICIAL,
1517 true, /* Prefer constant pool. */
1518 arm_default_branch_cost,
1519 false, /* Prefer LDRD/STRD. */
1520 {true, true}, /* Prefer non short circuit. */
1521 &arm_default_vec_cost, /* Vectorizer costs. */
1522 false /* Prefer Neon for 64-bits bitops. */
1523 };
1524
1525 const struct tune_params arm_xscale_tune =
1526 {
1527 arm_xscale_rtx_costs,
1528 NULL,
1529 xscale_sched_adjust_cost,
1530 2, /* Constant limit. */
1531 3, /* Max cond insns. */
1532 ARM_PREFETCH_NOT_BENEFICIAL,
1533 true, /* Prefer constant pool. */
1534 arm_default_branch_cost,
1535 false, /* Prefer LDRD/STRD. */
1536 {true, true}, /* Prefer non short circuit. */
1537 &arm_default_vec_cost, /* Vectorizer costs. */
1538 false /* Prefer Neon for 64-bits bitops. */
1539 };
1540
1541 const struct tune_params arm_9e_tune =
1542 {
1543 arm_9e_rtx_costs,
1544 NULL,
1545 NULL, /* Sched adj cost. */
1546 1, /* Constant limit. */
1547 5, /* Max cond insns. */
1548 ARM_PREFETCH_NOT_BENEFICIAL,
1549 true, /* Prefer constant pool. */
1550 arm_default_branch_cost,
1551 false, /* Prefer LDRD/STRD. */
1552 {true, true}, /* Prefer non short circuit. */
1553 &arm_default_vec_cost, /* Vectorizer costs. */
1554 false /* Prefer Neon for 64-bits bitops. */
1555 };
1556
1557 const struct tune_params arm_v6t2_tune =
1558 {
1559 arm_9e_rtx_costs,
1560 NULL,
1561 NULL, /* Sched adj cost. */
1562 1, /* Constant limit. */
1563 5, /* Max cond insns. */
1564 ARM_PREFETCH_NOT_BENEFICIAL,
1565 false, /* Prefer constant pool. */
1566 arm_default_branch_cost,
1567 false, /* Prefer LDRD/STRD. */
1568 {true, true}, /* Prefer non short circuit. */
1569 &arm_default_vec_cost, /* Vectorizer costs. */
1570 false /* Prefer Neon for 64-bits bitops. */
1571 };
1572
1573 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1574 const struct tune_params arm_cortex_tune =
1575 {
1576 arm_9e_rtx_costs,
1577 &generic_extra_costs,
1578 NULL, /* Sched adj cost. */
1579 1, /* Constant limit. */
1580 5, /* Max cond insns. */
1581 ARM_PREFETCH_NOT_BENEFICIAL,
1582 false, /* Prefer constant pool. */
1583 arm_default_branch_cost,
1584 false, /* Prefer LDRD/STRD. */
1585 {true, true}, /* Prefer non short circuit. */
1586 &arm_default_vec_cost, /* Vectorizer costs. */
1587 false /* Prefer Neon for 64-bits bitops. */
1588 };
1589
1590 const struct tune_params arm_cortex_a7_tune =
1591 {
1592 arm_9e_rtx_costs,
1593 &cortexa7_extra_costs,
1594 NULL,
1595 1, /* Constant limit. */
1596 5, /* Max cond insns. */
1597 ARM_PREFETCH_NOT_BENEFICIAL,
1598 false, /* Prefer constant pool. */
1599 arm_default_branch_cost,
1600 false, /* Prefer LDRD/STRD. */
1601 {true, true}, /* Prefer non short circuit. */
1602 &arm_default_vec_cost, /* Vectorizer costs. */
1603 false /* Prefer Neon for 64-bits bitops. */
1604 };
1605
1606 const struct tune_params arm_cortex_a15_tune =
1607 {
1608 arm_9e_rtx_costs,
1609 &cortexa15_extra_costs,
1610 NULL, /* Sched adj cost. */
1611 1, /* Constant limit. */
1612 2, /* Max cond insns. */
1613 ARM_PREFETCH_NOT_BENEFICIAL,
1614 false, /* Prefer constant pool. */
1615 arm_default_branch_cost,
1616 true, /* Prefer LDRD/STRD. */
1617 {true, true}, /* Prefer non short circuit. */
1618 &arm_default_vec_cost, /* Vectorizer costs. */
1619 false /* Prefer Neon for 64-bits bitops. */
1620 };
1621
1622 const struct tune_params arm_cortex_a53_tune =
1623 {
1624 arm_9e_rtx_costs,
1625 &cortexa53_extra_costs,
1626 NULL, /* Scheduler cost adjustment. */
1627 1, /* Constant limit. */
1628 5, /* Max cond insns. */
1629 ARM_PREFETCH_NOT_BENEFICIAL,
1630 false, /* Prefer constant pool. */
1631 arm_default_branch_cost,
1632 false, /* Prefer LDRD/STRD. */
1633 {true, true}, /* Prefer non short circuit. */
1634 &arm_default_vec_cost, /* Vectorizer costs. */
1635 false /* Prefer Neon for 64-bits bitops. */
1636 };
1637
1638 const struct tune_params arm_cortex_a57_tune =
1639 {
1640 arm_9e_rtx_costs,
1641 &cortexa57_extra_costs,
1642 NULL, /* Scheduler cost adjustment. */
1643 1, /* Constant limit. */
1644 2, /* Max cond insns. */
1645 ARM_PREFETCH_NOT_BENEFICIAL,
1646 false, /* Prefer constant pool. */
1647 arm_default_branch_cost,
1648 true, /* Prefer LDRD/STRD. */
1649 {true, true}, /* Prefer non short circuit. */
1650 &arm_default_vec_cost, /* Vectorizer costs. */
1651 false /* Prefer Neon for 64-bits bitops. */
1652 };
1653
1654 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1655 less appealing. Set max_insns_skipped to a low value. */
1656
1657 const struct tune_params arm_cortex_a5_tune =
1658 {
1659 arm_9e_rtx_costs,
1660 NULL,
1661 NULL, /* Sched adj cost. */
1662 1, /* Constant limit. */
1663 1, /* Max cond insns. */
1664 ARM_PREFETCH_NOT_BENEFICIAL,
1665 false, /* Prefer constant pool. */
1666 arm_cortex_a5_branch_cost,
1667 false, /* Prefer LDRD/STRD. */
1668 {false, false}, /* Prefer non short circuit. */
1669 &arm_default_vec_cost, /* Vectorizer costs. */
1670 false /* Prefer Neon for 64-bits bitops. */
1671 };
1672
1673 const struct tune_params arm_cortex_a9_tune =
1674 {
1675 arm_9e_rtx_costs,
1676 &cortexa9_extra_costs,
1677 cortex_a9_sched_adjust_cost,
1678 1, /* Constant limit. */
1679 5, /* Max cond insns. */
1680 ARM_PREFETCH_BENEFICIAL(4,32,32),
1681 false, /* Prefer constant pool. */
1682 arm_default_branch_cost,
1683 false, /* Prefer LDRD/STRD. */
1684 {true, true}, /* Prefer non short circuit. */
1685 &arm_default_vec_cost, /* Vectorizer costs. */
1686 false /* Prefer Neon for 64-bits bitops. */
1687 };
1688
1689 const struct tune_params arm_cortex_a12_tune =
1690 {
1691 arm_9e_rtx_costs,
1692 &cortexa12_extra_costs,
1693 NULL,
1694 1, /* Constant limit. */
1695 5, /* Max cond insns. */
1696 ARM_PREFETCH_BENEFICIAL(4,32,32),
1697 false, /* Prefer constant pool. */
1698 arm_default_branch_cost,
1699 true, /* Prefer LDRD/STRD. */
1700 {true, true}, /* Prefer non short circuit. */
1701 &arm_default_vec_cost, /* Vectorizer costs. */
1702 false /* Prefer Neon for 64-bits bitops. */
1703 };
1704
1705 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1706 cycle to execute each. An LDR from the constant pool also takes two cycles
1707 to execute, but mildly increases pipelining opportunity (consecutive
1708 loads/stores can be pipelined together, saving one cycle), and may also
1709 improve icache utilisation. Hence we prefer the constant pool for such
1710 processors. */
1711
1712 const struct tune_params arm_v7m_tune =
1713 {
1714 arm_9e_rtx_costs,
1715 &v7m_extra_costs,
1716 NULL, /* Sched adj cost. */
1717 1, /* Constant limit. */
1718 2, /* Max cond insns. */
1719 ARM_PREFETCH_NOT_BENEFICIAL,
1720 true, /* Prefer constant pool. */
1721 arm_cortex_m_branch_cost,
1722 false, /* Prefer LDRD/STRD. */
1723 {false, false}, /* Prefer non short circuit. */
1724 &arm_default_vec_cost, /* Vectorizer costs. */
1725 false /* Prefer Neon for 64-bits bitops. */
1726 };
1727
1728 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1729 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1730 const struct tune_params arm_v6m_tune =
1731 {
1732 arm_9e_rtx_costs,
1733 NULL,
1734 NULL, /* Sched adj cost. */
1735 1, /* Constant limit. */
1736 5, /* Max cond insns. */
1737 ARM_PREFETCH_NOT_BENEFICIAL,
1738 false, /* Prefer constant pool. */
1739 arm_default_branch_cost,
1740 false, /* Prefer LDRD/STRD. */
1741 {false, false}, /* Prefer non short circuit. */
1742 &arm_default_vec_cost, /* Vectorizer costs. */
1743 false /* Prefer Neon for 64-bits bitops. */
1744 };
1745
1746 const struct tune_params arm_fa726te_tune =
1747 {
1748 arm_9e_rtx_costs,
1749 NULL,
1750 fa726te_sched_adjust_cost,
1751 1, /* Constant limit. */
1752 5, /* Max cond insns. */
1753 ARM_PREFETCH_NOT_BENEFICIAL,
1754 true, /* Prefer constant pool. */
1755 arm_default_branch_cost,
1756 false, /* Prefer LDRD/STRD. */
1757 {true, true}, /* Prefer non short circuit. */
1758 &arm_default_vec_cost, /* Vectorizer costs. */
1759 false /* Prefer Neon for 64-bits bitops. */
1760 };
1761
1762
1763 /* Not all of these give usefully different compilation alternatives,
1764 but there is no simple way of generalizing them. */
1765 static const struct processors all_cores[] =
1766 {
1767 /* ARM Cores */
1768 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
1769 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1770 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1771 #include "arm-cores.def"
1772 #undef ARM_CORE
1773 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1774 };
1775
1776 static const struct processors all_architectures[] =
1777 {
1778 /* ARM Architectures */
1779 /* We don't specify tuning costs here as it will be figured out
1780 from the core. */
1781
1782 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1783 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1784 #include "arm-arches.def"
1785 #undef ARM_ARCH
1786 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1787 };
1788
1789
1790 /* These are populated as commandline arguments are processed, or NULL
1791 if not specified. */
1792 static const struct processors *arm_selected_arch;
1793 static const struct processors *arm_selected_cpu;
1794 static const struct processors *arm_selected_tune;
1795
1796 /* The name of the preprocessor macro to define for this architecture. */
1797
1798 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1799
1800 /* Available values for -mfpu=. */
1801
1802 static const struct arm_fpu_desc all_fpus[] =
1803 {
1804 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1805 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1806 #include "arm-fpus.def"
1807 #undef ARM_FPU
1808 };
1809
1810
1811 /* Supported TLS relocations. */
1812
1813 enum tls_reloc {
1814 TLS_GD32,
1815 TLS_LDM32,
1816 TLS_LDO32,
1817 TLS_IE32,
1818 TLS_LE32,
1819 TLS_DESCSEQ /* GNU scheme */
1820 };
1821
1822 /* The maximum number of insns to be used when loading a constant. */
1823 inline static int
1824 arm_constant_limit (bool size_p)
1825 {
1826 return size_p ? 1 : current_tune->constant_limit;
1827 }
1828
1829 /* Emit an insn that's a simple single-set. Both the operands must be known
1830 to be valid. */
1831 inline static rtx
1832 emit_set_insn (rtx x, rtx y)
1833 {
1834 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1835 }
1836
1837 /* Return the number of bits set in VALUE. */
1838 static unsigned
1839 bit_count (unsigned long value)
1840 {
1841 unsigned long count = 0;
1842
1843 while (value)
1844 {
1845 count++;
1846 value &= value - 1; /* Clear the least-significant set bit. */
1847 }
1848
1849 return count;
1850 }
1851
1852 typedef struct
1853 {
1854 enum machine_mode mode;
1855 const char *name;
1856 } arm_fixed_mode_set;
1857
1858 /* A small helper for setting fixed-point library libfuncs. */
1859
1860 static void
1861 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1862 const char *funcname, const char *modename,
1863 int num_suffix)
1864 {
1865 char buffer[50];
1866
1867 if (num_suffix == 0)
1868 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1869 else
1870 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1871
1872 set_optab_libfunc (optable, mode, buffer);
1873 }
1874
1875 static void
1876 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1877 enum machine_mode from, const char *funcname,
1878 const char *toname, const char *fromname)
1879 {
1880 char buffer[50];
1881 const char *maybe_suffix_2 = "";
1882
1883 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1884 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1885 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1886 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1887 maybe_suffix_2 = "2";
1888
1889 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1890 maybe_suffix_2);
1891
1892 set_conv_libfunc (optable, to, from, buffer);
1893 }
1894
1895 /* Set up library functions unique to ARM. */
1896
1897 static void
1898 arm_init_libfuncs (void)
1899 {
1900 /* For Linux, we have access to kernel support for atomic operations. */
1901 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1902 init_sync_libfuncs (2 * UNITS_PER_WORD);
1903
1904 /* There are no special library functions unless we are using the
1905 ARM BPABI. */
1906 if (!TARGET_BPABI)
1907 return;
1908
1909 /* The functions below are described in Section 4 of the "Run-Time
1910 ABI for the ARM architecture", Version 1.0. */
1911
1912 /* Double-precision floating-point arithmetic. Table 2. */
1913 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1914 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1915 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1916 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1917 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1918
1919 /* Double-precision comparisons. Table 3. */
1920 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1921 set_optab_libfunc (ne_optab, DFmode, NULL);
1922 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1923 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1924 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1925 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1926 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1927
1928 /* Single-precision floating-point arithmetic. Table 4. */
1929 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1930 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1931 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1932 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1933 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1934
1935 /* Single-precision comparisons. Table 5. */
1936 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1937 set_optab_libfunc (ne_optab, SFmode, NULL);
1938 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1939 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1940 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1941 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1942 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1943
1944 /* Floating-point to integer conversions. Table 6. */
1945 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1946 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1947 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1948 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1949 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1950 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1951 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1952 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1953
1954 /* Conversions between floating types. Table 7. */
1955 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1956 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1957
1958 /* Integer to floating-point conversions. Table 8. */
1959 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1960 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1961 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1962 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1963 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1964 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1965 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1966 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1967
1968 /* Long long. Table 9. */
1969 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1970 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1971 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1972 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1973 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1974 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1975 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1976 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1977
1978 /* Integer (32/32->32) division. \S 4.3.1. */
1979 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1980 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1981
1982 /* The divmod functions are designed so that they can be used for
1983 plain division, even though they return both the quotient and the
1984 remainder. The quotient is returned in the usual location (i.e.,
1985 r0 for SImode, {r0, r1} for DImode), just as would be expected
1986 for an ordinary division routine. Because the AAPCS calling
1987 conventions specify that all of { r0, r1, r2, r3 } are
1988 callee-saved registers, there is no need to tell the compiler
1989 explicitly that those registers are clobbered by these
1990 routines. */
1991 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1992 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1993
1994 /* For SImode division the ABI provides div-without-mod routines,
1995 which are faster. */
1996 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1997 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1998
1999 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2000 divmod libcalls instead. */
2001 set_optab_libfunc (smod_optab, DImode, NULL);
2002 set_optab_libfunc (umod_optab, DImode, NULL);
2003 set_optab_libfunc (smod_optab, SImode, NULL);
2004 set_optab_libfunc (umod_optab, SImode, NULL);
2005
2006 /* Half-precision float operations. The compiler handles all operations
2007 with NULL libfuncs by converting the SFmode. */
2008 switch (arm_fp16_format)
2009 {
2010 case ARM_FP16_FORMAT_IEEE:
2011 case ARM_FP16_FORMAT_ALTERNATIVE:
2012
2013 /* Conversions. */
2014 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2015 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2016 ? "__gnu_f2h_ieee"
2017 : "__gnu_f2h_alternative"));
2018 set_conv_libfunc (sext_optab, SFmode, HFmode,
2019 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2020 ? "__gnu_h2f_ieee"
2021 : "__gnu_h2f_alternative"));
2022
2023 /* Arithmetic. */
2024 set_optab_libfunc (add_optab, HFmode, NULL);
2025 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2026 set_optab_libfunc (smul_optab, HFmode, NULL);
2027 set_optab_libfunc (neg_optab, HFmode, NULL);
2028 set_optab_libfunc (sub_optab, HFmode, NULL);
2029
2030 /* Comparisons. */
2031 set_optab_libfunc (eq_optab, HFmode, NULL);
2032 set_optab_libfunc (ne_optab, HFmode, NULL);
2033 set_optab_libfunc (lt_optab, HFmode, NULL);
2034 set_optab_libfunc (le_optab, HFmode, NULL);
2035 set_optab_libfunc (ge_optab, HFmode, NULL);
2036 set_optab_libfunc (gt_optab, HFmode, NULL);
2037 set_optab_libfunc (unord_optab, HFmode, NULL);
2038 break;
2039
2040 default:
2041 break;
2042 }
2043
2044 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2045 {
2046 const arm_fixed_mode_set fixed_arith_modes[] =
2047 {
2048 { QQmode, "qq" },
2049 { UQQmode, "uqq" },
2050 { HQmode, "hq" },
2051 { UHQmode, "uhq" },
2052 { SQmode, "sq" },
2053 { USQmode, "usq" },
2054 { DQmode, "dq" },
2055 { UDQmode, "udq" },
2056 { TQmode, "tq" },
2057 { UTQmode, "utq" },
2058 { HAmode, "ha" },
2059 { UHAmode, "uha" },
2060 { SAmode, "sa" },
2061 { USAmode, "usa" },
2062 { DAmode, "da" },
2063 { UDAmode, "uda" },
2064 { TAmode, "ta" },
2065 { UTAmode, "uta" }
2066 };
2067 const arm_fixed_mode_set fixed_conv_modes[] =
2068 {
2069 { QQmode, "qq" },
2070 { UQQmode, "uqq" },
2071 { HQmode, "hq" },
2072 { UHQmode, "uhq" },
2073 { SQmode, "sq" },
2074 { USQmode, "usq" },
2075 { DQmode, "dq" },
2076 { UDQmode, "udq" },
2077 { TQmode, "tq" },
2078 { UTQmode, "utq" },
2079 { HAmode, "ha" },
2080 { UHAmode, "uha" },
2081 { SAmode, "sa" },
2082 { USAmode, "usa" },
2083 { DAmode, "da" },
2084 { UDAmode, "uda" },
2085 { TAmode, "ta" },
2086 { UTAmode, "uta" },
2087 { QImode, "qi" },
2088 { HImode, "hi" },
2089 { SImode, "si" },
2090 { DImode, "di" },
2091 { TImode, "ti" },
2092 { SFmode, "sf" },
2093 { DFmode, "df" }
2094 };
2095 unsigned int i, j;
2096
2097 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2098 {
2099 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2100 "add", fixed_arith_modes[i].name, 3);
2101 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2102 "ssadd", fixed_arith_modes[i].name, 3);
2103 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2104 "usadd", fixed_arith_modes[i].name, 3);
2105 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2106 "sub", fixed_arith_modes[i].name, 3);
2107 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2108 "sssub", fixed_arith_modes[i].name, 3);
2109 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2110 "ussub", fixed_arith_modes[i].name, 3);
2111 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2112 "mul", fixed_arith_modes[i].name, 3);
2113 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2114 "ssmul", fixed_arith_modes[i].name, 3);
2115 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2116 "usmul", fixed_arith_modes[i].name, 3);
2117 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2118 "div", fixed_arith_modes[i].name, 3);
2119 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2120 "udiv", fixed_arith_modes[i].name, 3);
2121 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2122 "ssdiv", fixed_arith_modes[i].name, 3);
2123 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2124 "usdiv", fixed_arith_modes[i].name, 3);
2125 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2126 "neg", fixed_arith_modes[i].name, 2);
2127 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2128 "ssneg", fixed_arith_modes[i].name, 2);
2129 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2130 "usneg", fixed_arith_modes[i].name, 2);
2131 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2132 "ashl", fixed_arith_modes[i].name, 3);
2133 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2134 "ashr", fixed_arith_modes[i].name, 3);
2135 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2136 "lshr", fixed_arith_modes[i].name, 3);
2137 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2138 "ssashl", fixed_arith_modes[i].name, 3);
2139 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2140 "usashl", fixed_arith_modes[i].name, 3);
2141 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2142 "cmp", fixed_arith_modes[i].name, 2);
2143 }
2144
2145 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2146 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2147 {
2148 if (i == j
2149 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2150 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2151 continue;
2152
2153 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2154 fixed_conv_modes[j].mode, "fract",
2155 fixed_conv_modes[i].name,
2156 fixed_conv_modes[j].name);
2157 arm_set_fixed_conv_libfunc (satfract_optab,
2158 fixed_conv_modes[i].mode,
2159 fixed_conv_modes[j].mode, "satfract",
2160 fixed_conv_modes[i].name,
2161 fixed_conv_modes[j].name);
2162 arm_set_fixed_conv_libfunc (fractuns_optab,
2163 fixed_conv_modes[i].mode,
2164 fixed_conv_modes[j].mode, "fractuns",
2165 fixed_conv_modes[i].name,
2166 fixed_conv_modes[j].name);
2167 arm_set_fixed_conv_libfunc (satfractuns_optab,
2168 fixed_conv_modes[i].mode,
2169 fixed_conv_modes[j].mode, "satfractuns",
2170 fixed_conv_modes[i].name,
2171 fixed_conv_modes[j].name);
2172 }
2173 }
2174
2175 if (TARGET_AAPCS_BASED)
2176 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2177 }
2178
2179 /* On AAPCS systems, this is the "struct __va_list". */
2180 static GTY(()) tree va_list_type;
2181
2182 /* Return the type to use as __builtin_va_list. */
2183 static tree
2184 arm_build_builtin_va_list (void)
2185 {
2186 tree va_list_name;
2187 tree ap_field;
2188
2189 if (!TARGET_AAPCS_BASED)
2190 return std_build_builtin_va_list ();
2191
2192 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2193 defined as:
2194
2195 struct __va_list
2196 {
2197 void *__ap;
2198 };
2199
2200 The C Library ABI further reinforces this definition in \S
2201 4.1.
2202
2203 We must follow this definition exactly. The structure tag
2204 name is visible in C++ mangled names, and thus forms a part
2205 of the ABI. The field name may be used by people who
2206 #include <stdarg.h>. */
2207 /* Create the type. */
2208 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2209 /* Give it the required name. */
2210 va_list_name = build_decl (BUILTINS_LOCATION,
2211 TYPE_DECL,
2212 get_identifier ("__va_list"),
2213 va_list_type);
2214 DECL_ARTIFICIAL (va_list_name) = 1;
2215 TYPE_NAME (va_list_type) = va_list_name;
2216 TYPE_STUB_DECL (va_list_type) = va_list_name;
2217 /* Create the __ap field. */
2218 ap_field = build_decl (BUILTINS_LOCATION,
2219 FIELD_DECL,
2220 get_identifier ("__ap"),
2221 ptr_type_node);
2222 DECL_ARTIFICIAL (ap_field) = 1;
2223 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2224 TYPE_FIELDS (va_list_type) = ap_field;
2225 /* Compute its layout. */
2226 layout_type (va_list_type);
2227
2228 return va_list_type;
2229 }
2230
2231 /* Return an expression of type "void *" pointing to the next
2232 available argument in a variable-argument list. VALIST is the
2233 user-level va_list object, of type __builtin_va_list. */
2234 static tree
2235 arm_extract_valist_ptr (tree valist)
2236 {
2237 if (TREE_TYPE (valist) == error_mark_node)
2238 return error_mark_node;
2239
2240 /* On an AAPCS target, the pointer is stored within "struct
2241 va_list". */
2242 if (TARGET_AAPCS_BASED)
2243 {
2244 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2245 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2246 valist, ap_field, NULL_TREE);
2247 }
2248
2249 return valist;
2250 }
2251
2252 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2253 static void
2254 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2255 {
2256 valist = arm_extract_valist_ptr (valist);
2257 std_expand_builtin_va_start (valist, nextarg);
2258 }
2259
2260 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2261 static tree
2262 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2263 gimple_seq *post_p)
2264 {
2265 valist = arm_extract_valist_ptr (valist);
2266 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2267 }
2268
2269 /* Fix up any incompatible options that the user has specified. */
2270 static void
2271 arm_option_override (void)
2272 {
2273 if (global_options_set.x_arm_arch_option)
2274 arm_selected_arch = &all_architectures[arm_arch_option];
2275
2276 if (global_options_set.x_arm_cpu_option)
2277 {
2278 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2279 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2280 }
2281
2282 if (global_options_set.x_arm_tune_option)
2283 arm_selected_tune = &all_cores[(int) arm_tune_option];
2284
2285 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2286 SUBTARGET_OVERRIDE_OPTIONS;
2287 #endif
2288
2289 if (arm_selected_arch)
2290 {
2291 if (arm_selected_cpu)
2292 {
2293 /* Check for conflict between mcpu and march. */
2294 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2295 {
2296 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2297 arm_selected_cpu->name, arm_selected_arch->name);
2298 /* -march wins for code generation.
2299 -mcpu wins for default tuning. */
2300 if (!arm_selected_tune)
2301 arm_selected_tune = arm_selected_cpu;
2302
2303 arm_selected_cpu = arm_selected_arch;
2304 }
2305 else
2306 /* -mcpu wins. */
2307 arm_selected_arch = NULL;
2308 }
2309 else
2310 /* Pick a CPU based on the architecture. */
2311 arm_selected_cpu = arm_selected_arch;
2312 }
2313
2314 /* If the user did not specify a processor, choose one for them. */
2315 if (!arm_selected_cpu)
2316 {
2317 const struct processors * sel;
2318 unsigned int sought;
2319
2320 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2321 if (!arm_selected_cpu->name)
2322 {
2323 #ifdef SUBTARGET_CPU_DEFAULT
2324 /* Use the subtarget default CPU if none was specified by
2325 configure. */
2326 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2327 #endif
2328 /* Default to ARM6. */
2329 if (!arm_selected_cpu->name)
2330 arm_selected_cpu = &all_cores[arm6];
2331 }
2332
2333 sel = arm_selected_cpu;
2334 insn_flags = sel->flags;
2335
2336 /* Now check to see if the user has specified some command line
2337 switch that require certain abilities from the cpu. */
2338 sought = 0;
2339
2340 if (TARGET_INTERWORK || TARGET_THUMB)
2341 {
2342 sought |= (FL_THUMB | FL_MODE32);
2343
2344 /* There are no ARM processors that support both APCS-26 and
2345 interworking. Therefore we force FL_MODE26 to be removed
2346 from insn_flags here (if it was set), so that the search
2347 below will always be able to find a compatible processor. */
2348 insn_flags &= ~FL_MODE26;
2349 }
2350
2351 if (sought != 0 && ((sought & insn_flags) != sought))
2352 {
2353 /* Try to locate a CPU type that supports all of the abilities
2354 of the default CPU, plus the extra abilities requested by
2355 the user. */
2356 for (sel = all_cores; sel->name != NULL; sel++)
2357 if ((sel->flags & sought) == (sought | insn_flags))
2358 break;
2359
2360 if (sel->name == NULL)
2361 {
2362 unsigned current_bit_count = 0;
2363 const struct processors * best_fit = NULL;
2364
2365 /* Ideally we would like to issue an error message here
2366 saying that it was not possible to find a CPU compatible
2367 with the default CPU, but which also supports the command
2368 line options specified by the programmer, and so they
2369 ought to use the -mcpu=<name> command line option to
2370 override the default CPU type.
2371
2372 If we cannot find a cpu that has both the
2373 characteristics of the default cpu and the given
2374 command line options we scan the array again looking
2375 for a best match. */
2376 for (sel = all_cores; sel->name != NULL; sel++)
2377 if ((sel->flags & sought) == sought)
2378 {
2379 unsigned count;
2380
2381 count = bit_count (sel->flags & insn_flags);
2382
2383 if (count >= current_bit_count)
2384 {
2385 best_fit = sel;
2386 current_bit_count = count;
2387 }
2388 }
2389
2390 gcc_assert (best_fit);
2391 sel = best_fit;
2392 }
2393
2394 arm_selected_cpu = sel;
2395 }
2396 }
2397
2398 gcc_assert (arm_selected_cpu);
2399 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2400 if (!arm_selected_tune)
2401 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2402
2403 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2404 insn_flags = arm_selected_cpu->flags;
2405 arm_base_arch = arm_selected_cpu->base_arch;
2406
2407 arm_tune = arm_selected_tune->core;
2408 tune_flags = arm_selected_tune->flags;
2409 current_tune = arm_selected_tune->tune;
2410
2411 /* Make sure that the processor choice does not conflict with any of the
2412 other command line choices. */
2413 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2414 error ("target CPU does not support ARM mode");
2415
2416 /* BPABI targets use linker tricks to allow interworking on cores
2417 without thumb support. */
2418 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2419 {
2420 warning (0, "target CPU does not support interworking" );
2421 target_flags &= ~MASK_INTERWORK;
2422 }
2423
2424 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2425 {
2426 warning (0, "target CPU does not support THUMB instructions");
2427 target_flags &= ~MASK_THUMB;
2428 }
2429
2430 if (TARGET_APCS_FRAME && TARGET_THUMB)
2431 {
2432 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2433 target_flags &= ~MASK_APCS_FRAME;
2434 }
2435
2436 /* Callee super interworking implies thumb interworking. Adding
2437 this to the flags here simplifies the logic elsewhere. */
2438 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2439 target_flags |= MASK_INTERWORK;
2440
2441 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2442 from here where no function is being compiled currently. */
2443 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2444 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2445
2446 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2447 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2448
2449 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2450 {
2451 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2452 target_flags |= MASK_APCS_FRAME;
2453 }
2454
2455 if (TARGET_POKE_FUNCTION_NAME)
2456 target_flags |= MASK_APCS_FRAME;
2457
2458 if (TARGET_APCS_REENT && flag_pic)
2459 error ("-fpic and -mapcs-reent are incompatible");
2460
2461 if (TARGET_APCS_REENT)
2462 warning (0, "APCS reentrant code not supported. Ignored");
2463
2464 /* If this target is normally configured to use APCS frames, warn if they
2465 are turned off and debugging is turned on. */
2466 if (TARGET_ARM
2467 && write_symbols != NO_DEBUG
2468 && !TARGET_APCS_FRAME
2469 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2470 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2471
2472 if (TARGET_APCS_FLOAT)
2473 warning (0, "passing floating point arguments in fp regs not yet supported");
2474
2475 if (TARGET_LITTLE_WORDS)
2476 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
2477 "will be removed in a future release");
2478
2479 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2480 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2481 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2482 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2483 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2484 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2485 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2486 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2487 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2488 arm_arch6m = arm_arch6 && !arm_arch_notm;
2489 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2490 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2491 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2492 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2493 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2494
2495 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2496 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2497 thumb_code = TARGET_ARM == 0;
2498 thumb1_code = TARGET_THUMB1 != 0;
2499 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2500 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2501 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2502 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2503 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2504 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2505 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2506 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2507 if (arm_restrict_it == 2)
2508 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2509
2510 if (!TARGET_THUMB2)
2511 arm_restrict_it = 0;
2512
2513 /* If we are not using the default (ARM mode) section anchor offset
2514 ranges, then set the correct ranges now. */
2515 if (TARGET_THUMB1)
2516 {
2517 /* Thumb-1 LDR instructions cannot have negative offsets.
2518 Permissible positive offset ranges are 5-bit (for byte loads),
2519 6-bit (for halfword loads), or 7-bit (for word loads).
2520 Empirical results suggest a 7-bit anchor range gives the best
2521 overall code size. */
2522 targetm.min_anchor_offset = 0;
2523 targetm.max_anchor_offset = 127;
2524 }
2525 else if (TARGET_THUMB2)
2526 {
2527 /* The minimum is set such that the total size of the block
2528 for a particular anchor is 248 + 1 + 4095 bytes, which is
2529 divisible by eight, ensuring natural spacing of anchors. */
2530 targetm.min_anchor_offset = -248;
2531 targetm.max_anchor_offset = 4095;
2532 }
2533
2534 /* V5 code we generate is completely interworking capable, so we turn off
2535 TARGET_INTERWORK here to avoid many tests later on. */
2536
2537 /* XXX However, we must pass the right pre-processor defines to CPP
2538 or GLD can get confused. This is a hack. */
2539 if (TARGET_INTERWORK)
2540 arm_cpp_interwork = 1;
2541
2542 if (arm_arch5)
2543 target_flags &= ~MASK_INTERWORK;
2544
2545 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2546 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2547
2548 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2549 error ("iwmmxt abi requires an iwmmxt capable cpu");
2550
2551 if (!global_options_set.x_arm_fpu_index)
2552 {
2553 const char *target_fpu_name;
2554 bool ok;
2555
2556 #ifdef FPUTYPE_DEFAULT
2557 target_fpu_name = FPUTYPE_DEFAULT;
2558 #else
2559 target_fpu_name = "vfp";
2560 #endif
2561
2562 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2563 CL_TARGET);
2564 gcc_assert (ok);
2565 }
2566
2567 arm_fpu_desc = &all_fpus[arm_fpu_index];
2568
2569 switch (arm_fpu_desc->model)
2570 {
2571 case ARM_FP_MODEL_VFP:
2572 arm_fpu_attr = FPU_VFP;
2573 break;
2574
2575 default:
2576 gcc_unreachable();
2577 }
2578
2579 if (TARGET_AAPCS_BASED)
2580 {
2581 if (TARGET_CALLER_INTERWORKING)
2582 error ("AAPCS does not support -mcaller-super-interworking");
2583 else
2584 if (TARGET_CALLEE_INTERWORKING)
2585 error ("AAPCS does not support -mcallee-super-interworking");
2586 }
2587
2588 /* iWMMXt and NEON are incompatible. */
2589 if (TARGET_IWMMXT && TARGET_NEON)
2590 error ("iWMMXt and NEON are incompatible");
2591
2592 /* iWMMXt unsupported under Thumb mode. */
2593 if (TARGET_THUMB && TARGET_IWMMXT)
2594 error ("iWMMXt unsupported under Thumb mode");
2595
2596 /* __fp16 support currently assumes the core has ldrh. */
2597 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2598 sorry ("__fp16 and no ldrh");
2599
2600 /* If soft-float is specified then don't use FPU. */
2601 if (TARGET_SOFT_FLOAT)
2602 arm_fpu_attr = FPU_NONE;
2603
2604 if (TARGET_AAPCS_BASED)
2605 {
2606 if (arm_abi == ARM_ABI_IWMMXT)
2607 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2608 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2609 && TARGET_HARD_FLOAT
2610 && TARGET_VFP)
2611 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2612 else
2613 arm_pcs_default = ARM_PCS_AAPCS;
2614 }
2615 else
2616 {
2617 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2618 sorry ("-mfloat-abi=hard and VFP");
2619
2620 if (arm_abi == ARM_ABI_APCS)
2621 arm_pcs_default = ARM_PCS_APCS;
2622 else
2623 arm_pcs_default = ARM_PCS_ATPCS;
2624 }
2625
2626 /* For arm2/3 there is no need to do any scheduling if we are doing
2627 software floating-point. */
2628 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2629 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2630
2631 /* Use the cp15 method if it is available. */
2632 if (target_thread_pointer == TP_AUTO)
2633 {
2634 if (arm_arch6k && !TARGET_THUMB1)
2635 target_thread_pointer = TP_CP15;
2636 else
2637 target_thread_pointer = TP_SOFT;
2638 }
2639
2640 if (TARGET_HARD_TP && TARGET_THUMB1)
2641 error ("can not use -mtp=cp15 with 16-bit Thumb");
2642
2643 /* Override the default structure alignment for AAPCS ABI. */
2644 if (!global_options_set.x_arm_structure_size_boundary)
2645 {
2646 if (TARGET_AAPCS_BASED)
2647 arm_structure_size_boundary = 8;
2648 }
2649 else
2650 {
2651 if (arm_structure_size_boundary != 8
2652 && arm_structure_size_boundary != 32
2653 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2654 {
2655 if (ARM_DOUBLEWORD_ALIGN)
2656 warning (0,
2657 "structure size boundary can only be set to 8, 32 or 64");
2658 else
2659 warning (0, "structure size boundary can only be set to 8 or 32");
2660 arm_structure_size_boundary
2661 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2662 }
2663 }
2664
2665 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2666 {
2667 error ("RTP PIC is incompatible with Thumb");
2668 flag_pic = 0;
2669 }
2670
2671 /* If stack checking is disabled, we can use r10 as the PIC register,
2672 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2673 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2674 {
2675 if (TARGET_VXWORKS_RTP)
2676 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2677 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2678 }
2679
2680 if (flag_pic && TARGET_VXWORKS_RTP)
2681 arm_pic_register = 9;
2682
2683 if (arm_pic_register_string != NULL)
2684 {
2685 int pic_register = decode_reg_name (arm_pic_register_string);
2686
2687 if (!flag_pic)
2688 warning (0, "-mpic-register= is useless without -fpic");
2689
2690 /* Prevent the user from choosing an obviously stupid PIC register. */
2691 else if (pic_register < 0 || call_used_regs[pic_register]
2692 || pic_register == HARD_FRAME_POINTER_REGNUM
2693 || pic_register == STACK_POINTER_REGNUM
2694 || pic_register >= PC_REGNUM
2695 || (TARGET_VXWORKS_RTP
2696 && (unsigned int) pic_register != arm_pic_register))
2697 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2698 else
2699 arm_pic_register = pic_register;
2700 }
2701
2702 if (TARGET_VXWORKS_RTP
2703 && !global_options_set.x_arm_pic_data_is_text_relative)
2704 arm_pic_data_is_text_relative = 0;
2705
2706 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2707 if (fix_cm3_ldrd == 2)
2708 {
2709 if (arm_selected_cpu->core == cortexm3)
2710 fix_cm3_ldrd = 1;
2711 else
2712 fix_cm3_ldrd = 0;
2713 }
2714
2715 /* Enable -munaligned-access by default for
2716 - all ARMv6 architecture-based processors
2717 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2718 - ARMv8 architecture-base processors.
2719
2720 Disable -munaligned-access by default for
2721 - all pre-ARMv6 architecture-based processors
2722 - ARMv6-M architecture-based processors. */
2723
2724 if (unaligned_access == 2)
2725 {
2726 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2727 unaligned_access = 1;
2728 else
2729 unaligned_access = 0;
2730 }
2731 else if (unaligned_access == 1
2732 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2733 {
2734 warning (0, "target CPU does not support unaligned accesses");
2735 unaligned_access = 0;
2736 }
2737
2738 if (TARGET_THUMB1 && flag_schedule_insns)
2739 {
2740 /* Don't warn since it's on by default in -O2. */
2741 flag_schedule_insns = 0;
2742 }
2743
2744 if (optimize_size)
2745 {
2746 /* If optimizing for size, bump the number of instructions that we
2747 are prepared to conditionally execute (even on a StrongARM). */
2748 max_insns_skipped = 6;
2749 }
2750 else
2751 max_insns_skipped = current_tune->max_insns_skipped;
2752
2753 /* Hot/Cold partitioning is not currently supported, since we can't
2754 handle literal pool placement in that case. */
2755 if (flag_reorder_blocks_and_partition)
2756 {
2757 inform (input_location,
2758 "-freorder-blocks-and-partition not supported on this architecture");
2759 flag_reorder_blocks_and_partition = 0;
2760 flag_reorder_blocks = 1;
2761 }
2762
2763 if (flag_pic)
2764 /* Hoisting PIC address calculations more aggressively provides a small,
2765 but measurable, size reduction for PIC code. Therefore, we decrease
2766 the bar for unrestricted expression hoisting to the cost of PIC address
2767 calculation, which is 2 instructions. */
2768 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2769 global_options.x_param_values,
2770 global_options_set.x_param_values);
2771
2772 /* ARM EABI defaults to strict volatile bitfields. */
2773 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2774 && abi_version_at_least(2))
2775 flag_strict_volatile_bitfields = 1;
2776
2777 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2778 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2779 if (flag_prefetch_loop_arrays < 0
2780 && HAVE_prefetch
2781 && optimize >= 3
2782 && current_tune->num_prefetch_slots > 0)
2783 flag_prefetch_loop_arrays = 1;
2784
2785 /* Set up parameters to be used in prefetching algorithm. Do not override the
2786 defaults unless we are tuning for a core we have researched values for. */
2787 if (current_tune->num_prefetch_slots > 0)
2788 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2789 current_tune->num_prefetch_slots,
2790 global_options.x_param_values,
2791 global_options_set.x_param_values);
2792 if (current_tune->l1_cache_line_size >= 0)
2793 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2794 current_tune->l1_cache_line_size,
2795 global_options.x_param_values,
2796 global_options_set.x_param_values);
2797 if (current_tune->l1_cache_size >= 0)
2798 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2799 current_tune->l1_cache_size,
2800 global_options.x_param_values,
2801 global_options_set.x_param_values);
2802
2803 /* Use Neon to perform 64-bits operations rather than core
2804 registers. */
2805 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
2806 if (use_neon_for_64bits == 1)
2807 prefer_neon_for_64bits = true;
2808
2809 /* Use the alternative scheduling-pressure algorithm by default. */
2810 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2811 global_options.x_param_values,
2812 global_options_set.x_param_values);
2813
2814 /* Disable shrink-wrap when optimizing function for size, since it tends to
2815 generate additional returns. */
2816 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
2817 flag_shrink_wrap = false;
2818 /* TBD: Dwarf info for apcs frame is not handled yet. */
2819 if (TARGET_APCS_FRAME)
2820 flag_shrink_wrap = false;
2821
2822 /* We only support -mslow-flash-data on armv7-m targets. */
2823 if (target_slow_flash_data
2824 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2825 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
2826 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2827
2828 /* Currently, for slow flash data, we just disable literal pools. */
2829 if (target_slow_flash_data)
2830 arm_disable_literal_pool = true;
2831
2832 /* Register global variables with the garbage collector. */
2833 arm_add_gc_roots ();
2834 }
2835
2836 static void
2837 arm_add_gc_roots (void)
2838 {
2839 gcc_obstack_init(&minipool_obstack);
2840 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2841 }
2842 \f
2843 /* A table of known ARM exception types.
2844 For use with the interrupt function attribute. */
2845
2846 typedef struct
2847 {
2848 const char *const arg;
2849 const unsigned long return_value;
2850 }
2851 isr_attribute_arg;
2852
2853 static const isr_attribute_arg isr_attribute_args [] =
2854 {
2855 { "IRQ", ARM_FT_ISR },
2856 { "irq", ARM_FT_ISR },
2857 { "FIQ", ARM_FT_FIQ },
2858 { "fiq", ARM_FT_FIQ },
2859 { "ABORT", ARM_FT_ISR },
2860 { "abort", ARM_FT_ISR },
2861 { "ABORT", ARM_FT_ISR },
2862 { "abort", ARM_FT_ISR },
2863 { "UNDEF", ARM_FT_EXCEPTION },
2864 { "undef", ARM_FT_EXCEPTION },
2865 { "SWI", ARM_FT_EXCEPTION },
2866 { "swi", ARM_FT_EXCEPTION },
2867 { NULL, ARM_FT_NORMAL }
2868 };
2869
2870 /* Returns the (interrupt) function type of the current
2871 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2872
2873 static unsigned long
2874 arm_isr_value (tree argument)
2875 {
2876 const isr_attribute_arg * ptr;
2877 const char * arg;
2878
2879 if (!arm_arch_notm)
2880 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2881
2882 /* No argument - default to IRQ. */
2883 if (argument == NULL_TREE)
2884 return ARM_FT_ISR;
2885
2886 /* Get the value of the argument. */
2887 if (TREE_VALUE (argument) == NULL_TREE
2888 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2889 return ARM_FT_UNKNOWN;
2890
2891 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2892
2893 /* Check it against the list of known arguments. */
2894 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2895 if (streq (arg, ptr->arg))
2896 return ptr->return_value;
2897
2898 /* An unrecognized interrupt type. */
2899 return ARM_FT_UNKNOWN;
2900 }
2901
2902 /* Computes the type of the current function. */
2903
2904 static unsigned long
2905 arm_compute_func_type (void)
2906 {
2907 unsigned long type = ARM_FT_UNKNOWN;
2908 tree a;
2909 tree attr;
2910
2911 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2912
2913 /* Decide if the current function is volatile. Such functions
2914 never return, and many memory cycles can be saved by not storing
2915 register values that will never be needed again. This optimization
2916 was added to speed up context switching in a kernel application. */
2917 if (optimize > 0
2918 && (TREE_NOTHROW (current_function_decl)
2919 || !(flag_unwind_tables
2920 || (flag_exceptions
2921 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2922 && TREE_THIS_VOLATILE (current_function_decl))
2923 type |= ARM_FT_VOLATILE;
2924
2925 if (cfun->static_chain_decl != NULL)
2926 type |= ARM_FT_NESTED;
2927
2928 attr = DECL_ATTRIBUTES (current_function_decl);
2929
2930 a = lookup_attribute ("naked", attr);
2931 if (a != NULL_TREE)
2932 type |= ARM_FT_NAKED;
2933
2934 a = lookup_attribute ("isr", attr);
2935 if (a == NULL_TREE)
2936 a = lookup_attribute ("interrupt", attr);
2937
2938 if (a == NULL_TREE)
2939 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2940 else
2941 type |= arm_isr_value (TREE_VALUE (a));
2942
2943 return type;
2944 }
2945
2946 /* Returns the type of the current function. */
2947
2948 unsigned long
2949 arm_current_func_type (void)
2950 {
2951 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2952 cfun->machine->func_type = arm_compute_func_type ();
2953
2954 return cfun->machine->func_type;
2955 }
2956
2957 bool
2958 arm_allocate_stack_slots_for_args (void)
2959 {
2960 /* Naked functions should not allocate stack slots for arguments. */
2961 return !IS_NAKED (arm_current_func_type ());
2962 }
2963
2964 static bool
2965 arm_warn_func_return (tree decl)
2966 {
2967 /* Naked functions are implemented entirely in assembly, including the
2968 return sequence, so suppress warnings about this. */
2969 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
2970 }
2971
2972 \f
2973 /* Output assembler code for a block containing the constant parts
2974 of a trampoline, leaving space for the variable parts.
2975
2976 On the ARM, (if r8 is the static chain regnum, and remembering that
2977 referencing pc adds an offset of 8) the trampoline looks like:
2978 ldr r8, [pc, #0]
2979 ldr pc, [pc]
2980 .word static chain value
2981 .word function's address
2982 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2983
2984 static void
2985 arm_asm_trampoline_template (FILE *f)
2986 {
2987 if (TARGET_ARM)
2988 {
2989 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2990 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2991 }
2992 else if (TARGET_THUMB2)
2993 {
2994 /* The Thumb-2 trampoline is similar to the arm implementation.
2995 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2996 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2997 STATIC_CHAIN_REGNUM, PC_REGNUM);
2998 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2999 }
3000 else
3001 {
3002 ASM_OUTPUT_ALIGN (f, 2);
3003 fprintf (f, "\t.code\t16\n");
3004 fprintf (f, ".Ltrampoline_start:\n");
3005 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3006 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3007 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3008 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3009 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3010 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3011 }
3012 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3013 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3014 }
3015
3016 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3017
3018 static void
3019 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3020 {
3021 rtx fnaddr, mem, a_tramp;
3022
3023 emit_block_move (m_tramp, assemble_trampoline_template (),
3024 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3025
3026 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3027 emit_move_insn (mem, chain_value);
3028
3029 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3030 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3031 emit_move_insn (mem, fnaddr);
3032
3033 a_tramp = XEXP (m_tramp, 0);
3034 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3035 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3036 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3037 }
3038
3039 /* Thumb trampolines should be entered in thumb mode, so set
3040 the bottom bit of the address. */
3041
3042 static rtx
3043 arm_trampoline_adjust_address (rtx addr)
3044 {
3045 if (TARGET_THUMB)
3046 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3047 NULL, 0, OPTAB_LIB_WIDEN);
3048 return addr;
3049 }
3050 \f
3051 /* Return 1 if it is possible to return using a single instruction.
3052 If SIBLING is non-null, this is a test for a return before a sibling
3053 call. SIBLING is the call insn, so we can examine its register usage. */
3054
3055 int
3056 use_return_insn (int iscond, rtx sibling)
3057 {
3058 int regno;
3059 unsigned int func_type;
3060 unsigned long saved_int_regs;
3061 unsigned HOST_WIDE_INT stack_adjust;
3062 arm_stack_offsets *offsets;
3063
3064 /* Never use a return instruction before reload has run. */
3065 if (!reload_completed)
3066 return 0;
3067
3068 func_type = arm_current_func_type ();
3069
3070 /* Naked, volatile and stack alignment functions need special
3071 consideration. */
3072 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3073 return 0;
3074
3075 /* So do interrupt functions that use the frame pointer and Thumb
3076 interrupt functions. */
3077 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3078 return 0;
3079
3080 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3081 && !optimize_function_for_size_p (cfun))
3082 return 0;
3083
3084 offsets = arm_get_frame_offsets ();
3085 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3086
3087 /* As do variadic functions. */
3088 if (crtl->args.pretend_args_size
3089 || cfun->machine->uses_anonymous_args
3090 /* Or if the function calls __builtin_eh_return () */
3091 || crtl->calls_eh_return
3092 /* Or if the function calls alloca */
3093 || cfun->calls_alloca
3094 /* Or if there is a stack adjustment. However, if the stack pointer
3095 is saved on the stack, we can use a pre-incrementing stack load. */
3096 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3097 && stack_adjust == 4)))
3098 return 0;
3099
3100 saved_int_regs = offsets->saved_regs_mask;
3101
3102 /* Unfortunately, the insn
3103
3104 ldmib sp, {..., sp, ...}
3105
3106 triggers a bug on most SA-110 based devices, such that the stack
3107 pointer won't be correctly restored if the instruction takes a
3108 page fault. We work around this problem by popping r3 along with
3109 the other registers, since that is never slower than executing
3110 another instruction.
3111
3112 We test for !arm_arch5 here, because code for any architecture
3113 less than this could potentially be run on one of the buggy
3114 chips. */
3115 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3116 {
3117 /* Validate that r3 is a call-clobbered register (always true in
3118 the default abi) ... */
3119 if (!call_used_regs[3])
3120 return 0;
3121
3122 /* ... that it isn't being used for a return value ... */
3123 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3124 return 0;
3125
3126 /* ... or for a tail-call argument ... */
3127 if (sibling)
3128 {
3129 gcc_assert (CALL_P (sibling));
3130
3131 if (find_regno_fusage (sibling, USE, 3))
3132 return 0;
3133 }
3134
3135 /* ... and that there are no call-saved registers in r0-r2
3136 (always true in the default ABI). */
3137 if (saved_int_regs & 0x7)
3138 return 0;
3139 }
3140
3141 /* Can't be done if interworking with Thumb, and any registers have been
3142 stacked. */
3143 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3144 return 0;
3145
3146 /* On StrongARM, conditional returns are expensive if they aren't
3147 taken and multiple registers have been stacked. */
3148 if (iscond && arm_tune_strongarm)
3149 {
3150 /* Conditional return when just the LR is stored is a simple
3151 conditional-load instruction, that's not expensive. */
3152 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3153 return 0;
3154
3155 if (flag_pic
3156 && arm_pic_register != INVALID_REGNUM
3157 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3158 return 0;
3159 }
3160
3161 /* If there are saved registers but the LR isn't saved, then we need
3162 two instructions for the return. */
3163 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3164 return 0;
3165
3166 /* Can't be done if any of the VFP regs are pushed,
3167 since this also requires an insn. */
3168 if (TARGET_HARD_FLOAT && TARGET_VFP)
3169 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3170 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3171 return 0;
3172
3173 if (TARGET_REALLY_IWMMXT)
3174 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3175 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3176 return 0;
3177
3178 return 1;
3179 }
3180
3181 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3182 shrink-wrapping if possible. This is the case if we need to emit a
3183 prologue, which we can test by looking at the offsets. */
3184 bool
3185 use_simple_return_p (void)
3186 {
3187 arm_stack_offsets *offsets;
3188
3189 offsets = arm_get_frame_offsets ();
3190 return offsets->outgoing_args != 0;
3191 }
3192
3193 /* Return TRUE if int I is a valid immediate ARM constant. */
3194
3195 int
3196 const_ok_for_arm (HOST_WIDE_INT i)
3197 {
3198 int lowbit;
3199
3200 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3201 be all zero, or all one. */
3202 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3203 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3204 != ((~(unsigned HOST_WIDE_INT) 0)
3205 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3206 return FALSE;
3207
3208 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3209
3210 /* Fast return for 0 and small values. We must do this for zero, since
3211 the code below can't handle that one case. */
3212 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3213 return TRUE;
3214
3215 /* Get the number of trailing zeros. */
3216 lowbit = ffs((int) i) - 1;
3217
3218 /* Only even shifts are allowed in ARM mode so round down to the
3219 nearest even number. */
3220 if (TARGET_ARM)
3221 lowbit &= ~1;
3222
3223 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3224 return TRUE;
3225
3226 if (TARGET_ARM)
3227 {
3228 /* Allow rotated constants in ARM mode. */
3229 if (lowbit <= 4
3230 && ((i & ~0xc000003f) == 0
3231 || (i & ~0xf000000f) == 0
3232 || (i & ~0xfc000003) == 0))
3233 return TRUE;
3234 }
3235 else
3236 {
3237 HOST_WIDE_INT v;
3238
3239 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3240 v = i & 0xff;
3241 v |= v << 16;
3242 if (i == v || i == (v | (v << 8)))
3243 return TRUE;
3244
3245 /* Allow repeated pattern 0xXY00XY00. */
3246 v = i & 0xff00;
3247 v |= v << 16;
3248 if (i == v)
3249 return TRUE;
3250 }
3251
3252 return FALSE;
3253 }
3254
3255 /* Return true if I is a valid constant for the operation CODE. */
3256 int
3257 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3258 {
3259 if (const_ok_for_arm (i))
3260 return 1;
3261
3262 switch (code)
3263 {
3264 case SET:
3265 /* See if we can use movw. */
3266 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3267 return 1;
3268 else
3269 /* Otherwise, try mvn. */
3270 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3271
3272 case PLUS:
3273 /* See if we can use addw or subw. */
3274 if (TARGET_THUMB2
3275 && ((i & 0xfffff000) == 0
3276 || ((-i) & 0xfffff000) == 0))
3277 return 1;
3278 /* else fall through. */
3279
3280 case COMPARE:
3281 case EQ:
3282 case NE:
3283 case GT:
3284 case LE:
3285 case LT:
3286 case GE:
3287 case GEU:
3288 case LTU:
3289 case GTU:
3290 case LEU:
3291 case UNORDERED:
3292 case ORDERED:
3293 case UNEQ:
3294 case UNGE:
3295 case UNLT:
3296 case UNGT:
3297 case UNLE:
3298 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3299
3300 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3301 case XOR:
3302 return 0;
3303
3304 case IOR:
3305 if (TARGET_THUMB2)
3306 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3307 return 0;
3308
3309 case AND:
3310 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3311
3312 default:
3313 gcc_unreachable ();
3314 }
3315 }
3316
3317 /* Return true if I is a valid di mode constant for the operation CODE. */
3318 int
3319 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3320 {
3321 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3322 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3323 rtx hi = GEN_INT (hi_val);
3324 rtx lo = GEN_INT (lo_val);
3325
3326 if (TARGET_THUMB1)
3327 return 0;
3328
3329 switch (code)
3330 {
3331 case AND:
3332 case IOR:
3333 case XOR:
3334 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3335 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3336 case PLUS:
3337 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3338
3339 default:
3340 return 0;
3341 }
3342 }
3343
3344 /* Emit a sequence of insns to handle a large constant.
3345 CODE is the code of the operation required, it can be any of SET, PLUS,
3346 IOR, AND, XOR, MINUS;
3347 MODE is the mode in which the operation is being performed;
3348 VAL is the integer to operate on;
3349 SOURCE is the other operand (a register, or a null-pointer for SET);
3350 SUBTARGETS means it is safe to create scratch registers if that will
3351 either produce a simpler sequence, or we will want to cse the values.
3352 Return value is the number of insns emitted. */
3353
3354 /* ??? Tweak this for thumb2. */
3355 int
3356 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
3357 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3358 {
3359 rtx cond;
3360
3361 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3362 cond = COND_EXEC_TEST (PATTERN (insn));
3363 else
3364 cond = NULL_RTX;
3365
3366 if (subtargets || code == SET
3367 || (REG_P (target) && REG_P (source)
3368 && REGNO (target) != REGNO (source)))
3369 {
3370 /* After arm_reorg has been called, we can't fix up expensive
3371 constants by pushing them into memory so we must synthesize
3372 them in-line, regardless of the cost. This is only likely to
3373 be more costly on chips that have load delay slots and we are
3374 compiling without running the scheduler (so no splitting
3375 occurred before the final instruction emission).
3376
3377 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3378 */
3379 if (!after_arm_reorg
3380 && !cond
3381 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3382 1, 0)
3383 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3384 + (code != SET))))
3385 {
3386 if (code == SET)
3387 {
3388 /* Currently SET is the only monadic value for CODE, all
3389 the rest are diadic. */
3390 if (TARGET_USE_MOVT)
3391 arm_emit_movpair (target, GEN_INT (val));
3392 else
3393 emit_set_insn (target, GEN_INT (val));
3394
3395 return 1;
3396 }
3397 else
3398 {
3399 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3400
3401 if (TARGET_USE_MOVT)
3402 arm_emit_movpair (temp, GEN_INT (val));
3403 else
3404 emit_set_insn (temp, GEN_INT (val));
3405
3406 /* For MINUS, the value is subtracted from, since we never
3407 have subtraction of a constant. */
3408 if (code == MINUS)
3409 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3410 else
3411 emit_set_insn (target,
3412 gen_rtx_fmt_ee (code, mode, source, temp));
3413 return 2;
3414 }
3415 }
3416 }
3417
3418 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3419 1);
3420 }
3421
3422 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3423 ARM/THUMB2 immediates, and add up to VAL.
3424 Thr function return value gives the number of insns required. */
3425 static int
3426 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3427 struct four_ints *return_sequence)
3428 {
3429 int best_consecutive_zeros = 0;
3430 int i;
3431 int best_start = 0;
3432 int insns1, insns2;
3433 struct four_ints tmp_sequence;
3434
3435 /* If we aren't targeting ARM, the best place to start is always at
3436 the bottom, otherwise look more closely. */
3437 if (TARGET_ARM)
3438 {
3439 for (i = 0; i < 32; i += 2)
3440 {
3441 int consecutive_zeros = 0;
3442
3443 if (!(val & (3 << i)))
3444 {
3445 while ((i < 32) && !(val & (3 << i)))
3446 {
3447 consecutive_zeros += 2;
3448 i += 2;
3449 }
3450 if (consecutive_zeros > best_consecutive_zeros)
3451 {
3452 best_consecutive_zeros = consecutive_zeros;
3453 best_start = i - consecutive_zeros;
3454 }
3455 i -= 2;
3456 }
3457 }
3458 }
3459
3460 /* So long as it won't require any more insns to do so, it's
3461 desirable to emit a small constant (in bits 0...9) in the last
3462 insn. This way there is more chance that it can be combined with
3463 a later addressing insn to form a pre-indexed load or store
3464 operation. Consider:
3465
3466 *((volatile int *)0xe0000100) = 1;
3467 *((volatile int *)0xe0000110) = 2;
3468
3469 We want this to wind up as:
3470
3471 mov rA, #0xe0000000
3472 mov rB, #1
3473 str rB, [rA, #0x100]
3474 mov rB, #2
3475 str rB, [rA, #0x110]
3476
3477 rather than having to synthesize both large constants from scratch.
3478
3479 Therefore, we calculate how many insns would be required to emit
3480 the constant starting from `best_start', and also starting from
3481 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3482 yield a shorter sequence, we may as well use zero. */
3483 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3484 if (best_start != 0
3485 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3486 {
3487 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3488 if (insns2 <= insns1)
3489 {
3490 *return_sequence = tmp_sequence;
3491 insns1 = insns2;
3492 }
3493 }
3494
3495 return insns1;
3496 }
3497
3498 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3499 static int
3500 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3501 struct four_ints *return_sequence, int i)
3502 {
3503 int remainder = val & 0xffffffff;
3504 int insns = 0;
3505
3506 /* Try and find a way of doing the job in either two or three
3507 instructions.
3508
3509 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3510 location. We start at position I. This may be the MSB, or
3511 optimial_immediate_sequence may have positioned it at the largest block
3512 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3513 wrapping around to the top of the word when we drop off the bottom.
3514 In the worst case this code should produce no more than four insns.
3515
3516 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3517 constants, shifted to any arbitrary location. We should always start
3518 at the MSB. */
3519 do
3520 {
3521 int end;
3522 unsigned int b1, b2, b3, b4;
3523 unsigned HOST_WIDE_INT result;
3524 int loc;
3525
3526 gcc_assert (insns < 4);
3527
3528 if (i <= 0)
3529 i += 32;
3530
3531 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3532 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3533 {
3534 loc = i;
3535 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3536 /* We can use addw/subw for the last 12 bits. */
3537 result = remainder;
3538 else
3539 {
3540 /* Use an 8-bit shifted/rotated immediate. */
3541 end = i - 8;
3542 if (end < 0)
3543 end += 32;
3544 result = remainder & ((0x0ff << end)
3545 | ((i < end) ? (0xff >> (32 - end))
3546 : 0));
3547 i -= 8;
3548 }
3549 }
3550 else
3551 {
3552 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3553 arbitrary shifts. */
3554 i -= TARGET_ARM ? 2 : 1;
3555 continue;
3556 }
3557
3558 /* Next, see if we can do a better job with a thumb2 replicated
3559 constant.
3560
3561 We do it this way around to catch the cases like 0x01F001E0 where
3562 two 8-bit immediates would work, but a replicated constant would
3563 make it worse.
3564
3565 TODO: 16-bit constants that don't clear all the bits, but still win.
3566 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3567 if (TARGET_THUMB2)
3568 {
3569 b1 = (remainder & 0xff000000) >> 24;
3570 b2 = (remainder & 0x00ff0000) >> 16;
3571 b3 = (remainder & 0x0000ff00) >> 8;
3572 b4 = remainder & 0xff;
3573
3574 if (loc > 24)
3575 {
3576 /* The 8-bit immediate already found clears b1 (and maybe b2),
3577 but must leave b3 and b4 alone. */
3578
3579 /* First try to find a 32-bit replicated constant that clears
3580 almost everything. We can assume that we can't do it in one,
3581 or else we wouldn't be here. */
3582 unsigned int tmp = b1 & b2 & b3 & b4;
3583 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3584 + (tmp << 24);
3585 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3586 + (tmp == b3) + (tmp == b4);
3587 if (tmp
3588 && (matching_bytes >= 3
3589 || (matching_bytes == 2
3590 && const_ok_for_op (remainder & ~tmp2, code))))
3591 {
3592 /* At least 3 of the bytes match, and the fourth has at
3593 least as many bits set, or two of the bytes match
3594 and it will only require one more insn to finish. */
3595 result = tmp2;
3596 i = tmp != b1 ? 32
3597 : tmp != b2 ? 24
3598 : tmp != b3 ? 16
3599 : 8;
3600 }
3601
3602 /* Second, try to find a 16-bit replicated constant that can
3603 leave three of the bytes clear. If b2 or b4 is already
3604 zero, then we can. If the 8-bit from above would not
3605 clear b2 anyway, then we still win. */
3606 else if (b1 == b3 && (!b2 || !b4
3607 || (remainder & 0x00ff0000 & ~result)))
3608 {
3609 result = remainder & 0xff00ff00;
3610 i = 24;
3611 }
3612 }
3613 else if (loc > 16)
3614 {
3615 /* The 8-bit immediate already found clears b2 (and maybe b3)
3616 and we don't get here unless b1 is alredy clear, but it will
3617 leave b4 unchanged. */
3618
3619 /* If we can clear b2 and b4 at once, then we win, since the
3620 8-bits couldn't possibly reach that far. */
3621 if (b2 == b4)
3622 {
3623 result = remainder & 0x00ff00ff;
3624 i = 16;
3625 }
3626 }
3627 }
3628
3629 return_sequence->i[insns++] = result;
3630 remainder &= ~result;
3631
3632 if (code == SET || code == MINUS)
3633 code = PLUS;
3634 }
3635 while (remainder);
3636
3637 return insns;
3638 }
3639
3640 /* Emit an instruction with the indicated PATTERN. If COND is
3641 non-NULL, conditionalize the execution of the instruction on COND
3642 being true. */
3643
3644 static void
3645 emit_constant_insn (rtx cond, rtx pattern)
3646 {
3647 if (cond)
3648 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3649 emit_insn (pattern);
3650 }
3651
3652 /* As above, but extra parameter GENERATE which, if clear, suppresses
3653 RTL generation. */
3654
3655 static int
3656 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
3657 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3658 int generate)
3659 {
3660 int can_invert = 0;
3661 int can_negate = 0;
3662 int final_invert = 0;
3663 int i;
3664 int set_sign_bit_copies = 0;
3665 int clear_sign_bit_copies = 0;
3666 int clear_zero_bit_copies = 0;
3667 int set_zero_bit_copies = 0;
3668 int insns = 0, neg_insns, inv_insns;
3669 unsigned HOST_WIDE_INT temp1, temp2;
3670 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3671 struct four_ints *immediates;
3672 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3673
3674 /* Find out which operations are safe for a given CODE. Also do a quick
3675 check for degenerate cases; these can occur when DImode operations
3676 are split. */
3677 switch (code)
3678 {
3679 case SET:
3680 can_invert = 1;
3681 break;
3682
3683 case PLUS:
3684 can_negate = 1;
3685 break;
3686
3687 case IOR:
3688 if (remainder == 0xffffffff)
3689 {
3690 if (generate)
3691 emit_constant_insn (cond,
3692 gen_rtx_SET (VOIDmode, target,
3693 GEN_INT (ARM_SIGN_EXTEND (val))));
3694 return 1;
3695 }
3696
3697 if (remainder == 0)
3698 {
3699 if (reload_completed && rtx_equal_p (target, source))
3700 return 0;
3701
3702 if (generate)
3703 emit_constant_insn (cond,
3704 gen_rtx_SET (VOIDmode, target, source));
3705 return 1;
3706 }
3707 break;
3708
3709 case AND:
3710 if (remainder == 0)
3711 {
3712 if (generate)
3713 emit_constant_insn (cond,
3714 gen_rtx_SET (VOIDmode, target, const0_rtx));
3715 return 1;
3716 }
3717 if (remainder == 0xffffffff)
3718 {
3719 if (reload_completed && rtx_equal_p (target, source))
3720 return 0;
3721 if (generate)
3722 emit_constant_insn (cond,
3723 gen_rtx_SET (VOIDmode, target, source));
3724 return 1;
3725 }
3726 can_invert = 1;
3727 break;
3728
3729 case XOR:
3730 if (remainder == 0)
3731 {
3732 if (reload_completed && rtx_equal_p (target, source))
3733 return 0;
3734 if (generate)
3735 emit_constant_insn (cond,
3736 gen_rtx_SET (VOIDmode, target, source));
3737 return 1;
3738 }
3739
3740 if (remainder == 0xffffffff)
3741 {
3742 if (generate)
3743 emit_constant_insn (cond,
3744 gen_rtx_SET (VOIDmode, target,
3745 gen_rtx_NOT (mode, source)));
3746 return 1;
3747 }
3748 final_invert = 1;
3749 break;
3750
3751 case MINUS:
3752 /* We treat MINUS as (val - source), since (source - val) is always
3753 passed as (source + (-val)). */
3754 if (remainder == 0)
3755 {
3756 if (generate)
3757 emit_constant_insn (cond,
3758 gen_rtx_SET (VOIDmode, target,
3759 gen_rtx_NEG (mode, source)));
3760 return 1;
3761 }
3762 if (const_ok_for_arm (val))
3763 {
3764 if (generate)
3765 emit_constant_insn (cond,
3766 gen_rtx_SET (VOIDmode, target,
3767 gen_rtx_MINUS (mode, GEN_INT (val),
3768 source)));
3769 return 1;
3770 }
3771
3772 break;
3773
3774 default:
3775 gcc_unreachable ();
3776 }
3777
3778 /* If we can do it in one insn get out quickly. */
3779 if (const_ok_for_op (val, code))
3780 {
3781 if (generate)
3782 emit_constant_insn (cond,
3783 gen_rtx_SET (VOIDmode, target,
3784 (source
3785 ? gen_rtx_fmt_ee (code, mode, source,
3786 GEN_INT (val))
3787 : GEN_INT (val))));
3788 return 1;
3789 }
3790
3791 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3792 insn. */
3793 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
3794 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
3795 {
3796 if (generate)
3797 {
3798 if (mode == SImode && i == 16)
3799 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3800 smaller insn. */
3801 emit_constant_insn (cond,
3802 gen_zero_extendhisi2
3803 (target, gen_lowpart (HImode, source)));
3804 else
3805 /* Extz only supports SImode, but we can coerce the operands
3806 into that mode. */
3807 emit_constant_insn (cond,
3808 gen_extzv_t2 (gen_lowpart (SImode, target),
3809 gen_lowpart (SImode, source),
3810 GEN_INT (i), const0_rtx));
3811 }
3812
3813 return 1;
3814 }
3815
3816 /* Calculate a few attributes that may be useful for specific
3817 optimizations. */
3818 /* Count number of leading zeros. */
3819 for (i = 31; i >= 0; i--)
3820 {
3821 if ((remainder & (1 << i)) == 0)
3822 clear_sign_bit_copies++;
3823 else
3824 break;
3825 }
3826
3827 /* Count number of leading 1's. */
3828 for (i = 31; i >= 0; i--)
3829 {
3830 if ((remainder & (1 << i)) != 0)
3831 set_sign_bit_copies++;
3832 else
3833 break;
3834 }
3835
3836 /* Count number of trailing zero's. */
3837 for (i = 0; i <= 31; i++)
3838 {
3839 if ((remainder & (1 << i)) == 0)
3840 clear_zero_bit_copies++;
3841 else
3842 break;
3843 }
3844
3845 /* Count number of trailing 1's. */
3846 for (i = 0; i <= 31; i++)
3847 {
3848 if ((remainder & (1 << i)) != 0)
3849 set_zero_bit_copies++;
3850 else
3851 break;
3852 }
3853
3854 switch (code)
3855 {
3856 case SET:
3857 /* See if we can do this by sign_extending a constant that is known
3858 to be negative. This is a good, way of doing it, since the shift
3859 may well merge into a subsequent insn. */
3860 if (set_sign_bit_copies > 1)
3861 {
3862 if (const_ok_for_arm
3863 (temp1 = ARM_SIGN_EXTEND (remainder
3864 << (set_sign_bit_copies - 1))))
3865 {
3866 if (generate)
3867 {
3868 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3869 emit_constant_insn (cond,
3870 gen_rtx_SET (VOIDmode, new_src,
3871 GEN_INT (temp1)));
3872 emit_constant_insn (cond,
3873 gen_ashrsi3 (target, new_src,
3874 GEN_INT (set_sign_bit_copies - 1)));
3875 }
3876 return 2;
3877 }
3878 /* For an inverted constant, we will need to set the low bits,
3879 these will be shifted out of harm's way. */
3880 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3881 if (const_ok_for_arm (~temp1))
3882 {
3883 if (generate)
3884 {
3885 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3886 emit_constant_insn (cond,
3887 gen_rtx_SET (VOIDmode, new_src,
3888 GEN_INT (temp1)));
3889 emit_constant_insn (cond,
3890 gen_ashrsi3 (target, new_src,
3891 GEN_INT (set_sign_bit_copies - 1)));
3892 }
3893 return 2;
3894 }
3895 }
3896
3897 /* See if we can calculate the value as the difference between two
3898 valid immediates. */
3899 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3900 {
3901 int topshift = clear_sign_bit_copies & ~1;
3902
3903 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3904 & (0xff000000 >> topshift));
3905
3906 /* If temp1 is zero, then that means the 9 most significant
3907 bits of remainder were 1 and we've caused it to overflow.
3908 When topshift is 0 we don't need to do anything since we
3909 can borrow from 'bit 32'. */
3910 if (temp1 == 0 && topshift != 0)
3911 temp1 = 0x80000000 >> (topshift - 1);
3912
3913 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3914
3915 if (const_ok_for_arm (temp2))
3916 {
3917 if (generate)
3918 {
3919 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3920 emit_constant_insn (cond,
3921 gen_rtx_SET (VOIDmode, new_src,
3922 GEN_INT (temp1)));
3923 emit_constant_insn (cond,
3924 gen_addsi3 (target, new_src,
3925 GEN_INT (-temp2)));
3926 }
3927
3928 return 2;
3929 }
3930 }
3931
3932 /* See if we can generate this by setting the bottom (or the top)
3933 16 bits, and then shifting these into the other half of the
3934 word. We only look for the simplest cases, to do more would cost
3935 too much. Be careful, however, not to generate this when the
3936 alternative would take fewer insns. */
3937 if (val & 0xffff0000)
3938 {
3939 temp1 = remainder & 0xffff0000;
3940 temp2 = remainder & 0x0000ffff;
3941
3942 /* Overlaps outside this range are best done using other methods. */
3943 for (i = 9; i < 24; i++)
3944 {
3945 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3946 && !const_ok_for_arm (temp2))
3947 {
3948 rtx new_src = (subtargets
3949 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3950 : target);
3951 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3952 source, subtargets, generate);
3953 source = new_src;
3954 if (generate)
3955 emit_constant_insn
3956 (cond,
3957 gen_rtx_SET
3958 (VOIDmode, target,
3959 gen_rtx_IOR (mode,
3960 gen_rtx_ASHIFT (mode, source,
3961 GEN_INT (i)),
3962 source)));
3963 return insns + 1;
3964 }
3965 }
3966
3967 /* Don't duplicate cases already considered. */
3968 for (i = 17; i < 24; i++)
3969 {
3970 if (((temp1 | (temp1 >> i)) == remainder)
3971 && !const_ok_for_arm (temp1))
3972 {
3973 rtx new_src = (subtargets
3974 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3975 : target);
3976 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3977 source, subtargets, generate);
3978 source = new_src;
3979 if (generate)
3980 emit_constant_insn
3981 (cond,
3982 gen_rtx_SET (VOIDmode, target,
3983 gen_rtx_IOR
3984 (mode,
3985 gen_rtx_LSHIFTRT (mode, source,
3986 GEN_INT (i)),
3987 source)));
3988 return insns + 1;
3989 }
3990 }
3991 }
3992 break;
3993
3994 case IOR:
3995 case XOR:
3996 /* If we have IOR or XOR, and the constant can be loaded in a
3997 single instruction, and we can find a temporary to put it in,
3998 then this can be done in two instructions instead of 3-4. */
3999 if (subtargets
4000 /* TARGET can't be NULL if SUBTARGETS is 0 */
4001 || (reload_completed && !reg_mentioned_p (target, source)))
4002 {
4003 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4004 {
4005 if (generate)
4006 {
4007 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4008
4009 emit_constant_insn (cond,
4010 gen_rtx_SET (VOIDmode, sub,
4011 GEN_INT (val)));
4012 emit_constant_insn (cond,
4013 gen_rtx_SET (VOIDmode, target,
4014 gen_rtx_fmt_ee (code, mode,
4015 source, sub)));
4016 }
4017 return 2;
4018 }
4019 }
4020
4021 if (code == XOR)
4022 break;
4023
4024 /* Convert.
4025 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4026 and the remainder 0s for e.g. 0xfff00000)
4027 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4028
4029 This can be done in 2 instructions by using shifts with mov or mvn.
4030 e.g. for
4031 x = x | 0xfff00000;
4032 we generate.
4033 mvn r0, r0, asl #12
4034 mvn r0, r0, lsr #12 */
4035 if (set_sign_bit_copies > 8
4036 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4037 {
4038 if (generate)
4039 {
4040 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4041 rtx shift = GEN_INT (set_sign_bit_copies);
4042
4043 emit_constant_insn
4044 (cond,
4045 gen_rtx_SET (VOIDmode, sub,
4046 gen_rtx_NOT (mode,
4047 gen_rtx_ASHIFT (mode,
4048 source,
4049 shift))));
4050 emit_constant_insn
4051 (cond,
4052 gen_rtx_SET (VOIDmode, target,
4053 gen_rtx_NOT (mode,
4054 gen_rtx_LSHIFTRT (mode, sub,
4055 shift))));
4056 }
4057 return 2;
4058 }
4059
4060 /* Convert
4061 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4062 to
4063 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4064
4065 For eg. r0 = r0 | 0xfff
4066 mvn r0, r0, lsr #12
4067 mvn r0, r0, asl #12
4068
4069 */
4070 if (set_zero_bit_copies > 8
4071 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4072 {
4073 if (generate)
4074 {
4075 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4076 rtx shift = GEN_INT (set_zero_bit_copies);
4077
4078 emit_constant_insn
4079 (cond,
4080 gen_rtx_SET (VOIDmode, sub,
4081 gen_rtx_NOT (mode,
4082 gen_rtx_LSHIFTRT (mode,
4083 source,
4084 shift))));
4085 emit_constant_insn
4086 (cond,
4087 gen_rtx_SET (VOIDmode, target,
4088 gen_rtx_NOT (mode,
4089 gen_rtx_ASHIFT (mode, sub,
4090 shift))));
4091 }
4092 return 2;
4093 }
4094
4095 /* This will never be reached for Thumb2 because orn is a valid
4096 instruction. This is for Thumb1 and the ARM 32 bit cases.
4097
4098 x = y | constant (such that ~constant is a valid constant)
4099 Transform this to
4100 x = ~(~y & ~constant).
4101 */
4102 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4103 {
4104 if (generate)
4105 {
4106 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4107 emit_constant_insn (cond,
4108 gen_rtx_SET (VOIDmode, sub,
4109 gen_rtx_NOT (mode, source)));
4110 source = sub;
4111 if (subtargets)
4112 sub = gen_reg_rtx (mode);
4113 emit_constant_insn (cond,
4114 gen_rtx_SET (VOIDmode, sub,
4115 gen_rtx_AND (mode, source,
4116 GEN_INT (temp1))));
4117 emit_constant_insn (cond,
4118 gen_rtx_SET (VOIDmode, target,
4119 gen_rtx_NOT (mode, sub)));
4120 }
4121 return 3;
4122 }
4123 break;
4124
4125 case AND:
4126 /* See if two shifts will do 2 or more insn's worth of work. */
4127 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4128 {
4129 HOST_WIDE_INT shift_mask = ((0xffffffff
4130 << (32 - clear_sign_bit_copies))
4131 & 0xffffffff);
4132
4133 if ((remainder | shift_mask) != 0xffffffff)
4134 {
4135 if (generate)
4136 {
4137 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4138 insns = arm_gen_constant (AND, mode, cond,
4139 remainder | shift_mask,
4140 new_src, source, subtargets, 1);
4141 source = new_src;
4142 }
4143 else
4144 {
4145 rtx targ = subtargets ? NULL_RTX : target;
4146 insns = arm_gen_constant (AND, mode, cond,
4147 remainder | shift_mask,
4148 targ, source, subtargets, 0);
4149 }
4150 }
4151
4152 if (generate)
4153 {
4154 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4155 rtx shift = GEN_INT (clear_sign_bit_copies);
4156
4157 emit_insn (gen_ashlsi3 (new_src, source, shift));
4158 emit_insn (gen_lshrsi3 (target, new_src, shift));
4159 }
4160
4161 return insns + 2;
4162 }
4163
4164 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4165 {
4166 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4167
4168 if ((remainder | shift_mask) != 0xffffffff)
4169 {
4170 if (generate)
4171 {
4172 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4173
4174 insns = arm_gen_constant (AND, mode, cond,
4175 remainder | shift_mask,
4176 new_src, source, subtargets, 1);
4177 source = new_src;
4178 }
4179 else
4180 {
4181 rtx targ = subtargets ? NULL_RTX : target;
4182
4183 insns = arm_gen_constant (AND, mode, cond,
4184 remainder | shift_mask,
4185 targ, source, subtargets, 0);
4186 }
4187 }
4188
4189 if (generate)
4190 {
4191 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4192 rtx shift = GEN_INT (clear_zero_bit_copies);
4193
4194 emit_insn (gen_lshrsi3 (new_src, source, shift));
4195 emit_insn (gen_ashlsi3 (target, new_src, shift));
4196 }
4197
4198 return insns + 2;
4199 }
4200
4201 break;
4202
4203 default:
4204 break;
4205 }
4206
4207 /* Calculate what the instruction sequences would be if we generated it
4208 normally, negated, or inverted. */
4209 if (code == AND)
4210 /* AND cannot be split into multiple insns, so invert and use BIC. */
4211 insns = 99;
4212 else
4213 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4214
4215 if (can_negate)
4216 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4217 &neg_immediates);
4218 else
4219 neg_insns = 99;
4220
4221 if (can_invert || final_invert)
4222 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4223 &inv_immediates);
4224 else
4225 inv_insns = 99;
4226
4227 immediates = &pos_immediates;
4228
4229 /* Is the negated immediate sequence more efficient? */
4230 if (neg_insns < insns && neg_insns <= inv_insns)
4231 {
4232 insns = neg_insns;
4233 immediates = &neg_immediates;
4234 }
4235 else
4236 can_negate = 0;
4237
4238 /* Is the inverted immediate sequence more efficient?
4239 We must allow for an extra NOT instruction for XOR operations, although
4240 there is some chance that the final 'mvn' will get optimized later. */
4241 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4242 {
4243 insns = inv_insns;
4244 immediates = &inv_immediates;
4245 }
4246 else
4247 {
4248 can_invert = 0;
4249 final_invert = 0;
4250 }
4251
4252 /* Now output the chosen sequence as instructions. */
4253 if (generate)
4254 {
4255 for (i = 0; i < insns; i++)
4256 {
4257 rtx new_src, temp1_rtx;
4258
4259 temp1 = immediates->i[i];
4260
4261 if (code == SET || code == MINUS)
4262 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4263 else if ((final_invert || i < (insns - 1)) && subtargets)
4264 new_src = gen_reg_rtx (mode);
4265 else
4266 new_src = target;
4267
4268 if (can_invert)
4269 temp1 = ~temp1;
4270 else if (can_negate)
4271 temp1 = -temp1;
4272
4273 temp1 = trunc_int_for_mode (temp1, mode);
4274 temp1_rtx = GEN_INT (temp1);
4275
4276 if (code == SET)
4277 ;
4278 else if (code == MINUS)
4279 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4280 else
4281 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4282
4283 emit_constant_insn (cond,
4284 gen_rtx_SET (VOIDmode, new_src,
4285 temp1_rtx));
4286 source = new_src;
4287
4288 if (code == SET)
4289 {
4290 can_negate = can_invert;
4291 can_invert = 0;
4292 code = PLUS;
4293 }
4294 else if (code == MINUS)
4295 code = PLUS;
4296 }
4297 }
4298
4299 if (final_invert)
4300 {
4301 if (generate)
4302 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4303 gen_rtx_NOT (mode, source)));
4304 insns++;
4305 }
4306
4307 return insns;
4308 }
4309
4310 /* Canonicalize a comparison so that we are more likely to recognize it.
4311 This can be done for a few constant compares, where we can make the
4312 immediate value easier to load. */
4313
4314 static void
4315 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4316 bool op0_preserve_value)
4317 {
4318 enum machine_mode mode;
4319 unsigned HOST_WIDE_INT i, maxval;
4320
4321 mode = GET_MODE (*op0);
4322 if (mode == VOIDmode)
4323 mode = GET_MODE (*op1);
4324
4325 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4326
4327 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4328 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4329 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4330 for GTU/LEU in Thumb mode. */
4331 if (mode == DImode)
4332 {
4333 rtx tem;
4334
4335 if (*code == GT || *code == LE
4336 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4337 {
4338 /* Missing comparison. First try to use an available
4339 comparison. */
4340 if (CONST_INT_P (*op1))
4341 {
4342 i = INTVAL (*op1);
4343 switch (*code)
4344 {
4345 case GT:
4346 case LE:
4347 if (i != maxval
4348 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4349 {
4350 *op1 = GEN_INT (i + 1);
4351 *code = *code == GT ? GE : LT;
4352 return;
4353 }
4354 break;
4355 case GTU:
4356 case LEU:
4357 if (i != ~((unsigned HOST_WIDE_INT) 0)
4358 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4359 {
4360 *op1 = GEN_INT (i + 1);
4361 *code = *code == GTU ? GEU : LTU;
4362 return;
4363 }
4364 break;
4365 default:
4366 gcc_unreachable ();
4367 }
4368 }
4369
4370 /* If that did not work, reverse the condition. */
4371 if (!op0_preserve_value)
4372 {
4373 tem = *op0;
4374 *op0 = *op1;
4375 *op1 = tem;
4376 *code = (int)swap_condition ((enum rtx_code)*code);
4377 }
4378 }
4379 return;
4380 }
4381
4382 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4383 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4384 to facilitate possible combining with a cmp into 'ands'. */
4385 if (mode == SImode
4386 && GET_CODE (*op0) == ZERO_EXTEND
4387 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4388 && GET_MODE (XEXP (*op0, 0)) == QImode
4389 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4390 && subreg_lowpart_p (XEXP (*op0, 0))
4391 && *op1 == const0_rtx)
4392 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4393 GEN_INT (255));
4394
4395 /* Comparisons smaller than DImode. Only adjust comparisons against
4396 an out-of-range constant. */
4397 if (!CONST_INT_P (*op1)
4398 || const_ok_for_arm (INTVAL (*op1))
4399 || const_ok_for_arm (- INTVAL (*op1)))
4400 return;
4401
4402 i = INTVAL (*op1);
4403
4404 switch (*code)
4405 {
4406 case EQ:
4407 case NE:
4408 return;
4409
4410 case GT:
4411 case LE:
4412 if (i != maxval
4413 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4414 {
4415 *op1 = GEN_INT (i + 1);
4416 *code = *code == GT ? GE : LT;
4417 return;
4418 }
4419 break;
4420
4421 case GE:
4422 case LT:
4423 if (i != ~maxval
4424 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4425 {
4426 *op1 = GEN_INT (i - 1);
4427 *code = *code == GE ? GT : LE;
4428 return;
4429 }
4430 break;
4431
4432 case GTU:
4433 case LEU:
4434 if (i != ~((unsigned HOST_WIDE_INT) 0)
4435 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4436 {
4437 *op1 = GEN_INT (i + 1);
4438 *code = *code == GTU ? GEU : LTU;
4439 return;
4440 }
4441 break;
4442
4443 case GEU:
4444 case LTU:
4445 if (i != 0
4446 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4447 {
4448 *op1 = GEN_INT (i - 1);
4449 *code = *code == GEU ? GTU : LEU;
4450 return;
4451 }
4452 break;
4453
4454 default:
4455 gcc_unreachable ();
4456 }
4457 }
4458
4459
4460 /* Define how to find the value returned by a function. */
4461
4462 static rtx
4463 arm_function_value(const_tree type, const_tree func,
4464 bool outgoing ATTRIBUTE_UNUSED)
4465 {
4466 enum machine_mode mode;
4467 int unsignedp ATTRIBUTE_UNUSED;
4468 rtx r ATTRIBUTE_UNUSED;
4469
4470 mode = TYPE_MODE (type);
4471
4472 if (TARGET_AAPCS_BASED)
4473 return aapcs_allocate_return_reg (mode, type, func);
4474
4475 /* Promote integer types. */
4476 if (INTEGRAL_TYPE_P (type))
4477 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4478
4479 /* Promotes small structs returned in a register to full-word size
4480 for big-endian AAPCS. */
4481 if (arm_return_in_msb (type))
4482 {
4483 HOST_WIDE_INT size = int_size_in_bytes (type);
4484 if (size % UNITS_PER_WORD != 0)
4485 {
4486 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4487 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4488 }
4489 }
4490
4491 return arm_libcall_value_1 (mode);
4492 }
4493
4494 /* libcall hashtable helpers. */
4495
4496 struct libcall_hasher : typed_noop_remove <rtx_def>
4497 {
4498 typedef rtx_def value_type;
4499 typedef rtx_def compare_type;
4500 static inline hashval_t hash (const value_type *);
4501 static inline bool equal (const value_type *, const compare_type *);
4502 static inline void remove (value_type *);
4503 };
4504
4505 inline bool
4506 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4507 {
4508 return rtx_equal_p (p1, p2);
4509 }
4510
4511 inline hashval_t
4512 libcall_hasher::hash (const value_type *p1)
4513 {
4514 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4515 }
4516
4517 typedef hash_table <libcall_hasher> libcall_table_type;
4518
4519 static void
4520 add_libcall (libcall_table_type htab, rtx libcall)
4521 {
4522 *htab.find_slot (libcall, INSERT) = libcall;
4523 }
4524
4525 static bool
4526 arm_libcall_uses_aapcs_base (const_rtx libcall)
4527 {
4528 static bool init_done = false;
4529 static libcall_table_type libcall_htab;
4530
4531 if (!init_done)
4532 {
4533 init_done = true;
4534
4535 libcall_htab.create (31);
4536 add_libcall (libcall_htab,
4537 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4538 add_libcall (libcall_htab,
4539 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4540 add_libcall (libcall_htab,
4541 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4542 add_libcall (libcall_htab,
4543 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4544
4545 add_libcall (libcall_htab,
4546 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4547 add_libcall (libcall_htab,
4548 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4549 add_libcall (libcall_htab,
4550 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4551 add_libcall (libcall_htab,
4552 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4553
4554 add_libcall (libcall_htab,
4555 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4556 add_libcall (libcall_htab,
4557 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4558 add_libcall (libcall_htab,
4559 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4560 add_libcall (libcall_htab,
4561 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4562 add_libcall (libcall_htab,
4563 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4564 add_libcall (libcall_htab,
4565 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4566 add_libcall (libcall_htab,
4567 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4568 add_libcall (libcall_htab,
4569 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4570
4571 /* Values from double-precision helper functions are returned in core
4572 registers if the selected core only supports single-precision
4573 arithmetic, even if we are using the hard-float ABI. The same is
4574 true for single-precision helpers, but we will never be using the
4575 hard-float ABI on a CPU which doesn't support single-precision
4576 operations in hardware. */
4577 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4578 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4579 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4580 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4581 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4582 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4583 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4584 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4585 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4586 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4587 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4588 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4589 SFmode));
4590 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4591 DFmode));
4592 }
4593
4594 return libcall && libcall_htab.find (libcall) != NULL;
4595 }
4596
4597 static rtx
4598 arm_libcall_value_1 (enum machine_mode mode)
4599 {
4600 if (TARGET_AAPCS_BASED)
4601 return aapcs_libcall_value (mode);
4602 else if (TARGET_IWMMXT_ABI
4603 && arm_vector_mode_supported_p (mode))
4604 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4605 else
4606 return gen_rtx_REG (mode, ARG_REGISTER (1));
4607 }
4608
4609 /* Define how to find the value returned by a library function
4610 assuming the value has mode MODE. */
4611
4612 static rtx
4613 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
4614 {
4615 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4616 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4617 {
4618 /* The following libcalls return their result in integer registers,
4619 even though they return a floating point value. */
4620 if (arm_libcall_uses_aapcs_base (libcall))
4621 return gen_rtx_REG (mode, ARG_REGISTER(1));
4622
4623 }
4624
4625 return arm_libcall_value_1 (mode);
4626 }
4627
4628 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4629
4630 static bool
4631 arm_function_value_regno_p (const unsigned int regno)
4632 {
4633 if (regno == ARG_REGISTER (1)
4634 || (TARGET_32BIT
4635 && TARGET_AAPCS_BASED
4636 && TARGET_VFP
4637 && TARGET_HARD_FLOAT
4638 && regno == FIRST_VFP_REGNUM)
4639 || (TARGET_IWMMXT_ABI
4640 && regno == FIRST_IWMMXT_REGNUM))
4641 return true;
4642
4643 return false;
4644 }
4645
4646 /* Determine the amount of memory needed to store the possible return
4647 registers of an untyped call. */
4648 int
4649 arm_apply_result_size (void)
4650 {
4651 int size = 16;
4652
4653 if (TARGET_32BIT)
4654 {
4655 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4656 size += 32;
4657 if (TARGET_IWMMXT_ABI)
4658 size += 8;
4659 }
4660
4661 return size;
4662 }
4663
4664 /* Decide whether TYPE should be returned in memory (true)
4665 or in a register (false). FNTYPE is the type of the function making
4666 the call. */
4667 static bool
4668 arm_return_in_memory (const_tree type, const_tree fntype)
4669 {
4670 HOST_WIDE_INT size;
4671
4672 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4673
4674 if (TARGET_AAPCS_BASED)
4675 {
4676 /* Simple, non-aggregate types (ie not including vectors and
4677 complex) are always returned in a register (or registers).
4678 We don't care about which register here, so we can short-cut
4679 some of the detail. */
4680 if (!AGGREGATE_TYPE_P (type)
4681 && TREE_CODE (type) != VECTOR_TYPE
4682 && TREE_CODE (type) != COMPLEX_TYPE)
4683 return false;
4684
4685 /* Any return value that is no larger than one word can be
4686 returned in r0. */
4687 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4688 return false;
4689
4690 /* Check any available co-processors to see if they accept the
4691 type as a register candidate (VFP, for example, can return
4692 some aggregates in consecutive registers). These aren't
4693 available if the call is variadic. */
4694 if (aapcs_select_return_coproc (type, fntype) >= 0)
4695 return false;
4696
4697 /* Vector values should be returned using ARM registers, not
4698 memory (unless they're over 16 bytes, which will break since
4699 we only have four call-clobbered registers to play with). */
4700 if (TREE_CODE (type) == VECTOR_TYPE)
4701 return (size < 0 || size > (4 * UNITS_PER_WORD));
4702
4703 /* The rest go in memory. */
4704 return true;
4705 }
4706
4707 if (TREE_CODE (type) == VECTOR_TYPE)
4708 return (size < 0 || size > (4 * UNITS_PER_WORD));
4709
4710 if (!AGGREGATE_TYPE_P (type) &&
4711 (TREE_CODE (type) != VECTOR_TYPE))
4712 /* All simple types are returned in registers. */
4713 return false;
4714
4715 if (arm_abi != ARM_ABI_APCS)
4716 {
4717 /* ATPCS and later return aggregate types in memory only if they are
4718 larger than a word (or are variable size). */
4719 return (size < 0 || size > UNITS_PER_WORD);
4720 }
4721
4722 /* For the arm-wince targets we choose to be compatible with Microsoft's
4723 ARM and Thumb compilers, which always return aggregates in memory. */
4724 #ifndef ARM_WINCE
4725 /* All structures/unions bigger than one word are returned in memory.
4726 Also catch the case where int_size_in_bytes returns -1. In this case
4727 the aggregate is either huge or of variable size, and in either case
4728 we will want to return it via memory and not in a register. */
4729 if (size < 0 || size > UNITS_PER_WORD)
4730 return true;
4731
4732 if (TREE_CODE (type) == RECORD_TYPE)
4733 {
4734 tree field;
4735
4736 /* For a struct the APCS says that we only return in a register
4737 if the type is 'integer like' and every addressable element
4738 has an offset of zero. For practical purposes this means
4739 that the structure can have at most one non bit-field element
4740 and that this element must be the first one in the structure. */
4741
4742 /* Find the first field, ignoring non FIELD_DECL things which will
4743 have been created by C++. */
4744 for (field = TYPE_FIELDS (type);
4745 field && TREE_CODE (field) != FIELD_DECL;
4746 field = DECL_CHAIN (field))
4747 continue;
4748
4749 if (field == NULL)
4750 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4751
4752 /* Check that the first field is valid for returning in a register. */
4753
4754 /* ... Floats are not allowed */
4755 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4756 return true;
4757
4758 /* ... Aggregates that are not themselves valid for returning in
4759 a register are not allowed. */
4760 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4761 return true;
4762
4763 /* Now check the remaining fields, if any. Only bitfields are allowed,
4764 since they are not addressable. */
4765 for (field = DECL_CHAIN (field);
4766 field;
4767 field = DECL_CHAIN (field))
4768 {
4769 if (TREE_CODE (field) != FIELD_DECL)
4770 continue;
4771
4772 if (!DECL_BIT_FIELD_TYPE (field))
4773 return true;
4774 }
4775
4776 return false;
4777 }
4778
4779 if (TREE_CODE (type) == UNION_TYPE)
4780 {
4781 tree field;
4782
4783 /* Unions can be returned in registers if every element is
4784 integral, or can be returned in an integer register. */
4785 for (field = TYPE_FIELDS (type);
4786 field;
4787 field = DECL_CHAIN (field))
4788 {
4789 if (TREE_CODE (field) != FIELD_DECL)
4790 continue;
4791
4792 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4793 return true;
4794
4795 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4796 return true;
4797 }
4798
4799 return false;
4800 }
4801 #endif /* not ARM_WINCE */
4802
4803 /* Return all other types in memory. */
4804 return true;
4805 }
4806
4807 const struct pcs_attribute_arg
4808 {
4809 const char *arg;
4810 enum arm_pcs value;
4811 } pcs_attribute_args[] =
4812 {
4813 {"aapcs", ARM_PCS_AAPCS},
4814 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
4815 #if 0
4816 /* We could recognize these, but changes would be needed elsewhere
4817 * to implement them. */
4818 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
4819 {"atpcs", ARM_PCS_ATPCS},
4820 {"apcs", ARM_PCS_APCS},
4821 #endif
4822 {NULL, ARM_PCS_UNKNOWN}
4823 };
4824
4825 static enum arm_pcs
4826 arm_pcs_from_attribute (tree attr)
4827 {
4828 const struct pcs_attribute_arg *ptr;
4829 const char *arg;
4830
4831 /* Get the value of the argument. */
4832 if (TREE_VALUE (attr) == NULL_TREE
4833 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4834 return ARM_PCS_UNKNOWN;
4835
4836 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4837
4838 /* Check it against the list of known arguments. */
4839 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4840 if (streq (arg, ptr->arg))
4841 return ptr->value;
4842
4843 /* An unrecognized interrupt type. */
4844 return ARM_PCS_UNKNOWN;
4845 }
4846
4847 /* Get the PCS variant to use for this call. TYPE is the function's type
4848 specification, DECL is the specific declartion. DECL may be null if
4849 the call could be indirect or if this is a library call. */
4850 static enum arm_pcs
4851 arm_get_pcs_model (const_tree type, const_tree decl)
4852 {
4853 bool user_convention = false;
4854 enum arm_pcs user_pcs = arm_pcs_default;
4855 tree attr;
4856
4857 gcc_assert (type);
4858
4859 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4860 if (attr)
4861 {
4862 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4863 user_convention = true;
4864 }
4865
4866 if (TARGET_AAPCS_BASED)
4867 {
4868 /* Detect varargs functions. These always use the base rules
4869 (no argument is ever a candidate for a co-processor
4870 register). */
4871 bool base_rules = stdarg_p (type);
4872
4873 if (user_convention)
4874 {
4875 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4876 sorry ("non-AAPCS derived PCS variant");
4877 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4878 error ("variadic functions must use the base AAPCS variant");
4879 }
4880
4881 if (base_rules)
4882 return ARM_PCS_AAPCS;
4883 else if (user_convention)
4884 return user_pcs;
4885 else if (decl && flag_unit_at_a_time)
4886 {
4887 /* Local functions never leak outside this compilation unit,
4888 so we are free to use whatever conventions are
4889 appropriate. */
4890 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4891 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4892 if (i && i->local)
4893 return ARM_PCS_AAPCS_LOCAL;
4894 }
4895 }
4896 else if (user_convention && user_pcs != arm_pcs_default)
4897 sorry ("PCS variant");
4898
4899 /* For everything else we use the target's default. */
4900 return arm_pcs_default;
4901 }
4902
4903
4904 static void
4905 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4906 const_tree fntype ATTRIBUTE_UNUSED,
4907 rtx libcall ATTRIBUTE_UNUSED,
4908 const_tree fndecl ATTRIBUTE_UNUSED)
4909 {
4910 /* Record the unallocated VFP registers. */
4911 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4912 pcum->aapcs_vfp_reg_alloc = 0;
4913 }
4914
4915 /* Walk down the type tree of TYPE counting consecutive base elements.
4916 If *MODEP is VOIDmode, then set it to the first valid floating point
4917 type. If a non-floating point type is found, or if a floating point
4918 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4919 otherwise return the count in the sub-tree. */
4920 static int
4921 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4922 {
4923 enum machine_mode mode;
4924 HOST_WIDE_INT size;
4925
4926 switch (TREE_CODE (type))
4927 {
4928 case REAL_TYPE:
4929 mode = TYPE_MODE (type);
4930 if (mode != DFmode && mode != SFmode)
4931 return -1;
4932
4933 if (*modep == VOIDmode)
4934 *modep = mode;
4935
4936 if (*modep == mode)
4937 return 1;
4938
4939 break;
4940
4941 case COMPLEX_TYPE:
4942 mode = TYPE_MODE (TREE_TYPE (type));
4943 if (mode != DFmode && mode != SFmode)
4944 return -1;
4945
4946 if (*modep == VOIDmode)
4947 *modep = mode;
4948
4949 if (*modep == mode)
4950 return 2;
4951
4952 break;
4953
4954 case VECTOR_TYPE:
4955 /* Use V2SImode and V4SImode as representatives of all 64-bit
4956 and 128-bit vector types, whether or not those modes are
4957 supported with the present options. */
4958 size = int_size_in_bytes (type);
4959 switch (size)
4960 {
4961 case 8:
4962 mode = V2SImode;
4963 break;
4964 case 16:
4965 mode = V4SImode;
4966 break;
4967 default:
4968 return -1;
4969 }
4970
4971 if (*modep == VOIDmode)
4972 *modep = mode;
4973
4974 /* Vector modes are considered to be opaque: two vectors are
4975 equivalent for the purposes of being homogeneous aggregates
4976 if they are the same size. */
4977 if (*modep == mode)
4978 return 1;
4979
4980 break;
4981
4982 case ARRAY_TYPE:
4983 {
4984 int count;
4985 tree index = TYPE_DOMAIN (type);
4986
4987 /* Can't handle incomplete types. */
4988 if (!COMPLETE_TYPE_P (type))
4989 return -1;
4990
4991 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4992 if (count == -1
4993 || !index
4994 || !TYPE_MAX_VALUE (index)
4995 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
4996 || !TYPE_MIN_VALUE (index)
4997 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
4998 || count < 0)
4999 return -1;
5000
5001 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5002 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5003
5004 /* There must be no padding. */
5005 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
5006 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
5007 != count * GET_MODE_BITSIZE (*modep)))
5008 return -1;
5009
5010 return count;
5011 }
5012
5013 case RECORD_TYPE:
5014 {
5015 int count = 0;
5016 int sub_count;
5017 tree field;
5018
5019 /* Can't handle incomplete types. */
5020 if (!COMPLETE_TYPE_P (type))
5021 return -1;
5022
5023 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5024 {
5025 if (TREE_CODE (field) != FIELD_DECL)
5026 continue;
5027
5028 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5029 if (sub_count < 0)
5030 return -1;
5031 count += sub_count;
5032 }
5033
5034 /* There must be no padding. */
5035 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
5036 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
5037 != count * GET_MODE_BITSIZE (*modep)))
5038 return -1;
5039
5040 return count;
5041 }
5042
5043 case UNION_TYPE:
5044 case QUAL_UNION_TYPE:
5045 {
5046 /* These aren't very interesting except in a degenerate case. */
5047 int count = 0;
5048 int sub_count;
5049 tree field;
5050
5051 /* Can't handle incomplete types. */
5052 if (!COMPLETE_TYPE_P (type))
5053 return -1;
5054
5055 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5056 {
5057 if (TREE_CODE (field) != FIELD_DECL)
5058 continue;
5059
5060 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5061 if (sub_count < 0)
5062 return -1;
5063 count = count > sub_count ? count : sub_count;
5064 }
5065
5066 /* There must be no padding. */
5067 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
5068 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
5069 != count * GET_MODE_BITSIZE (*modep)))
5070 return -1;
5071
5072 return count;
5073 }
5074
5075 default:
5076 break;
5077 }
5078
5079 return -1;
5080 }
5081
5082 /* Return true if PCS_VARIANT should use VFP registers. */
5083 static bool
5084 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5085 {
5086 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5087 {
5088 static bool seen_thumb1_vfp = false;
5089
5090 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5091 {
5092 sorry ("Thumb-1 hard-float VFP ABI");
5093 /* sorry() is not immediately fatal, so only display this once. */
5094 seen_thumb1_vfp = true;
5095 }
5096
5097 return true;
5098 }
5099
5100 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5101 return false;
5102
5103 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5104 (TARGET_VFP_DOUBLE || !is_double));
5105 }
5106
5107 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5108 suitable for passing or returning in VFP registers for the PCS
5109 variant selected. If it is, then *BASE_MODE is updated to contain
5110 a machine mode describing each element of the argument's type and
5111 *COUNT to hold the number of such elements. */
5112 static bool
5113 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5114 enum machine_mode mode, const_tree type,
5115 enum machine_mode *base_mode, int *count)
5116 {
5117 enum machine_mode new_mode = VOIDmode;
5118
5119 /* If we have the type information, prefer that to working things
5120 out from the mode. */
5121 if (type)
5122 {
5123 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5124
5125 if (ag_count > 0 && ag_count <= 4)
5126 *count = ag_count;
5127 else
5128 return false;
5129 }
5130 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5131 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5132 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5133 {
5134 *count = 1;
5135 new_mode = mode;
5136 }
5137 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5138 {
5139 *count = 2;
5140 new_mode = (mode == DCmode ? DFmode : SFmode);
5141 }
5142 else
5143 return false;
5144
5145
5146 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5147 return false;
5148
5149 *base_mode = new_mode;
5150 return true;
5151 }
5152
5153 static bool
5154 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5155 enum machine_mode mode, const_tree type)
5156 {
5157 int count ATTRIBUTE_UNUSED;
5158 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
5159
5160 if (!use_vfp_abi (pcs_variant, false))
5161 return false;
5162 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5163 &ag_mode, &count);
5164 }
5165
5166 static bool
5167 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5168 const_tree type)
5169 {
5170 if (!use_vfp_abi (pcum->pcs_variant, false))
5171 return false;
5172
5173 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5174 &pcum->aapcs_vfp_rmode,
5175 &pcum->aapcs_vfp_rcount);
5176 }
5177
5178 static bool
5179 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5180 const_tree type ATTRIBUTE_UNUSED)
5181 {
5182 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5183 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5184 int regno;
5185
5186 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5187 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5188 {
5189 pcum->aapcs_vfp_reg_alloc = mask << regno;
5190 if (mode == BLKmode
5191 || (mode == TImode && ! TARGET_NEON)
5192 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5193 {
5194 int i;
5195 int rcount = pcum->aapcs_vfp_rcount;
5196 int rshift = shift;
5197 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
5198 rtx par;
5199 if (!TARGET_NEON)
5200 {
5201 /* Avoid using unsupported vector modes. */
5202 if (rmode == V2SImode)
5203 rmode = DImode;
5204 else if (rmode == V4SImode)
5205 {
5206 rmode = DImode;
5207 rcount *= 2;
5208 rshift /= 2;
5209 }
5210 }
5211 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5212 for (i = 0; i < rcount; i++)
5213 {
5214 rtx tmp = gen_rtx_REG (rmode,
5215 FIRST_VFP_REGNUM + regno + i * rshift);
5216 tmp = gen_rtx_EXPR_LIST
5217 (VOIDmode, tmp,
5218 GEN_INT (i * GET_MODE_SIZE (rmode)));
5219 XVECEXP (par, 0, i) = tmp;
5220 }
5221
5222 pcum->aapcs_reg = par;
5223 }
5224 else
5225 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5226 return true;
5227 }
5228 return false;
5229 }
5230
5231 static rtx
5232 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5233 enum machine_mode mode,
5234 const_tree type ATTRIBUTE_UNUSED)
5235 {
5236 if (!use_vfp_abi (pcs_variant, false))
5237 return NULL;
5238
5239 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5240 {
5241 int count;
5242 enum machine_mode ag_mode;
5243 int i;
5244 rtx par;
5245 int shift;
5246
5247 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5248 &ag_mode, &count);
5249
5250 if (!TARGET_NEON)
5251 {
5252 if (ag_mode == V2SImode)
5253 ag_mode = DImode;
5254 else if (ag_mode == V4SImode)
5255 {
5256 ag_mode = DImode;
5257 count *= 2;
5258 }
5259 }
5260 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5261 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5262 for (i = 0; i < count; i++)
5263 {
5264 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5265 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5266 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5267 XVECEXP (par, 0, i) = tmp;
5268 }
5269
5270 return par;
5271 }
5272
5273 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5274 }
5275
5276 static void
5277 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5278 enum machine_mode mode ATTRIBUTE_UNUSED,
5279 const_tree type ATTRIBUTE_UNUSED)
5280 {
5281 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5282 pcum->aapcs_vfp_reg_alloc = 0;
5283 return;
5284 }
5285
5286 #define AAPCS_CP(X) \
5287 { \
5288 aapcs_ ## X ## _cum_init, \
5289 aapcs_ ## X ## _is_call_candidate, \
5290 aapcs_ ## X ## _allocate, \
5291 aapcs_ ## X ## _is_return_candidate, \
5292 aapcs_ ## X ## _allocate_return_reg, \
5293 aapcs_ ## X ## _advance \
5294 }
5295
5296 /* Table of co-processors that can be used to pass arguments in
5297 registers. Idealy no arugment should be a candidate for more than
5298 one co-processor table entry, but the table is processed in order
5299 and stops after the first match. If that entry then fails to put
5300 the argument into a co-processor register, the argument will go on
5301 the stack. */
5302 static struct
5303 {
5304 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5305 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5306
5307 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5308 BLKmode) is a candidate for this co-processor's registers; this
5309 function should ignore any position-dependent state in
5310 CUMULATIVE_ARGS and only use call-type dependent information. */
5311 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5312
5313 /* Return true if the argument does get a co-processor register; it
5314 should set aapcs_reg to an RTX of the register allocated as is
5315 required for a return from FUNCTION_ARG. */
5316 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5317
5318 /* Return true if a result of mode MODE (or type TYPE if MODE is
5319 BLKmode) is can be returned in this co-processor's registers. */
5320 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
5321
5322 /* Allocate and return an RTX element to hold the return type of a
5323 call, this routine must not fail and will only be called if
5324 is_return_candidate returned true with the same parameters. */
5325 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
5326
5327 /* Finish processing this argument and prepare to start processing
5328 the next one. */
5329 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5330 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5331 {
5332 AAPCS_CP(vfp)
5333 };
5334
5335 #undef AAPCS_CP
5336
5337 static int
5338 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5339 const_tree type)
5340 {
5341 int i;
5342
5343 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5344 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5345 return i;
5346
5347 return -1;
5348 }
5349
5350 static int
5351 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5352 {
5353 /* We aren't passed a decl, so we can't check that a call is local.
5354 However, it isn't clear that that would be a win anyway, since it
5355 might limit some tail-calling opportunities. */
5356 enum arm_pcs pcs_variant;
5357
5358 if (fntype)
5359 {
5360 const_tree fndecl = NULL_TREE;
5361
5362 if (TREE_CODE (fntype) == FUNCTION_DECL)
5363 {
5364 fndecl = fntype;
5365 fntype = TREE_TYPE (fntype);
5366 }
5367
5368 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5369 }
5370 else
5371 pcs_variant = arm_pcs_default;
5372
5373 if (pcs_variant != ARM_PCS_AAPCS)
5374 {
5375 int i;
5376
5377 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5378 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5379 TYPE_MODE (type),
5380 type))
5381 return i;
5382 }
5383 return -1;
5384 }
5385
5386 static rtx
5387 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
5388 const_tree fntype)
5389 {
5390 /* We aren't passed a decl, so we can't check that a call is local.
5391 However, it isn't clear that that would be a win anyway, since it
5392 might limit some tail-calling opportunities. */
5393 enum arm_pcs pcs_variant;
5394 int unsignedp ATTRIBUTE_UNUSED;
5395
5396 if (fntype)
5397 {
5398 const_tree fndecl = NULL_TREE;
5399
5400 if (TREE_CODE (fntype) == FUNCTION_DECL)
5401 {
5402 fndecl = fntype;
5403 fntype = TREE_TYPE (fntype);
5404 }
5405
5406 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5407 }
5408 else
5409 pcs_variant = arm_pcs_default;
5410
5411 /* Promote integer types. */
5412 if (type && INTEGRAL_TYPE_P (type))
5413 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5414
5415 if (pcs_variant != ARM_PCS_AAPCS)
5416 {
5417 int i;
5418
5419 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5420 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5421 type))
5422 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5423 mode, type);
5424 }
5425
5426 /* Promotes small structs returned in a register to full-word size
5427 for big-endian AAPCS. */
5428 if (type && arm_return_in_msb (type))
5429 {
5430 HOST_WIDE_INT size = int_size_in_bytes (type);
5431 if (size % UNITS_PER_WORD != 0)
5432 {
5433 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5434 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5435 }
5436 }
5437
5438 return gen_rtx_REG (mode, R0_REGNUM);
5439 }
5440
5441 static rtx
5442 aapcs_libcall_value (enum machine_mode mode)
5443 {
5444 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5445 && GET_MODE_SIZE (mode) <= 4)
5446 mode = SImode;
5447
5448 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5449 }
5450
5451 /* Lay out a function argument using the AAPCS rules. The rule
5452 numbers referred to here are those in the AAPCS. */
5453 static void
5454 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5455 const_tree type, bool named)
5456 {
5457 int nregs, nregs2;
5458 int ncrn;
5459
5460 /* We only need to do this once per argument. */
5461 if (pcum->aapcs_arg_processed)
5462 return;
5463
5464 pcum->aapcs_arg_processed = true;
5465
5466 /* Special case: if named is false then we are handling an incoming
5467 anonymous argument which is on the stack. */
5468 if (!named)
5469 return;
5470
5471 /* Is this a potential co-processor register candidate? */
5472 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5473 {
5474 int slot = aapcs_select_call_coproc (pcum, mode, type);
5475 pcum->aapcs_cprc_slot = slot;
5476
5477 /* We don't have to apply any of the rules from part B of the
5478 preparation phase, these are handled elsewhere in the
5479 compiler. */
5480
5481 if (slot >= 0)
5482 {
5483 /* A Co-processor register candidate goes either in its own
5484 class of registers or on the stack. */
5485 if (!pcum->aapcs_cprc_failed[slot])
5486 {
5487 /* C1.cp - Try to allocate the argument to co-processor
5488 registers. */
5489 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5490 return;
5491
5492 /* C2.cp - Put the argument on the stack and note that we
5493 can't assign any more candidates in this slot. We also
5494 need to note that we have allocated stack space, so that
5495 we won't later try to split a non-cprc candidate between
5496 core registers and the stack. */
5497 pcum->aapcs_cprc_failed[slot] = true;
5498 pcum->can_split = false;
5499 }
5500
5501 /* We didn't get a register, so this argument goes on the
5502 stack. */
5503 gcc_assert (pcum->can_split == false);
5504 return;
5505 }
5506 }
5507
5508 /* C3 - For double-word aligned arguments, round the NCRN up to the
5509 next even number. */
5510 ncrn = pcum->aapcs_ncrn;
5511 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5512 ncrn++;
5513
5514 nregs = ARM_NUM_REGS2(mode, type);
5515
5516 /* Sigh, this test should really assert that nregs > 0, but a GCC
5517 extension allows empty structs and then gives them empty size; it
5518 then allows such a structure to be passed by value. For some of
5519 the code below we have to pretend that such an argument has
5520 non-zero size so that we 'locate' it correctly either in
5521 registers or on the stack. */
5522 gcc_assert (nregs >= 0);
5523
5524 nregs2 = nregs ? nregs : 1;
5525
5526 /* C4 - Argument fits entirely in core registers. */
5527 if (ncrn + nregs2 <= NUM_ARG_REGS)
5528 {
5529 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5530 pcum->aapcs_next_ncrn = ncrn + nregs;
5531 return;
5532 }
5533
5534 /* C5 - Some core registers left and there are no arguments already
5535 on the stack: split this argument between the remaining core
5536 registers and the stack. */
5537 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5538 {
5539 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5540 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5541 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5542 return;
5543 }
5544
5545 /* C6 - NCRN is set to 4. */
5546 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5547
5548 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5549 return;
5550 }
5551
5552 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5553 for a call to a function whose data type is FNTYPE.
5554 For a library call, FNTYPE is NULL. */
5555 void
5556 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5557 rtx libname,
5558 tree fndecl ATTRIBUTE_UNUSED)
5559 {
5560 /* Long call handling. */
5561 if (fntype)
5562 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5563 else
5564 pcum->pcs_variant = arm_pcs_default;
5565
5566 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5567 {
5568 if (arm_libcall_uses_aapcs_base (libname))
5569 pcum->pcs_variant = ARM_PCS_AAPCS;
5570
5571 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5572 pcum->aapcs_reg = NULL_RTX;
5573 pcum->aapcs_partial = 0;
5574 pcum->aapcs_arg_processed = false;
5575 pcum->aapcs_cprc_slot = -1;
5576 pcum->can_split = true;
5577
5578 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5579 {
5580 int i;
5581
5582 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5583 {
5584 pcum->aapcs_cprc_failed[i] = false;
5585 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5586 }
5587 }
5588 return;
5589 }
5590
5591 /* Legacy ABIs */
5592
5593 /* On the ARM, the offset starts at 0. */
5594 pcum->nregs = 0;
5595 pcum->iwmmxt_nregs = 0;
5596 pcum->can_split = true;
5597
5598 /* Varargs vectors are treated the same as long long.
5599 named_count avoids having to change the way arm handles 'named' */
5600 pcum->named_count = 0;
5601 pcum->nargs = 0;
5602
5603 if (TARGET_REALLY_IWMMXT && fntype)
5604 {
5605 tree fn_arg;
5606
5607 for (fn_arg = TYPE_ARG_TYPES (fntype);
5608 fn_arg;
5609 fn_arg = TREE_CHAIN (fn_arg))
5610 pcum->named_count += 1;
5611
5612 if (! pcum->named_count)
5613 pcum->named_count = INT_MAX;
5614 }
5615 }
5616
5617 /* Return true if we use LRA instead of reload pass. */
5618 static bool
5619 arm_lra_p (void)
5620 {
5621 return arm_lra_flag;
5622 }
5623
5624 /* Return true if mode/type need doubleword alignment. */
5625 static bool
5626 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
5627 {
5628 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5629 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5630 }
5631
5632
5633 /* Determine where to put an argument to a function.
5634 Value is zero to push the argument on the stack,
5635 or a hard register in which to store the argument.
5636
5637 MODE is the argument's machine mode.
5638 TYPE is the data type of the argument (as a tree).
5639 This is null for libcalls where that information may
5640 not be available.
5641 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5642 the preceding args and about the function being called.
5643 NAMED is nonzero if this argument is a named parameter
5644 (otherwise it is an extra parameter matching an ellipsis).
5645
5646 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5647 other arguments are passed on the stack. If (NAMED == 0) (which happens
5648 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5649 defined), say it is passed in the stack (function_prologue will
5650 indeed make it pass in the stack if necessary). */
5651
5652 static rtx
5653 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
5654 const_tree type, bool named)
5655 {
5656 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5657 int nregs;
5658
5659 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5660 a call insn (op3 of a call_value insn). */
5661 if (mode == VOIDmode)
5662 return const0_rtx;
5663
5664 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5665 {
5666 aapcs_layout_arg (pcum, mode, type, named);
5667 return pcum->aapcs_reg;
5668 }
5669
5670 /* Varargs vectors are treated the same as long long.
5671 named_count avoids having to change the way arm handles 'named' */
5672 if (TARGET_IWMMXT_ABI
5673 && arm_vector_mode_supported_p (mode)
5674 && pcum->named_count > pcum->nargs + 1)
5675 {
5676 if (pcum->iwmmxt_nregs <= 9)
5677 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5678 else
5679 {
5680 pcum->can_split = false;
5681 return NULL_RTX;
5682 }
5683 }
5684
5685 /* Put doubleword aligned quantities in even register pairs. */
5686 if (pcum->nregs & 1
5687 && ARM_DOUBLEWORD_ALIGN
5688 && arm_needs_doubleword_align (mode, type))
5689 pcum->nregs++;
5690
5691 /* Only allow splitting an arg between regs and memory if all preceding
5692 args were allocated to regs. For args passed by reference we only count
5693 the reference pointer. */
5694 if (pcum->can_split)
5695 nregs = 1;
5696 else
5697 nregs = ARM_NUM_REGS2 (mode, type);
5698
5699 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5700 return NULL_RTX;
5701
5702 return gen_rtx_REG (mode, pcum->nregs);
5703 }
5704
5705 static unsigned int
5706 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
5707 {
5708 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5709 ? DOUBLEWORD_ALIGNMENT
5710 : PARM_BOUNDARY);
5711 }
5712
5713 static int
5714 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
5715 tree type, bool named)
5716 {
5717 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5718 int nregs = pcum->nregs;
5719
5720 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5721 {
5722 aapcs_layout_arg (pcum, mode, type, named);
5723 return pcum->aapcs_partial;
5724 }
5725
5726 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
5727 return 0;
5728
5729 if (NUM_ARG_REGS > nregs
5730 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
5731 && pcum->can_split)
5732 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
5733
5734 return 0;
5735 }
5736
5737 /* Update the data in PCUM to advance over an argument
5738 of mode MODE and data type TYPE.
5739 (TYPE is null for libcalls where that information may not be available.) */
5740
5741 static void
5742 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
5743 const_tree type, bool named)
5744 {
5745 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5746
5747 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5748 {
5749 aapcs_layout_arg (pcum, mode, type, named);
5750
5751 if (pcum->aapcs_cprc_slot >= 0)
5752 {
5753 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
5754 type);
5755 pcum->aapcs_cprc_slot = -1;
5756 }
5757
5758 /* Generic stuff. */
5759 pcum->aapcs_arg_processed = false;
5760 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
5761 pcum->aapcs_reg = NULL_RTX;
5762 pcum->aapcs_partial = 0;
5763 }
5764 else
5765 {
5766 pcum->nargs += 1;
5767 if (arm_vector_mode_supported_p (mode)
5768 && pcum->named_count > pcum->nargs
5769 && TARGET_IWMMXT_ABI)
5770 pcum->iwmmxt_nregs += 1;
5771 else
5772 pcum->nregs += ARM_NUM_REGS2 (mode, type);
5773 }
5774 }
5775
5776 /* Variable sized types are passed by reference. This is a GCC
5777 extension to the ARM ABI. */
5778
5779 static bool
5780 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
5781 enum machine_mode mode ATTRIBUTE_UNUSED,
5782 const_tree type, bool named ATTRIBUTE_UNUSED)
5783 {
5784 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
5785 }
5786 \f
5787 /* Encode the current state of the #pragma [no_]long_calls. */
5788 typedef enum
5789 {
5790 OFF, /* No #pragma [no_]long_calls is in effect. */
5791 LONG, /* #pragma long_calls is in effect. */
5792 SHORT /* #pragma no_long_calls is in effect. */
5793 } arm_pragma_enum;
5794
5795 static arm_pragma_enum arm_pragma_long_calls = OFF;
5796
5797 void
5798 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5799 {
5800 arm_pragma_long_calls = LONG;
5801 }
5802
5803 void
5804 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5805 {
5806 arm_pragma_long_calls = SHORT;
5807 }
5808
5809 void
5810 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5811 {
5812 arm_pragma_long_calls = OFF;
5813 }
5814 \f
5815 /* Handle an attribute requiring a FUNCTION_DECL;
5816 arguments as in struct attribute_spec.handler. */
5817 static tree
5818 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
5819 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5820 {
5821 if (TREE_CODE (*node) != FUNCTION_DECL)
5822 {
5823 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5824 name);
5825 *no_add_attrs = true;
5826 }
5827
5828 return NULL_TREE;
5829 }
5830
5831 /* Handle an "interrupt" or "isr" attribute;
5832 arguments as in struct attribute_spec.handler. */
5833 static tree
5834 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5835 bool *no_add_attrs)
5836 {
5837 if (DECL_P (*node))
5838 {
5839 if (TREE_CODE (*node) != FUNCTION_DECL)
5840 {
5841 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5842 name);
5843 *no_add_attrs = true;
5844 }
5845 /* FIXME: the argument if any is checked for type attributes;
5846 should it be checked for decl ones? */
5847 }
5848 else
5849 {
5850 if (TREE_CODE (*node) == FUNCTION_TYPE
5851 || TREE_CODE (*node) == METHOD_TYPE)
5852 {
5853 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5854 {
5855 warning (OPT_Wattributes, "%qE attribute ignored",
5856 name);
5857 *no_add_attrs = true;
5858 }
5859 }
5860 else if (TREE_CODE (*node) == POINTER_TYPE
5861 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5862 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5863 && arm_isr_value (args) != ARM_FT_UNKNOWN)
5864 {
5865 *node = build_variant_type_copy (*node);
5866 TREE_TYPE (*node) = build_type_attribute_variant
5867 (TREE_TYPE (*node),
5868 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5869 *no_add_attrs = true;
5870 }
5871 else
5872 {
5873 /* Possibly pass this attribute on from the type to a decl. */
5874 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5875 | (int) ATTR_FLAG_FUNCTION_NEXT
5876 | (int) ATTR_FLAG_ARRAY_NEXT))
5877 {
5878 *no_add_attrs = true;
5879 return tree_cons (name, args, NULL_TREE);
5880 }
5881 else
5882 {
5883 warning (OPT_Wattributes, "%qE attribute ignored",
5884 name);
5885 }
5886 }
5887 }
5888
5889 return NULL_TREE;
5890 }
5891
5892 /* Handle a "pcs" attribute; arguments as in struct
5893 attribute_spec.handler. */
5894 static tree
5895 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5896 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5897 {
5898 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5899 {
5900 warning (OPT_Wattributes, "%qE attribute ignored", name);
5901 *no_add_attrs = true;
5902 }
5903 return NULL_TREE;
5904 }
5905
5906 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5907 /* Handle the "notshared" attribute. This attribute is another way of
5908 requesting hidden visibility. ARM's compiler supports
5909 "__declspec(notshared)"; we support the same thing via an
5910 attribute. */
5911
5912 static tree
5913 arm_handle_notshared_attribute (tree *node,
5914 tree name ATTRIBUTE_UNUSED,
5915 tree args ATTRIBUTE_UNUSED,
5916 int flags ATTRIBUTE_UNUSED,
5917 bool *no_add_attrs)
5918 {
5919 tree decl = TYPE_NAME (*node);
5920
5921 if (decl)
5922 {
5923 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5924 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5925 *no_add_attrs = false;
5926 }
5927 return NULL_TREE;
5928 }
5929 #endif
5930
5931 /* Return 0 if the attributes for two types are incompatible, 1 if they
5932 are compatible, and 2 if they are nearly compatible (which causes a
5933 warning to be generated). */
5934 static int
5935 arm_comp_type_attributes (const_tree type1, const_tree type2)
5936 {
5937 int l1, l2, s1, s2;
5938
5939 /* Check for mismatch of non-default calling convention. */
5940 if (TREE_CODE (type1) != FUNCTION_TYPE)
5941 return 1;
5942
5943 /* Check for mismatched call attributes. */
5944 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5945 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5946 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5947 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5948
5949 /* Only bother to check if an attribute is defined. */
5950 if (l1 | l2 | s1 | s2)
5951 {
5952 /* If one type has an attribute, the other must have the same attribute. */
5953 if ((l1 != l2) || (s1 != s2))
5954 return 0;
5955
5956 /* Disallow mixed attributes. */
5957 if ((l1 & s2) || (l2 & s1))
5958 return 0;
5959 }
5960
5961 /* Check for mismatched ISR attribute. */
5962 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5963 if (! l1)
5964 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5965 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5966 if (! l2)
5967 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5968 if (l1 != l2)
5969 return 0;
5970
5971 return 1;
5972 }
5973
5974 /* Assigns default attributes to newly defined type. This is used to
5975 set short_call/long_call attributes for function types of
5976 functions defined inside corresponding #pragma scopes. */
5977 static void
5978 arm_set_default_type_attributes (tree type)
5979 {
5980 /* Add __attribute__ ((long_call)) to all functions, when
5981 inside #pragma long_calls or __attribute__ ((short_call)),
5982 when inside #pragma no_long_calls. */
5983 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5984 {
5985 tree type_attr_list, attr_name;
5986 type_attr_list = TYPE_ATTRIBUTES (type);
5987
5988 if (arm_pragma_long_calls == LONG)
5989 attr_name = get_identifier ("long_call");
5990 else if (arm_pragma_long_calls == SHORT)
5991 attr_name = get_identifier ("short_call");
5992 else
5993 return;
5994
5995 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5996 TYPE_ATTRIBUTES (type) = type_attr_list;
5997 }
5998 }
5999 \f
6000 /* Return true if DECL is known to be linked into section SECTION. */
6001
6002 static bool
6003 arm_function_in_section_p (tree decl, section *section)
6004 {
6005 /* We can only be certain about functions defined in the same
6006 compilation unit. */
6007 if (!TREE_STATIC (decl))
6008 return false;
6009
6010 /* Make sure that SYMBOL always binds to the definition in this
6011 compilation unit. */
6012 if (!targetm.binds_local_p (decl))
6013 return false;
6014
6015 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6016 if (!DECL_SECTION_NAME (decl))
6017 {
6018 /* Make sure that we will not create a unique section for DECL. */
6019 if (flag_function_sections || DECL_ONE_ONLY (decl))
6020 return false;
6021 }
6022
6023 return function_section (decl) == section;
6024 }
6025
6026 /* Return nonzero if a 32-bit "long_call" should be generated for
6027 a call from the current function to DECL. We generate a long_call
6028 if the function:
6029
6030 a. has an __attribute__((long call))
6031 or b. is within the scope of a #pragma long_calls
6032 or c. the -mlong-calls command line switch has been specified
6033
6034 However we do not generate a long call if the function:
6035
6036 d. has an __attribute__ ((short_call))
6037 or e. is inside the scope of a #pragma no_long_calls
6038 or f. is defined in the same section as the current function. */
6039
6040 bool
6041 arm_is_long_call_p (tree decl)
6042 {
6043 tree attrs;
6044
6045 if (!decl)
6046 return TARGET_LONG_CALLS;
6047
6048 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6049 if (lookup_attribute ("short_call", attrs))
6050 return false;
6051
6052 /* For "f", be conservative, and only cater for cases in which the
6053 whole of the current function is placed in the same section. */
6054 if (!flag_reorder_blocks_and_partition
6055 && TREE_CODE (decl) == FUNCTION_DECL
6056 && arm_function_in_section_p (decl, current_function_section ()))
6057 return false;
6058
6059 if (lookup_attribute ("long_call", attrs))
6060 return true;
6061
6062 return TARGET_LONG_CALLS;
6063 }
6064
6065 /* Return nonzero if it is ok to make a tail-call to DECL. */
6066 static bool
6067 arm_function_ok_for_sibcall (tree decl, tree exp)
6068 {
6069 unsigned long func_type;
6070
6071 if (cfun->machine->sibcall_blocked)
6072 return false;
6073
6074 /* Never tailcall something if we are generating code for Thumb-1. */
6075 if (TARGET_THUMB1)
6076 return false;
6077
6078 /* The PIC register is live on entry to VxWorks PLT entries, so we
6079 must make the call before restoring the PIC register. */
6080 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6081 return false;
6082
6083 /* Cannot tail-call to long calls, since these are out of range of
6084 a branch instruction. */
6085 if (decl && arm_is_long_call_p (decl))
6086 return false;
6087
6088 /* If we are interworking and the function is not declared static
6089 then we can't tail-call it unless we know that it exists in this
6090 compilation unit (since it might be a Thumb routine). */
6091 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6092 && !TREE_ASM_WRITTEN (decl))
6093 return false;
6094
6095 func_type = arm_current_func_type ();
6096 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6097 if (IS_INTERRUPT (func_type))
6098 return false;
6099
6100 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6101 {
6102 /* Check that the return value locations are the same. For
6103 example that we aren't returning a value from the sibling in
6104 a VFP register but then need to transfer it to a core
6105 register. */
6106 rtx a, b;
6107
6108 a = arm_function_value (TREE_TYPE (exp), decl, false);
6109 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6110 cfun->decl, false);
6111 if (!rtx_equal_p (a, b))
6112 return false;
6113 }
6114
6115 /* Never tailcall if function may be called with a misaligned SP. */
6116 if (IS_STACKALIGN (func_type))
6117 return false;
6118
6119 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6120 references should become a NOP. Don't convert such calls into
6121 sibling calls. */
6122 if (TARGET_AAPCS_BASED
6123 && arm_abi == ARM_ABI_AAPCS
6124 && decl
6125 && DECL_WEAK (decl))
6126 return false;
6127
6128 /* Everything else is ok. */
6129 return true;
6130 }
6131
6132 \f
6133 /* Addressing mode support functions. */
6134
6135 /* Return nonzero if X is a legitimate immediate operand when compiling
6136 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6137 int
6138 legitimate_pic_operand_p (rtx x)
6139 {
6140 if (GET_CODE (x) == SYMBOL_REF
6141 || (GET_CODE (x) == CONST
6142 && GET_CODE (XEXP (x, 0)) == PLUS
6143 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6144 return 0;
6145
6146 return 1;
6147 }
6148
6149 /* Record that the current function needs a PIC register. Initialize
6150 cfun->machine->pic_reg if we have not already done so. */
6151
6152 static void
6153 require_pic_register (void)
6154 {
6155 /* A lot of the logic here is made obscure by the fact that this
6156 routine gets called as part of the rtx cost estimation process.
6157 We don't want those calls to affect any assumptions about the real
6158 function; and further, we can't call entry_of_function() until we
6159 start the real expansion process. */
6160 if (!crtl->uses_pic_offset_table)
6161 {
6162 gcc_assert (can_create_pseudo_p ());
6163 if (arm_pic_register != INVALID_REGNUM
6164 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6165 {
6166 if (!cfun->machine->pic_reg)
6167 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6168
6169 /* Play games to avoid marking the function as needing pic
6170 if we are being called as part of the cost-estimation
6171 process. */
6172 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6173 crtl->uses_pic_offset_table = 1;
6174 }
6175 else
6176 {
6177 rtx seq, insn;
6178
6179 if (!cfun->machine->pic_reg)
6180 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6181
6182 /* Play games to avoid marking the function as needing pic
6183 if we are being called as part of the cost-estimation
6184 process. */
6185 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6186 {
6187 crtl->uses_pic_offset_table = 1;
6188 start_sequence ();
6189
6190 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6191 && arm_pic_register > LAST_LO_REGNUM)
6192 emit_move_insn (cfun->machine->pic_reg,
6193 gen_rtx_REG (Pmode, arm_pic_register));
6194 else
6195 arm_load_pic_register (0UL);
6196
6197 seq = get_insns ();
6198 end_sequence ();
6199
6200 for (insn = seq; insn; insn = NEXT_INSN (insn))
6201 if (INSN_P (insn))
6202 INSN_LOCATION (insn) = prologue_location;
6203
6204 /* We can be called during expansion of PHI nodes, where
6205 we can't yet emit instructions directly in the final
6206 insn stream. Queue the insns on the entry edge, they will
6207 be committed after everything else is expanded. */
6208 insert_insn_on_edge (seq,
6209 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6210 }
6211 }
6212 }
6213 }
6214
6215 rtx
6216 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
6217 {
6218 if (GET_CODE (orig) == SYMBOL_REF
6219 || GET_CODE (orig) == LABEL_REF)
6220 {
6221 rtx insn;
6222
6223 if (reg == 0)
6224 {
6225 gcc_assert (can_create_pseudo_p ());
6226 reg = gen_reg_rtx (Pmode);
6227 }
6228
6229 /* VxWorks does not impose a fixed gap between segments; the run-time
6230 gap can be different from the object-file gap. We therefore can't
6231 use GOTOFF unless we are absolutely sure that the symbol is in the
6232 same segment as the GOT. Unfortunately, the flexibility of linker
6233 scripts means that we can't be sure of that in general, so assume
6234 that GOTOFF is never valid on VxWorks. */
6235 if ((GET_CODE (orig) == LABEL_REF
6236 || (GET_CODE (orig) == SYMBOL_REF &&
6237 SYMBOL_REF_LOCAL_P (orig)))
6238 && NEED_GOT_RELOC
6239 && arm_pic_data_is_text_relative)
6240 insn = arm_pic_static_addr (orig, reg);
6241 else
6242 {
6243 rtx pat;
6244 rtx mem;
6245
6246 /* If this function doesn't have a pic register, create one now. */
6247 require_pic_register ();
6248
6249 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6250
6251 /* Make the MEM as close to a constant as possible. */
6252 mem = SET_SRC (pat);
6253 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6254 MEM_READONLY_P (mem) = 1;
6255 MEM_NOTRAP_P (mem) = 1;
6256
6257 insn = emit_insn (pat);
6258 }
6259
6260 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6261 by loop. */
6262 set_unique_reg_note (insn, REG_EQUAL, orig);
6263
6264 return reg;
6265 }
6266 else if (GET_CODE (orig) == CONST)
6267 {
6268 rtx base, offset;
6269
6270 if (GET_CODE (XEXP (orig, 0)) == PLUS
6271 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6272 return orig;
6273
6274 /* Handle the case where we have: const (UNSPEC_TLS). */
6275 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6276 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6277 return orig;
6278
6279 /* Handle the case where we have:
6280 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6281 CONST_INT. */
6282 if (GET_CODE (XEXP (orig, 0)) == PLUS
6283 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6284 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6285 {
6286 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6287 return orig;
6288 }
6289
6290 if (reg == 0)
6291 {
6292 gcc_assert (can_create_pseudo_p ());
6293 reg = gen_reg_rtx (Pmode);
6294 }
6295
6296 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6297
6298 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6299 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6300 base == reg ? 0 : reg);
6301
6302 if (CONST_INT_P (offset))
6303 {
6304 /* The base register doesn't really matter, we only want to
6305 test the index for the appropriate mode. */
6306 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6307 {
6308 gcc_assert (can_create_pseudo_p ());
6309 offset = force_reg (Pmode, offset);
6310 }
6311
6312 if (CONST_INT_P (offset))
6313 return plus_constant (Pmode, base, INTVAL (offset));
6314 }
6315
6316 if (GET_MODE_SIZE (mode) > 4
6317 && (GET_MODE_CLASS (mode) == MODE_INT
6318 || TARGET_SOFT_FLOAT))
6319 {
6320 emit_insn (gen_addsi3 (reg, base, offset));
6321 return reg;
6322 }
6323
6324 return gen_rtx_PLUS (Pmode, base, offset);
6325 }
6326
6327 return orig;
6328 }
6329
6330
6331 /* Find a spare register to use during the prolog of a function. */
6332
6333 static int
6334 thumb_find_work_register (unsigned long pushed_regs_mask)
6335 {
6336 int reg;
6337
6338 /* Check the argument registers first as these are call-used. The
6339 register allocation order means that sometimes r3 might be used
6340 but earlier argument registers might not, so check them all. */
6341 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6342 if (!df_regs_ever_live_p (reg))
6343 return reg;
6344
6345 /* Before going on to check the call-saved registers we can try a couple
6346 more ways of deducing that r3 is available. The first is when we are
6347 pushing anonymous arguments onto the stack and we have less than 4
6348 registers worth of fixed arguments(*). In this case r3 will be part of
6349 the variable argument list and so we can be sure that it will be
6350 pushed right at the start of the function. Hence it will be available
6351 for the rest of the prologue.
6352 (*): ie crtl->args.pretend_args_size is greater than 0. */
6353 if (cfun->machine->uses_anonymous_args
6354 && crtl->args.pretend_args_size > 0)
6355 return LAST_ARG_REGNUM;
6356
6357 /* The other case is when we have fixed arguments but less than 4 registers
6358 worth. In this case r3 might be used in the body of the function, but
6359 it is not being used to convey an argument into the function. In theory
6360 we could just check crtl->args.size to see how many bytes are
6361 being passed in argument registers, but it seems that it is unreliable.
6362 Sometimes it will have the value 0 when in fact arguments are being
6363 passed. (See testcase execute/20021111-1.c for an example). So we also
6364 check the args_info.nregs field as well. The problem with this field is
6365 that it makes no allowances for arguments that are passed to the
6366 function but which are not used. Hence we could miss an opportunity
6367 when a function has an unused argument in r3. But it is better to be
6368 safe than to be sorry. */
6369 if (! cfun->machine->uses_anonymous_args
6370 && crtl->args.size >= 0
6371 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6372 && (TARGET_AAPCS_BASED
6373 ? crtl->args.info.aapcs_ncrn < 4
6374 : crtl->args.info.nregs < 4))
6375 return LAST_ARG_REGNUM;
6376
6377 /* Otherwise look for a call-saved register that is going to be pushed. */
6378 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6379 if (pushed_regs_mask & (1 << reg))
6380 return reg;
6381
6382 if (TARGET_THUMB2)
6383 {
6384 /* Thumb-2 can use high regs. */
6385 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6386 if (pushed_regs_mask & (1 << reg))
6387 return reg;
6388 }
6389 /* Something went wrong - thumb_compute_save_reg_mask()
6390 should have arranged for a suitable register to be pushed. */
6391 gcc_unreachable ();
6392 }
6393
6394 static GTY(()) int pic_labelno;
6395
6396 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6397 low register. */
6398
6399 void
6400 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6401 {
6402 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6403
6404 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6405 return;
6406
6407 gcc_assert (flag_pic);
6408
6409 pic_reg = cfun->machine->pic_reg;
6410 if (TARGET_VXWORKS_RTP)
6411 {
6412 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6413 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6414 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6415
6416 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6417
6418 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6419 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6420 }
6421 else
6422 {
6423 /* We use an UNSPEC rather than a LABEL_REF because this label
6424 never appears in the code stream. */
6425
6426 labelno = GEN_INT (pic_labelno++);
6427 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6428 l1 = gen_rtx_CONST (VOIDmode, l1);
6429
6430 /* On the ARM the PC register contains 'dot + 8' at the time of the
6431 addition, on the Thumb it is 'dot + 4'. */
6432 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6433 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6434 UNSPEC_GOTSYM_OFF);
6435 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6436
6437 if (TARGET_32BIT)
6438 {
6439 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6440 }
6441 else /* TARGET_THUMB1 */
6442 {
6443 if (arm_pic_register != INVALID_REGNUM
6444 && REGNO (pic_reg) > LAST_LO_REGNUM)
6445 {
6446 /* We will have pushed the pic register, so we should always be
6447 able to find a work register. */
6448 pic_tmp = gen_rtx_REG (SImode,
6449 thumb_find_work_register (saved_regs));
6450 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6451 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6452 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6453 }
6454 else if (arm_pic_register != INVALID_REGNUM
6455 && arm_pic_register > LAST_LO_REGNUM
6456 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6457 {
6458 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6459 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6460 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6461 }
6462 else
6463 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6464 }
6465 }
6466
6467 /* Need to emit this whether or not we obey regdecls,
6468 since setjmp/longjmp can cause life info to screw up. */
6469 emit_use (pic_reg);
6470 }
6471
6472 /* Generate code to load the address of a static var when flag_pic is set. */
6473 static rtx
6474 arm_pic_static_addr (rtx orig, rtx reg)
6475 {
6476 rtx l1, labelno, offset_rtx, insn;
6477
6478 gcc_assert (flag_pic);
6479
6480 /* We use an UNSPEC rather than a LABEL_REF because this label
6481 never appears in the code stream. */
6482 labelno = GEN_INT (pic_labelno++);
6483 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6484 l1 = gen_rtx_CONST (VOIDmode, l1);
6485
6486 /* On the ARM the PC register contains 'dot + 8' at the time of the
6487 addition, on the Thumb it is 'dot + 4'. */
6488 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6489 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6490 UNSPEC_SYMBOL_OFFSET);
6491 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6492
6493 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6494 return insn;
6495 }
6496
6497 /* Return nonzero if X is valid as an ARM state addressing register. */
6498 static int
6499 arm_address_register_rtx_p (rtx x, int strict_p)
6500 {
6501 int regno;
6502
6503 if (!REG_P (x))
6504 return 0;
6505
6506 regno = REGNO (x);
6507
6508 if (strict_p)
6509 return ARM_REGNO_OK_FOR_BASE_P (regno);
6510
6511 return (regno <= LAST_ARM_REGNUM
6512 || regno >= FIRST_PSEUDO_REGISTER
6513 || regno == FRAME_POINTER_REGNUM
6514 || regno == ARG_POINTER_REGNUM);
6515 }
6516
6517 /* Return TRUE if this rtx is the difference of a symbol and a label,
6518 and will reduce to a PC-relative relocation in the object file.
6519 Expressions like this can be left alone when generating PIC, rather
6520 than forced through the GOT. */
6521 static int
6522 pcrel_constant_p (rtx x)
6523 {
6524 if (GET_CODE (x) == MINUS)
6525 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6526
6527 return FALSE;
6528 }
6529
6530 /* Return true if X will surely end up in an index register after next
6531 splitting pass. */
6532 static bool
6533 will_be_in_index_register (const_rtx x)
6534 {
6535 /* arm.md: calculate_pic_address will split this into a register. */
6536 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6537 }
6538
6539 /* Return nonzero if X is a valid ARM state address operand. */
6540 int
6541 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
6542 int strict_p)
6543 {
6544 bool use_ldrd;
6545 enum rtx_code code = GET_CODE (x);
6546
6547 if (arm_address_register_rtx_p (x, strict_p))
6548 return 1;
6549
6550 use_ldrd = (TARGET_LDRD
6551 && (mode == DImode
6552 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6553
6554 if (code == POST_INC || code == PRE_DEC
6555 || ((code == PRE_INC || code == POST_DEC)
6556 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6557 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6558
6559 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6560 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6561 && GET_CODE (XEXP (x, 1)) == PLUS
6562 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6563 {
6564 rtx addend = XEXP (XEXP (x, 1), 1);
6565
6566 /* Don't allow ldrd post increment by register because it's hard
6567 to fixup invalid register choices. */
6568 if (use_ldrd
6569 && GET_CODE (x) == POST_MODIFY
6570 && REG_P (addend))
6571 return 0;
6572
6573 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6574 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6575 }
6576
6577 /* After reload constants split into minipools will have addresses
6578 from a LABEL_REF. */
6579 else if (reload_completed
6580 && (code == LABEL_REF
6581 || (code == CONST
6582 && GET_CODE (XEXP (x, 0)) == PLUS
6583 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6584 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6585 return 1;
6586
6587 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6588 return 0;
6589
6590 else if (code == PLUS)
6591 {
6592 rtx xop0 = XEXP (x, 0);
6593 rtx xop1 = XEXP (x, 1);
6594
6595 return ((arm_address_register_rtx_p (xop0, strict_p)
6596 && ((CONST_INT_P (xop1)
6597 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6598 || (!strict_p && will_be_in_index_register (xop1))))
6599 || (arm_address_register_rtx_p (xop1, strict_p)
6600 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6601 }
6602
6603 #if 0
6604 /* Reload currently can't handle MINUS, so disable this for now */
6605 else if (GET_CODE (x) == MINUS)
6606 {
6607 rtx xop0 = XEXP (x, 0);
6608 rtx xop1 = XEXP (x, 1);
6609
6610 return (arm_address_register_rtx_p (xop0, strict_p)
6611 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6612 }
6613 #endif
6614
6615 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6616 && code == SYMBOL_REF
6617 && CONSTANT_POOL_ADDRESS_P (x)
6618 && ! (flag_pic
6619 && symbol_mentioned_p (get_pool_constant (x))
6620 && ! pcrel_constant_p (get_pool_constant (x))))
6621 return 1;
6622
6623 return 0;
6624 }
6625
6626 /* Return nonzero if X is a valid Thumb-2 address operand. */
6627 static int
6628 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6629 {
6630 bool use_ldrd;
6631 enum rtx_code code = GET_CODE (x);
6632
6633 if (arm_address_register_rtx_p (x, strict_p))
6634 return 1;
6635
6636 use_ldrd = (TARGET_LDRD
6637 && (mode == DImode
6638 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6639
6640 if (code == POST_INC || code == PRE_DEC
6641 || ((code == PRE_INC || code == POST_DEC)
6642 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6643 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6644
6645 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6646 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6647 && GET_CODE (XEXP (x, 1)) == PLUS
6648 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6649 {
6650 /* Thumb-2 only has autoincrement by constant. */
6651 rtx addend = XEXP (XEXP (x, 1), 1);
6652 HOST_WIDE_INT offset;
6653
6654 if (!CONST_INT_P (addend))
6655 return 0;
6656
6657 offset = INTVAL(addend);
6658 if (GET_MODE_SIZE (mode) <= 4)
6659 return (offset > -256 && offset < 256);
6660
6661 return (use_ldrd && offset > -1024 && offset < 1024
6662 && (offset & 3) == 0);
6663 }
6664
6665 /* After reload constants split into minipools will have addresses
6666 from a LABEL_REF. */
6667 else if (reload_completed
6668 && (code == LABEL_REF
6669 || (code == CONST
6670 && GET_CODE (XEXP (x, 0)) == PLUS
6671 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6672 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6673 return 1;
6674
6675 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6676 return 0;
6677
6678 else if (code == PLUS)
6679 {
6680 rtx xop0 = XEXP (x, 0);
6681 rtx xop1 = XEXP (x, 1);
6682
6683 return ((arm_address_register_rtx_p (xop0, strict_p)
6684 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6685 || (!strict_p && will_be_in_index_register (xop1))))
6686 || (arm_address_register_rtx_p (xop1, strict_p)
6687 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6688 }
6689
6690 /* Normally we can assign constant values to target registers without
6691 the help of constant pool. But there are cases we have to use constant
6692 pool like:
6693 1) assign a label to register.
6694 2) sign-extend a 8bit value to 32bit and then assign to register.
6695
6696 Constant pool access in format:
6697 (set (reg r0) (mem (symbol_ref (".LC0"))))
6698 will cause the use of literal pool (later in function arm_reorg).
6699 So here we mark such format as an invalid format, then the compiler
6700 will adjust it into:
6701 (set (reg r0) (symbol_ref (".LC0")))
6702 (set (reg r0) (mem (reg r0))).
6703 No extra register is required, and (mem (reg r0)) won't cause the use
6704 of literal pools. */
6705 else if (arm_disable_literal_pool && code == SYMBOL_REF
6706 && CONSTANT_POOL_ADDRESS_P (x))
6707 return 0;
6708
6709 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6710 && code == SYMBOL_REF
6711 && CONSTANT_POOL_ADDRESS_P (x)
6712 && ! (flag_pic
6713 && symbol_mentioned_p (get_pool_constant (x))
6714 && ! pcrel_constant_p (get_pool_constant (x))))
6715 return 1;
6716
6717 return 0;
6718 }
6719
6720 /* Return nonzero if INDEX is valid for an address index operand in
6721 ARM state. */
6722 static int
6723 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
6724 int strict_p)
6725 {
6726 HOST_WIDE_INT range;
6727 enum rtx_code code = GET_CODE (index);
6728
6729 /* Standard coprocessor addressing modes. */
6730 if (TARGET_HARD_FLOAT
6731 && TARGET_VFP
6732 && (mode == SFmode || mode == DFmode))
6733 return (code == CONST_INT && INTVAL (index) < 1024
6734 && INTVAL (index) > -1024
6735 && (INTVAL (index) & 3) == 0);
6736
6737 /* For quad modes, we restrict the constant offset to be slightly less
6738 than what the instruction format permits. We do this because for
6739 quad mode moves, we will actually decompose them into two separate
6740 double-mode reads or writes. INDEX must therefore be a valid
6741 (double-mode) offset and so should INDEX+8. */
6742 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6743 return (code == CONST_INT
6744 && INTVAL (index) < 1016
6745 && INTVAL (index) > -1024
6746 && (INTVAL (index) & 3) == 0);
6747
6748 /* We have no such constraint on double mode offsets, so we permit the
6749 full range of the instruction format. */
6750 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6751 return (code == CONST_INT
6752 && INTVAL (index) < 1024
6753 && INTVAL (index) > -1024
6754 && (INTVAL (index) & 3) == 0);
6755
6756 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6757 return (code == CONST_INT
6758 && INTVAL (index) < 1024
6759 && INTVAL (index) > -1024
6760 && (INTVAL (index) & 3) == 0);
6761
6762 if (arm_address_register_rtx_p (index, strict_p)
6763 && (GET_MODE_SIZE (mode) <= 4))
6764 return 1;
6765
6766 if (mode == DImode || mode == DFmode)
6767 {
6768 if (code == CONST_INT)
6769 {
6770 HOST_WIDE_INT val = INTVAL (index);
6771
6772 if (TARGET_LDRD)
6773 return val > -256 && val < 256;
6774 else
6775 return val > -4096 && val < 4092;
6776 }
6777
6778 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
6779 }
6780
6781 if (GET_MODE_SIZE (mode) <= 4
6782 && ! (arm_arch4
6783 && (mode == HImode
6784 || mode == HFmode
6785 || (mode == QImode && outer == SIGN_EXTEND))))
6786 {
6787 if (code == MULT)
6788 {
6789 rtx xiop0 = XEXP (index, 0);
6790 rtx xiop1 = XEXP (index, 1);
6791
6792 return ((arm_address_register_rtx_p (xiop0, strict_p)
6793 && power_of_two_operand (xiop1, SImode))
6794 || (arm_address_register_rtx_p (xiop1, strict_p)
6795 && power_of_two_operand (xiop0, SImode)));
6796 }
6797 else if (code == LSHIFTRT || code == ASHIFTRT
6798 || code == ASHIFT || code == ROTATERT)
6799 {
6800 rtx op = XEXP (index, 1);
6801
6802 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6803 && CONST_INT_P (op)
6804 && INTVAL (op) > 0
6805 && INTVAL (op) <= 31);
6806 }
6807 }
6808
6809 /* For ARM v4 we may be doing a sign-extend operation during the
6810 load. */
6811 if (arm_arch4)
6812 {
6813 if (mode == HImode
6814 || mode == HFmode
6815 || (outer == SIGN_EXTEND && mode == QImode))
6816 range = 256;
6817 else
6818 range = 4096;
6819 }
6820 else
6821 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
6822
6823 return (code == CONST_INT
6824 && INTVAL (index) < range
6825 && INTVAL (index) > -range);
6826 }
6827
6828 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6829 index operand. i.e. 1, 2, 4 or 8. */
6830 static bool
6831 thumb2_index_mul_operand (rtx op)
6832 {
6833 HOST_WIDE_INT val;
6834
6835 if (!CONST_INT_P (op))
6836 return false;
6837
6838 val = INTVAL(op);
6839 return (val == 1 || val == 2 || val == 4 || val == 8);
6840 }
6841
6842 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6843 static int
6844 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
6845 {
6846 enum rtx_code code = GET_CODE (index);
6847
6848 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6849 /* Standard coprocessor addressing modes. */
6850 if (TARGET_HARD_FLOAT
6851 && TARGET_VFP
6852 && (mode == SFmode || mode == DFmode))
6853 return (code == CONST_INT && INTVAL (index) < 1024
6854 /* Thumb-2 allows only > -256 index range for it's core register
6855 load/stores. Since we allow SF/DF in core registers, we have
6856 to use the intersection between -256~4096 (core) and -1024~1024
6857 (coprocessor). */
6858 && INTVAL (index) > -256
6859 && (INTVAL (index) & 3) == 0);
6860
6861 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6862 {
6863 /* For DImode assume values will usually live in core regs
6864 and only allow LDRD addressing modes. */
6865 if (!TARGET_LDRD || mode != DImode)
6866 return (code == CONST_INT
6867 && INTVAL (index) < 1024
6868 && INTVAL (index) > -1024
6869 && (INTVAL (index) & 3) == 0);
6870 }
6871
6872 /* For quad modes, we restrict the constant offset to be slightly less
6873 than what the instruction format permits. We do this because for
6874 quad mode moves, we will actually decompose them into two separate
6875 double-mode reads or writes. INDEX must therefore be a valid
6876 (double-mode) offset and so should INDEX+8. */
6877 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6878 return (code == CONST_INT
6879 && INTVAL (index) < 1016
6880 && INTVAL (index) > -1024
6881 && (INTVAL (index) & 3) == 0);
6882
6883 /* We have no such constraint on double mode offsets, so we permit the
6884 full range of the instruction format. */
6885 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6886 return (code == CONST_INT
6887 && INTVAL (index) < 1024
6888 && INTVAL (index) > -1024
6889 && (INTVAL (index) & 3) == 0);
6890
6891 if (arm_address_register_rtx_p (index, strict_p)
6892 && (GET_MODE_SIZE (mode) <= 4))
6893 return 1;
6894
6895 if (mode == DImode || mode == DFmode)
6896 {
6897 if (code == CONST_INT)
6898 {
6899 HOST_WIDE_INT val = INTVAL (index);
6900 /* ??? Can we assume ldrd for thumb2? */
6901 /* Thumb-2 ldrd only has reg+const addressing modes. */
6902 /* ldrd supports offsets of +-1020.
6903 However the ldr fallback does not. */
6904 return val > -256 && val < 256 && (val & 3) == 0;
6905 }
6906 else
6907 return 0;
6908 }
6909
6910 if (code == MULT)
6911 {
6912 rtx xiop0 = XEXP (index, 0);
6913 rtx xiop1 = XEXP (index, 1);
6914
6915 return ((arm_address_register_rtx_p (xiop0, strict_p)
6916 && thumb2_index_mul_operand (xiop1))
6917 || (arm_address_register_rtx_p (xiop1, strict_p)
6918 && thumb2_index_mul_operand (xiop0)));
6919 }
6920 else if (code == ASHIFT)
6921 {
6922 rtx op = XEXP (index, 1);
6923
6924 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6925 && CONST_INT_P (op)
6926 && INTVAL (op) > 0
6927 && INTVAL (op) <= 3);
6928 }
6929
6930 return (code == CONST_INT
6931 && INTVAL (index) < 4096
6932 && INTVAL (index) > -256);
6933 }
6934
6935 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6936 static int
6937 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6938 {
6939 int regno;
6940
6941 if (!REG_P (x))
6942 return 0;
6943
6944 regno = REGNO (x);
6945
6946 if (strict_p)
6947 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6948
6949 return (regno <= LAST_LO_REGNUM
6950 || regno > LAST_VIRTUAL_REGISTER
6951 || regno == FRAME_POINTER_REGNUM
6952 || (GET_MODE_SIZE (mode) >= 4
6953 && (regno == STACK_POINTER_REGNUM
6954 || regno >= FIRST_PSEUDO_REGISTER
6955 || x == hard_frame_pointer_rtx
6956 || x == arg_pointer_rtx)));
6957 }
6958
6959 /* Return nonzero if x is a legitimate index register. This is the case
6960 for any base register that can access a QImode object. */
6961 inline static int
6962 thumb1_index_register_rtx_p (rtx x, int strict_p)
6963 {
6964 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6965 }
6966
6967 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6968
6969 The AP may be eliminated to either the SP or the FP, so we use the
6970 least common denominator, e.g. SImode, and offsets from 0 to 64.
6971
6972 ??? Verify whether the above is the right approach.
6973
6974 ??? Also, the FP may be eliminated to the SP, so perhaps that
6975 needs special handling also.
6976
6977 ??? Look at how the mips16 port solves this problem. It probably uses
6978 better ways to solve some of these problems.
6979
6980 Although it is not incorrect, we don't accept QImode and HImode
6981 addresses based on the frame pointer or arg pointer until the
6982 reload pass starts. This is so that eliminating such addresses
6983 into stack based ones won't produce impossible code. */
6984 int
6985 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6986 {
6987 /* ??? Not clear if this is right. Experiment. */
6988 if (GET_MODE_SIZE (mode) < 4
6989 && !(reload_in_progress || reload_completed)
6990 && (reg_mentioned_p (frame_pointer_rtx, x)
6991 || reg_mentioned_p (arg_pointer_rtx, x)
6992 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6993 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6994 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6995 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6996 return 0;
6997
6998 /* Accept any base register. SP only in SImode or larger. */
6999 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7000 return 1;
7001
7002 /* This is PC relative data before arm_reorg runs. */
7003 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7004 && GET_CODE (x) == SYMBOL_REF
7005 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7006 return 1;
7007
7008 /* This is PC relative data after arm_reorg runs. */
7009 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7010 && reload_completed
7011 && (GET_CODE (x) == LABEL_REF
7012 || (GET_CODE (x) == CONST
7013 && GET_CODE (XEXP (x, 0)) == PLUS
7014 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7015 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7016 return 1;
7017
7018 /* Post-inc indexing only supported for SImode and larger. */
7019 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7020 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7021 return 1;
7022
7023 else if (GET_CODE (x) == PLUS)
7024 {
7025 /* REG+REG address can be any two index registers. */
7026 /* We disallow FRAME+REG addressing since we know that FRAME
7027 will be replaced with STACK, and SP relative addressing only
7028 permits SP+OFFSET. */
7029 if (GET_MODE_SIZE (mode) <= 4
7030 && XEXP (x, 0) != frame_pointer_rtx
7031 && XEXP (x, 1) != frame_pointer_rtx
7032 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7033 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7034 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7035 return 1;
7036
7037 /* REG+const has 5-7 bit offset for non-SP registers. */
7038 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7039 || XEXP (x, 0) == arg_pointer_rtx)
7040 && CONST_INT_P (XEXP (x, 1))
7041 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7042 return 1;
7043
7044 /* REG+const has 10-bit offset for SP, but only SImode and
7045 larger is supported. */
7046 /* ??? Should probably check for DI/DFmode overflow here
7047 just like GO_IF_LEGITIMATE_OFFSET does. */
7048 else if (REG_P (XEXP (x, 0))
7049 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7050 && GET_MODE_SIZE (mode) >= 4
7051 && CONST_INT_P (XEXP (x, 1))
7052 && INTVAL (XEXP (x, 1)) >= 0
7053 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7054 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7055 return 1;
7056
7057 else if (REG_P (XEXP (x, 0))
7058 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7059 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7060 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7061 && REGNO (XEXP (x, 0))
7062 <= LAST_VIRTUAL_POINTER_REGISTER))
7063 && GET_MODE_SIZE (mode) >= 4
7064 && CONST_INT_P (XEXP (x, 1))
7065 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7066 return 1;
7067 }
7068
7069 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7070 && GET_MODE_SIZE (mode) == 4
7071 && GET_CODE (x) == SYMBOL_REF
7072 && CONSTANT_POOL_ADDRESS_P (x)
7073 && ! (flag_pic
7074 && symbol_mentioned_p (get_pool_constant (x))
7075 && ! pcrel_constant_p (get_pool_constant (x))))
7076 return 1;
7077
7078 return 0;
7079 }
7080
7081 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7082 instruction of mode MODE. */
7083 int
7084 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
7085 {
7086 switch (GET_MODE_SIZE (mode))
7087 {
7088 case 1:
7089 return val >= 0 && val < 32;
7090
7091 case 2:
7092 return val >= 0 && val < 64 && (val & 1) == 0;
7093
7094 default:
7095 return (val >= 0
7096 && (val + GET_MODE_SIZE (mode)) <= 128
7097 && (val & 3) == 0);
7098 }
7099 }
7100
7101 bool
7102 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
7103 {
7104 if (TARGET_ARM)
7105 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7106 else if (TARGET_THUMB2)
7107 return thumb2_legitimate_address_p (mode, x, strict_p);
7108 else /* if (TARGET_THUMB1) */
7109 return thumb1_legitimate_address_p (mode, x, strict_p);
7110 }
7111
7112 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7113
7114 Given an rtx X being reloaded into a reg required to be
7115 in class CLASS, return the class of reg to actually use.
7116 In general this is just CLASS, but for the Thumb core registers and
7117 immediate constants we prefer a LO_REGS class or a subset. */
7118
7119 static reg_class_t
7120 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7121 {
7122 if (TARGET_32BIT)
7123 return rclass;
7124 else
7125 {
7126 if (rclass == GENERAL_REGS)
7127 return LO_REGS;
7128 else
7129 return rclass;
7130 }
7131 }
7132
7133 /* Build the SYMBOL_REF for __tls_get_addr. */
7134
7135 static GTY(()) rtx tls_get_addr_libfunc;
7136
7137 static rtx
7138 get_tls_get_addr (void)
7139 {
7140 if (!tls_get_addr_libfunc)
7141 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7142 return tls_get_addr_libfunc;
7143 }
7144
7145 rtx
7146 arm_load_tp (rtx target)
7147 {
7148 if (!target)
7149 target = gen_reg_rtx (SImode);
7150
7151 if (TARGET_HARD_TP)
7152 {
7153 /* Can return in any reg. */
7154 emit_insn (gen_load_tp_hard (target));
7155 }
7156 else
7157 {
7158 /* Always returned in r0. Immediately copy the result into a pseudo,
7159 otherwise other uses of r0 (e.g. setting up function arguments) may
7160 clobber the value. */
7161
7162 rtx tmp;
7163
7164 emit_insn (gen_load_tp_soft ());
7165
7166 tmp = gen_rtx_REG (SImode, 0);
7167 emit_move_insn (target, tmp);
7168 }
7169 return target;
7170 }
7171
7172 static rtx
7173 load_tls_operand (rtx x, rtx reg)
7174 {
7175 rtx tmp;
7176
7177 if (reg == NULL_RTX)
7178 reg = gen_reg_rtx (SImode);
7179
7180 tmp = gen_rtx_CONST (SImode, x);
7181
7182 emit_move_insn (reg, tmp);
7183
7184 return reg;
7185 }
7186
7187 static rtx
7188 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7189 {
7190 rtx insns, label, labelno, sum;
7191
7192 gcc_assert (reloc != TLS_DESCSEQ);
7193 start_sequence ();
7194
7195 labelno = GEN_INT (pic_labelno++);
7196 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7197 label = gen_rtx_CONST (VOIDmode, label);
7198
7199 sum = gen_rtx_UNSPEC (Pmode,
7200 gen_rtvec (4, x, GEN_INT (reloc), label,
7201 GEN_INT (TARGET_ARM ? 8 : 4)),
7202 UNSPEC_TLS);
7203 reg = load_tls_operand (sum, reg);
7204
7205 if (TARGET_ARM)
7206 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7207 else
7208 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7209
7210 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7211 LCT_PURE, /* LCT_CONST? */
7212 Pmode, 1, reg, Pmode);
7213
7214 insns = get_insns ();
7215 end_sequence ();
7216
7217 return insns;
7218 }
7219
7220 static rtx
7221 arm_tls_descseq_addr (rtx x, rtx reg)
7222 {
7223 rtx labelno = GEN_INT (pic_labelno++);
7224 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7225 rtx sum = gen_rtx_UNSPEC (Pmode,
7226 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7227 gen_rtx_CONST (VOIDmode, label),
7228 GEN_INT (!TARGET_ARM)),
7229 UNSPEC_TLS);
7230 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7231
7232 emit_insn (gen_tlscall (x, labelno));
7233 if (!reg)
7234 reg = gen_reg_rtx (SImode);
7235 else
7236 gcc_assert (REGNO (reg) != 0);
7237
7238 emit_move_insn (reg, reg0);
7239
7240 return reg;
7241 }
7242
7243 rtx
7244 legitimize_tls_address (rtx x, rtx reg)
7245 {
7246 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7247 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7248
7249 switch (model)
7250 {
7251 case TLS_MODEL_GLOBAL_DYNAMIC:
7252 if (TARGET_GNU2_TLS)
7253 {
7254 reg = arm_tls_descseq_addr (x, reg);
7255
7256 tp = arm_load_tp (NULL_RTX);
7257
7258 dest = gen_rtx_PLUS (Pmode, tp, reg);
7259 }
7260 else
7261 {
7262 /* Original scheme */
7263 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7264 dest = gen_reg_rtx (Pmode);
7265 emit_libcall_block (insns, dest, ret, x);
7266 }
7267 return dest;
7268
7269 case TLS_MODEL_LOCAL_DYNAMIC:
7270 if (TARGET_GNU2_TLS)
7271 {
7272 reg = arm_tls_descseq_addr (x, reg);
7273
7274 tp = arm_load_tp (NULL_RTX);
7275
7276 dest = gen_rtx_PLUS (Pmode, tp, reg);
7277 }
7278 else
7279 {
7280 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7281
7282 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7283 share the LDM result with other LD model accesses. */
7284 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7285 UNSPEC_TLS);
7286 dest = gen_reg_rtx (Pmode);
7287 emit_libcall_block (insns, dest, ret, eqv);
7288
7289 /* Load the addend. */
7290 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7291 GEN_INT (TLS_LDO32)),
7292 UNSPEC_TLS);
7293 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7294 dest = gen_rtx_PLUS (Pmode, dest, addend);
7295 }
7296 return dest;
7297
7298 case TLS_MODEL_INITIAL_EXEC:
7299 labelno = GEN_INT (pic_labelno++);
7300 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7301 label = gen_rtx_CONST (VOIDmode, label);
7302 sum = gen_rtx_UNSPEC (Pmode,
7303 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7304 GEN_INT (TARGET_ARM ? 8 : 4)),
7305 UNSPEC_TLS);
7306 reg = load_tls_operand (sum, reg);
7307
7308 if (TARGET_ARM)
7309 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7310 else if (TARGET_THUMB2)
7311 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7312 else
7313 {
7314 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7315 emit_move_insn (reg, gen_const_mem (SImode, reg));
7316 }
7317
7318 tp = arm_load_tp (NULL_RTX);
7319
7320 return gen_rtx_PLUS (Pmode, tp, reg);
7321
7322 case TLS_MODEL_LOCAL_EXEC:
7323 tp = arm_load_tp (NULL_RTX);
7324
7325 reg = gen_rtx_UNSPEC (Pmode,
7326 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7327 UNSPEC_TLS);
7328 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7329
7330 return gen_rtx_PLUS (Pmode, tp, reg);
7331
7332 default:
7333 abort ();
7334 }
7335 }
7336
7337 /* Try machine-dependent ways of modifying an illegitimate address
7338 to be legitimate. If we find one, return the new, valid address. */
7339 rtx
7340 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7341 {
7342 if (arm_tls_referenced_p (x))
7343 {
7344 rtx addend = NULL;
7345
7346 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7347 {
7348 addend = XEXP (XEXP (x, 0), 1);
7349 x = XEXP (XEXP (x, 0), 0);
7350 }
7351
7352 if (GET_CODE (x) != SYMBOL_REF)
7353 return x;
7354
7355 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7356
7357 x = legitimize_tls_address (x, NULL_RTX);
7358
7359 if (addend)
7360 {
7361 x = gen_rtx_PLUS (SImode, x, addend);
7362 orig_x = x;
7363 }
7364 else
7365 return x;
7366 }
7367
7368 if (!TARGET_ARM)
7369 {
7370 /* TODO: legitimize_address for Thumb2. */
7371 if (TARGET_THUMB2)
7372 return x;
7373 return thumb_legitimize_address (x, orig_x, mode);
7374 }
7375
7376 if (GET_CODE (x) == PLUS)
7377 {
7378 rtx xop0 = XEXP (x, 0);
7379 rtx xop1 = XEXP (x, 1);
7380
7381 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7382 xop0 = force_reg (SImode, xop0);
7383
7384 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7385 && !symbol_mentioned_p (xop1))
7386 xop1 = force_reg (SImode, xop1);
7387
7388 if (ARM_BASE_REGISTER_RTX_P (xop0)
7389 && CONST_INT_P (xop1))
7390 {
7391 HOST_WIDE_INT n, low_n;
7392 rtx base_reg, val;
7393 n = INTVAL (xop1);
7394
7395 /* VFP addressing modes actually allow greater offsets, but for
7396 now we just stick with the lowest common denominator. */
7397 if (mode == DImode
7398 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7399 {
7400 low_n = n & 0x0f;
7401 n &= ~0x0f;
7402 if (low_n > 4)
7403 {
7404 n += 16;
7405 low_n -= 16;
7406 }
7407 }
7408 else
7409 {
7410 low_n = ((mode) == TImode ? 0
7411 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7412 n -= low_n;
7413 }
7414
7415 base_reg = gen_reg_rtx (SImode);
7416 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7417 emit_move_insn (base_reg, val);
7418 x = plus_constant (Pmode, base_reg, low_n);
7419 }
7420 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7421 x = gen_rtx_PLUS (SImode, xop0, xop1);
7422 }
7423
7424 /* XXX We don't allow MINUS any more -- see comment in
7425 arm_legitimate_address_outer_p (). */
7426 else if (GET_CODE (x) == MINUS)
7427 {
7428 rtx xop0 = XEXP (x, 0);
7429 rtx xop1 = XEXP (x, 1);
7430
7431 if (CONSTANT_P (xop0))
7432 xop0 = force_reg (SImode, xop0);
7433
7434 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7435 xop1 = force_reg (SImode, xop1);
7436
7437 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7438 x = gen_rtx_MINUS (SImode, xop0, xop1);
7439 }
7440
7441 /* Make sure to take full advantage of the pre-indexed addressing mode
7442 with absolute addresses which often allows for the base register to
7443 be factorized for multiple adjacent memory references, and it might
7444 even allows for the mini pool to be avoided entirely. */
7445 else if (CONST_INT_P (x) && optimize > 0)
7446 {
7447 unsigned int bits;
7448 HOST_WIDE_INT mask, base, index;
7449 rtx base_reg;
7450
7451 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7452 use a 8-bit index. So let's use a 12-bit index for SImode only and
7453 hope that arm_gen_constant will enable ldrb to use more bits. */
7454 bits = (mode == SImode) ? 12 : 8;
7455 mask = (1 << bits) - 1;
7456 base = INTVAL (x) & ~mask;
7457 index = INTVAL (x) & mask;
7458 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7459 {
7460 /* It'll most probably be more efficient to generate the base
7461 with more bits set and use a negative index instead. */
7462 base |= mask;
7463 index -= mask;
7464 }
7465 base_reg = force_reg (SImode, GEN_INT (base));
7466 x = plus_constant (Pmode, base_reg, index);
7467 }
7468
7469 if (flag_pic)
7470 {
7471 /* We need to find and carefully transform any SYMBOL and LABEL
7472 references; so go back to the original address expression. */
7473 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7474
7475 if (new_x != orig_x)
7476 x = new_x;
7477 }
7478
7479 return x;
7480 }
7481
7482
7483 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7484 to be legitimate. If we find one, return the new, valid address. */
7485 rtx
7486 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7487 {
7488 if (GET_CODE (x) == PLUS
7489 && CONST_INT_P (XEXP (x, 1))
7490 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7491 || INTVAL (XEXP (x, 1)) < 0))
7492 {
7493 rtx xop0 = XEXP (x, 0);
7494 rtx xop1 = XEXP (x, 1);
7495 HOST_WIDE_INT offset = INTVAL (xop1);
7496
7497 /* Try and fold the offset into a biasing of the base register and
7498 then offsetting that. Don't do this when optimizing for space
7499 since it can cause too many CSEs. */
7500 if (optimize_size && offset >= 0
7501 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7502 {
7503 HOST_WIDE_INT delta;
7504
7505 if (offset >= 256)
7506 delta = offset - (256 - GET_MODE_SIZE (mode));
7507 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7508 delta = 31 * GET_MODE_SIZE (mode);
7509 else
7510 delta = offset & (~31 * GET_MODE_SIZE (mode));
7511
7512 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7513 NULL_RTX);
7514 x = plus_constant (Pmode, xop0, delta);
7515 }
7516 else if (offset < 0 && offset > -256)
7517 /* Small negative offsets are best done with a subtract before the
7518 dereference, forcing these into a register normally takes two
7519 instructions. */
7520 x = force_operand (x, NULL_RTX);
7521 else
7522 {
7523 /* For the remaining cases, force the constant into a register. */
7524 xop1 = force_reg (SImode, xop1);
7525 x = gen_rtx_PLUS (SImode, xop0, xop1);
7526 }
7527 }
7528 else if (GET_CODE (x) == PLUS
7529 && s_register_operand (XEXP (x, 1), SImode)
7530 && !s_register_operand (XEXP (x, 0), SImode))
7531 {
7532 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7533
7534 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7535 }
7536
7537 if (flag_pic)
7538 {
7539 /* We need to find and carefully transform any SYMBOL and LABEL
7540 references; so go back to the original address expression. */
7541 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7542
7543 if (new_x != orig_x)
7544 x = new_x;
7545 }
7546
7547 return x;
7548 }
7549
7550 bool
7551 arm_legitimize_reload_address (rtx *p,
7552 enum machine_mode mode,
7553 int opnum, int type,
7554 int ind_levels ATTRIBUTE_UNUSED)
7555 {
7556 /* We must recognize output that we have already generated ourselves. */
7557 if (GET_CODE (*p) == PLUS
7558 && GET_CODE (XEXP (*p, 0)) == PLUS
7559 && REG_P (XEXP (XEXP (*p, 0), 0))
7560 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7561 && CONST_INT_P (XEXP (*p, 1)))
7562 {
7563 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7564 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7565 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7566 return true;
7567 }
7568
7569 if (GET_CODE (*p) == PLUS
7570 && REG_P (XEXP (*p, 0))
7571 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7572 /* If the base register is equivalent to a constant, let the generic
7573 code handle it. Otherwise we will run into problems if a future
7574 reload pass decides to rematerialize the constant. */
7575 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7576 && CONST_INT_P (XEXP (*p, 1)))
7577 {
7578 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7579 HOST_WIDE_INT low, high;
7580
7581 /* Detect coprocessor load/stores. */
7582 bool coproc_p = ((TARGET_HARD_FLOAT
7583 && TARGET_VFP
7584 && (mode == SFmode || mode == DFmode))
7585 || (TARGET_REALLY_IWMMXT
7586 && VALID_IWMMXT_REG_MODE (mode))
7587 || (TARGET_NEON
7588 && (VALID_NEON_DREG_MODE (mode)
7589 || VALID_NEON_QREG_MODE (mode))));
7590
7591 /* For some conditions, bail out when lower two bits are unaligned. */
7592 if ((val & 0x3) != 0
7593 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7594 && (coproc_p
7595 /* For DI, and DF under soft-float: */
7596 || ((mode == DImode || mode == DFmode)
7597 /* Without ldrd, we use stm/ldm, which does not
7598 fair well with unaligned bits. */
7599 && (! TARGET_LDRD
7600 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7601 || TARGET_THUMB2))))
7602 return false;
7603
7604 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7605 of which the (reg+high) gets turned into a reload add insn,
7606 we try to decompose the index into high/low values that can often
7607 also lead to better reload CSE.
7608 For example:
7609 ldr r0, [r2, #4100] // Offset too large
7610 ldr r1, [r2, #4104] // Offset too large
7611
7612 is best reloaded as:
7613 add t1, r2, #4096
7614 ldr r0, [t1, #4]
7615 add t2, r2, #4096
7616 ldr r1, [t2, #8]
7617
7618 which post-reload CSE can simplify in most cases to eliminate the
7619 second add instruction:
7620 add t1, r2, #4096
7621 ldr r0, [t1, #4]
7622 ldr r1, [t1, #8]
7623
7624 The idea here is that we want to split out the bits of the constant
7625 as a mask, rather than as subtracting the maximum offset that the
7626 respective type of load/store used can handle.
7627
7628 When encountering negative offsets, we can still utilize it even if
7629 the overall offset is positive; sometimes this may lead to an immediate
7630 that can be constructed with fewer instructions.
7631 For example:
7632 ldr r0, [r2, #0x3FFFFC]
7633
7634 This is best reloaded as:
7635 add t1, r2, #0x400000
7636 ldr r0, [t1, #-4]
7637
7638 The trick for spotting this for a load insn with N bits of offset
7639 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7640 negative offset that is going to make bit N and all the bits below
7641 it become zero in the remainder part.
7642
7643 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7644 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7645 used in most cases of ARM load/store instructions. */
7646
7647 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7648 (((VAL) & ((1 << (N)) - 1)) \
7649 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7650 : 0)
7651
7652 if (coproc_p)
7653 {
7654 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7655
7656 /* NEON quad-word load/stores are made of two double-word accesses,
7657 so the valid index range is reduced by 8. Treat as 9-bit range if
7658 we go over it. */
7659 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7660 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7661 }
7662 else if (GET_MODE_SIZE (mode) == 8)
7663 {
7664 if (TARGET_LDRD)
7665 low = (TARGET_THUMB2
7666 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7667 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7668 else
7669 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7670 to access doublewords. The supported load/store offsets are
7671 -8, -4, and 4, which we try to produce here. */
7672 low = ((val & 0xf) ^ 0x8) - 0x8;
7673 }
7674 else if (GET_MODE_SIZE (mode) < 8)
7675 {
7676 /* NEON element load/stores do not have an offset. */
7677 if (TARGET_NEON_FP16 && mode == HFmode)
7678 return false;
7679
7680 if (TARGET_THUMB2)
7681 {
7682 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7683 Try the wider 12-bit range first, and re-try if the result
7684 is out of range. */
7685 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7686 if (low < -255)
7687 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7688 }
7689 else
7690 {
7691 if (mode == HImode || mode == HFmode)
7692 {
7693 if (arm_arch4)
7694 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7695 else
7696 {
7697 /* The storehi/movhi_bytes fallbacks can use only
7698 [-4094,+4094] of the full ldrb/strb index range. */
7699 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7700 if (low == 4095 || low == -4095)
7701 return false;
7702 }
7703 }
7704 else
7705 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7706 }
7707 }
7708 else
7709 return false;
7710
7711 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7712 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7713 - (unsigned HOST_WIDE_INT) 0x80000000);
7714 /* Check for overflow or zero */
7715 if (low == 0 || high == 0 || (high + low != val))
7716 return false;
7717
7718 /* Reload the high part into a base reg; leave the low part
7719 in the mem.
7720 Note that replacing this gen_rtx_PLUS with plus_constant is
7721 wrong in this case because we rely on the
7722 (plus (plus reg c1) c2) structure being preserved so that
7723 XEXP (*p, 0) in push_reload below uses the correct term. */
7724 *p = gen_rtx_PLUS (GET_MODE (*p),
7725 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
7726 GEN_INT (high)),
7727 GEN_INT (low));
7728 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7729 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7730 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7731 return true;
7732 }
7733
7734 return false;
7735 }
7736
7737 rtx
7738 thumb_legitimize_reload_address (rtx *x_p,
7739 enum machine_mode mode,
7740 int opnum, int type,
7741 int ind_levels ATTRIBUTE_UNUSED)
7742 {
7743 rtx x = *x_p;
7744
7745 if (GET_CODE (x) == PLUS
7746 && GET_MODE_SIZE (mode) < 4
7747 && REG_P (XEXP (x, 0))
7748 && XEXP (x, 0) == stack_pointer_rtx
7749 && CONST_INT_P (XEXP (x, 1))
7750 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7751 {
7752 rtx orig_x = x;
7753
7754 x = copy_rtx (x);
7755 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7756 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7757 return x;
7758 }
7759
7760 /* If both registers are hi-regs, then it's better to reload the
7761 entire expression rather than each register individually. That
7762 only requires one reload register rather than two. */
7763 if (GET_CODE (x) == PLUS
7764 && REG_P (XEXP (x, 0))
7765 && REG_P (XEXP (x, 1))
7766 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
7767 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
7768 {
7769 rtx orig_x = x;
7770
7771 x = copy_rtx (x);
7772 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7773 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7774 return x;
7775 }
7776
7777 return NULL;
7778 }
7779
7780 /* Test for various thread-local symbols. */
7781
7782 /* Helper for arm_tls_referenced_p. */
7783
7784 static int
7785 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
7786 {
7787 if (GET_CODE (*x) == SYMBOL_REF)
7788 return SYMBOL_REF_TLS_MODEL (*x) != 0;
7789
7790 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7791 TLS offsets, not real symbol references. */
7792 if (GET_CODE (*x) == UNSPEC
7793 && XINT (*x, 1) == UNSPEC_TLS)
7794 return -1;
7795
7796 return 0;
7797 }
7798
7799 /* Return TRUE if X contains any TLS symbol references. */
7800
7801 bool
7802 arm_tls_referenced_p (rtx x)
7803 {
7804 if (! TARGET_HAVE_TLS)
7805 return false;
7806
7807 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
7808 }
7809
7810 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7811
7812 On the ARM, allow any integer (invalid ones are removed later by insn
7813 patterns), nice doubles and symbol_refs which refer to the function's
7814 constant pool XXX.
7815
7816 When generating pic allow anything. */
7817
7818 static bool
7819 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
7820 {
7821 /* At present, we have no support for Neon structure constants, so forbid
7822 them here. It might be possible to handle simple cases like 0 and -1
7823 in future. */
7824 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
7825 return false;
7826
7827 return flag_pic || !label_mentioned_p (x);
7828 }
7829
7830 static bool
7831 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7832 {
7833 return (CONST_INT_P (x)
7834 || CONST_DOUBLE_P (x)
7835 || CONSTANT_ADDRESS_P (x)
7836 || flag_pic);
7837 }
7838
7839 static bool
7840 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
7841 {
7842 return (!arm_cannot_force_const_mem (mode, x)
7843 && (TARGET_32BIT
7844 ? arm_legitimate_constant_p_1 (mode, x)
7845 : thumb_legitimate_constant_p (mode, x)));
7846 }
7847
7848 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7849
7850 static bool
7851 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7852 {
7853 rtx base, offset;
7854
7855 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
7856 {
7857 split_const (x, &base, &offset);
7858 if (GET_CODE (base) == SYMBOL_REF
7859 && !offset_within_block_p (base, INTVAL (offset)))
7860 return true;
7861 }
7862 return arm_tls_referenced_p (x);
7863 }
7864 \f
7865 #define REG_OR_SUBREG_REG(X) \
7866 (REG_P (X) \
7867 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7868
7869 #define REG_OR_SUBREG_RTX(X) \
7870 (REG_P (X) ? (X) : SUBREG_REG (X))
7871
7872 static inline int
7873 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7874 {
7875 enum machine_mode mode = GET_MODE (x);
7876 int total, words;
7877
7878 switch (code)
7879 {
7880 case ASHIFT:
7881 case ASHIFTRT:
7882 case LSHIFTRT:
7883 case ROTATERT:
7884 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7885
7886 case PLUS:
7887 case MINUS:
7888 case COMPARE:
7889 case NEG:
7890 case NOT:
7891 return COSTS_N_INSNS (1);
7892
7893 case MULT:
7894 if (CONST_INT_P (XEXP (x, 1)))
7895 {
7896 int cycles = 0;
7897 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7898
7899 while (i)
7900 {
7901 i >>= 2;
7902 cycles++;
7903 }
7904 return COSTS_N_INSNS (2) + cycles;
7905 }
7906 return COSTS_N_INSNS (1) + 16;
7907
7908 case SET:
7909 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7910 the mode. */
7911 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
7912 return (COSTS_N_INSNS (words)
7913 + 4 * ((MEM_P (SET_SRC (x)))
7914 + MEM_P (SET_DEST (x))));
7915
7916 case CONST_INT:
7917 if (outer == SET)
7918 {
7919 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7920 return 0;
7921 if (thumb_shiftable_const (INTVAL (x)))
7922 return COSTS_N_INSNS (2);
7923 return COSTS_N_INSNS (3);
7924 }
7925 else if ((outer == PLUS || outer == COMPARE)
7926 && INTVAL (x) < 256 && INTVAL (x) > -256)
7927 return 0;
7928 else if ((outer == IOR || outer == XOR || outer == AND)
7929 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7930 return COSTS_N_INSNS (1);
7931 else if (outer == AND)
7932 {
7933 int i;
7934 /* This duplicates the tests in the andsi3 expander. */
7935 for (i = 9; i <= 31; i++)
7936 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7937 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7938 return COSTS_N_INSNS (2);
7939 }
7940 else if (outer == ASHIFT || outer == ASHIFTRT
7941 || outer == LSHIFTRT)
7942 return 0;
7943 return COSTS_N_INSNS (2);
7944
7945 case CONST:
7946 case CONST_DOUBLE:
7947 case LABEL_REF:
7948 case SYMBOL_REF:
7949 return COSTS_N_INSNS (3);
7950
7951 case UDIV:
7952 case UMOD:
7953 case DIV:
7954 case MOD:
7955 return 100;
7956
7957 case TRUNCATE:
7958 return 99;
7959
7960 case AND:
7961 case XOR:
7962 case IOR:
7963 /* XXX guess. */
7964 return 8;
7965
7966 case MEM:
7967 /* XXX another guess. */
7968 /* Memory costs quite a lot for the first word, but subsequent words
7969 load at the equivalent of a single insn each. */
7970 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7971 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7972 ? 4 : 0));
7973
7974 case IF_THEN_ELSE:
7975 /* XXX a guess. */
7976 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7977 return 14;
7978 return 2;
7979
7980 case SIGN_EXTEND:
7981 case ZERO_EXTEND:
7982 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7983 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7984
7985 if (mode == SImode)
7986 return total;
7987
7988 if (arm_arch6)
7989 return total + COSTS_N_INSNS (1);
7990
7991 /* Assume a two-shift sequence. Increase the cost slightly so
7992 we prefer actual shifts over an extend operation. */
7993 return total + 1 + COSTS_N_INSNS (2);
7994
7995 default:
7996 return 99;
7997 }
7998 }
7999
8000 static inline bool
8001 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8002 {
8003 enum machine_mode mode = GET_MODE (x);
8004 enum rtx_code subcode;
8005 rtx operand;
8006 enum rtx_code code = GET_CODE (x);
8007 *total = 0;
8008
8009 switch (code)
8010 {
8011 case MEM:
8012 /* Memory costs quite a lot for the first word, but subsequent words
8013 load at the equivalent of a single insn each. */
8014 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8015 return true;
8016
8017 case DIV:
8018 case MOD:
8019 case UDIV:
8020 case UMOD:
8021 if (TARGET_HARD_FLOAT && mode == SFmode)
8022 *total = COSTS_N_INSNS (2);
8023 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8024 *total = COSTS_N_INSNS (4);
8025 else
8026 *total = COSTS_N_INSNS (20);
8027 return false;
8028
8029 case ROTATE:
8030 if (REG_P (XEXP (x, 1)))
8031 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8032 else if (!CONST_INT_P (XEXP (x, 1)))
8033 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8034
8035 /* Fall through */
8036 case ROTATERT:
8037 if (mode != SImode)
8038 {
8039 *total += COSTS_N_INSNS (4);
8040 return true;
8041 }
8042
8043 /* Fall through */
8044 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8045 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8046 if (mode == DImode)
8047 {
8048 *total += COSTS_N_INSNS (3);
8049 return true;
8050 }
8051
8052 *total += COSTS_N_INSNS (1);
8053 /* Increase the cost of complex shifts because they aren't any faster,
8054 and reduce dual issue opportunities. */
8055 if (arm_tune_cortex_a9
8056 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8057 ++*total;
8058
8059 return true;
8060
8061 case MINUS:
8062 if (mode == DImode)
8063 {
8064 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8065 if (CONST_INT_P (XEXP (x, 0))
8066 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8067 {
8068 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8069 return true;
8070 }
8071
8072 if (CONST_INT_P (XEXP (x, 1))
8073 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8074 {
8075 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8076 return true;
8077 }
8078
8079 return false;
8080 }
8081
8082 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8083 {
8084 if (TARGET_HARD_FLOAT
8085 && (mode == SFmode
8086 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8087 {
8088 *total = COSTS_N_INSNS (1);
8089 if (CONST_DOUBLE_P (XEXP (x, 0))
8090 && arm_const_double_rtx (XEXP (x, 0)))
8091 {
8092 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8093 return true;
8094 }
8095
8096 if (CONST_DOUBLE_P (XEXP (x, 1))
8097 && arm_const_double_rtx (XEXP (x, 1)))
8098 {
8099 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8100 return true;
8101 }
8102
8103 return false;
8104 }
8105 *total = COSTS_N_INSNS (20);
8106 return false;
8107 }
8108
8109 *total = COSTS_N_INSNS (1);
8110 if (CONST_INT_P (XEXP (x, 0))
8111 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8112 {
8113 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8114 return true;
8115 }
8116
8117 subcode = GET_CODE (XEXP (x, 1));
8118 if (subcode == ASHIFT || subcode == ASHIFTRT
8119 || subcode == LSHIFTRT
8120 || subcode == ROTATE || subcode == ROTATERT)
8121 {
8122 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8123 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8124 return true;
8125 }
8126
8127 /* A shift as a part of RSB costs no more than RSB itself. */
8128 if (GET_CODE (XEXP (x, 0)) == MULT
8129 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8130 {
8131 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8132 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8133 return true;
8134 }
8135
8136 if (subcode == MULT
8137 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8138 {
8139 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8140 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8141 return true;
8142 }
8143
8144 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8145 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8146 {
8147 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8148 if (REG_P (XEXP (XEXP (x, 1), 0))
8149 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8150 *total += COSTS_N_INSNS (1);
8151
8152 return true;
8153 }
8154
8155 /* Fall through */
8156
8157 case PLUS:
8158 if (code == PLUS && arm_arch6 && mode == SImode
8159 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8160 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8161 {
8162 *total = COSTS_N_INSNS (1);
8163 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8164 0, speed);
8165 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8166 return true;
8167 }
8168
8169 /* MLA: All arguments must be registers. We filter out
8170 multiplication by a power of two, so that we fall down into
8171 the code below. */
8172 if (GET_CODE (XEXP (x, 0)) == MULT
8173 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8174 {
8175 /* The cost comes from the cost of the multiply. */
8176 return false;
8177 }
8178
8179 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8180 {
8181 if (TARGET_HARD_FLOAT
8182 && (mode == SFmode
8183 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8184 {
8185 *total = COSTS_N_INSNS (1);
8186 if (CONST_DOUBLE_P (XEXP (x, 1))
8187 && arm_const_double_rtx (XEXP (x, 1)))
8188 {
8189 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8190 return true;
8191 }
8192
8193 return false;
8194 }
8195
8196 *total = COSTS_N_INSNS (20);
8197 return false;
8198 }
8199
8200 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8201 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8202 {
8203 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8204 if (REG_P (XEXP (XEXP (x, 0), 0))
8205 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8206 *total += COSTS_N_INSNS (1);
8207 return true;
8208 }
8209
8210 /* Fall through */
8211
8212 case AND: case XOR: case IOR:
8213
8214 /* Normally the frame registers will be spilt into reg+const during
8215 reload, so it is a bad idea to combine them with other instructions,
8216 since then they might not be moved outside of loops. As a compromise
8217 we allow integration with ops that have a constant as their second
8218 operand. */
8219 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8220 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8221 && !CONST_INT_P (XEXP (x, 1)))
8222 *total = COSTS_N_INSNS (1);
8223
8224 if (mode == DImode)
8225 {
8226 *total += COSTS_N_INSNS (2);
8227 if (CONST_INT_P (XEXP (x, 1))
8228 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8229 {
8230 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8231 return true;
8232 }
8233
8234 return false;
8235 }
8236
8237 *total += COSTS_N_INSNS (1);
8238 if (CONST_INT_P (XEXP (x, 1))
8239 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8240 {
8241 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8242 return true;
8243 }
8244 subcode = GET_CODE (XEXP (x, 0));
8245 if (subcode == ASHIFT || subcode == ASHIFTRT
8246 || subcode == LSHIFTRT
8247 || subcode == ROTATE || subcode == ROTATERT)
8248 {
8249 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8250 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8251 return true;
8252 }
8253
8254 if (subcode == MULT
8255 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8256 {
8257 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8258 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8259 return true;
8260 }
8261
8262 if (subcode == UMIN || subcode == UMAX
8263 || subcode == SMIN || subcode == SMAX)
8264 {
8265 *total = COSTS_N_INSNS (3);
8266 return true;
8267 }
8268
8269 return false;
8270
8271 case MULT:
8272 /* This should have been handled by the CPU specific routines. */
8273 gcc_unreachable ();
8274
8275 case TRUNCATE:
8276 if (arm_arch3m && mode == SImode
8277 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8278 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8279 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8280 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8281 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8282 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8283 {
8284 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8285 return true;
8286 }
8287 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8288 return false;
8289
8290 case NEG:
8291 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8292 {
8293 if (TARGET_HARD_FLOAT
8294 && (mode == SFmode
8295 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8296 {
8297 *total = COSTS_N_INSNS (1);
8298 return false;
8299 }
8300 *total = COSTS_N_INSNS (2);
8301 return false;
8302 }
8303
8304 /* Fall through */
8305 case NOT:
8306 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8307 if (mode == SImode && code == NOT)
8308 {
8309 subcode = GET_CODE (XEXP (x, 0));
8310 if (subcode == ASHIFT || subcode == ASHIFTRT
8311 || subcode == LSHIFTRT
8312 || subcode == ROTATE || subcode == ROTATERT
8313 || (subcode == MULT
8314 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8315 {
8316 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8317 /* Register shifts cost an extra cycle. */
8318 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8319 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8320 subcode, 1, speed);
8321 return true;
8322 }
8323 }
8324
8325 return false;
8326
8327 case IF_THEN_ELSE:
8328 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8329 {
8330 *total = COSTS_N_INSNS (4);
8331 return true;
8332 }
8333
8334 operand = XEXP (x, 0);
8335
8336 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8337 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8338 && REG_P (XEXP (operand, 0))
8339 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8340 *total += COSTS_N_INSNS (1);
8341 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8342 + rtx_cost (XEXP (x, 2), code, 2, speed));
8343 return true;
8344
8345 case NE:
8346 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8347 {
8348 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8349 return true;
8350 }
8351 goto scc_insn;
8352
8353 case GE:
8354 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8355 && mode == SImode && XEXP (x, 1) == const0_rtx)
8356 {
8357 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8358 return true;
8359 }
8360 goto scc_insn;
8361
8362 case LT:
8363 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8364 && mode == SImode && XEXP (x, 1) == const0_rtx)
8365 {
8366 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8367 return true;
8368 }
8369 goto scc_insn;
8370
8371 case EQ:
8372 case GT:
8373 case LE:
8374 case GEU:
8375 case LTU:
8376 case GTU:
8377 case LEU:
8378 case UNORDERED:
8379 case ORDERED:
8380 case UNEQ:
8381 case UNGE:
8382 case UNLT:
8383 case UNGT:
8384 case UNLE:
8385 scc_insn:
8386 /* SCC insns. In the case where the comparison has already been
8387 performed, then they cost 2 instructions. Otherwise they need
8388 an additional comparison before them. */
8389 *total = COSTS_N_INSNS (2);
8390 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8391 {
8392 return true;
8393 }
8394
8395 /* Fall through */
8396 case COMPARE:
8397 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8398 {
8399 *total = 0;
8400 return true;
8401 }
8402
8403 *total += COSTS_N_INSNS (1);
8404 if (CONST_INT_P (XEXP (x, 1))
8405 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8406 {
8407 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8408 return true;
8409 }
8410
8411 subcode = GET_CODE (XEXP (x, 0));
8412 if (subcode == ASHIFT || subcode == ASHIFTRT
8413 || subcode == LSHIFTRT
8414 || subcode == ROTATE || subcode == ROTATERT)
8415 {
8416 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8417 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8418 return true;
8419 }
8420
8421 if (subcode == MULT
8422 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8423 {
8424 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8425 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8426 return true;
8427 }
8428
8429 return false;
8430
8431 case UMIN:
8432 case UMAX:
8433 case SMIN:
8434 case SMAX:
8435 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8436 if (!CONST_INT_P (XEXP (x, 1))
8437 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8438 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8439 return true;
8440
8441 case ABS:
8442 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8443 {
8444 if (TARGET_HARD_FLOAT
8445 && (mode == SFmode
8446 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8447 {
8448 *total = COSTS_N_INSNS (1);
8449 return false;
8450 }
8451 *total = COSTS_N_INSNS (20);
8452 return false;
8453 }
8454 *total = COSTS_N_INSNS (1);
8455 if (mode == DImode)
8456 *total += COSTS_N_INSNS (3);
8457 return false;
8458
8459 case SIGN_EXTEND:
8460 case ZERO_EXTEND:
8461 *total = 0;
8462 if (GET_MODE_CLASS (mode) == MODE_INT)
8463 {
8464 rtx op = XEXP (x, 0);
8465 enum machine_mode opmode = GET_MODE (op);
8466
8467 if (mode == DImode)
8468 *total += COSTS_N_INSNS (1);
8469
8470 if (opmode != SImode)
8471 {
8472 if (MEM_P (op))
8473 {
8474 /* If !arm_arch4, we use one of the extendhisi2_mem
8475 or movhi_bytes patterns for HImode. For a QImode
8476 sign extension, we first zero-extend from memory
8477 and then perform a shift sequence. */
8478 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8479 *total += COSTS_N_INSNS (2);
8480 }
8481 else if (arm_arch6)
8482 *total += COSTS_N_INSNS (1);
8483
8484 /* We don't have the necessary insn, so we need to perform some
8485 other operation. */
8486 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8487 /* An and with constant 255. */
8488 *total += COSTS_N_INSNS (1);
8489 else
8490 /* A shift sequence. Increase costs slightly to avoid
8491 combining two shifts into an extend operation. */
8492 *total += COSTS_N_INSNS (2) + 1;
8493 }
8494
8495 return false;
8496 }
8497
8498 switch (GET_MODE (XEXP (x, 0)))
8499 {
8500 case V8QImode:
8501 case V4HImode:
8502 case V2SImode:
8503 case V4QImode:
8504 case V2HImode:
8505 *total = COSTS_N_INSNS (1);
8506 return false;
8507
8508 default:
8509 gcc_unreachable ();
8510 }
8511 gcc_unreachable ();
8512
8513 case ZERO_EXTRACT:
8514 case SIGN_EXTRACT:
8515 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8516 return true;
8517
8518 case CONST_INT:
8519 if (const_ok_for_arm (INTVAL (x))
8520 || const_ok_for_arm (~INTVAL (x)))
8521 *total = COSTS_N_INSNS (1);
8522 else
8523 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8524 INTVAL (x), NULL_RTX,
8525 NULL_RTX, 0, 0));
8526 return true;
8527
8528 case CONST:
8529 case LABEL_REF:
8530 case SYMBOL_REF:
8531 *total = COSTS_N_INSNS (3);
8532 return true;
8533
8534 case HIGH:
8535 *total = COSTS_N_INSNS (1);
8536 return true;
8537
8538 case LO_SUM:
8539 *total = COSTS_N_INSNS (1);
8540 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8541 return true;
8542
8543 case CONST_DOUBLE:
8544 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8545 && (mode == SFmode || !TARGET_VFP_SINGLE))
8546 *total = COSTS_N_INSNS (1);
8547 else
8548 *total = COSTS_N_INSNS (4);
8549 return true;
8550
8551 case SET:
8552 /* The vec_extract patterns accept memory operands that require an
8553 address reload. Account for the cost of that reload to give the
8554 auto-inc-dec pass an incentive to try to replace them. */
8555 if (TARGET_NEON && MEM_P (SET_DEST (x))
8556 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8557 {
8558 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8559 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8560 *total += COSTS_N_INSNS (1);
8561 return true;
8562 }
8563 /* Likewise for the vec_set patterns. */
8564 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8565 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8566 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8567 {
8568 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8569 *total = rtx_cost (mem, code, 0, speed);
8570 if (!neon_vector_mem_operand (mem, 2, true))
8571 *total += COSTS_N_INSNS (1);
8572 return true;
8573 }
8574 return false;
8575
8576 case UNSPEC:
8577 /* We cost this as high as our memory costs to allow this to
8578 be hoisted from loops. */
8579 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8580 {
8581 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8582 }
8583 return true;
8584
8585 case CONST_VECTOR:
8586 if (TARGET_NEON
8587 && TARGET_HARD_FLOAT
8588 && outer == SET
8589 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8590 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8591 *total = COSTS_N_INSNS (1);
8592 else
8593 *total = COSTS_N_INSNS (4);
8594 return true;
8595
8596 default:
8597 *total = COSTS_N_INSNS (4);
8598 return false;
8599 }
8600 }
8601
8602 /* Estimates the size cost of thumb1 instructions.
8603 For now most of the code is copied from thumb1_rtx_costs. We need more
8604 fine grain tuning when we have more related test cases. */
8605 static inline int
8606 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8607 {
8608 enum machine_mode mode = GET_MODE (x);
8609 int words;
8610
8611 switch (code)
8612 {
8613 case ASHIFT:
8614 case ASHIFTRT:
8615 case LSHIFTRT:
8616 case ROTATERT:
8617 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8618
8619 case PLUS:
8620 case MINUS:
8621 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8622 defined by RTL expansion, especially for the expansion of
8623 multiplication. */
8624 if ((GET_CODE (XEXP (x, 0)) == MULT
8625 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8626 || (GET_CODE (XEXP (x, 1)) == MULT
8627 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8628 return COSTS_N_INSNS (2);
8629 /* On purpose fall through for normal RTX. */
8630 case COMPARE:
8631 case NEG:
8632 case NOT:
8633 return COSTS_N_INSNS (1);
8634
8635 case MULT:
8636 if (CONST_INT_P (XEXP (x, 1)))
8637 {
8638 /* Thumb1 mul instruction can't operate on const. We must Load it
8639 into a register first. */
8640 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8641 return COSTS_N_INSNS (1) + const_size;
8642 }
8643 return COSTS_N_INSNS (1);
8644
8645 case SET:
8646 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8647 the mode. */
8648 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8649 return (COSTS_N_INSNS (words)
8650 + 4 * ((MEM_P (SET_SRC (x)))
8651 + MEM_P (SET_DEST (x))));
8652
8653 case CONST_INT:
8654 if (outer == SET)
8655 {
8656 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8657 return COSTS_N_INSNS (1);
8658 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8659 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8660 return COSTS_N_INSNS (2);
8661 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8662 if (thumb_shiftable_const (INTVAL (x)))
8663 return COSTS_N_INSNS (2);
8664 return COSTS_N_INSNS (3);
8665 }
8666 else if ((outer == PLUS || outer == COMPARE)
8667 && INTVAL (x) < 256 && INTVAL (x) > -256)
8668 return 0;
8669 else if ((outer == IOR || outer == XOR || outer == AND)
8670 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8671 return COSTS_N_INSNS (1);
8672 else if (outer == AND)
8673 {
8674 int i;
8675 /* This duplicates the tests in the andsi3 expander. */
8676 for (i = 9; i <= 31; i++)
8677 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8678 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8679 return COSTS_N_INSNS (2);
8680 }
8681 else if (outer == ASHIFT || outer == ASHIFTRT
8682 || outer == LSHIFTRT)
8683 return 0;
8684 return COSTS_N_INSNS (2);
8685
8686 case CONST:
8687 case CONST_DOUBLE:
8688 case LABEL_REF:
8689 case SYMBOL_REF:
8690 return COSTS_N_INSNS (3);
8691
8692 case UDIV:
8693 case UMOD:
8694 case DIV:
8695 case MOD:
8696 return 100;
8697
8698 case TRUNCATE:
8699 return 99;
8700
8701 case AND:
8702 case XOR:
8703 case IOR:
8704 /* XXX guess. */
8705 return 8;
8706
8707 case MEM:
8708 /* XXX another guess. */
8709 /* Memory costs quite a lot for the first word, but subsequent words
8710 load at the equivalent of a single insn each. */
8711 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8712 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8713 ? 4 : 0));
8714
8715 case IF_THEN_ELSE:
8716 /* XXX a guess. */
8717 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8718 return 14;
8719 return 2;
8720
8721 case ZERO_EXTEND:
8722 /* XXX still guessing. */
8723 switch (GET_MODE (XEXP (x, 0)))
8724 {
8725 case QImode:
8726 return (1 + (mode == DImode ? 4 : 0)
8727 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8728
8729 case HImode:
8730 return (4 + (mode == DImode ? 4 : 0)
8731 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8732
8733 case SImode:
8734 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8735
8736 default:
8737 return 99;
8738 }
8739
8740 default:
8741 return 99;
8742 }
8743 }
8744
8745 /* RTX costs when optimizing for size. */
8746 static bool
8747 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8748 int *total)
8749 {
8750 enum machine_mode mode = GET_MODE (x);
8751 if (TARGET_THUMB1)
8752 {
8753 *total = thumb1_size_rtx_costs (x, code, outer_code);
8754 return true;
8755 }
8756
8757 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8758 switch (code)
8759 {
8760 case MEM:
8761 /* A memory access costs 1 insn if the mode is small, or the address is
8762 a single register, otherwise it costs one insn per word. */
8763 if (REG_P (XEXP (x, 0)))
8764 *total = COSTS_N_INSNS (1);
8765 else if (flag_pic
8766 && GET_CODE (XEXP (x, 0)) == PLUS
8767 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8768 /* This will be split into two instructions.
8769 See arm.md:calculate_pic_address. */
8770 *total = COSTS_N_INSNS (2);
8771 else
8772 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8773 return true;
8774
8775 case DIV:
8776 case MOD:
8777 case UDIV:
8778 case UMOD:
8779 /* Needs a libcall, so it costs about this. */
8780 *total = COSTS_N_INSNS (2);
8781 return false;
8782
8783 case ROTATE:
8784 if (mode == SImode && REG_P (XEXP (x, 1)))
8785 {
8786 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
8787 return true;
8788 }
8789 /* Fall through */
8790 case ROTATERT:
8791 case ASHIFT:
8792 case LSHIFTRT:
8793 case ASHIFTRT:
8794 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8795 {
8796 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8797 return true;
8798 }
8799 else if (mode == SImode)
8800 {
8801 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8802 /* Slightly disparage register shifts, but not by much. */
8803 if (!CONST_INT_P (XEXP (x, 1)))
8804 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8805 return true;
8806 }
8807
8808 /* Needs a libcall. */
8809 *total = COSTS_N_INSNS (2);
8810 return false;
8811
8812 case MINUS:
8813 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8814 && (mode == SFmode || !TARGET_VFP_SINGLE))
8815 {
8816 *total = COSTS_N_INSNS (1);
8817 return false;
8818 }
8819
8820 if (mode == SImode)
8821 {
8822 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
8823 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
8824
8825 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
8826 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
8827 || subcode1 == ROTATE || subcode1 == ROTATERT
8828 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
8829 || subcode1 == ASHIFTRT)
8830 {
8831 /* It's just the cost of the two operands. */
8832 *total = 0;
8833 return false;
8834 }
8835
8836 *total = COSTS_N_INSNS (1);
8837 return false;
8838 }
8839
8840 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8841 return false;
8842
8843 case PLUS:
8844 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8845 && (mode == SFmode || !TARGET_VFP_SINGLE))
8846 {
8847 *total = COSTS_N_INSNS (1);
8848 return false;
8849 }
8850
8851 /* A shift as a part of ADD costs nothing. */
8852 if (GET_CODE (XEXP (x, 0)) == MULT
8853 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8854 {
8855 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
8856 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
8857 *total += rtx_cost (XEXP (x, 1), code, 1, false);
8858 return true;
8859 }
8860
8861 /* Fall through */
8862 case AND: case XOR: case IOR:
8863 if (mode == SImode)
8864 {
8865 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
8866
8867 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
8868 || subcode == LSHIFTRT || subcode == ASHIFTRT
8869 || (code == AND && subcode == NOT))
8870 {
8871 /* It's just the cost of the two operands. */
8872 *total = 0;
8873 return false;
8874 }
8875 }
8876
8877 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8878 return false;
8879
8880 case MULT:
8881 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8882 return false;
8883
8884 case NEG:
8885 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8886 && (mode == SFmode || !TARGET_VFP_SINGLE))
8887 {
8888 *total = COSTS_N_INSNS (1);
8889 return false;
8890 }
8891
8892 /* Fall through */
8893 case NOT:
8894 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8895
8896 return false;
8897
8898 case IF_THEN_ELSE:
8899 *total = 0;
8900 return false;
8901
8902 case COMPARE:
8903 if (cc_register (XEXP (x, 0), VOIDmode))
8904 * total = 0;
8905 else
8906 *total = COSTS_N_INSNS (1);
8907 return false;
8908
8909 case ABS:
8910 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8911 && (mode == SFmode || !TARGET_VFP_SINGLE))
8912 *total = COSTS_N_INSNS (1);
8913 else
8914 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
8915 return false;
8916
8917 case SIGN_EXTEND:
8918 case ZERO_EXTEND:
8919 return arm_rtx_costs_1 (x, outer_code, total, 0);
8920
8921 case CONST_INT:
8922 if (const_ok_for_arm (INTVAL (x)))
8923 /* A multiplication by a constant requires another instruction
8924 to load the constant to a register. */
8925 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
8926 ? 1 : 0);
8927 else if (const_ok_for_arm (~INTVAL (x)))
8928 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
8929 else if (const_ok_for_arm (-INTVAL (x)))
8930 {
8931 if (outer_code == COMPARE || outer_code == PLUS
8932 || outer_code == MINUS)
8933 *total = 0;
8934 else
8935 *total = COSTS_N_INSNS (1);
8936 }
8937 else
8938 *total = COSTS_N_INSNS (2);
8939 return true;
8940
8941 case CONST:
8942 case LABEL_REF:
8943 case SYMBOL_REF:
8944 *total = COSTS_N_INSNS (2);
8945 return true;
8946
8947 case CONST_DOUBLE:
8948 *total = COSTS_N_INSNS (4);
8949 return true;
8950
8951 case CONST_VECTOR:
8952 if (TARGET_NEON
8953 && TARGET_HARD_FLOAT
8954 && outer_code == SET
8955 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8956 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8957 *total = COSTS_N_INSNS (1);
8958 else
8959 *total = COSTS_N_INSNS (4);
8960 return true;
8961
8962 case HIGH:
8963 case LO_SUM:
8964 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8965 cost of these slightly. */
8966 *total = COSTS_N_INSNS (1) + 1;
8967 return true;
8968
8969 case SET:
8970 return false;
8971
8972 default:
8973 if (mode != VOIDmode)
8974 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8975 else
8976 *total = COSTS_N_INSNS (4); /* How knows? */
8977 return false;
8978 }
8979 }
8980
8981 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8982 operand, then return the operand that is being shifted. If the shift
8983 is not by a constant, then set SHIFT_REG to point to the operand.
8984 Return NULL if OP is not a shifter operand. */
8985 static rtx
8986 shifter_op_p (rtx op, rtx *shift_reg)
8987 {
8988 enum rtx_code code = GET_CODE (op);
8989
8990 if (code == MULT && CONST_INT_P (XEXP (op, 1))
8991 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
8992 return XEXP (op, 0);
8993 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
8994 return XEXP (op, 0);
8995 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
8996 || code == ASHIFTRT)
8997 {
8998 if (!CONST_INT_P (XEXP (op, 1)))
8999 *shift_reg = XEXP (op, 1);
9000 return XEXP (op, 0);
9001 }
9002
9003 return NULL;
9004 }
9005
9006 static bool
9007 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9008 {
9009 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9010 gcc_assert (GET_CODE (x) == UNSPEC);
9011
9012 switch (XINT (x, 1))
9013 {
9014 case UNSPEC_UNALIGNED_LOAD:
9015 /* We can only do unaligned loads into the integer unit, and we can't
9016 use LDM or LDRD. */
9017 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9018 if (speed_p)
9019 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9020 + extra_cost->ldst.load_unaligned);
9021
9022 #ifdef NOT_YET
9023 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9024 ADDR_SPACE_GENERIC, speed_p);
9025 #endif
9026 return true;
9027
9028 case UNSPEC_UNALIGNED_STORE:
9029 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9030 if (speed_p)
9031 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9032 + extra_cost->ldst.store_unaligned);
9033
9034 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9035 #ifdef NOT_YET
9036 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9037 ADDR_SPACE_GENERIC, speed_p);
9038 #endif
9039 return true;
9040
9041 case UNSPEC_VRINTZ:
9042 case UNSPEC_VRINTP:
9043 case UNSPEC_VRINTM:
9044 case UNSPEC_VRINTR:
9045 case UNSPEC_VRINTX:
9046 case UNSPEC_VRINTA:
9047 *cost = COSTS_N_INSNS (1);
9048 if (speed_p)
9049 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9050
9051 return true;
9052 default:
9053 *cost = COSTS_N_INSNS (2);
9054 break;
9055 }
9056 return false;
9057 }
9058
9059 /* Cost of a libcall. We assume one insn per argument, an amount for the
9060 call (one insn for -Os) and then one for processing the result. */
9061 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9062
9063 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9064 do \
9065 { \
9066 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9067 if (shift_op != NULL \
9068 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9069 { \
9070 if (shift_reg) \
9071 { \
9072 if (speed_p) \
9073 *cost += extra_cost->alu.arith_shift_reg; \
9074 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9075 } \
9076 else if (speed_p) \
9077 *cost += extra_cost->alu.arith_shift; \
9078 \
9079 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9080 + rtx_cost (XEXP (x, 1 - IDX), \
9081 OP, 1, speed_p)); \
9082 return true; \
9083 } \
9084 } \
9085 while (0);
9086
9087 /* RTX costs. Make an estimate of the cost of executing the operation
9088 X, which is contained with an operation with code OUTER_CODE.
9089 SPEED_P indicates whether the cost desired is the performance cost,
9090 or the size cost. The estimate is stored in COST and the return
9091 value is TRUE if the cost calculation is final, or FALSE if the
9092 caller should recurse through the operands of X to add additional
9093 costs.
9094
9095 We currently make no attempt to model the size savings of Thumb-2
9096 16-bit instructions. At the normal points in compilation where
9097 this code is called we have no measure of whether the condition
9098 flags are live or not, and thus no realistic way to determine what
9099 the size will eventually be. */
9100 static bool
9101 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9102 const struct cpu_cost_table *extra_cost,
9103 int *cost, bool speed_p)
9104 {
9105 enum machine_mode mode = GET_MODE (x);
9106
9107 if (TARGET_THUMB1)
9108 {
9109 if (speed_p)
9110 *cost = thumb1_rtx_costs (x, code, outer_code);
9111 else
9112 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9113 return true;
9114 }
9115
9116 switch (code)
9117 {
9118 case SET:
9119 *cost = 0;
9120 /* SET RTXs don't have a mode so we get it from the destination. */
9121 mode = GET_MODE (SET_DEST (x));
9122
9123 if (REG_P (SET_SRC (x))
9124 && REG_P (SET_DEST (x)))
9125 {
9126 /* Assume that most copies can be done with a single insn,
9127 unless we don't have HW FP, in which case everything
9128 larger than word mode will require two insns. */
9129 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9130 && GET_MODE_SIZE (mode) > 4)
9131 || mode == DImode)
9132 ? 2 : 1);
9133 /* Conditional register moves can be encoded
9134 in 16 bits in Thumb mode. */
9135 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9136 *cost >>= 1;
9137
9138 return true;
9139 }
9140
9141 if (CONST_INT_P (SET_SRC (x)))
9142 {
9143 /* Handle CONST_INT here, since the value doesn't have a mode
9144 and we would otherwise be unable to work out the true cost. */
9145 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9146 outer_code = SET;
9147 /* Slightly lower the cost of setting a core reg to a constant.
9148 This helps break up chains and allows for better scheduling. */
9149 if (REG_P (SET_DEST (x))
9150 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9151 *cost -= 1;
9152 x = SET_SRC (x);
9153 /* Immediate moves with an immediate in the range [0, 255] can be
9154 encoded in 16 bits in Thumb mode. */
9155 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9156 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9157 *cost >>= 1;
9158 goto const_int_cost;
9159 }
9160
9161 return false;
9162
9163 case MEM:
9164 /* A memory access costs 1 insn if the mode is small, or the address is
9165 a single register, otherwise it costs one insn per word. */
9166 if (REG_P (XEXP (x, 0)))
9167 *cost = COSTS_N_INSNS (1);
9168 else if (flag_pic
9169 && GET_CODE (XEXP (x, 0)) == PLUS
9170 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9171 /* This will be split into two instructions.
9172 See arm.md:calculate_pic_address. */
9173 *cost = COSTS_N_INSNS (2);
9174 else
9175 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9176
9177 /* For speed optimizations, add the costs of the address and
9178 accessing memory. */
9179 if (speed_p)
9180 #ifdef NOT_YET
9181 *cost += (extra_cost->ldst.load
9182 + arm_address_cost (XEXP (x, 0), mode,
9183 ADDR_SPACE_GENERIC, speed_p));
9184 #else
9185 *cost += extra_cost->ldst.load;
9186 #endif
9187 return true;
9188
9189 case PARALLEL:
9190 {
9191 /* Calculations of LDM costs are complex. We assume an initial cost
9192 (ldm_1st) which will load the number of registers mentioned in
9193 ldm_regs_per_insn_1st registers; then each additional
9194 ldm_regs_per_insn_subsequent registers cost one more insn. The
9195 formula for N regs is thus:
9196
9197 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9198 + ldm_regs_per_insn_subsequent - 1)
9199 / ldm_regs_per_insn_subsequent).
9200
9201 Additional costs may also be added for addressing. A similar
9202 formula is used for STM. */
9203
9204 bool is_ldm = load_multiple_operation (x, SImode);
9205 bool is_stm = store_multiple_operation (x, SImode);
9206
9207 *cost = COSTS_N_INSNS (1);
9208
9209 if (is_ldm || is_stm)
9210 {
9211 if (speed_p)
9212 {
9213 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9214 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9215 ? extra_cost->ldst.ldm_regs_per_insn_1st
9216 : extra_cost->ldst.stm_regs_per_insn_1st;
9217 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9218 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9219 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9220
9221 *cost += regs_per_insn_1st
9222 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9223 + regs_per_insn_sub - 1)
9224 / regs_per_insn_sub);
9225 return true;
9226 }
9227
9228 }
9229 return false;
9230 }
9231 case DIV:
9232 case UDIV:
9233 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9234 && (mode == SFmode || !TARGET_VFP_SINGLE))
9235 *cost = COSTS_N_INSNS (speed_p
9236 ? extra_cost->fp[mode != SFmode].div : 1);
9237 else if (mode == SImode && TARGET_IDIV)
9238 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9239 else
9240 *cost = LIBCALL_COST (2);
9241 return false; /* All arguments must be in registers. */
9242
9243 case MOD:
9244 case UMOD:
9245 *cost = LIBCALL_COST (2);
9246 return false; /* All arguments must be in registers. */
9247
9248 case ROTATE:
9249 if (mode == SImode && REG_P (XEXP (x, 1)))
9250 {
9251 *cost = (COSTS_N_INSNS (2)
9252 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9253 if (speed_p)
9254 *cost += extra_cost->alu.shift_reg;
9255 return true;
9256 }
9257 /* Fall through */
9258 case ROTATERT:
9259 case ASHIFT:
9260 case LSHIFTRT:
9261 case ASHIFTRT:
9262 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9263 {
9264 *cost = (COSTS_N_INSNS (3)
9265 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9266 if (speed_p)
9267 *cost += 2 * extra_cost->alu.shift;
9268 return true;
9269 }
9270 else if (mode == SImode)
9271 {
9272 *cost = (COSTS_N_INSNS (1)
9273 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9274 /* Slightly disparage register shifts at -Os, but not by much. */
9275 if (!CONST_INT_P (XEXP (x, 1)))
9276 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9277 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9278 return true;
9279 }
9280 else if (GET_MODE_CLASS (mode) == MODE_INT
9281 && GET_MODE_SIZE (mode) < 4)
9282 {
9283 if (code == ASHIFT)
9284 {
9285 *cost = (COSTS_N_INSNS (1)
9286 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9287 /* Slightly disparage register shifts at -Os, but not by
9288 much. */
9289 if (!CONST_INT_P (XEXP (x, 1)))
9290 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9291 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9292 }
9293 else if (code == LSHIFTRT || code == ASHIFTRT)
9294 {
9295 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9296 {
9297 /* Can use SBFX/UBFX. */
9298 *cost = COSTS_N_INSNS (1);
9299 if (speed_p)
9300 *cost += extra_cost->alu.bfx;
9301 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9302 }
9303 else
9304 {
9305 *cost = COSTS_N_INSNS (2);
9306 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9307 if (speed_p)
9308 {
9309 if (CONST_INT_P (XEXP (x, 1)))
9310 *cost += 2 * extra_cost->alu.shift;
9311 else
9312 *cost += (extra_cost->alu.shift
9313 + extra_cost->alu.shift_reg);
9314 }
9315 else
9316 /* Slightly disparage register shifts. */
9317 *cost += !CONST_INT_P (XEXP (x, 1));
9318 }
9319 }
9320 else /* Rotates. */
9321 {
9322 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9323 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9324 if (speed_p)
9325 {
9326 if (CONST_INT_P (XEXP (x, 1)))
9327 *cost += (2 * extra_cost->alu.shift
9328 + extra_cost->alu.log_shift);
9329 else
9330 *cost += (extra_cost->alu.shift
9331 + extra_cost->alu.shift_reg
9332 + extra_cost->alu.log_shift_reg);
9333 }
9334 }
9335 return true;
9336 }
9337
9338 *cost = LIBCALL_COST (2);
9339 return false;
9340
9341 case MINUS:
9342 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9343 && (mode == SFmode || !TARGET_VFP_SINGLE))
9344 {
9345 *cost = COSTS_N_INSNS (1);
9346 if (GET_CODE (XEXP (x, 0)) == MULT
9347 || GET_CODE (XEXP (x, 1)) == MULT)
9348 {
9349 rtx mul_op0, mul_op1, sub_op;
9350
9351 if (speed_p)
9352 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9353
9354 if (GET_CODE (XEXP (x, 0)) == MULT)
9355 {
9356 mul_op0 = XEXP (XEXP (x, 0), 0);
9357 mul_op1 = XEXP (XEXP (x, 0), 1);
9358 sub_op = XEXP (x, 1);
9359 }
9360 else
9361 {
9362 mul_op0 = XEXP (XEXP (x, 1), 0);
9363 mul_op1 = XEXP (XEXP (x, 1), 1);
9364 sub_op = XEXP (x, 0);
9365 }
9366
9367 /* The first operand of the multiply may be optionally
9368 negated. */
9369 if (GET_CODE (mul_op0) == NEG)
9370 mul_op0 = XEXP (mul_op0, 0);
9371
9372 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9373 + rtx_cost (mul_op1, code, 0, speed_p)
9374 + rtx_cost (sub_op, code, 0, speed_p));
9375
9376 return true;
9377 }
9378
9379 if (speed_p)
9380 *cost += extra_cost->fp[mode != SFmode].addsub;
9381 return false;
9382 }
9383
9384 if (mode == SImode)
9385 {
9386 rtx shift_by_reg = NULL;
9387 rtx shift_op;
9388 rtx non_shift_op;
9389
9390 *cost = COSTS_N_INSNS (1);
9391
9392 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9393 if (shift_op == NULL)
9394 {
9395 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9396 non_shift_op = XEXP (x, 0);
9397 }
9398 else
9399 non_shift_op = XEXP (x, 1);
9400
9401 if (shift_op != NULL)
9402 {
9403 if (shift_by_reg != NULL)
9404 {
9405 if (speed_p)
9406 *cost += extra_cost->alu.arith_shift_reg;
9407 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9408 }
9409 else if (speed_p)
9410 *cost += extra_cost->alu.arith_shift;
9411
9412 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9413 + rtx_cost (non_shift_op, code, 0, speed_p));
9414 return true;
9415 }
9416
9417 if (arm_arch_thumb2
9418 && GET_CODE (XEXP (x, 1)) == MULT)
9419 {
9420 /* MLS. */
9421 if (speed_p)
9422 *cost += extra_cost->mult[0].add;
9423 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9424 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9425 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9426 return true;
9427 }
9428
9429 if (CONST_INT_P (XEXP (x, 0)))
9430 {
9431 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9432 INTVAL (XEXP (x, 0)), NULL_RTX,
9433 NULL_RTX, 1, 0);
9434 *cost = COSTS_N_INSNS (insns);
9435 if (speed_p)
9436 *cost += insns * extra_cost->alu.arith;
9437 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9438 return true;
9439 }
9440
9441 return false;
9442 }
9443
9444 if (GET_MODE_CLASS (mode) == MODE_INT
9445 && GET_MODE_SIZE (mode) < 4)
9446 {
9447 rtx shift_op, shift_reg;
9448 shift_reg = NULL;
9449
9450 /* We check both sides of the MINUS for shifter operands since,
9451 unlike PLUS, it's not commutative. */
9452
9453 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9454 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9455
9456 /* Slightly disparage, as we might need to widen the result. */
9457 *cost = 1 + COSTS_N_INSNS (1);
9458 if (speed_p)
9459 *cost += extra_cost->alu.arith;
9460
9461 if (CONST_INT_P (XEXP (x, 0)))
9462 {
9463 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9464 return true;
9465 }
9466
9467 return false;
9468 }
9469
9470 if (mode == DImode)
9471 {
9472 *cost = COSTS_N_INSNS (2);
9473
9474 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9475 {
9476 rtx op1 = XEXP (x, 1);
9477
9478 if (speed_p)
9479 *cost += 2 * extra_cost->alu.arith;
9480
9481 if (GET_CODE (op1) == ZERO_EXTEND)
9482 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9483 else
9484 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9485 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9486 0, speed_p);
9487 return true;
9488 }
9489 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9490 {
9491 if (speed_p)
9492 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9493 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9494 0, speed_p)
9495 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9496 return true;
9497 }
9498 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9499 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9500 {
9501 if (speed_p)
9502 *cost += (extra_cost->alu.arith
9503 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9504 ? extra_cost->alu.arith
9505 : extra_cost->alu.arith_shift));
9506 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9507 + rtx_cost (XEXP (XEXP (x, 1), 0),
9508 GET_CODE (XEXP (x, 1)), 0, speed_p));
9509 return true;
9510 }
9511
9512 if (speed_p)
9513 *cost += 2 * extra_cost->alu.arith;
9514 return false;
9515 }
9516
9517 /* Vector mode? */
9518
9519 *cost = LIBCALL_COST (2);
9520 return false;
9521
9522 case PLUS:
9523 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9524 && (mode == SFmode || !TARGET_VFP_SINGLE))
9525 {
9526 *cost = COSTS_N_INSNS (1);
9527 if (GET_CODE (XEXP (x, 0)) == MULT)
9528 {
9529 rtx mul_op0, mul_op1, add_op;
9530
9531 if (speed_p)
9532 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9533
9534 mul_op0 = XEXP (XEXP (x, 0), 0);
9535 mul_op1 = XEXP (XEXP (x, 0), 1);
9536 add_op = XEXP (x, 1);
9537
9538 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9539 + rtx_cost (mul_op1, code, 0, speed_p)
9540 + rtx_cost (add_op, code, 0, speed_p));
9541
9542 return true;
9543 }
9544
9545 if (speed_p)
9546 *cost += extra_cost->fp[mode != SFmode].addsub;
9547 return false;
9548 }
9549 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9550 {
9551 *cost = LIBCALL_COST (2);
9552 return false;
9553 }
9554
9555 /* Narrow modes can be synthesized in SImode, but the range
9556 of useful sub-operations is limited. Check for shift operations
9557 on one of the operands. Only left shifts can be used in the
9558 narrow modes. */
9559 if (GET_MODE_CLASS (mode) == MODE_INT
9560 && GET_MODE_SIZE (mode) < 4)
9561 {
9562 rtx shift_op, shift_reg;
9563 shift_reg = NULL;
9564
9565 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9566
9567 if (CONST_INT_P (XEXP (x, 1)))
9568 {
9569 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9570 INTVAL (XEXP (x, 1)), NULL_RTX,
9571 NULL_RTX, 1, 0);
9572 *cost = COSTS_N_INSNS (insns);
9573 if (speed_p)
9574 *cost += insns * extra_cost->alu.arith;
9575 /* Slightly penalize a narrow operation as the result may
9576 need widening. */
9577 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9578 return true;
9579 }
9580
9581 /* Slightly penalize a narrow operation as the result may
9582 need widening. */
9583 *cost = 1 + COSTS_N_INSNS (1);
9584 if (speed_p)
9585 *cost += extra_cost->alu.arith;
9586
9587 return false;
9588 }
9589
9590 if (mode == SImode)
9591 {
9592 rtx shift_op, shift_reg;
9593
9594 *cost = COSTS_N_INSNS (1);
9595 if (TARGET_INT_SIMD
9596 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9597 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9598 {
9599 /* UXTA[BH] or SXTA[BH]. */
9600 if (speed_p)
9601 *cost += extra_cost->alu.extend_arith;
9602 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9603 speed_p)
9604 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9605 return true;
9606 }
9607
9608 shift_reg = NULL;
9609 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9610 if (shift_op != NULL)
9611 {
9612 if (shift_reg)
9613 {
9614 if (speed_p)
9615 *cost += extra_cost->alu.arith_shift_reg;
9616 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9617 }
9618 else if (speed_p)
9619 *cost += extra_cost->alu.arith_shift;
9620
9621 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9622 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9623 return true;
9624 }
9625 if (GET_CODE (XEXP (x, 0)) == MULT)
9626 {
9627 rtx mul_op = XEXP (x, 0);
9628
9629 *cost = COSTS_N_INSNS (1);
9630
9631 if (TARGET_DSP_MULTIPLY
9632 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9633 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9634 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9635 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9636 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9637 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9638 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9639 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9640 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9641 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9642 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9643 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9644 == 16))))))
9645 {
9646 /* SMLA[BT][BT]. */
9647 if (speed_p)
9648 *cost += extra_cost->mult[0].extend_add;
9649 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9650 SIGN_EXTEND, 0, speed_p)
9651 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9652 SIGN_EXTEND, 0, speed_p)
9653 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9654 return true;
9655 }
9656
9657 if (speed_p)
9658 *cost += extra_cost->mult[0].add;
9659 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9660 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9661 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9662 return true;
9663 }
9664 if (CONST_INT_P (XEXP (x, 1)))
9665 {
9666 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9667 INTVAL (XEXP (x, 1)), NULL_RTX,
9668 NULL_RTX, 1, 0);
9669 *cost = COSTS_N_INSNS (insns);
9670 if (speed_p)
9671 *cost += insns * extra_cost->alu.arith;
9672 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9673 return true;
9674 }
9675 return false;
9676 }
9677
9678 if (mode == DImode)
9679 {
9680 if (arm_arch3m
9681 && GET_CODE (XEXP (x, 0)) == MULT
9682 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9683 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9684 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9685 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9686 {
9687 *cost = COSTS_N_INSNS (1);
9688 if (speed_p)
9689 *cost += extra_cost->mult[1].extend_add;
9690 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
9691 ZERO_EXTEND, 0, speed_p)
9692 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
9693 ZERO_EXTEND, 0, speed_p)
9694 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9695 return true;
9696 }
9697
9698 *cost = COSTS_N_INSNS (2);
9699
9700 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9701 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9702 {
9703 if (speed_p)
9704 *cost += (extra_cost->alu.arith
9705 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9706 ? extra_cost->alu.arith
9707 : extra_cost->alu.arith_shift));
9708
9709 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9710 speed_p)
9711 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9712 return true;
9713 }
9714
9715 if (speed_p)
9716 *cost += 2 * extra_cost->alu.arith;
9717 return false;
9718 }
9719
9720 /* Vector mode? */
9721 *cost = LIBCALL_COST (2);
9722 return false;
9723
9724 case AND: case XOR: case IOR:
9725 if (mode == SImode)
9726 {
9727 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9728 rtx op0 = XEXP (x, 0);
9729 rtx shift_op, shift_reg;
9730
9731 *cost = COSTS_N_INSNS (1);
9732
9733 if (subcode == NOT
9734 && (code == AND
9735 || (code == IOR && TARGET_THUMB2)))
9736 op0 = XEXP (op0, 0);
9737
9738 shift_reg = NULL;
9739 shift_op = shifter_op_p (op0, &shift_reg);
9740 if (shift_op != NULL)
9741 {
9742 if (shift_reg)
9743 {
9744 if (speed_p)
9745 *cost += extra_cost->alu.log_shift_reg;
9746 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9747 }
9748 else if (speed_p)
9749 *cost += extra_cost->alu.log_shift;
9750
9751 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9752 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9753 return true;
9754 }
9755
9756 if (CONST_INT_P (XEXP (x, 1)))
9757 {
9758 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9759 INTVAL (XEXP (x, 1)), NULL_RTX,
9760 NULL_RTX, 1, 0);
9761
9762 *cost = COSTS_N_INSNS (insns);
9763 if (speed_p)
9764 *cost += insns * extra_cost->alu.logical;
9765 *cost += rtx_cost (op0, code, 0, speed_p);
9766 return true;
9767 }
9768
9769 if (speed_p)
9770 *cost += extra_cost->alu.logical;
9771 *cost += (rtx_cost (op0, code, 0, speed_p)
9772 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9773 return true;
9774 }
9775
9776 if (mode == DImode)
9777 {
9778 rtx op0 = XEXP (x, 0);
9779 enum rtx_code subcode = GET_CODE (op0);
9780
9781 *cost = COSTS_N_INSNS (2);
9782
9783 if (subcode == NOT
9784 && (code == AND
9785 || (code == IOR && TARGET_THUMB2)))
9786 op0 = XEXP (op0, 0);
9787
9788 if (GET_CODE (op0) == ZERO_EXTEND)
9789 {
9790 if (speed_p)
9791 *cost += 2 * extra_cost->alu.logical;
9792
9793 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
9794 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9795 return true;
9796 }
9797 else if (GET_CODE (op0) == SIGN_EXTEND)
9798 {
9799 if (speed_p)
9800 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9801
9802 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
9803 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9804 return true;
9805 }
9806
9807 if (speed_p)
9808 *cost += 2 * extra_cost->alu.logical;
9809
9810 return true;
9811 }
9812 /* Vector mode? */
9813
9814 *cost = LIBCALL_COST (2);
9815 return false;
9816
9817 case MULT:
9818 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9819 && (mode == SFmode || !TARGET_VFP_SINGLE))
9820 {
9821 rtx op0 = XEXP (x, 0);
9822
9823 *cost = COSTS_N_INSNS (1);
9824
9825 if (GET_CODE (op0) == NEG)
9826 op0 = XEXP (op0, 0);
9827
9828 if (speed_p)
9829 *cost += extra_cost->fp[mode != SFmode].mult;
9830
9831 *cost += (rtx_cost (op0, MULT, 0, speed_p)
9832 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
9833 return true;
9834 }
9835 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9836 {
9837 *cost = LIBCALL_COST (2);
9838 return false;
9839 }
9840
9841 if (mode == SImode)
9842 {
9843 *cost = COSTS_N_INSNS (1);
9844 if (TARGET_DSP_MULTIPLY
9845 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9846 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9847 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9848 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9849 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
9850 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
9851 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9852 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
9853 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9854 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9855 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9856 && (INTVAL (XEXP (XEXP (x, 1), 1))
9857 == 16))))))
9858 {
9859 /* SMUL[TB][TB]. */
9860 if (speed_p)
9861 *cost += extra_cost->mult[0].extend;
9862 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
9863 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
9864 return true;
9865 }
9866 if (speed_p)
9867 *cost += extra_cost->mult[0].simple;
9868 return false;
9869 }
9870
9871 if (mode == DImode)
9872 {
9873 if (arm_arch3m
9874 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9875 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
9876 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9877 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
9878 {
9879 *cost = COSTS_N_INSNS (1);
9880 if (speed_p)
9881 *cost += extra_cost->mult[1].extend;
9882 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
9883 ZERO_EXTEND, 0, speed_p)
9884 + rtx_cost (XEXP (XEXP (x, 1), 0),
9885 ZERO_EXTEND, 0, speed_p));
9886 return true;
9887 }
9888
9889 *cost = LIBCALL_COST (2);
9890 return false;
9891 }
9892
9893 /* Vector mode? */
9894 *cost = LIBCALL_COST (2);
9895 return false;
9896
9897 case NEG:
9898 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9899 && (mode == SFmode || !TARGET_VFP_SINGLE))
9900 {
9901 *cost = COSTS_N_INSNS (1);
9902 if (speed_p)
9903 *cost += extra_cost->fp[mode != SFmode].neg;
9904
9905 return false;
9906 }
9907 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9908 {
9909 *cost = LIBCALL_COST (1);
9910 return false;
9911 }
9912
9913 if (mode == SImode)
9914 {
9915 if (GET_CODE (XEXP (x, 0)) == ABS)
9916 {
9917 *cost = COSTS_N_INSNS (2);
9918 /* Assume the non-flag-changing variant. */
9919 if (speed_p)
9920 *cost += (extra_cost->alu.log_shift
9921 + extra_cost->alu.arith_shift);
9922 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
9923 return true;
9924 }
9925
9926 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
9927 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
9928 {
9929 *cost = COSTS_N_INSNS (2);
9930 /* No extra cost for MOV imm and MVN imm. */
9931 /* If the comparison op is using the flags, there's no further
9932 cost, otherwise we need to add the cost of the comparison. */
9933 if (!(REG_P (XEXP (XEXP (x, 0), 0))
9934 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
9935 && XEXP (XEXP (x, 0), 1) == const0_rtx))
9936 {
9937 *cost += (COSTS_N_INSNS (1)
9938 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
9939 speed_p)
9940 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
9941 speed_p));
9942 if (speed_p)
9943 *cost += extra_cost->alu.arith;
9944 }
9945 return true;
9946 }
9947 *cost = COSTS_N_INSNS (1);
9948 if (speed_p)
9949 *cost += extra_cost->alu.arith;
9950 return false;
9951 }
9952
9953 if (GET_MODE_CLASS (mode) == MODE_INT
9954 && GET_MODE_SIZE (mode) < 4)
9955 {
9956 /* Slightly disparage, as we might need an extend operation. */
9957 *cost = 1 + COSTS_N_INSNS (1);
9958 if (speed_p)
9959 *cost += extra_cost->alu.arith;
9960 return false;
9961 }
9962
9963 if (mode == DImode)
9964 {
9965 *cost = COSTS_N_INSNS (2);
9966 if (speed_p)
9967 *cost += 2 * extra_cost->alu.arith;
9968 return false;
9969 }
9970
9971 /* Vector mode? */
9972 *cost = LIBCALL_COST (1);
9973 return false;
9974
9975 case NOT:
9976 if (mode == SImode)
9977 {
9978 rtx shift_op;
9979 rtx shift_reg = NULL;
9980
9981 *cost = COSTS_N_INSNS (1);
9982 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9983
9984 if (shift_op)
9985 {
9986 if (shift_reg != NULL)
9987 {
9988 if (speed_p)
9989 *cost += extra_cost->alu.log_shift_reg;
9990 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9991 }
9992 else if (speed_p)
9993 *cost += extra_cost->alu.log_shift;
9994 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
9995 return true;
9996 }
9997
9998 if (speed_p)
9999 *cost += extra_cost->alu.logical;
10000 return false;
10001 }
10002 if (mode == DImode)
10003 {
10004 *cost = COSTS_N_INSNS (2);
10005 return false;
10006 }
10007
10008 /* Vector mode? */
10009
10010 *cost += LIBCALL_COST (1);
10011 return false;
10012
10013 case IF_THEN_ELSE:
10014 {
10015 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10016 {
10017 *cost = COSTS_N_INSNS (4);
10018 return true;
10019 }
10020 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10021 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10022
10023 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10024 /* Assume that if one arm of the if_then_else is a register,
10025 that it will be tied with the result and eliminate the
10026 conditional insn. */
10027 if (REG_P (XEXP (x, 1)))
10028 *cost += op2cost;
10029 else if (REG_P (XEXP (x, 2)))
10030 *cost += op1cost;
10031 else
10032 {
10033 if (speed_p)
10034 {
10035 if (extra_cost->alu.non_exec_costs_exec)
10036 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10037 else
10038 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10039 }
10040 else
10041 *cost += op1cost + op2cost;
10042 }
10043 }
10044 return true;
10045
10046 case COMPARE:
10047 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10048 *cost = 0;
10049 else
10050 {
10051 enum machine_mode op0mode;
10052 /* We'll mostly assume that the cost of a compare is the cost of the
10053 LHS. However, there are some notable exceptions. */
10054
10055 /* Floating point compares are never done as side-effects. */
10056 op0mode = GET_MODE (XEXP (x, 0));
10057 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10058 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10059 {
10060 *cost = COSTS_N_INSNS (1);
10061 if (speed_p)
10062 *cost += extra_cost->fp[op0mode != SFmode].compare;
10063
10064 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10065 {
10066 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10067 return true;
10068 }
10069
10070 return false;
10071 }
10072 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10073 {
10074 *cost = LIBCALL_COST (2);
10075 return false;
10076 }
10077
10078 /* DImode compares normally take two insns. */
10079 if (op0mode == DImode)
10080 {
10081 *cost = COSTS_N_INSNS (2);
10082 if (speed_p)
10083 *cost += 2 * extra_cost->alu.arith;
10084 return false;
10085 }
10086
10087 if (op0mode == SImode)
10088 {
10089 rtx shift_op;
10090 rtx shift_reg;
10091
10092 if (XEXP (x, 1) == const0_rtx
10093 && !(REG_P (XEXP (x, 0))
10094 || (GET_CODE (XEXP (x, 0)) == SUBREG
10095 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10096 {
10097 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10098
10099 /* Multiply operations that set the flags are often
10100 significantly more expensive. */
10101 if (speed_p
10102 && GET_CODE (XEXP (x, 0)) == MULT
10103 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10104 *cost += extra_cost->mult[0].flag_setting;
10105
10106 if (speed_p
10107 && GET_CODE (XEXP (x, 0)) == PLUS
10108 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10109 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10110 0), 1), mode))
10111 *cost += extra_cost->mult[0].flag_setting;
10112 return true;
10113 }
10114
10115 shift_reg = NULL;
10116 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10117 if (shift_op != NULL)
10118 {
10119 *cost = COSTS_N_INSNS (1);
10120 if (shift_reg != NULL)
10121 {
10122 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10123 if (speed_p)
10124 *cost += extra_cost->alu.arith_shift_reg;
10125 }
10126 else if (speed_p)
10127 *cost += extra_cost->alu.arith_shift;
10128 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10129 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10130 return true;
10131 }
10132
10133 *cost = COSTS_N_INSNS (1);
10134 if (speed_p)
10135 *cost += extra_cost->alu.arith;
10136 if (CONST_INT_P (XEXP (x, 1))
10137 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10138 {
10139 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10140 return true;
10141 }
10142 return false;
10143 }
10144
10145 /* Vector mode? */
10146
10147 *cost = LIBCALL_COST (2);
10148 return false;
10149 }
10150 return true;
10151
10152 case EQ:
10153 case NE:
10154 case LT:
10155 case LE:
10156 case GT:
10157 case GE:
10158 case LTU:
10159 case LEU:
10160 case GEU:
10161 case GTU:
10162 case ORDERED:
10163 case UNORDERED:
10164 case UNEQ:
10165 case UNLE:
10166 case UNLT:
10167 case UNGE:
10168 case UNGT:
10169 case LTGT:
10170 if (outer_code == SET)
10171 {
10172 /* Is it a store-flag operation? */
10173 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10174 && XEXP (x, 1) == const0_rtx)
10175 {
10176 /* Thumb also needs an IT insn. */
10177 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10178 return true;
10179 }
10180 if (XEXP (x, 1) == const0_rtx)
10181 {
10182 switch (code)
10183 {
10184 case LT:
10185 /* LSR Rd, Rn, #31. */
10186 *cost = COSTS_N_INSNS (1);
10187 if (speed_p)
10188 *cost += extra_cost->alu.shift;
10189 break;
10190
10191 case EQ:
10192 /* RSBS T1, Rn, #0
10193 ADC Rd, Rn, T1. */
10194
10195 case NE:
10196 /* SUBS T1, Rn, #1
10197 SBC Rd, Rn, T1. */
10198 *cost = COSTS_N_INSNS (2);
10199 break;
10200
10201 case LE:
10202 /* RSBS T1, Rn, Rn, LSR #31
10203 ADC Rd, Rn, T1. */
10204 *cost = COSTS_N_INSNS (2);
10205 if (speed_p)
10206 *cost += extra_cost->alu.arith_shift;
10207 break;
10208
10209 case GT:
10210 /* RSB Rd, Rn, Rn, ASR #1
10211 LSR Rd, Rd, #31. */
10212 *cost = COSTS_N_INSNS (2);
10213 if (speed_p)
10214 *cost += (extra_cost->alu.arith_shift
10215 + extra_cost->alu.shift);
10216 break;
10217
10218 case GE:
10219 /* ASR Rd, Rn, #31
10220 ADD Rd, Rn, #1. */
10221 *cost = COSTS_N_INSNS (2);
10222 if (speed_p)
10223 *cost += extra_cost->alu.shift;
10224 break;
10225
10226 default:
10227 /* Remaining cases are either meaningless or would take
10228 three insns anyway. */
10229 *cost = COSTS_N_INSNS (3);
10230 break;
10231 }
10232 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10233 return true;
10234 }
10235 else
10236 {
10237 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10238 if (CONST_INT_P (XEXP (x, 1))
10239 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10240 {
10241 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10242 return true;
10243 }
10244
10245 return false;
10246 }
10247 }
10248 /* Not directly inside a set. If it involves the condition code
10249 register it must be the condition for a branch, cond_exec or
10250 I_T_E operation. Since the comparison is performed elsewhere
10251 this is just the control part which has no additional
10252 cost. */
10253 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10254 && XEXP (x, 1) == const0_rtx)
10255 {
10256 *cost = 0;
10257 return true;
10258 }
10259 return false;
10260
10261 case ABS:
10262 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10263 && (mode == SFmode || !TARGET_VFP_SINGLE))
10264 {
10265 *cost = COSTS_N_INSNS (1);
10266 if (speed_p)
10267 *cost += extra_cost->fp[mode != SFmode].neg;
10268
10269 return false;
10270 }
10271 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10272 {
10273 *cost = LIBCALL_COST (1);
10274 return false;
10275 }
10276
10277 if (mode == SImode)
10278 {
10279 *cost = COSTS_N_INSNS (1);
10280 if (speed_p)
10281 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10282 return false;
10283 }
10284 /* Vector mode? */
10285 *cost = LIBCALL_COST (1);
10286 return false;
10287
10288 case SIGN_EXTEND:
10289 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10290 && MEM_P (XEXP (x, 0)))
10291 {
10292 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10293
10294 if (mode == DImode)
10295 *cost += COSTS_N_INSNS (1);
10296
10297 if (!speed_p)
10298 return true;
10299
10300 if (GET_MODE (XEXP (x, 0)) == SImode)
10301 *cost += extra_cost->ldst.load;
10302 else
10303 *cost += extra_cost->ldst.load_sign_extend;
10304
10305 if (mode == DImode)
10306 *cost += extra_cost->alu.shift;
10307
10308 return true;
10309 }
10310
10311 /* Widening from less than 32-bits requires an extend operation. */
10312 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10313 {
10314 /* We have SXTB/SXTH. */
10315 *cost = COSTS_N_INSNS (1);
10316 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10317 if (speed_p)
10318 *cost += extra_cost->alu.extend;
10319 }
10320 else if (GET_MODE (XEXP (x, 0)) != SImode)
10321 {
10322 /* Needs two shifts. */
10323 *cost = COSTS_N_INSNS (2);
10324 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10325 if (speed_p)
10326 *cost += 2 * extra_cost->alu.shift;
10327 }
10328
10329 /* Widening beyond 32-bits requires one more insn. */
10330 if (mode == DImode)
10331 {
10332 *cost += COSTS_N_INSNS (1);
10333 if (speed_p)
10334 *cost += extra_cost->alu.shift;
10335 }
10336
10337 return true;
10338
10339 case ZERO_EXTEND:
10340 if ((arm_arch4
10341 || GET_MODE (XEXP (x, 0)) == SImode
10342 || GET_MODE (XEXP (x, 0)) == QImode)
10343 && MEM_P (XEXP (x, 0)))
10344 {
10345 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10346
10347 if (mode == DImode)
10348 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10349
10350 return true;
10351 }
10352
10353 /* Widening from less than 32-bits requires an extend operation. */
10354 if (GET_MODE (XEXP (x, 0)) == QImode)
10355 {
10356 /* UXTB can be a shorter instruction in Thumb2, but it might
10357 be slower than the AND Rd, Rn, #255 alternative. When
10358 optimizing for speed it should never be slower to use
10359 AND, and we don't really model 16-bit vs 32-bit insns
10360 here. */
10361 *cost = COSTS_N_INSNS (1);
10362 if (speed_p)
10363 *cost += extra_cost->alu.logical;
10364 }
10365 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10366 {
10367 /* We have UXTB/UXTH. */
10368 *cost = COSTS_N_INSNS (1);
10369 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10370 if (speed_p)
10371 *cost += extra_cost->alu.extend;
10372 }
10373 else if (GET_MODE (XEXP (x, 0)) != SImode)
10374 {
10375 /* Needs two shifts. It's marginally preferable to use
10376 shifts rather than two BIC instructions as the second
10377 shift may merge with a subsequent insn as a shifter
10378 op. */
10379 *cost = COSTS_N_INSNS (2);
10380 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10381 if (speed_p)
10382 *cost += 2 * extra_cost->alu.shift;
10383 }
10384 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10385 *cost = COSTS_N_INSNS (1);
10386
10387 /* Widening beyond 32-bits requires one more insn. */
10388 if (mode == DImode)
10389 {
10390 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10391 }
10392
10393 return true;
10394
10395 case CONST_INT:
10396 *cost = 0;
10397 /* CONST_INT has no mode, so we cannot tell for sure how many
10398 insns are really going to be needed. The best we can do is
10399 look at the value passed. If it fits in SImode, then assume
10400 that's the mode it will be used for. Otherwise assume it
10401 will be used in DImode. */
10402 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10403 mode = SImode;
10404 else
10405 mode = DImode;
10406
10407 /* Avoid blowing up in arm_gen_constant (). */
10408 if (!(outer_code == PLUS
10409 || outer_code == AND
10410 || outer_code == IOR
10411 || outer_code == XOR
10412 || outer_code == MINUS))
10413 outer_code = SET;
10414
10415 const_int_cost:
10416 if (mode == SImode)
10417 {
10418 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10419 INTVAL (x), NULL, NULL,
10420 0, 0));
10421 /* Extra costs? */
10422 }
10423 else
10424 {
10425 *cost += COSTS_N_INSNS (arm_gen_constant
10426 (outer_code, SImode, NULL,
10427 trunc_int_for_mode (INTVAL (x), SImode),
10428 NULL, NULL, 0, 0)
10429 + arm_gen_constant (outer_code, SImode, NULL,
10430 INTVAL (x) >> 32, NULL,
10431 NULL, 0, 0));
10432 /* Extra costs? */
10433 }
10434
10435 return true;
10436
10437 case CONST:
10438 case LABEL_REF:
10439 case SYMBOL_REF:
10440 if (speed_p)
10441 {
10442 if (arm_arch_thumb2 && !flag_pic)
10443 *cost = COSTS_N_INSNS (2);
10444 else
10445 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10446 }
10447 else
10448 *cost = COSTS_N_INSNS (2);
10449
10450 if (flag_pic)
10451 {
10452 *cost += COSTS_N_INSNS (1);
10453 if (speed_p)
10454 *cost += extra_cost->alu.arith;
10455 }
10456
10457 return true;
10458
10459 case CONST_FIXED:
10460 *cost = COSTS_N_INSNS (4);
10461 /* Fixme. */
10462 return true;
10463
10464 case CONST_DOUBLE:
10465 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10466 && (mode == SFmode || !TARGET_VFP_SINGLE))
10467 {
10468 if (vfp3_const_double_rtx (x))
10469 {
10470 *cost = COSTS_N_INSNS (1);
10471 if (speed_p)
10472 *cost += extra_cost->fp[mode == DFmode].fpconst;
10473 return true;
10474 }
10475
10476 if (speed_p)
10477 {
10478 *cost = COSTS_N_INSNS (1);
10479 if (mode == DFmode)
10480 *cost += extra_cost->ldst.loadd;
10481 else
10482 *cost += extra_cost->ldst.loadf;
10483 }
10484 else
10485 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10486
10487 return true;
10488 }
10489 *cost = COSTS_N_INSNS (4);
10490 return true;
10491
10492 case CONST_VECTOR:
10493 /* Fixme. */
10494 if (TARGET_NEON
10495 && TARGET_HARD_FLOAT
10496 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10497 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10498 *cost = COSTS_N_INSNS (1);
10499 else
10500 *cost = COSTS_N_INSNS (4);
10501 return true;
10502
10503 case HIGH:
10504 case LO_SUM:
10505 *cost = COSTS_N_INSNS (1);
10506 /* When optimizing for size, we prefer constant pool entries to
10507 MOVW/MOVT pairs, so bump the cost of these slightly. */
10508 if (!speed_p)
10509 *cost += 1;
10510 return true;
10511
10512 case CLZ:
10513 *cost = COSTS_N_INSNS (1);
10514 if (speed_p)
10515 *cost += extra_cost->alu.clz;
10516 return false;
10517
10518 case SMIN:
10519 if (XEXP (x, 1) == const0_rtx)
10520 {
10521 *cost = COSTS_N_INSNS (1);
10522 if (speed_p)
10523 *cost += extra_cost->alu.log_shift;
10524 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10525 return true;
10526 }
10527 /* Fall through. */
10528 case SMAX:
10529 case UMIN:
10530 case UMAX:
10531 *cost = COSTS_N_INSNS (2);
10532 return false;
10533
10534 case TRUNCATE:
10535 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10536 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10537 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10538 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10539 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10540 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10541 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10542 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10543 == ZERO_EXTEND))))
10544 {
10545 *cost = COSTS_N_INSNS (1);
10546 if (speed_p)
10547 *cost += extra_cost->mult[1].extend;
10548 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10549 speed_p)
10550 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10551 0, speed_p));
10552 return true;
10553 }
10554 *cost = LIBCALL_COST (1);
10555 return false;
10556
10557 case UNSPEC:
10558 return arm_unspec_cost (x, outer_code, speed_p, cost);
10559
10560 case PC:
10561 /* Reading the PC is like reading any other register. Writing it
10562 is more expensive, but we take that into account elsewhere. */
10563 *cost = 0;
10564 return true;
10565
10566 case ZERO_EXTRACT:
10567 /* TODO: Simple zero_extract of bottom bits using AND. */
10568 /* Fall through. */
10569 case SIGN_EXTRACT:
10570 if (arm_arch6
10571 && mode == SImode
10572 && CONST_INT_P (XEXP (x, 1))
10573 && CONST_INT_P (XEXP (x, 2)))
10574 {
10575 *cost = COSTS_N_INSNS (1);
10576 if (speed_p)
10577 *cost += extra_cost->alu.bfx;
10578 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10579 return true;
10580 }
10581 /* Without UBFX/SBFX, need to resort to shift operations. */
10582 *cost = COSTS_N_INSNS (2);
10583 if (speed_p)
10584 *cost += 2 * extra_cost->alu.shift;
10585 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10586 return true;
10587
10588 case FLOAT_EXTEND:
10589 if (TARGET_HARD_FLOAT)
10590 {
10591 *cost = COSTS_N_INSNS (1);
10592 if (speed_p)
10593 *cost += extra_cost->fp[mode == DFmode].widen;
10594 if (!TARGET_FPU_ARMV8
10595 && GET_MODE (XEXP (x, 0)) == HFmode)
10596 {
10597 /* Pre v8, widening HF->DF is a two-step process, first
10598 widening to SFmode. */
10599 *cost += COSTS_N_INSNS (1);
10600 if (speed_p)
10601 *cost += extra_cost->fp[0].widen;
10602 }
10603 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10604 return true;
10605 }
10606
10607 *cost = LIBCALL_COST (1);
10608 return false;
10609
10610 case FLOAT_TRUNCATE:
10611 if (TARGET_HARD_FLOAT)
10612 {
10613 *cost = COSTS_N_INSNS (1);
10614 if (speed_p)
10615 *cost += extra_cost->fp[mode == DFmode].narrow;
10616 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10617 return true;
10618 /* Vector modes? */
10619 }
10620 *cost = LIBCALL_COST (1);
10621 return false;
10622
10623 case FIX:
10624 case UNSIGNED_FIX:
10625 if (TARGET_HARD_FLOAT)
10626 {
10627 if (GET_MODE_CLASS (mode) == MODE_INT)
10628 {
10629 *cost = COSTS_N_INSNS (1);
10630 if (speed_p)
10631 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10632 /* Strip of the 'cost' of rounding towards zero. */
10633 if (GET_CODE (XEXP (x, 0)) == FIX)
10634 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10635 else
10636 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10637 /* ??? Increase the cost to deal with transferring from
10638 FP -> CORE registers? */
10639 return true;
10640 }
10641 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10642 && TARGET_FPU_ARMV8)
10643 {
10644 *cost = COSTS_N_INSNS (1);
10645 if (speed_p)
10646 *cost += extra_cost->fp[mode == DFmode].roundint;
10647 return false;
10648 }
10649 /* Vector costs? */
10650 }
10651 *cost = LIBCALL_COST (1);
10652 return false;
10653
10654 case FLOAT:
10655 case UNSIGNED_FLOAT:
10656 if (TARGET_HARD_FLOAT)
10657 {
10658 /* ??? Increase the cost to deal with transferring from CORE
10659 -> FP registers? */
10660 *cost = COSTS_N_INSNS (1);
10661 if (speed_p)
10662 *cost += extra_cost->fp[mode == DFmode].fromint;
10663 return false;
10664 }
10665 *cost = LIBCALL_COST (1);
10666 return false;
10667
10668 case CALL:
10669 *cost = COSTS_N_INSNS (1);
10670 return true;
10671
10672 case ASM_OPERANDS:
10673 {
10674 /* Just a guess. Guess number of instructions in the asm
10675 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10676 though (see PR60663). */
10677 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10678 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10679
10680 *cost = COSTS_N_INSNS (asm_length + num_operands);
10681 return true;
10682 }
10683 default:
10684 if (mode != VOIDmode)
10685 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10686 else
10687 *cost = COSTS_N_INSNS (4); /* Who knows? */
10688 return false;
10689 }
10690 }
10691
10692 #undef HANDLE_NARROW_SHIFT_ARITH
10693
10694 /* RTX costs when optimizing for size. */
10695 static bool
10696 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10697 int *total, bool speed)
10698 {
10699 bool result;
10700
10701 if (TARGET_OLD_RTX_COSTS
10702 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
10703 {
10704 /* Old way. (Deprecated.) */
10705 if (!speed)
10706 result = arm_size_rtx_costs (x, (enum rtx_code) code,
10707 (enum rtx_code) outer_code, total);
10708 else
10709 result = current_tune->rtx_costs (x, (enum rtx_code) code,
10710 (enum rtx_code) outer_code, total,
10711 speed);
10712 }
10713 else
10714 {
10715 /* New way. */
10716 if (current_tune->insn_extra_cost)
10717 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10718 (enum rtx_code) outer_code,
10719 current_tune->insn_extra_cost,
10720 total, speed);
10721 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10722 && current_tune->insn_extra_cost != NULL */
10723 else
10724 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10725 (enum rtx_code) outer_code,
10726 &generic_extra_costs, total, speed);
10727 }
10728
10729 if (dump_file && (dump_flags & TDF_DETAILS))
10730 {
10731 print_rtl_single (dump_file, x);
10732 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10733 *total, result ? "final" : "partial");
10734 }
10735 return result;
10736 }
10737
10738 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
10739 supported on any "slowmul" cores, so it can be ignored. */
10740
10741 static bool
10742 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10743 int *total, bool speed)
10744 {
10745 enum machine_mode mode = GET_MODE (x);
10746
10747 if (TARGET_THUMB)
10748 {
10749 *total = thumb1_rtx_costs (x, code, outer_code);
10750 return true;
10751 }
10752
10753 switch (code)
10754 {
10755 case MULT:
10756 if (GET_MODE_CLASS (mode) == MODE_FLOAT
10757 || mode == DImode)
10758 {
10759 *total = COSTS_N_INSNS (20);
10760 return false;
10761 }
10762
10763 if (CONST_INT_P (XEXP (x, 1)))
10764 {
10765 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
10766 & (unsigned HOST_WIDE_INT) 0xffffffff);
10767 int cost, const_ok = const_ok_for_arm (i);
10768 int j, booth_unit_size;
10769
10770 /* Tune as appropriate. */
10771 cost = const_ok ? 4 : 8;
10772 booth_unit_size = 2;
10773 for (j = 0; i && j < 32; j += booth_unit_size)
10774 {
10775 i >>= booth_unit_size;
10776 cost++;
10777 }
10778
10779 *total = COSTS_N_INSNS (cost);
10780 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
10781 return true;
10782 }
10783
10784 *total = COSTS_N_INSNS (20);
10785 return false;
10786
10787 default:
10788 return arm_rtx_costs_1 (x, outer_code, total, speed);;
10789 }
10790 }
10791
10792
10793 /* RTX cost for cores with a fast multiply unit (M variants). */
10794
10795 static bool
10796 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10797 int *total, bool speed)
10798 {
10799 enum machine_mode mode = GET_MODE (x);
10800
10801 if (TARGET_THUMB1)
10802 {
10803 *total = thumb1_rtx_costs (x, code, outer_code);
10804 return true;
10805 }
10806
10807 /* ??? should thumb2 use different costs? */
10808 switch (code)
10809 {
10810 case MULT:
10811 /* There is no point basing this on the tuning, since it is always the
10812 fast variant if it exists at all. */
10813 if (mode == DImode
10814 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10815 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10816 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10817 {
10818 *total = COSTS_N_INSNS(2);
10819 return false;
10820 }
10821
10822
10823 if (mode == DImode)
10824 {
10825 *total = COSTS_N_INSNS (5);
10826 return false;
10827 }
10828
10829 if (CONST_INT_P (XEXP (x, 1)))
10830 {
10831 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
10832 & (unsigned HOST_WIDE_INT) 0xffffffff);
10833 int cost, const_ok = const_ok_for_arm (i);
10834 int j, booth_unit_size;
10835
10836 /* Tune as appropriate. */
10837 cost = const_ok ? 4 : 8;
10838 booth_unit_size = 8;
10839 for (j = 0; i && j < 32; j += booth_unit_size)
10840 {
10841 i >>= booth_unit_size;
10842 cost++;
10843 }
10844
10845 *total = COSTS_N_INSNS(cost);
10846 return false;
10847 }
10848
10849 if (mode == SImode)
10850 {
10851 *total = COSTS_N_INSNS (4);
10852 return false;
10853 }
10854
10855 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10856 {
10857 if (TARGET_HARD_FLOAT
10858 && (mode == SFmode
10859 || (mode == DFmode && !TARGET_VFP_SINGLE)))
10860 {
10861 *total = COSTS_N_INSNS (1);
10862 return false;
10863 }
10864 }
10865
10866 /* Requires a lib call */
10867 *total = COSTS_N_INSNS (20);
10868 return false;
10869
10870 default:
10871 return arm_rtx_costs_1 (x, outer_code, total, speed);
10872 }
10873 }
10874
10875
10876 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
10877 so it can be ignored. */
10878
10879 static bool
10880 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10881 int *total, bool speed)
10882 {
10883 enum machine_mode mode = GET_MODE (x);
10884
10885 if (TARGET_THUMB)
10886 {
10887 *total = thumb1_rtx_costs (x, code, outer_code);
10888 return true;
10889 }
10890
10891 switch (code)
10892 {
10893 case COMPARE:
10894 if (GET_CODE (XEXP (x, 0)) != MULT)
10895 return arm_rtx_costs_1 (x, outer_code, total, speed);
10896
10897 /* A COMPARE of a MULT is slow on XScale; the muls instruction
10898 will stall until the multiplication is complete. */
10899 *total = COSTS_N_INSNS (3);
10900 return false;
10901
10902 case MULT:
10903 /* There is no point basing this on the tuning, since it is always the
10904 fast variant if it exists at all. */
10905 if (mode == DImode
10906 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10907 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10908 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10909 {
10910 *total = COSTS_N_INSNS (2);
10911 return false;
10912 }
10913
10914
10915 if (mode == DImode)
10916 {
10917 *total = COSTS_N_INSNS (5);
10918 return false;
10919 }
10920
10921 if (CONST_INT_P (XEXP (x, 1)))
10922 {
10923 /* If operand 1 is a constant we can more accurately
10924 calculate the cost of the multiply. The multiplier can
10925 retire 15 bits on the first cycle and a further 12 on the
10926 second. We do, of course, have to load the constant into
10927 a register first. */
10928 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
10929 /* There's a general overhead of one cycle. */
10930 int cost = 1;
10931 unsigned HOST_WIDE_INT masked_const;
10932
10933 if (i & 0x80000000)
10934 i = ~i;
10935
10936 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
10937
10938 masked_const = i & 0xffff8000;
10939 if (masked_const != 0)
10940 {
10941 cost++;
10942 masked_const = i & 0xf8000000;
10943 if (masked_const != 0)
10944 cost++;
10945 }
10946 *total = COSTS_N_INSNS (cost);
10947 return false;
10948 }
10949
10950 if (mode == SImode)
10951 {
10952 *total = COSTS_N_INSNS (3);
10953 return false;
10954 }
10955
10956 /* Requires a lib call */
10957 *total = COSTS_N_INSNS (20);
10958 return false;
10959
10960 default:
10961 return arm_rtx_costs_1 (x, outer_code, total, speed);
10962 }
10963 }
10964
10965
10966 /* RTX costs for 9e (and later) cores. */
10967
10968 static bool
10969 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10970 int *total, bool speed)
10971 {
10972 enum machine_mode mode = GET_MODE (x);
10973
10974 if (TARGET_THUMB1)
10975 {
10976 switch (code)
10977 {
10978 case MULT:
10979 *total = COSTS_N_INSNS (3);
10980 return true;
10981
10982 default:
10983 *total = thumb1_rtx_costs (x, code, outer_code);
10984 return true;
10985 }
10986 }
10987
10988 switch (code)
10989 {
10990 case MULT:
10991 /* There is no point basing this on the tuning, since it is always the
10992 fast variant if it exists at all. */
10993 if (mode == DImode
10994 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10995 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10996 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10997 {
10998 *total = COSTS_N_INSNS (2);
10999 return false;
11000 }
11001
11002
11003 if (mode == DImode)
11004 {
11005 *total = COSTS_N_INSNS (5);
11006 return false;
11007 }
11008
11009 if (mode == SImode)
11010 {
11011 *total = COSTS_N_INSNS (2);
11012 return false;
11013 }
11014
11015 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11016 {
11017 if (TARGET_HARD_FLOAT
11018 && (mode == SFmode
11019 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11020 {
11021 *total = COSTS_N_INSNS (1);
11022 return false;
11023 }
11024 }
11025
11026 *total = COSTS_N_INSNS (20);
11027 return false;
11028
11029 default:
11030 return arm_rtx_costs_1 (x, outer_code, total, speed);
11031 }
11032 }
11033 /* All address computations that can be done are free, but rtx cost returns
11034 the same for practically all of them. So we weight the different types
11035 of address here in the order (most pref first):
11036 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11037 static inline int
11038 arm_arm_address_cost (rtx x)
11039 {
11040 enum rtx_code c = GET_CODE (x);
11041
11042 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11043 return 0;
11044 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11045 return 10;
11046
11047 if (c == PLUS)
11048 {
11049 if (CONST_INT_P (XEXP (x, 1)))
11050 return 2;
11051
11052 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11053 return 3;
11054
11055 return 4;
11056 }
11057
11058 return 6;
11059 }
11060
11061 static inline int
11062 arm_thumb_address_cost (rtx x)
11063 {
11064 enum rtx_code c = GET_CODE (x);
11065
11066 if (c == REG)
11067 return 1;
11068 if (c == PLUS
11069 && REG_P (XEXP (x, 0))
11070 && CONST_INT_P (XEXP (x, 1)))
11071 return 1;
11072
11073 return 2;
11074 }
11075
11076 static int
11077 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
11078 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11079 {
11080 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11081 }
11082
11083 /* Adjust cost hook for XScale. */
11084 static bool
11085 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11086 {
11087 /* Some true dependencies can have a higher cost depending
11088 on precisely how certain input operands are used. */
11089 if (REG_NOTE_KIND(link) == 0
11090 && recog_memoized (insn) >= 0
11091 && recog_memoized (dep) >= 0)
11092 {
11093 int shift_opnum = get_attr_shift (insn);
11094 enum attr_type attr_type = get_attr_type (dep);
11095
11096 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11097 operand for INSN. If we have a shifted input operand and the
11098 instruction we depend on is another ALU instruction, then we may
11099 have to account for an additional stall. */
11100 if (shift_opnum != 0
11101 && (attr_type == TYPE_ALU_SHIFT_IMM
11102 || attr_type == TYPE_ALUS_SHIFT_IMM
11103 || attr_type == TYPE_LOGIC_SHIFT_IMM
11104 || attr_type == TYPE_LOGICS_SHIFT_IMM
11105 || attr_type == TYPE_ALU_SHIFT_REG
11106 || attr_type == TYPE_ALUS_SHIFT_REG
11107 || attr_type == TYPE_LOGIC_SHIFT_REG
11108 || attr_type == TYPE_LOGICS_SHIFT_REG
11109 || attr_type == TYPE_MOV_SHIFT
11110 || attr_type == TYPE_MVN_SHIFT
11111 || attr_type == TYPE_MOV_SHIFT_REG
11112 || attr_type == TYPE_MVN_SHIFT_REG))
11113 {
11114 rtx shifted_operand;
11115 int opno;
11116
11117 /* Get the shifted operand. */
11118 extract_insn (insn);
11119 shifted_operand = recog_data.operand[shift_opnum];
11120
11121 /* Iterate over all the operands in DEP. If we write an operand
11122 that overlaps with SHIFTED_OPERAND, then we have increase the
11123 cost of this dependency. */
11124 extract_insn (dep);
11125 preprocess_constraints ();
11126 for (opno = 0; opno < recog_data.n_operands; opno++)
11127 {
11128 /* We can ignore strict inputs. */
11129 if (recog_data.operand_type[opno] == OP_IN)
11130 continue;
11131
11132 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11133 shifted_operand))
11134 {
11135 *cost = 2;
11136 return false;
11137 }
11138 }
11139 }
11140 }
11141 return true;
11142 }
11143
11144 /* Adjust cost hook for Cortex A9. */
11145 static bool
11146 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11147 {
11148 switch (REG_NOTE_KIND (link))
11149 {
11150 case REG_DEP_ANTI:
11151 *cost = 0;
11152 return false;
11153
11154 case REG_DEP_TRUE:
11155 case REG_DEP_OUTPUT:
11156 if (recog_memoized (insn) >= 0
11157 && recog_memoized (dep) >= 0)
11158 {
11159 if (GET_CODE (PATTERN (insn)) == SET)
11160 {
11161 if (GET_MODE_CLASS
11162 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11163 || GET_MODE_CLASS
11164 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11165 {
11166 enum attr_type attr_type_insn = get_attr_type (insn);
11167 enum attr_type attr_type_dep = get_attr_type (dep);
11168
11169 /* By default all dependencies of the form
11170 s0 = s0 <op> s1
11171 s0 = s0 <op> s2
11172 have an extra latency of 1 cycle because
11173 of the input and output dependency in this
11174 case. However this gets modeled as an true
11175 dependency and hence all these checks. */
11176 if (REG_P (SET_DEST (PATTERN (insn)))
11177 && REG_P (SET_DEST (PATTERN (dep)))
11178 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11179 SET_DEST (PATTERN (dep))))
11180 {
11181 /* FMACS is a special case where the dependent
11182 instruction can be issued 3 cycles before
11183 the normal latency in case of an output
11184 dependency. */
11185 if ((attr_type_insn == TYPE_FMACS
11186 || attr_type_insn == TYPE_FMACD)
11187 && (attr_type_dep == TYPE_FMACS
11188 || attr_type_dep == TYPE_FMACD))
11189 {
11190 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11191 *cost = insn_default_latency (dep) - 3;
11192 else
11193 *cost = insn_default_latency (dep);
11194 return false;
11195 }
11196 else
11197 {
11198 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11199 *cost = insn_default_latency (dep) + 1;
11200 else
11201 *cost = insn_default_latency (dep);
11202 }
11203 return false;
11204 }
11205 }
11206 }
11207 }
11208 break;
11209
11210 default:
11211 gcc_unreachable ();
11212 }
11213
11214 return true;
11215 }
11216
11217 /* Adjust cost hook for FA726TE. */
11218 static bool
11219 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11220 {
11221 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11222 have penalty of 3. */
11223 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11224 && recog_memoized (insn) >= 0
11225 && recog_memoized (dep) >= 0
11226 && get_attr_conds (dep) == CONDS_SET)
11227 {
11228 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11229 if (get_attr_conds (insn) == CONDS_USE
11230 && get_attr_type (insn) != TYPE_BRANCH)
11231 {
11232 *cost = 3;
11233 return false;
11234 }
11235
11236 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11237 || get_attr_conds (insn) == CONDS_USE)
11238 {
11239 *cost = 0;
11240 return false;
11241 }
11242 }
11243
11244 return true;
11245 }
11246
11247 /* Implement TARGET_REGISTER_MOVE_COST.
11248
11249 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11250 it is typically more expensive than a single memory access. We set
11251 the cost to less than two memory accesses so that floating
11252 point to integer conversion does not go through memory. */
11253
11254 int
11255 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
11256 reg_class_t from, reg_class_t to)
11257 {
11258 if (TARGET_32BIT)
11259 {
11260 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11261 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11262 return 15;
11263 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11264 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11265 return 4;
11266 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11267 return 20;
11268 else
11269 return 2;
11270 }
11271 else
11272 {
11273 if (from == HI_REGS || to == HI_REGS)
11274 return 4;
11275 else
11276 return 2;
11277 }
11278 }
11279
11280 /* Implement TARGET_MEMORY_MOVE_COST. */
11281
11282 int
11283 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
11284 bool in ATTRIBUTE_UNUSED)
11285 {
11286 if (TARGET_32BIT)
11287 return 10;
11288 else
11289 {
11290 if (GET_MODE_SIZE (mode) < 4)
11291 return 8;
11292 else
11293 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11294 }
11295 }
11296
11297 /* Vectorizer cost model implementation. */
11298
11299 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11300 static int
11301 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11302 tree vectype,
11303 int misalign ATTRIBUTE_UNUSED)
11304 {
11305 unsigned elements;
11306
11307 switch (type_of_cost)
11308 {
11309 case scalar_stmt:
11310 return current_tune->vec_costs->scalar_stmt_cost;
11311
11312 case scalar_load:
11313 return current_tune->vec_costs->scalar_load_cost;
11314
11315 case scalar_store:
11316 return current_tune->vec_costs->scalar_store_cost;
11317
11318 case vector_stmt:
11319 return current_tune->vec_costs->vec_stmt_cost;
11320
11321 case vector_load:
11322 return current_tune->vec_costs->vec_align_load_cost;
11323
11324 case vector_store:
11325 return current_tune->vec_costs->vec_store_cost;
11326
11327 case vec_to_scalar:
11328 return current_tune->vec_costs->vec_to_scalar_cost;
11329
11330 case scalar_to_vec:
11331 return current_tune->vec_costs->scalar_to_vec_cost;
11332
11333 case unaligned_load:
11334 return current_tune->vec_costs->vec_unalign_load_cost;
11335
11336 case unaligned_store:
11337 return current_tune->vec_costs->vec_unalign_store_cost;
11338
11339 case cond_branch_taken:
11340 return current_tune->vec_costs->cond_taken_branch_cost;
11341
11342 case cond_branch_not_taken:
11343 return current_tune->vec_costs->cond_not_taken_branch_cost;
11344
11345 case vec_perm:
11346 case vec_promote_demote:
11347 return current_tune->vec_costs->vec_stmt_cost;
11348
11349 case vec_construct:
11350 elements = TYPE_VECTOR_SUBPARTS (vectype);
11351 return elements / 2 + 1;
11352
11353 default:
11354 gcc_unreachable ();
11355 }
11356 }
11357
11358 /* Implement targetm.vectorize.add_stmt_cost. */
11359
11360 static unsigned
11361 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11362 struct _stmt_vec_info *stmt_info, int misalign,
11363 enum vect_cost_model_location where)
11364 {
11365 unsigned *cost = (unsigned *) data;
11366 unsigned retval = 0;
11367
11368 if (flag_vect_cost_model)
11369 {
11370 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11371 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11372
11373 /* Statements in an inner loop relative to the loop being
11374 vectorized are weighted more heavily. The value here is
11375 arbitrary and could potentially be improved with analysis. */
11376 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11377 count *= 50; /* FIXME. */
11378
11379 retval = (unsigned) (count * stmt_cost);
11380 cost[where] += retval;
11381 }
11382
11383 return retval;
11384 }
11385
11386 /* Return true if and only if this insn can dual-issue only as older. */
11387 static bool
11388 cortexa7_older_only (rtx insn)
11389 {
11390 if (recog_memoized (insn) < 0)
11391 return false;
11392
11393 switch (get_attr_type (insn))
11394 {
11395 case TYPE_ALU_REG:
11396 case TYPE_ALUS_REG:
11397 case TYPE_LOGIC_REG:
11398 case TYPE_LOGICS_REG:
11399 case TYPE_ADC_REG:
11400 case TYPE_ADCS_REG:
11401 case TYPE_ADR:
11402 case TYPE_BFM:
11403 case TYPE_REV:
11404 case TYPE_MVN_REG:
11405 case TYPE_SHIFT_IMM:
11406 case TYPE_SHIFT_REG:
11407 case TYPE_LOAD_BYTE:
11408 case TYPE_LOAD1:
11409 case TYPE_STORE1:
11410 case TYPE_FFARITHS:
11411 case TYPE_FADDS:
11412 case TYPE_FFARITHD:
11413 case TYPE_FADDD:
11414 case TYPE_FMOV:
11415 case TYPE_F_CVT:
11416 case TYPE_FCMPS:
11417 case TYPE_FCMPD:
11418 case TYPE_FCONSTS:
11419 case TYPE_FCONSTD:
11420 case TYPE_FMULS:
11421 case TYPE_FMACS:
11422 case TYPE_FMULD:
11423 case TYPE_FMACD:
11424 case TYPE_FDIVS:
11425 case TYPE_FDIVD:
11426 case TYPE_F_MRC:
11427 case TYPE_F_MRRC:
11428 case TYPE_F_FLAG:
11429 case TYPE_F_LOADS:
11430 case TYPE_F_STORES:
11431 return true;
11432 default:
11433 return false;
11434 }
11435 }
11436
11437 /* Return true if and only if this insn can dual-issue as younger. */
11438 static bool
11439 cortexa7_younger (FILE *file, int verbose, rtx insn)
11440 {
11441 if (recog_memoized (insn) < 0)
11442 {
11443 if (verbose > 5)
11444 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11445 return false;
11446 }
11447
11448 switch (get_attr_type (insn))
11449 {
11450 case TYPE_ALU_IMM:
11451 case TYPE_ALUS_IMM:
11452 case TYPE_LOGIC_IMM:
11453 case TYPE_LOGICS_IMM:
11454 case TYPE_EXTEND:
11455 case TYPE_MVN_IMM:
11456 case TYPE_MOV_IMM:
11457 case TYPE_MOV_REG:
11458 case TYPE_MOV_SHIFT:
11459 case TYPE_MOV_SHIFT_REG:
11460 case TYPE_BRANCH:
11461 case TYPE_CALL:
11462 return true;
11463 default:
11464 return false;
11465 }
11466 }
11467
11468
11469 /* Look for an instruction that can dual issue only as an older
11470 instruction, and move it in front of any instructions that can
11471 dual-issue as younger, while preserving the relative order of all
11472 other instructions in the ready list. This is a hueuristic to help
11473 dual-issue in later cycles, by postponing issue of more flexible
11474 instructions. This heuristic may affect dual issue opportunities
11475 in the current cycle. */
11476 static void
11477 cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11478 int clock)
11479 {
11480 int i;
11481 int first_older_only = -1, first_younger = -1;
11482
11483 if (verbose > 5)
11484 fprintf (file,
11485 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11486 clock,
11487 *n_readyp);
11488
11489 /* Traverse the ready list from the head (the instruction to issue
11490 first), and looking for the first instruction that can issue as
11491 younger and the first instruction that can dual-issue only as
11492 older. */
11493 for (i = *n_readyp - 1; i >= 0; i--)
11494 {
11495 rtx insn = ready[i];
11496 if (cortexa7_older_only (insn))
11497 {
11498 first_older_only = i;
11499 if (verbose > 5)
11500 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11501 break;
11502 }
11503 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11504 first_younger = i;
11505 }
11506
11507 /* Nothing to reorder because either no younger insn found or insn
11508 that can dual-issue only as older appears before any insn that
11509 can dual-issue as younger. */
11510 if (first_younger == -1)
11511 {
11512 if (verbose > 5)
11513 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11514 return;
11515 }
11516
11517 /* Nothing to reorder because no older-only insn in the ready list. */
11518 if (first_older_only == -1)
11519 {
11520 if (verbose > 5)
11521 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11522 return;
11523 }
11524
11525 /* Move first_older_only insn before first_younger. */
11526 if (verbose > 5)
11527 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11528 INSN_UID(ready [first_older_only]),
11529 INSN_UID(ready [first_younger]));
11530 rtx first_older_only_insn = ready [first_older_only];
11531 for (i = first_older_only; i < first_younger; i++)
11532 {
11533 ready[i] = ready[i+1];
11534 }
11535
11536 ready[i] = first_older_only_insn;
11537 return;
11538 }
11539
11540 /* Implement TARGET_SCHED_REORDER. */
11541 static int
11542 arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11543 int clock)
11544 {
11545 switch (arm_tune)
11546 {
11547 case cortexa7:
11548 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11549 break;
11550 default:
11551 /* Do nothing for other cores. */
11552 break;
11553 }
11554
11555 return arm_issue_rate ();
11556 }
11557
11558 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11559 It corrects the value of COST based on the relationship between
11560 INSN and DEP through the dependence LINK. It returns the new
11561 value. There is a per-core adjust_cost hook to adjust scheduler costs
11562 and the per-core hook can choose to completely override the generic
11563 adjust_cost function. Only put bits of code into arm_adjust_cost that
11564 are common across all cores. */
11565 static int
11566 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
11567 {
11568 rtx i_pat, d_pat;
11569
11570 /* When generating Thumb-1 code, we want to place flag-setting operations
11571 close to a conditional branch which depends on them, so that we can
11572 omit the comparison. */
11573 if (TARGET_THUMB1
11574 && REG_NOTE_KIND (link) == 0
11575 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11576 && recog_memoized (dep) >= 0
11577 && get_attr_conds (dep) == CONDS_SET)
11578 return 0;
11579
11580 if (current_tune->sched_adjust_cost != NULL)
11581 {
11582 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11583 return cost;
11584 }
11585
11586 /* XXX Is this strictly true? */
11587 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11588 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11589 return 0;
11590
11591 /* Call insns don't incur a stall, even if they follow a load. */
11592 if (REG_NOTE_KIND (link) == 0
11593 && CALL_P (insn))
11594 return 1;
11595
11596 if ((i_pat = single_set (insn)) != NULL
11597 && MEM_P (SET_SRC (i_pat))
11598 && (d_pat = single_set (dep)) != NULL
11599 && MEM_P (SET_DEST (d_pat)))
11600 {
11601 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11602 /* This is a load after a store, there is no conflict if the load reads
11603 from a cached area. Assume that loads from the stack, and from the
11604 constant pool are cached, and that others will miss. This is a
11605 hack. */
11606
11607 if ((GET_CODE (src_mem) == SYMBOL_REF
11608 && CONSTANT_POOL_ADDRESS_P (src_mem))
11609 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11610 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11611 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11612 return 1;
11613 }
11614
11615 return cost;
11616 }
11617
11618 int
11619 arm_max_conditional_execute (void)
11620 {
11621 return max_insns_skipped;
11622 }
11623
11624 static int
11625 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11626 {
11627 if (TARGET_32BIT)
11628 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11629 else
11630 return (optimize > 0) ? 2 : 0;
11631 }
11632
11633 static int
11634 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11635 {
11636 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11637 }
11638
11639 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11640 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11641 sequences of non-executed instructions in IT blocks probably take the same
11642 amount of time as executed instructions (and the IT instruction itself takes
11643 space in icache). This function was experimentally determined to give good
11644 results on a popular embedded benchmark. */
11645
11646 static int
11647 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11648 {
11649 return (TARGET_32BIT && speed_p) ? 1
11650 : arm_default_branch_cost (speed_p, predictable_p);
11651 }
11652
11653 static bool fp_consts_inited = false;
11654
11655 static REAL_VALUE_TYPE value_fp0;
11656
11657 static void
11658 init_fp_table (void)
11659 {
11660 REAL_VALUE_TYPE r;
11661
11662 r = REAL_VALUE_ATOF ("0", DFmode);
11663 value_fp0 = r;
11664 fp_consts_inited = true;
11665 }
11666
11667 /* Return TRUE if rtx X is a valid immediate FP constant. */
11668 int
11669 arm_const_double_rtx (rtx x)
11670 {
11671 REAL_VALUE_TYPE r;
11672
11673 if (!fp_consts_inited)
11674 init_fp_table ();
11675
11676 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11677 if (REAL_VALUE_MINUS_ZERO (r))
11678 return 0;
11679
11680 if (REAL_VALUES_EQUAL (r, value_fp0))
11681 return 1;
11682
11683 return 0;
11684 }
11685
11686 /* VFPv3 has a fairly wide range of representable immediates, formed from
11687 "quarter-precision" floating-point values. These can be evaluated using this
11688 formula (with ^ for exponentiation):
11689
11690 -1^s * n * 2^-r
11691
11692 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11693 16 <= n <= 31 and 0 <= r <= 7.
11694
11695 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11696
11697 - A (most-significant) is the sign bit.
11698 - BCD are the exponent (encoded as r XOR 3).
11699 - EFGH are the mantissa (encoded as n - 16).
11700 */
11701
11702 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11703 fconst[sd] instruction, or -1 if X isn't suitable. */
11704 static int
11705 vfp3_const_double_index (rtx x)
11706 {
11707 REAL_VALUE_TYPE r, m;
11708 int sign, exponent;
11709 unsigned HOST_WIDE_INT mantissa, mant_hi;
11710 unsigned HOST_WIDE_INT mask;
11711 HOST_WIDE_INT m1, m2;
11712 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11713
11714 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11715 return -1;
11716
11717 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11718
11719 /* We can't represent these things, so detect them first. */
11720 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11721 return -1;
11722
11723 /* Extract sign, exponent and mantissa. */
11724 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11725 r = real_value_abs (&r);
11726 exponent = REAL_EXP (&r);
11727 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11728 highest (sign) bit, with a fixed binary point at bit point_pos.
11729 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11730 bits for the mantissa, this may fail (low bits would be lost). */
11731 real_ldexp (&m, &r, point_pos - exponent);
11732 REAL_VALUE_TO_INT (&m1, &m2, m);
11733 mantissa = m1;
11734 mant_hi = m2;
11735
11736 /* If there are bits set in the low part of the mantissa, we can't
11737 represent this value. */
11738 if (mantissa != 0)
11739 return -1;
11740
11741 /* Now make it so that mantissa contains the most-significant bits, and move
11742 the point_pos to indicate that the least-significant bits have been
11743 discarded. */
11744 point_pos -= HOST_BITS_PER_WIDE_INT;
11745 mantissa = mant_hi;
11746
11747 /* We can permit four significant bits of mantissa only, plus a high bit
11748 which is always 1. */
11749 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
11750 if ((mantissa & mask) != 0)
11751 return -1;
11752
11753 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11754 mantissa >>= point_pos - 5;
11755
11756 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11757 floating-point immediate zero with Neon using an integer-zero load, but
11758 that case is handled elsewhere.) */
11759 if (mantissa == 0)
11760 return -1;
11761
11762 gcc_assert (mantissa >= 16 && mantissa <= 31);
11763
11764 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11765 normalized significands are in the range [1, 2). (Our mantissa is shifted
11766 left 4 places at this point relative to normalized IEEE754 values). GCC
11767 internally uses [0.5, 1) (see real.c), so the exponent returned from
11768 REAL_EXP must be altered. */
11769 exponent = 5 - exponent;
11770
11771 if (exponent < 0 || exponent > 7)
11772 return -1;
11773
11774 /* Sign, mantissa and exponent are now in the correct form to plug into the
11775 formula described in the comment above. */
11776 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11777 }
11778
11779 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11780 int
11781 vfp3_const_double_rtx (rtx x)
11782 {
11783 if (!TARGET_VFP3)
11784 return 0;
11785
11786 return vfp3_const_double_index (x) != -1;
11787 }
11788
11789 /* Recognize immediates which can be used in various Neon instructions. Legal
11790 immediates are described by the following table (for VMVN variants, the
11791 bitwise inverse of the constant shown is recognized. In either case, VMOV
11792 is output and the correct instruction to use for a given constant is chosen
11793 by the assembler). The constant shown is replicated across all elements of
11794 the destination vector.
11795
11796 insn elems variant constant (binary)
11797 ---- ----- ------- -----------------
11798 vmov i32 0 00000000 00000000 00000000 abcdefgh
11799 vmov i32 1 00000000 00000000 abcdefgh 00000000
11800 vmov i32 2 00000000 abcdefgh 00000000 00000000
11801 vmov i32 3 abcdefgh 00000000 00000000 00000000
11802 vmov i16 4 00000000 abcdefgh
11803 vmov i16 5 abcdefgh 00000000
11804 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11805 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11806 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11807 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11808 vmvn i16 10 00000000 abcdefgh
11809 vmvn i16 11 abcdefgh 00000000
11810 vmov i32 12 00000000 00000000 abcdefgh 11111111
11811 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11812 vmov i32 14 00000000 abcdefgh 11111111 11111111
11813 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11814 vmov i8 16 abcdefgh
11815 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11816 eeeeeeee ffffffff gggggggg hhhhhhhh
11817 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11818 vmov f32 19 00000000 00000000 00000000 00000000
11819
11820 For case 18, B = !b. Representable values are exactly those accepted by
11821 vfp3_const_double_index, but are output as floating-point numbers rather
11822 than indices.
11823
11824 For case 19, we will change it to vmov.i32 when assembling.
11825
11826 Variants 0-5 (inclusive) may also be used as immediates for the second
11827 operand of VORR/VBIC instructions.
11828
11829 The INVERSE argument causes the bitwise inverse of the given operand to be
11830 recognized instead (used for recognizing legal immediates for the VAND/VORN
11831 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11832 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11833 output, rather than the real insns vbic/vorr).
11834
11835 INVERSE makes no difference to the recognition of float vectors.
11836
11837 The return value is the variant of immediate as shown in the above table, or
11838 -1 if the given value doesn't match any of the listed patterns.
11839 */
11840 static int
11841 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
11842 rtx *modconst, int *elementwidth)
11843 {
11844 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11845 matches = 1; \
11846 for (i = 0; i < idx; i += (STRIDE)) \
11847 if (!(TEST)) \
11848 matches = 0; \
11849 if (matches) \
11850 { \
11851 immtype = (CLASS); \
11852 elsize = (ELSIZE); \
11853 break; \
11854 }
11855
11856 unsigned int i, elsize = 0, idx = 0, n_elts;
11857 unsigned int innersize;
11858 unsigned char bytes[16];
11859 int immtype = -1, matches;
11860 unsigned int invmask = inverse ? 0xff : 0;
11861 bool vector = GET_CODE (op) == CONST_VECTOR;
11862
11863 if (vector)
11864 {
11865 n_elts = CONST_VECTOR_NUNITS (op);
11866 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
11867 }
11868 else
11869 {
11870 n_elts = 1;
11871 if (mode == VOIDmode)
11872 mode = DImode;
11873 innersize = GET_MODE_SIZE (mode);
11874 }
11875
11876 /* Vectors of float constants. */
11877 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11878 {
11879 rtx el0 = CONST_VECTOR_ELT (op, 0);
11880 REAL_VALUE_TYPE r0;
11881
11882 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11883 return -1;
11884
11885 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
11886
11887 for (i = 1; i < n_elts; i++)
11888 {
11889 rtx elt = CONST_VECTOR_ELT (op, i);
11890 REAL_VALUE_TYPE re;
11891
11892 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
11893
11894 if (!REAL_VALUES_EQUAL (r0, re))
11895 return -1;
11896 }
11897
11898 if (modconst)
11899 *modconst = CONST_VECTOR_ELT (op, 0);
11900
11901 if (elementwidth)
11902 *elementwidth = 0;
11903
11904 if (el0 == CONST0_RTX (GET_MODE (el0)))
11905 return 19;
11906 else
11907 return 18;
11908 }
11909
11910 /* Splat vector constant out into a byte vector. */
11911 for (i = 0; i < n_elts; i++)
11912 {
11913 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11914 unsigned HOST_WIDE_INT elpart;
11915 unsigned int part, parts;
11916
11917 if (CONST_INT_P (el))
11918 {
11919 elpart = INTVAL (el);
11920 parts = 1;
11921 }
11922 else if (CONST_DOUBLE_P (el))
11923 {
11924 elpart = CONST_DOUBLE_LOW (el);
11925 parts = 2;
11926 }
11927 else
11928 gcc_unreachable ();
11929
11930 for (part = 0; part < parts; part++)
11931 {
11932 unsigned int byte;
11933 for (byte = 0; byte < innersize; byte++)
11934 {
11935 bytes[idx++] = (elpart & 0xff) ^ invmask;
11936 elpart >>= BITS_PER_UNIT;
11937 }
11938 if (CONST_DOUBLE_P (el))
11939 elpart = CONST_DOUBLE_HIGH (el);
11940 }
11941 }
11942
11943 /* Sanity check. */
11944 gcc_assert (idx == GET_MODE_SIZE (mode));
11945
11946 do
11947 {
11948 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11949 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11950
11951 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11952 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11953
11954 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11955 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11956
11957 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11958 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11959
11960 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11961
11962 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11963
11964 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11965 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11966
11967 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11968 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11969
11970 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11971 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11972
11973 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11974 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11975
11976 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11977
11978 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11979
11980 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11981 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11982
11983 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11984 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11985
11986 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11987 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11988
11989 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11990 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11991
11992 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11993
11994 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11995 && bytes[i] == bytes[(i + 8) % idx]);
11996 }
11997 while (0);
11998
11999 if (immtype == -1)
12000 return -1;
12001
12002 if (elementwidth)
12003 *elementwidth = elsize;
12004
12005 if (modconst)
12006 {
12007 unsigned HOST_WIDE_INT imm = 0;
12008
12009 /* Un-invert bytes of recognized vector, if necessary. */
12010 if (invmask != 0)
12011 for (i = 0; i < idx; i++)
12012 bytes[i] ^= invmask;
12013
12014 if (immtype == 17)
12015 {
12016 /* FIXME: Broken on 32-bit H_W_I hosts. */
12017 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12018
12019 for (i = 0; i < 8; i++)
12020 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12021 << (i * BITS_PER_UNIT);
12022
12023 *modconst = GEN_INT (imm);
12024 }
12025 else
12026 {
12027 unsigned HOST_WIDE_INT imm = 0;
12028
12029 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12030 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12031
12032 *modconst = GEN_INT (imm);
12033 }
12034 }
12035
12036 return immtype;
12037 #undef CHECK
12038 }
12039
12040 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12041 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12042 float elements), and a modified constant (whatever should be output for a
12043 VMOV) in *MODCONST. */
12044
12045 int
12046 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
12047 rtx *modconst, int *elementwidth)
12048 {
12049 rtx tmpconst;
12050 int tmpwidth;
12051 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12052
12053 if (retval == -1)
12054 return 0;
12055
12056 if (modconst)
12057 *modconst = tmpconst;
12058
12059 if (elementwidth)
12060 *elementwidth = tmpwidth;
12061
12062 return 1;
12063 }
12064
12065 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12066 the immediate is valid, write a constant suitable for using as an operand
12067 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12068 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12069
12070 int
12071 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
12072 rtx *modconst, int *elementwidth)
12073 {
12074 rtx tmpconst;
12075 int tmpwidth;
12076 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12077
12078 if (retval < 0 || retval > 5)
12079 return 0;
12080
12081 if (modconst)
12082 *modconst = tmpconst;
12083
12084 if (elementwidth)
12085 *elementwidth = tmpwidth;
12086
12087 return 1;
12088 }
12089
12090 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12091 the immediate is valid, write a constant suitable for using as an operand
12092 to VSHR/VSHL to *MODCONST and the corresponding element width to
12093 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12094 because they have different limitations. */
12095
12096 int
12097 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
12098 rtx *modconst, int *elementwidth,
12099 bool isleftshift)
12100 {
12101 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12102 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12103 unsigned HOST_WIDE_INT last_elt = 0;
12104 unsigned HOST_WIDE_INT maxshift;
12105
12106 /* Split vector constant out into a byte vector. */
12107 for (i = 0; i < n_elts; i++)
12108 {
12109 rtx el = CONST_VECTOR_ELT (op, i);
12110 unsigned HOST_WIDE_INT elpart;
12111
12112 if (CONST_INT_P (el))
12113 elpart = INTVAL (el);
12114 else if (CONST_DOUBLE_P (el))
12115 return 0;
12116 else
12117 gcc_unreachable ();
12118
12119 if (i != 0 && elpart != last_elt)
12120 return 0;
12121
12122 last_elt = elpart;
12123 }
12124
12125 /* Shift less than element size. */
12126 maxshift = innersize * 8;
12127
12128 if (isleftshift)
12129 {
12130 /* Left shift immediate value can be from 0 to <size>-1. */
12131 if (last_elt >= maxshift)
12132 return 0;
12133 }
12134 else
12135 {
12136 /* Right shift immediate value can be from 1 to <size>. */
12137 if (last_elt == 0 || last_elt > maxshift)
12138 return 0;
12139 }
12140
12141 if (elementwidth)
12142 *elementwidth = innersize * 8;
12143
12144 if (modconst)
12145 *modconst = CONST_VECTOR_ELT (op, 0);
12146
12147 return 1;
12148 }
12149
12150 /* Return a string suitable for output of Neon immediate logic operation
12151 MNEM. */
12152
12153 char *
12154 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
12155 int inverse, int quad)
12156 {
12157 int width, is_valid;
12158 static char templ[40];
12159
12160 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12161
12162 gcc_assert (is_valid != 0);
12163
12164 if (quad)
12165 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12166 else
12167 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12168
12169 return templ;
12170 }
12171
12172 /* Return a string suitable for output of Neon immediate shift operation
12173 (VSHR or VSHL) MNEM. */
12174
12175 char *
12176 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12177 enum machine_mode mode, int quad,
12178 bool isleftshift)
12179 {
12180 int width, is_valid;
12181 static char templ[40];
12182
12183 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12184 gcc_assert (is_valid != 0);
12185
12186 if (quad)
12187 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12188 else
12189 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12190
12191 return templ;
12192 }
12193
12194 /* Output a sequence of pairwise operations to implement a reduction.
12195 NOTE: We do "too much work" here, because pairwise operations work on two
12196 registers-worth of operands in one go. Unfortunately we can't exploit those
12197 extra calculations to do the full operation in fewer steps, I don't think.
12198 Although all vector elements of the result but the first are ignored, we
12199 actually calculate the same result in each of the elements. An alternative
12200 such as initially loading a vector with zero to use as each of the second
12201 operands would use up an additional register and take an extra instruction,
12202 for no particular gain. */
12203
12204 void
12205 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
12206 rtx (*reduc) (rtx, rtx, rtx))
12207 {
12208 enum machine_mode inner = GET_MODE_INNER (mode);
12209 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12210 rtx tmpsum = op1;
12211
12212 for (i = parts / 2; i >= 1; i /= 2)
12213 {
12214 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12215 emit_insn (reduc (dest, tmpsum, tmpsum));
12216 tmpsum = dest;
12217 }
12218 }
12219
12220 /* If VALS is a vector constant that can be loaded into a register
12221 using VDUP, generate instructions to do so and return an RTX to
12222 assign to the register. Otherwise return NULL_RTX. */
12223
12224 static rtx
12225 neon_vdup_constant (rtx vals)
12226 {
12227 enum machine_mode mode = GET_MODE (vals);
12228 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12229 int n_elts = GET_MODE_NUNITS (mode);
12230 bool all_same = true;
12231 rtx x;
12232 int i;
12233
12234 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12235 return NULL_RTX;
12236
12237 for (i = 0; i < n_elts; ++i)
12238 {
12239 x = XVECEXP (vals, 0, i);
12240 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12241 all_same = false;
12242 }
12243
12244 if (!all_same)
12245 /* The elements are not all the same. We could handle repeating
12246 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12247 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12248 vdup.i16). */
12249 return NULL_RTX;
12250
12251 /* We can load this constant by using VDUP and a constant in a
12252 single ARM register. This will be cheaper than a vector
12253 load. */
12254
12255 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12256 return gen_rtx_VEC_DUPLICATE (mode, x);
12257 }
12258
12259 /* Generate code to load VALS, which is a PARALLEL containing only
12260 constants (for vec_init) or CONST_VECTOR, efficiently into a
12261 register. Returns an RTX to copy into the register, or NULL_RTX
12262 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12263
12264 rtx
12265 neon_make_constant (rtx vals)
12266 {
12267 enum machine_mode mode = GET_MODE (vals);
12268 rtx target;
12269 rtx const_vec = NULL_RTX;
12270 int n_elts = GET_MODE_NUNITS (mode);
12271 int n_const = 0;
12272 int i;
12273
12274 if (GET_CODE (vals) == CONST_VECTOR)
12275 const_vec = vals;
12276 else if (GET_CODE (vals) == PARALLEL)
12277 {
12278 /* A CONST_VECTOR must contain only CONST_INTs and
12279 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12280 Only store valid constants in a CONST_VECTOR. */
12281 for (i = 0; i < n_elts; ++i)
12282 {
12283 rtx x = XVECEXP (vals, 0, i);
12284 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12285 n_const++;
12286 }
12287 if (n_const == n_elts)
12288 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12289 }
12290 else
12291 gcc_unreachable ();
12292
12293 if (const_vec != NULL
12294 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12295 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12296 return const_vec;
12297 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12298 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12299 pipeline cycle; creating the constant takes one or two ARM
12300 pipeline cycles. */
12301 return target;
12302 else if (const_vec != NULL_RTX)
12303 /* Load from constant pool. On Cortex-A8 this takes two cycles
12304 (for either double or quad vectors). We can not take advantage
12305 of single-cycle VLD1 because we need a PC-relative addressing
12306 mode. */
12307 return const_vec;
12308 else
12309 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12310 We can not construct an initializer. */
12311 return NULL_RTX;
12312 }
12313
12314 /* Initialize vector TARGET to VALS. */
12315
12316 void
12317 neon_expand_vector_init (rtx target, rtx vals)
12318 {
12319 enum machine_mode mode = GET_MODE (target);
12320 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12321 int n_elts = GET_MODE_NUNITS (mode);
12322 int n_var = 0, one_var = -1;
12323 bool all_same = true;
12324 rtx x, mem;
12325 int i;
12326
12327 for (i = 0; i < n_elts; ++i)
12328 {
12329 x = XVECEXP (vals, 0, i);
12330 if (!CONSTANT_P (x))
12331 ++n_var, one_var = i;
12332
12333 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12334 all_same = false;
12335 }
12336
12337 if (n_var == 0)
12338 {
12339 rtx constant = neon_make_constant (vals);
12340 if (constant != NULL_RTX)
12341 {
12342 emit_move_insn (target, constant);
12343 return;
12344 }
12345 }
12346
12347 /* Splat a single non-constant element if we can. */
12348 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12349 {
12350 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12351 emit_insn (gen_rtx_SET (VOIDmode, target,
12352 gen_rtx_VEC_DUPLICATE (mode, x)));
12353 return;
12354 }
12355
12356 /* One field is non-constant. Load constant then overwrite varying
12357 field. This is more efficient than using the stack. */
12358 if (n_var == 1)
12359 {
12360 rtx copy = copy_rtx (vals);
12361 rtx index = GEN_INT (one_var);
12362
12363 /* Load constant part of vector, substitute neighboring value for
12364 varying element. */
12365 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12366 neon_expand_vector_init (target, copy);
12367
12368 /* Insert variable. */
12369 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12370 switch (mode)
12371 {
12372 case V8QImode:
12373 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12374 break;
12375 case V16QImode:
12376 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12377 break;
12378 case V4HImode:
12379 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12380 break;
12381 case V8HImode:
12382 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12383 break;
12384 case V2SImode:
12385 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12386 break;
12387 case V4SImode:
12388 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12389 break;
12390 case V2SFmode:
12391 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12392 break;
12393 case V4SFmode:
12394 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12395 break;
12396 case V2DImode:
12397 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12398 break;
12399 default:
12400 gcc_unreachable ();
12401 }
12402 return;
12403 }
12404
12405 /* Construct the vector in memory one field at a time
12406 and load the whole vector. */
12407 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12408 for (i = 0; i < n_elts; i++)
12409 emit_move_insn (adjust_address_nv (mem, inner_mode,
12410 i * GET_MODE_SIZE (inner_mode)),
12411 XVECEXP (vals, 0, i));
12412 emit_move_insn (target, mem);
12413 }
12414
12415 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12416 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12417 reported source locations are bogus. */
12418
12419 static void
12420 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12421 const char *err)
12422 {
12423 HOST_WIDE_INT lane;
12424
12425 gcc_assert (CONST_INT_P (operand));
12426
12427 lane = INTVAL (operand);
12428
12429 if (lane < low || lane >= high)
12430 error (err);
12431 }
12432
12433 /* Bounds-check lanes. */
12434
12435 void
12436 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12437 {
12438 bounds_check (operand, low, high, "lane out of range");
12439 }
12440
12441 /* Bounds-check constants. */
12442
12443 void
12444 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12445 {
12446 bounds_check (operand, low, high, "constant out of range");
12447 }
12448
12449 HOST_WIDE_INT
12450 neon_element_bits (enum machine_mode mode)
12451 {
12452 if (mode == DImode)
12453 return GET_MODE_BITSIZE (mode);
12454 else
12455 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12456 }
12457
12458 \f
12459 /* Predicates for `match_operand' and `match_operator'. */
12460
12461 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12462 WB is true if full writeback address modes are allowed and is false
12463 if limited writeback address modes (POST_INC and PRE_DEC) are
12464 allowed. */
12465
12466 int
12467 arm_coproc_mem_operand (rtx op, bool wb)
12468 {
12469 rtx ind;
12470
12471 /* Reject eliminable registers. */
12472 if (! (reload_in_progress || reload_completed || lra_in_progress)
12473 && ( reg_mentioned_p (frame_pointer_rtx, op)
12474 || reg_mentioned_p (arg_pointer_rtx, op)
12475 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12476 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12477 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12478 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12479 return FALSE;
12480
12481 /* Constants are converted into offsets from labels. */
12482 if (!MEM_P (op))
12483 return FALSE;
12484
12485 ind = XEXP (op, 0);
12486
12487 if (reload_completed
12488 && (GET_CODE (ind) == LABEL_REF
12489 || (GET_CODE (ind) == CONST
12490 && GET_CODE (XEXP (ind, 0)) == PLUS
12491 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12492 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12493 return TRUE;
12494
12495 /* Match: (mem (reg)). */
12496 if (REG_P (ind))
12497 return arm_address_register_rtx_p (ind, 0);
12498
12499 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12500 acceptable in any case (subject to verification by
12501 arm_address_register_rtx_p). We need WB to be true to accept
12502 PRE_INC and POST_DEC. */
12503 if (GET_CODE (ind) == POST_INC
12504 || GET_CODE (ind) == PRE_DEC
12505 || (wb
12506 && (GET_CODE (ind) == PRE_INC
12507 || GET_CODE (ind) == POST_DEC)))
12508 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12509
12510 if (wb
12511 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12512 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12513 && GET_CODE (XEXP (ind, 1)) == PLUS
12514 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12515 ind = XEXP (ind, 1);
12516
12517 /* Match:
12518 (plus (reg)
12519 (const)). */
12520 if (GET_CODE (ind) == PLUS
12521 && REG_P (XEXP (ind, 0))
12522 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12523 && CONST_INT_P (XEXP (ind, 1))
12524 && INTVAL (XEXP (ind, 1)) > -1024
12525 && INTVAL (XEXP (ind, 1)) < 1024
12526 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12527 return TRUE;
12528
12529 return FALSE;
12530 }
12531
12532 /* Return TRUE if OP is a memory operand which we can load or store a vector
12533 to/from. TYPE is one of the following values:
12534 0 - Vector load/stor (vldr)
12535 1 - Core registers (ldm)
12536 2 - Element/structure loads (vld1)
12537 */
12538 int
12539 neon_vector_mem_operand (rtx op, int type, bool strict)
12540 {
12541 rtx ind;
12542
12543 /* Reject eliminable registers. */
12544 if (! (reload_in_progress || reload_completed)
12545 && ( reg_mentioned_p (frame_pointer_rtx, op)
12546 || reg_mentioned_p (arg_pointer_rtx, op)
12547 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12548 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12549 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12550 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12551 return !strict;
12552
12553 /* Constants are converted into offsets from labels. */
12554 if (!MEM_P (op))
12555 return FALSE;
12556
12557 ind = XEXP (op, 0);
12558
12559 if (reload_completed
12560 && (GET_CODE (ind) == LABEL_REF
12561 || (GET_CODE (ind) == CONST
12562 && GET_CODE (XEXP (ind, 0)) == PLUS
12563 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12564 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12565 return TRUE;
12566
12567 /* Match: (mem (reg)). */
12568 if (REG_P (ind))
12569 return arm_address_register_rtx_p (ind, 0);
12570
12571 /* Allow post-increment with Neon registers. */
12572 if ((type != 1 && GET_CODE (ind) == POST_INC)
12573 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12574 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12575
12576 /* FIXME: vld1 allows register post-modify. */
12577
12578 /* Match:
12579 (plus (reg)
12580 (const)). */
12581 if (type == 0
12582 && GET_CODE (ind) == PLUS
12583 && REG_P (XEXP (ind, 0))
12584 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12585 && CONST_INT_P (XEXP (ind, 1))
12586 && INTVAL (XEXP (ind, 1)) > -1024
12587 /* For quad modes, we restrict the constant offset to be slightly less
12588 than what the instruction format permits. We have no such constraint
12589 on double mode offsets. (This must match arm_legitimate_index_p.) */
12590 && (INTVAL (XEXP (ind, 1))
12591 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12592 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12593 return TRUE;
12594
12595 return FALSE;
12596 }
12597
12598 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12599 type. */
12600 int
12601 neon_struct_mem_operand (rtx op)
12602 {
12603 rtx ind;
12604
12605 /* Reject eliminable registers. */
12606 if (! (reload_in_progress || reload_completed)
12607 && ( reg_mentioned_p (frame_pointer_rtx, op)
12608 || reg_mentioned_p (arg_pointer_rtx, op)
12609 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12610 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12611 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12612 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12613 return FALSE;
12614
12615 /* Constants are converted into offsets from labels. */
12616 if (!MEM_P (op))
12617 return FALSE;
12618
12619 ind = XEXP (op, 0);
12620
12621 if (reload_completed
12622 && (GET_CODE (ind) == LABEL_REF
12623 || (GET_CODE (ind) == CONST
12624 && GET_CODE (XEXP (ind, 0)) == PLUS
12625 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12626 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12627 return TRUE;
12628
12629 /* Match: (mem (reg)). */
12630 if (REG_P (ind))
12631 return arm_address_register_rtx_p (ind, 0);
12632
12633 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12634 if (GET_CODE (ind) == POST_INC
12635 || GET_CODE (ind) == PRE_DEC)
12636 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12637
12638 return FALSE;
12639 }
12640
12641 /* Return true if X is a register that will be eliminated later on. */
12642 int
12643 arm_eliminable_register (rtx x)
12644 {
12645 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12646 || REGNO (x) == ARG_POINTER_REGNUM
12647 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12648 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12649 }
12650
12651 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12652 coprocessor registers. Otherwise return NO_REGS. */
12653
12654 enum reg_class
12655 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
12656 {
12657 if (mode == HFmode)
12658 {
12659 if (!TARGET_NEON_FP16)
12660 return GENERAL_REGS;
12661 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12662 return NO_REGS;
12663 return GENERAL_REGS;
12664 }
12665
12666 /* The neon move patterns handle all legitimate vector and struct
12667 addresses. */
12668 if (TARGET_NEON
12669 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12670 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12671 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12672 || VALID_NEON_STRUCT_MODE (mode)))
12673 return NO_REGS;
12674
12675 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12676 return NO_REGS;
12677
12678 return GENERAL_REGS;
12679 }
12680
12681 /* Values which must be returned in the most-significant end of the return
12682 register. */
12683
12684 static bool
12685 arm_return_in_msb (const_tree valtype)
12686 {
12687 return (TARGET_AAPCS_BASED
12688 && BYTES_BIG_ENDIAN
12689 && (AGGREGATE_TYPE_P (valtype)
12690 || TREE_CODE (valtype) == COMPLEX_TYPE
12691 || FIXED_POINT_TYPE_P (valtype)));
12692 }
12693
12694 /* Return TRUE if X references a SYMBOL_REF. */
12695 int
12696 symbol_mentioned_p (rtx x)
12697 {
12698 const char * fmt;
12699 int i;
12700
12701 if (GET_CODE (x) == SYMBOL_REF)
12702 return 1;
12703
12704 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12705 are constant offsets, not symbols. */
12706 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12707 return 0;
12708
12709 fmt = GET_RTX_FORMAT (GET_CODE (x));
12710
12711 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12712 {
12713 if (fmt[i] == 'E')
12714 {
12715 int j;
12716
12717 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12718 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12719 return 1;
12720 }
12721 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12722 return 1;
12723 }
12724
12725 return 0;
12726 }
12727
12728 /* Return TRUE if X references a LABEL_REF. */
12729 int
12730 label_mentioned_p (rtx x)
12731 {
12732 const char * fmt;
12733 int i;
12734
12735 if (GET_CODE (x) == LABEL_REF)
12736 return 1;
12737
12738 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12739 instruction, but they are constant offsets, not symbols. */
12740 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12741 return 0;
12742
12743 fmt = GET_RTX_FORMAT (GET_CODE (x));
12744 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12745 {
12746 if (fmt[i] == 'E')
12747 {
12748 int j;
12749
12750 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12751 if (label_mentioned_p (XVECEXP (x, i, j)))
12752 return 1;
12753 }
12754 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12755 return 1;
12756 }
12757
12758 return 0;
12759 }
12760
12761 int
12762 tls_mentioned_p (rtx x)
12763 {
12764 switch (GET_CODE (x))
12765 {
12766 case CONST:
12767 return tls_mentioned_p (XEXP (x, 0));
12768
12769 case UNSPEC:
12770 if (XINT (x, 1) == UNSPEC_TLS)
12771 return 1;
12772
12773 default:
12774 return 0;
12775 }
12776 }
12777
12778 /* Must not copy any rtx that uses a pc-relative address. */
12779
12780 static int
12781 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
12782 {
12783 if (GET_CODE (*x) == UNSPEC
12784 && (XINT (*x, 1) == UNSPEC_PIC_BASE
12785 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
12786 return 1;
12787 return 0;
12788 }
12789
12790 static bool
12791 arm_cannot_copy_insn_p (rtx insn)
12792 {
12793 /* The tls call insn cannot be copied, as it is paired with a data
12794 word. */
12795 if (recog_memoized (insn) == CODE_FOR_tlscall)
12796 return true;
12797
12798 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
12799 }
12800
12801 enum rtx_code
12802 minmax_code (rtx x)
12803 {
12804 enum rtx_code code = GET_CODE (x);
12805
12806 switch (code)
12807 {
12808 case SMAX:
12809 return GE;
12810 case SMIN:
12811 return LE;
12812 case UMIN:
12813 return LEU;
12814 case UMAX:
12815 return GEU;
12816 default:
12817 gcc_unreachable ();
12818 }
12819 }
12820
12821 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12822
12823 bool
12824 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12825 int *mask, bool *signed_sat)
12826 {
12827 /* The high bound must be a power of two minus one. */
12828 int log = exact_log2 (INTVAL (hi_bound) + 1);
12829 if (log == -1)
12830 return false;
12831
12832 /* The low bound is either zero (for usat) or one less than the
12833 negation of the high bound (for ssat). */
12834 if (INTVAL (lo_bound) == 0)
12835 {
12836 if (mask)
12837 *mask = log;
12838 if (signed_sat)
12839 *signed_sat = false;
12840
12841 return true;
12842 }
12843
12844 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12845 {
12846 if (mask)
12847 *mask = log + 1;
12848 if (signed_sat)
12849 *signed_sat = true;
12850
12851 return true;
12852 }
12853
12854 return false;
12855 }
12856
12857 /* Return 1 if memory locations are adjacent. */
12858 int
12859 adjacent_mem_locations (rtx a, rtx b)
12860 {
12861 /* We don't guarantee to preserve the order of these memory refs. */
12862 if (volatile_refs_p (a) || volatile_refs_p (b))
12863 return 0;
12864
12865 if ((REG_P (XEXP (a, 0))
12866 || (GET_CODE (XEXP (a, 0)) == PLUS
12867 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12868 && (REG_P (XEXP (b, 0))
12869 || (GET_CODE (XEXP (b, 0)) == PLUS
12870 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12871 {
12872 HOST_WIDE_INT val0 = 0, val1 = 0;
12873 rtx reg0, reg1;
12874 int val_diff;
12875
12876 if (GET_CODE (XEXP (a, 0)) == PLUS)
12877 {
12878 reg0 = XEXP (XEXP (a, 0), 0);
12879 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12880 }
12881 else
12882 reg0 = XEXP (a, 0);
12883
12884 if (GET_CODE (XEXP (b, 0)) == PLUS)
12885 {
12886 reg1 = XEXP (XEXP (b, 0), 0);
12887 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12888 }
12889 else
12890 reg1 = XEXP (b, 0);
12891
12892 /* Don't accept any offset that will require multiple
12893 instructions to handle, since this would cause the
12894 arith_adjacentmem pattern to output an overlong sequence. */
12895 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12896 return 0;
12897
12898 /* Don't allow an eliminable register: register elimination can make
12899 the offset too large. */
12900 if (arm_eliminable_register (reg0))
12901 return 0;
12902
12903 val_diff = val1 - val0;
12904
12905 if (arm_ld_sched)
12906 {
12907 /* If the target has load delay slots, then there's no benefit
12908 to using an ldm instruction unless the offset is zero and
12909 we are optimizing for size. */
12910 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12911 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12912 && (val_diff == 4 || val_diff == -4));
12913 }
12914
12915 return ((REGNO (reg0) == REGNO (reg1))
12916 && (val_diff == 4 || val_diff == -4));
12917 }
12918
12919 return 0;
12920 }
12921
12922 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12923 for load operations, false for store operations. CONSECUTIVE is true
12924 if the register numbers in the operation must be consecutive in the register
12925 bank. RETURN_PC is true if value is to be loaded in PC.
12926 The pattern we are trying to match for load is:
12927 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12928 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12929 :
12930 :
12931 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12932 ]
12933 where
12934 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12935 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12936 3. If consecutive is TRUE, then for kth register being loaded,
12937 REGNO (R_dk) = REGNO (R_d0) + k.
12938 The pattern for store is similar. */
12939 bool
12940 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
12941 bool consecutive, bool return_pc)
12942 {
12943 HOST_WIDE_INT count = XVECLEN (op, 0);
12944 rtx reg, mem, addr;
12945 unsigned regno;
12946 unsigned first_regno;
12947 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12948 rtx elt;
12949 bool addr_reg_in_reglist = false;
12950 bool update = false;
12951 int reg_increment;
12952 int offset_adj;
12953 int regs_per_val;
12954
12955 /* If not in SImode, then registers must be consecutive
12956 (e.g., VLDM instructions for DFmode). */
12957 gcc_assert ((mode == SImode) || consecutive);
12958 /* Setting return_pc for stores is illegal. */
12959 gcc_assert (!return_pc || load);
12960
12961 /* Set up the increments and the regs per val based on the mode. */
12962 reg_increment = GET_MODE_SIZE (mode);
12963 regs_per_val = reg_increment / 4;
12964 offset_adj = return_pc ? 1 : 0;
12965
12966 if (count <= 1
12967 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12968 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12969 return false;
12970
12971 /* Check if this is a write-back. */
12972 elt = XVECEXP (op, 0, offset_adj);
12973 if (GET_CODE (SET_SRC (elt)) == PLUS)
12974 {
12975 i++;
12976 base = 1;
12977 update = true;
12978
12979 /* The offset adjustment must be the number of registers being
12980 popped times the size of a single register. */
12981 if (!REG_P (SET_DEST (elt))
12982 || !REG_P (XEXP (SET_SRC (elt), 0))
12983 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12984 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12985 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12986 ((count - 1 - offset_adj) * reg_increment))
12987 return false;
12988 }
12989
12990 i = i + offset_adj;
12991 base = base + offset_adj;
12992 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12993 success depends on the type: VLDM can do just one reg,
12994 LDM must do at least two. */
12995 if ((count <= i) && (mode == SImode))
12996 return false;
12997
12998 elt = XVECEXP (op, 0, i - 1);
12999 if (GET_CODE (elt) != SET)
13000 return false;
13001
13002 if (load)
13003 {
13004 reg = SET_DEST (elt);
13005 mem = SET_SRC (elt);
13006 }
13007 else
13008 {
13009 reg = SET_SRC (elt);
13010 mem = SET_DEST (elt);
13011 }
13012
13013 if (!REG_P (reg) || !MEM_P (mem))
13014 return false;
13015
13016 regno = REGNO (reg);
13017 first_regno = regno;
13018 addr = XEXP (mem, 0);
13019 if (GET_CODE (addr) == PLUS)
13020 {
13021 if (!CONST_INT_P (XEXP (addr, 1)))
13022 return false;
13023
13024 offset = INTVAL (XEXP (addr, 1));
13025 addr = XEXP (addr, 0);
13026 }
13027
13028 if (!REG_P (addr))
13029 return false;
13030
13031 /* Don't allow SP to be loaded unless it is also the base register. It
13032 guarantees that SP is reset correctly when an LDM instruction
13033 is interrupted. Otherwise, we might end up with a corrupt stack. */
13034 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13035 return false;
13036
13037 for (; i < count; i++)
13038 {
13039 elt = XVECEXP (op, 0, i);
13040 if (GET_CODE (elt) != SET)
13041 return false;
13042
13043 if (load)
13044 {
13045 reg = SET_DEST (elt);
13046 mem = SET_SRC (elt);
13047 }
13048 else
13049 {
13050 reg = SET_SRC (elt);
13051 mem = SET_DEST (elt);
13052 }
13053
13054 if (!REG_P (reg)
13055 || GET_MODE (reg) != mode
13056 || REGNO (reg) <= regno
13057 || (consecutive
13058 && (REGNO (reg) !=
13059 (unsigned int) (first_regno + regs_per_val * (i - base))))
13060 /* Don't allow SP to be loaded unless it is also the base register. It
13061 guarantees that SP is reset correctly when an LDM instruction
13062 is interrupted. Otherwise, we might end up with a corrupt stack. */
13063 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13064 || !MEM_P (mem)
13065 || GET_MODE (mem) != mode
13066 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13067 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13068 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13069 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13070 offset + (i - base) * reg_increment))
13071 && (!REG_P (XEXP (mem, 0))
13072 || offset + (i - base) * reg_increment != 0)))
13073 return false;
13074
13075 regno = REGNO (reg);
13076 if (regno == REGNO (addr))
13077 addr_reg_in_reglist = true;
13078 }
13079
13080 if (load)
13081 {
13082 if (update && addr_reg_in_reglist)
13083 return false;
13084
13085 /* For Thumb-1, address register is always modified - either by write-back
13086 or by explicit load. If the pattern does not describe an update,
13087 then the address register must be in the list of loaded registers. */
13088 if (TARGET_THUMB1)
13089 return update || addr_reg_in_reglist;
13090 }
13091
13092 return true;
13093 }
13094
13095 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13096 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13097 instruction. ADD_OFFSET is nonzero if the base address register needs
13098 to be modified with an add instruction before we can use it. */
13099
13100 static bool
13101 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13102 int nops, HOST_WIDE_INT add_offset)
13103 {
13104 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13105 if the offset isn't small enough. The reason 2 ldrs are faster
13106 is because these ARMs are able to do more than one cache access
13107 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13108 whilst the ARM8 has a double bandwidth cache. This means that
13109 these cores can do both an instruction fetch and a data fetch in
13110 a single cycle, so the trick of calculating the address into a
13111 scratch register (one of the result regs) and then doing a load
13112 multiple actually becomes slower (and no smaller in code size).
13113 That is the transformation
13114
13115 ldr rd1, [rbase + offset]
13116 ldr rd2, [rbase + offset + 4]
13117
13118 to
13119
13120 add rd1, rbase, offset
13121 ldmia rd1, {rd1, rd2}
13122
13123 produces worse code -- '3 cycles + any stalls on rd2' instead of
13124 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13125 access per cycle, the first sequence could never complete in less
13126 than 6 cycles, whereas the ldm sequence would only take 5 and
13127 would make better use of sequential accesses if not hitting the
13128 cache.
13129
13130 We cheat here and test 'arm_ld_sched' which we currently know to
13131 only be true for the ARM8, ARM9 and StrongARM. If this ever
13132 changes, then the test below needs to be reworked. */
13133 if (nops == 2 && arm_ld_sched && add_offset != 0)
13134 return false;
13135
13136 /* XScale has load-store double instructions, but they have stricter
13137 alignment requirements than load-store multiple, so we cannot
13138 use them.
13139
13140 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13141 the pipeline until completion.
13142
13143 NREGS CYCLES
13144 1 3
13145 2 4
13146 3 5
13147 4 6
13148
13149 An ldr instruction takes 1-3 cycles, but does not block the
13150 pipeline.
13151
13152 NREGS CYCLES
13153 1 1-3
13154 2 2-6
13155 3 3-9
13156 4 4-12
13157
13158 Best case ldr will always win. However, the more ldr instructions
13159 we issue, the less likely we are to be able to schedule them well.
13160 Using ldr instructions also increases code size.
13161
13162 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13163 for counts of 3 or 4 regs. */
13164 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13165 return false;
13166 return true;
13167 }
13168
13169 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13170 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13171 an array ORDER which describes the sequence to use when accessing the
13172 offsets that produces an ascending order. In this sequence, each
13173 offset must be larger by exactly 4 than the previous one. ORDER[0]
13174 must have been filled in with the lowest offset by the caller.
13175 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13176 we use to verify that ORDER produces an ascending order of registers.
13177 Return true if it was possible to construct such an order, false if
13178 not. */
13179
13180 static bool
13181 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13182 int *unsorted_regs)
13183 {
13184 int i;
13185 for (i = 1; i < nops; i++)
13186 {
13187 int j;
13188
13189 order[i] = order[i - 1];
13190 for (j = 0; j < nops; j++)
13191 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13192 {
13193 /* We must find exactly one offset that is higher than the
13194 previous one by 4. */
13195 if (order[i] != order[i - 1])
13196 return false;
13197 order[i] = j;
13198 }
13199 if (order[i] == order[i - 1])
13200 return false;
13201 /* The register numbers must be ascending. */
13202 if (unsorted_regs != NULL
13203 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13204 return false;
13205 }
13206 return true;
13207 }
13208
13209 /* Used to determine in a peephole whether a sequence of load
13210 instructions can be changed into a load-multiple instruction.
13211 NOPS is the number of separate load instructions we are examining. The
13212 first NOPS entries in OPERANDS are the destination registers, the
13213 next NOPS entries are memory operands. If this function is
13214 successful, *BASE is set to the common base register of the memory
13215 accesses; *LOAD_OFFSET is set to the first memory location's offset
13216 from that base register.
13217 REGS is an array filled in with the destination register numbers.
13218 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13219 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13220 the sequence of registers in REGS matches the loads from ascending memory
13221 locations, and the function verifies that the register numbers are
13222 themselves ascending. If CHECK_REGS is false, the register numbers
13223 are stored in the order they are found in the operands. */
13224 static int
13225 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13226 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13227 {
13228 int unsorted_regs[MAX_LDM_STM_OPS];
13229 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13230 int order[MAX_LDM_STM_OPS];
13231 rtx base_reg_rtx = NULL;
13232 int base_reg = -1;
13233 int i, ldm_case;
13234
13235 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13236 easily extended if required. */
13237 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13238
13239 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13240
13241 /* Loop over the operands and check that the memory references are
13242 suitable (i.e. immediate offsets from the same base register). At
13243 the same time, extract the target register, and the memory
13244 offsets. */
13245 for (i = 0; i < nops; i++)
13246 {
13247 rtx reg;
13248 rtx offset;
13249
13250 /* Convert a subreg of a mem into the mem itself. */
13251 if (GET_CODE (operands[nops + i]) == SUBREG)
13252 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13253
13254 gcc_assert (MEM_P (operands[nops + i]));
13255
13256 /* Don't reorder volatile memory references; it doesn't seem worth
13257 looking for the case where the order is ok anyway. */
13258 if (MEM_VOLATILE_P (operands[nops + i]))
13259 return 0;
13260
13261 offset = const0_rtx;
13262
13263 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13264 || (GET_CODE (reg) == SUBREG
13265 && REG_P (reg = SUBREG_REG (reg))))
13266 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13267 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13268 || (GET_CODE (reg) == SUBREG
13269 && REG_P (reg = SUBREG_REG (reg))))
13270 && (CONST_INT_P (offset
13271 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13272 {
13273 if (i == 0)
13274 {
13275 base_reg = REGNO (reg);
13276 base_reg_rtx = reg;
13277 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13278 return 0;
13279 }
13280 else if (base_reg != (int) REGNO (reg))
13281 /* Not addressed from the same base register. */
13282 return 0;
13283
13284 unsorted_regs[i] = (REG_P (operands[i])
13285 ? REGNO (operands[i])
13286 : REGNO (SUBREG_REG (operands[i])));
13287
13288 /* If it isn't an integer register, or if it overwrites the
13289 base register but isn't the last insn in the list, then
13290 we can't do this. */
13291 if (unsorted_regs[i] < 0
13292 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13293 || unsorted_regs[i] > 14
13294 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13295 return 0;
13296
13297 /* Don't allow SP to be loaded unless it is also the base
13298 register. It guarantees that SP is reset correctly when
13299 an LDM instruction is interrupted. Otherwise, we might
13300 end up with a corrupt stack. */
13301 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13302 return 0;
13303
13304 unsorted_offsets[i] = INTVAL (offset);
13305 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13306 order[0] = i;
13307 }
13308 else
13309 /* Not a suitable memory address. */
13310 return 0;
13311 }
13312
13313 /* All the useful information has now been extracted from the
13314 operands into unsorted_regs and unsorted_offsets; additionally,
13315 order[0] has been set to the lowest offset in the list. Sort
13316 the offsets into order, verifying that they are adjacent, and
13317 check that the register numbers are ascending. */
13318 if (!compute_offset_order (nops, unsorted_offsets, order,
13319 check_regs ? unsorted_regs : NULL))
13320 return 0;
13321
13322 if (saved_order)
13323 memcpy (saved_order, order, sizeof order);
13324
13325 if (base)
13326 {
13327 *base = base_reg;
13328
13329 for (i = 0; i < nops; i++)
13330 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13331
13332 *load_offset = unsorted_offsets[order[0]];
13333 }
13334
13335 if (TARGET_THUMB1
13336 && !peep2_reg_dead_p (nops, base_reg_rtx))
13337 return 0;
13338
13339 if (unsorted_offsets[order[0]] == 0)
13340 ldm_case = 1; /* ldmia */
13341 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13342 ldm_case = 2; /* ldmib */
13343 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13344 ldm_case = 3; /* ldmda */
13345 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13346 ldm_case = 4; /* ldmdb */
13347 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13348 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13349 ldm_case = 5;
13350 else
13351 return 0;
13352
13353 if (!multiple_operation_profitable_p (false, nops,
13354 ldm_case == 5
13355 ? unsorted_offsets[order[0]] : 0))
13356 return 0;
13357
13358 return ldm_case;
13359 }
13360
13361 /* Used to determine in a peephole whether a sequence of store instructions can
13362 be changed into a store-multiple instruction.
13363 NOPS is the number of separate store instructions we are examining.
13364 NOPS_TOTAL is the total number of instructions recognized by the peephole
13365 pattern.
13366 The first NOPS entries in OPERANDS are the source registers, the next
13367 NOPS entries are memory operands. If this function is successful, *BASE is
13368 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13369 to the first memory location's offset from that base register. REGS is an
13370 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13371 likewise filled with the corresponding rtx's.
13372 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13373 numbers to an ascending order of stores.
13374 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13375 from ascending memory locations, and the function verifies that the register
13376 numbers are themselves ascending. If CHECK_REGS is false, the register
13377 numbers are stored in the order they are found in the operands. */
13378 static int
13379 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13380 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13381 HOST_WIDE_INT *load_offset, bool check_regs)
13382 {
13383 int unsorted_regs[MAX_LDM_STM_OPS];
13384 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13385 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13386 int order[MAX_LDM_STM_OPS];
13387 int base_reg = -1;
13388 rtx base_reg_rtx = NULL;
13389 int i, stm_case;
13390
13391 /* Write back of base register is currently only supported for Thumb 1. */
13392 int base_writeback = TARGET_THUMB1;
13393
13394 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13395 easily extended if required. */
13396 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13397
13398 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13399
13400 /* Loop over the operands and check that the memory references are
13401 suitable (i.e. immediate offsets from the same base register). At
13402 the same time, extract the target register, and the memory
13403 offsets. */
13404 for (i = 0; i < nops; i++)
13405 {
13406 rtx reg;
13407 rtx offset;
13408
13409 /* Convert a subreg of a mem into the mem itself. */
13410 if (GET_CODE (operands[nops + i]) == SUBREG)
13411 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13412
13413 gcc_assert (MEM_P (operands[nops + i]));
13414
13415 /* Don't reorder volatile memory references; it doesn't seem worth
13416 looking for the case where the order is ok anyway. */
13417 if (MEM_VOLATILE_P (operands[nops + i]))
13418 return 0;
13419
13420 offset = const0_rtx;
13421
13422 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13423 || (GET_CODE (reg) == SUBREG
13424 && REG_P (reg = SUBREG_REG (reg))))
13425 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13426 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13427 || (GET_CODE (reg) == SUBREG
13428 && REG_P (reg = SUBREG_REG (reg))))
13429 && (CONST_INT_P (offset
13430 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13431 {
13432 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13433 ? operands[i] : SUBREG_REG (operands[i]));
13434 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13435
13436 if (i == 0)
13437 {
13438 base_reg = REGNO (reg);
13439 base_reg_rtx = reg;
13440 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13441 return 0;
13442 }
13443 else if (base_reg != (int) REGNO (reg))
13444 /* Not addressed from the same base register. */
13445 return 0;
13446
13447 /* If it isn't an integer register, then we can't do this. */
13448 if (unsorted_regs[i] < 0
13449 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13450 /* The effects are unpredictable if the base register is
13451 both updated and stored. */
13452 || (base_writeback && unsorted_regs[i] == base_reg)
13453 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13454 || unsorted_regs[i] > 14)
13455 return 0;
13456
13457 unsorted_offsets[i] = INTVAL (offset);
13458 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13459 order[0] = i;
13460 }
13461 else
13462 /* Not a suitable memory address. */
13463 return 0;
13464 }
13465
13466 /* All the useful information has now been extracted from the
13467 operands into unsorted_regs and unsorted_offsets; additionally,
13468 order[0] has been set to the lowest offset in the list. Sort
13469 the offsets into order, verifying that they are adjacent, and
13470 check that the register numbers are ascending. */
13471 if (!compute_offset_order (nops, unsorted_offsets, order,
13472 check_regs ? unsorted_regs : NULL))
13473 return 0;
13474
13475 if (saved_order)
13476 memcpy (saved_order, order, sizeof order);
13477
13478 if (base)
13479 {
13480 *base = base_reg;
13481
13482 for (i = 0; i < nops; i++)
13483 {
13484 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13485 if (reg_rtxs)
13486 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13487 }
13488
13489 *load_offset = unsorted_offsets[order[0]];
13490 }
13491
13492 if (TARGET_THUMB1
13493 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13494 return 0;
13495
13496 if (unsorted_offsets[order[0]] == 0)
13497 stm_case = 1; /* stmia */
13498 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13499 stm_case = 2; /* stmib */
13500 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13501 stm_case = 3; /* stmda */
13502 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13503 stm_case = 4; /* stmdb */
13504 else
13505 return 0;
13506
13507 if (!multiple_operation_profitable_p (false, nops, 0))
13508 return 0;
13509
13510 return stm_case;
13511 }
13512 \f
13513 /* Routines for use in generating RTL. */
13514
13515 /* Generate a load-multiple instruction. COUNT is the number of loads in
13516 the instruction; REGS and MEMS are arrays containing the operands.
13517 BASEREG is the base register to be used in addressing the memory operands.
13518 WBACK_OFFSET is nonzero if the instruction should update the base
13519 register. */
13520
13521 static rtx
13522 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13523 HOST_WIDE_INT wback_offset)
13524 {
13525 int i = 0, j;
13526 rtx result;
13527
13528 if (!multiple_operation_profitable_p (false, count, 0))
13529 {
13530 rtx seq;
13531
13532 start_sequence ();
13533
13534 for (i = 0; i < count; i++)
13535 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13536
13537 if (wback_offset != 0)
13538 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13539
13540 seq = get_insns ();
13541 end_sequence ();
13542
13543 return seq;
13544 }
13545
13546 result = gen_rtx_PARALLEL (VOIDmode,
13547 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13548 if (wback_offset != 0)
13549 {
13550 XVECEXP (result, 0, 0)
13551 = gen_rtx_SET (VOIDmode, basereg,
13552 plus_constant (Pmode, basereg, wback_offset));
13553 i = 1;
13554 count++;
13555 }
13556
13557 for (j = 0; i < count; i++, j++)
13558 XVECEXP (result, 0, i)
13559 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13560
13561 return result;
13562 }
13563
13564 /* Generate a store-multiple instruction. COUNT is the number of stores in
13565 the instruction; REGS and MEMS are arrays containing the operands.
13566 BASEREG is the base register to be used in addressing the memory operands.
13567 WBACK_OFFSET is nonzero if the instruction should update the base
13568 register. */
13569
13570 static rtx
13571 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13572 HOST_WIDE_INT wback_offset)
13573 {
13574 int i = 0, j;
13575 rtx result;
13576
13577 if (GET_CODE (basereg) == PLUS)
13578 basereg = XEXP (basereg, 0);
13579
13580 if (!multiple_operation_profitable_p (false, count, 0))
13581 {
13582 rtx seq;
13583
13584 start_sequence ();
13585
13586 for (i = 0; i < count; i++)
13587 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13588
13589 if (wback_offset != 0)
13590 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13591
13592 seq = get_insns ();
13593 end_sequence ();
13594
13595 return seq;
13596 }
13597
13598 result = gen_rtx_PARALLEL (VOIDmode,
13599 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13600 if (wback_offset != 0)
13601 {
13602 XVECEXP (result, 0, 0)
13603 = gen_rtx_SET (VOIDmode, basereg,
13604 plus_constant (Pmode, basereg, wback_offset));
13605 i = 1;
13606 count++;
13607 }
13608
13609 for (j = 0; i < count; i++, j++)
13610 XVECEXP (result, 0, i)
13611 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13612
13613 return result;
13614 }
13615
13616 /* Generate either a load-multiple or a store-multiple instruction. This
13617 function can be used in situations where we can start with a single MEM
13618 rtx and adjust its address upwards.
13619 COUNT is the number of operations in the instruction, not counting a
13620 possible update of the base register. REGS is an array containing the
13621 register operands.
13622 BASEREG is the base register to be used in addressing the memory operands,
13623 which are constructed from BASEMEM.
13624 WRITE_BACK specifies whether the generated instruction should include an
13625 update of the base register.
13626 OFFSETP is used to pass an offset to and from this function; this offset
13627 is not used when constructing the address (instead BASEMEM should have an
13628 appropriate offset in its address), it is used only for setting
13629 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13630
13631 static rtx
13632 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13633 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13634 {
13635 rtx mems[MAX_LDM_STM_OPS];
13636 HOST_WIDE_INT offset = *offsetp;
13637 int i;
13638
13639 gcc_assert (count <= MAX_LDM_STM_OPS);
13640
13641 if (GET_CODE (basereg) == PLUS)
13642 basereg = XEXP (basereg, 0);
13643
13644 for (i = 0; i < count; i++)
13645 {
13646 rtx addr = plus_constant (Pmode, basereg, i * 4);
13647 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13648 offset += 4;
13649 }
13650
13651 if (write_back)
13652 *offsetp = offset;
13653
13654 if (is_load)
13655 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13656 write_back ? 4 * count : 0);
13657 else
13658 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13659 write_back ? 4 * count : 0);
13660 }
13661
13662 rtx
13663 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13664 rtx basemem, HOST_WIDE_INT *offsetp)
13665 {
13666 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13667 offsetp);
13668 }
13669
13670 rtx
13671 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13672 rtx basemem, HOST_WIDE_INT *offsetp)
13673 {
13674 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13675 offsetp);
13676 }
13677
13678 /* Called from a peephole2 expander to turn a sequence of loads into an
13679 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13680 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13681 is true if we can reorder the registers because they are used commutatively
13682 subsequently.
13683 Returns true iff we could generate a new instruction. */
13684
13685 bool
13686 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13687 {
13688 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13689 rtx mems[MAX_LDM_STM_OPS];
13690 int i, j, base_reg;
13691 rtx base_reg_rtx;
13692 HOST_WIDE_INT offset;
13693 int write_back = FALSE;
13694 int ldm_case;
13695 rtx addr;
13696
13697 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13698 &base_reg, &offset, !sort_regs);
13699
13700 if (ldm_case == 0)
13701 return false;
13702
13703 if (sort_regs)
13704 for (i = 0; i < nops - 1; i++)
13705 for (j = i + 1; j < nops; j++)
13706 if (regs[i] > regs[j])
13707 {
13708 int t = regs[i];
13709 regs[i] = regs[j];
13710 regs[j] = t;
13711 }
13712 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13713
13714 if (TARGET_THUMB1)
13715 {
13716 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13717 gcc_assert (ldm_case == 1 || ldm_case == 5);
13718 write_back = TRUE;
13719 }
13720
13721 if (ldm_case == 5)
13722 {
13723 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13724 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13725 offset = 0;
13726 if (!TARGET_THUMB1)
13727 {
13728 base_reg = regs[0];
13729 base_reg_rtx = newbase;
13730 }
13731 }
13732
13733 for (i = 0; i < nops; i++)
13734 {
13735 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13736 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13737 SImode, addr, 0);
13738 }
13739 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13740 write_back ? offset + i * 4 : 0));
13741 return true;
13742 }
13743
13744 /* Called from a peephole2 expander to turn a sequence of stores into an
13745 STM instruction. OPERANDS are the operands found by the peephole matcher;
13746 NOPS indicates how many separate stores we are trying to combine.
13747 Returns true iff we could generate a new instruction. */
13748
13749 bool
13750 gen_stm_seq (rtx *operands, int nops)
13751 {
13752 int i;
13753 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13754 rtx mems[MAX_LDM_STM_OPS];
13755 int base_reg;
13756 rtx base_reg_rtx;
13757 HOST_WIDE_INT offset;
13758 int write_back = FALSE;
13759 int stm_case;
13760 rtx addr;
13761 bool base_reg_dies;
13762
13763 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13764 mem_order, &base_reg, &offset, true);
13765
13766 if (stm_case == 0)
13767 return false;
13768
13769 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13770
13771 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13772 if (TARGET_THUMB1)
13773 {
13774 gcc_assert (base_reg_dies);
13775 write_back = TRUE;
13776 }
13777
13778 if (stm_case == 5)
13779 {
13780 gcc_assert (base_reg_dies);
13781 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13782 offset = 0;
13783 }
13784
13785 addr = plus_constant (Pmode, base_reg_rtx, offset);
13786
13787 for (i = 0; i < nops; i++)
13788 {
13789 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13790 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13791 SImode, addr, 0);
13792 }
13793 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13794 write_back ? offset + i * 4 : 0));
13795 return true;
13796 }
13797
13798 /* Called from a peephole2 expander to turn a sequence of stores that are
13799 preceded by constant loads into an STM instruction. OPERANDS are the
13800 operands found by the peephole matcher; NOPS indicates how many
13801 separate stores we are trying to combine; there are 2 * NOPS
13802 instructions in the peephole.
13803 Returns true iff we could generate a new instruction. */
13804
13805 bool
13806 gen_const_stm_seq (rtx *operands, int nops)
13807 {
13808 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13809 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13810 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13811 rtx mems[MAX_LDM_STM_OPS];
13812 int base_reg;
13813 rtx base_reg_rtx;
13814 HOST_WIDE_INT offset;
13815 int write_back = FALSE;
13816 int stm_case;
13817 rtx addr;
13818 bool base_reg_dies;
13819 int i, j;
13820 HARD_REG_SET allocated;
13821
13822 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13823 mem_order, &base_reg, &offset, false);
13824
13825 if (stm_case == 0)
13826 return false;
13827
13828 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13829
13830 /* If the same register is used more than once, try to find a free
13831 register. */
13832 CLEAR_HARD_REG_SET (allocated);
13833 for (i = 0; i < nops; i++)
13834 {
13835 for (j = i + 1; j < nops; j++)
13836 if (regs[i] == regs[j])
13837 {
13838 rtx t = peep2_find_free_register (0, nops * 2,
13839 TARGET_THUMB1 ? "l" : "r",
13840 SImode, &allocated);
13841 if (t == NULL_RTX)
13842 return false;
13843 reg_rtxs[i] = t;
13844 regs[i] = REGNO (t);
13845 }
13846 }
13847
13848 /* Compute an ordering that maps the register numbers to an ascending
13849 sequence. */
13850 reg_order[0] = 0;
13851 for (i = 0; i < nops; i++)
13852 if (regs[i] < regs[reg_order[0]])
13853 reg_order[0] = i;
13854
13855 for (i = 1; i < nops; i++)
13856 {
13857 int this_order = reg_order[i - 1];
13858 for (j = 0; j < nops; j++)
13859 if (regs[j] > regs[reg_order[i - 1]]
13860 && (this_order == reg_order[i - 1]
13861 || regs[j] < regs[this_order]))
13862 this_order = j;
13863 reg_order[i] = this_order;
13864 }
13865
13866 /* Ensure that registers that must be live after the instruction end
13867 up with the correct value. */
13868 for (i = 0; i < nops; i++)
13869 {
13870 int this_order = reg_order[i];
13871 if ((this_order != mem_order[i]
13872 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13873 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13874 return false;
13875 }
13876
13877 /* Load the constants. */
13878 for (i = 0; i < nops; i++)
13879 {
13880 rtx op = operands[2 * nops + mem_order[i]];
13881 sorted_regs[i] = regs[reg_order[i]];
13882 emit_move_insn (reg_rtxs[reg_order[i]], op);
13883 }
13884
13885 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13886
13887 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13888 if (TARGET_THUMB1)
13889 {
13890 gcc_assert (base_reg_dies);
13891 write_back = TRUE;
13892 }
13893
13894 if (stm_case == 5)
13895 {
13896 gcc_assert (base_reg_dies);
13897 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13898 offset = 0;
13899 }
13900
13901 addr = plus_constant (Pmode, base_reg_rtx, offset);
13902
13903 for (i = 0; i < nops; i++)
13904 {
13905 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13906 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13907 SImode, addr, 0);
13908 }
13909 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13910 write_back ? offset + i * 4 : 0));
13911 return true;
13912 }
13913
13914 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13915 unaligned copies on processors which support unaligned semantics for those
13916 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13917 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13918 An interleave factor of 1 (the minimum) will perform no interleaving.
13919 Load/store multiple are used for aligned addresses where possible. */
13920
13921 static void
13922 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13923 HOST_WIDE_INT length,
13924 unsigned int interleave_factor)
13925 {
13926 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13927 int *regnos = XALLOCAVEC (int, interleave_factor);
13928 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13929 HOST_WIDE_INT i, j;
13930 HOST_WIDE_INT remaining = length, words;
13931 rtx halfword_tmp = NULL, byte_tmp = NULL;
13932 rtx dst, src;
13933 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13934 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13935 HOST_WIDE_INT srcoffset, dstoffset;
13936 HOST_WIDE_INT src_autoinc, dst_autoinc;
13937 rtx mem, addr;
13938
13939 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13940
13941 /* Use hard registers if we have aligned source or destination so we can use
13942 load/store multiple with contiguous registers. */
13943 if (dst_aligned || src_aligned)
13944 for (i = 0; i < interleave_factor; i++)
13945 regs[i] = gen_rtx_REG (SImode, i);
13946 else
13947 for (i = 0; i < interleave_factor; i++)
13948 regs[i] = gen_reg_rtx (SImode);
13949
13950 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13951 src = copy_addr_to_reg (XEXP (srcbase, 0));
13952
13953 srcoffset = dstoffset = 0;
13954
13955 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13956 For copying the last bytes we want to subtract this offset again. */
13957 src_autoinc = dst_autoinc = 0;
13958
13959 for (i = 0; i < interleave_factor; i++)
13960 regnos[i] = i;
13961
13962 /* Copy BLOCK_SIZE_BYTES chunks. */
13963
13964 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13965 {
13966 /* Load words. */
13967 if (src_aligned && interleave_factor > 1)
13968 {
13969 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13970 TRUE, srcbase, &srcoffset));
13971 src_autoinc += UNITS_PER_WORD * interleave_factor;
13972 }
13973 else
13974 {
13975 for (j = 0; j < interleave_factor; j++)
13976 {
13977 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13978 - src_autoinc));
13979 mem = adjust_automodify_address (srcbase, SImode, addr,
13980 srcoffset + j * UNITS_PER_WORD);
13981 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13982 }
13983 srcoffset += block_size_bytes;
13984 }
13985
13986 /* Store words. */
13987 if (dst_aligned && interleave_factor > 1)
13988 {
13989 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13990 TRUE, dstbase, &dstoffset));
13991 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13992 }
13993 else
13994 {
13995 for (j = 0; j < interleave_factor; j++)
13996 {
13997 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13998 - dst_autoinc));
13999 mem = adjust_automodify_address (dstbase, SImode, addr,
14000 dstoffset + j * UNITS_PER_WORD);
14001 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14002 }
14003 dstoffset += block_size_bytes;
14004 }
14005
14006 remaining -= block_size_bytes;
14007 }
14008
14009 /* Copy any whole words left (note these aren't interleaved with any
14010 subsequent halfword/byte load/stores in the interests of simplicity). */
14011
14012 words = remaining / UNITS_PER_WORD;
14013
14014 gcc_assert (words < interleave_factor);
14015
14016 if (src_aligned && words > 1)
14017 {
14018 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14019 &srcoffset));
14020 src_autoinc += UNITS_PER_WORD * words;
14021 }
14022 else
14023 {
14024 for (j = 0; j < words; j++)
14025 {
14026 addr = plus_constant (Pmode, src,
14027 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14028 mem = adjust_automodify_address (srcbase, SImode, addr,
14029 srcoffset + j * UNITS_PER_WORD);
14030 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14031 }
14032 srcoffset += words * UNITS_PER_WORD;
14033 }
14034
14035 if (dst_aligned && words > 1)
14036 {
14037 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14038 &dstoffset));
14039 dst_autoinc += words * UNITS_PER_WORD;
14040 }
14041 else
14042 {
14043 for (j = 0; j < words; j++)
14044 {
14045 addr = plus_constant (Pmode, dst,
14046 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14047 mem = adjust_automodify_address (dstbase, SImode, addr,
14048 dstoffset + j * UNITS_PER_WORD);
14049 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14050 }
14051 dstoffset += words * UNITS_PER_WORD;
14052 }
14053
14054 remaining -= words * UNITS_PER_WORD;
14055
14056 gcc_assert (remaining < 4);
14057
14058 /* Copy a halfword if necessary. */
14059
14060 if (remaining >= 2)
14061 {
14062 halfword_tmp = gen_reg_rtx (SImode);
14063
14064 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14065 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14066 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14067
14068 /* Either write out immediately, or delay until we've loaded the last
14069 byte, depending on interleave factor. */
14070 if (interleave_factor == 1)
14071 {
14072 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14073 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14074 emit_insn (gen_unaligned_storehi (mem,
14075 gen_lowpart (HImode, halfword_tmp)));
14076 halfword_tmp = NULL;
14077 dstoffset += 2;
14078 }
14079
14080 remaining -= 2;
14081 srcoffset += 2;
14082 }
14083
14084 gcc_assert (remaining < 2);
14085
14086 /* Copy last byte. */
14087
14088 if ((remaining & 1) != 0)
14089 {
14090 byte_tmp = gen_reg_rtx (SImode);
14091
14092 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14093 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14094 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14095
14096 if (interleave_factor == 1)
14097 {
14098 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14099 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14100 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14101 byte_tmp = NULL;
14102 dstoffset++;
14103 }
14104
14105 remaining--;
14106 srcoffset++;
14107 }
14108
14109 /* Store last halfword if we haven't done so already. */
14110
14111 if (halfword_tmp)
14112 {
14113 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14114 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14115 emit_insn (gen_unaligned_storehi (mem,
14116 gen_lowpart (HImode, halfword_tmp)));
14117 dstoffset += 2;
14118 }
14119
14120 /* Likewise for last byte. */
14121
14122 if (byte_tmp)
14123 {
14124 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14125 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14126 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14127 dstoffset++;
14128 }
14129
14130 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14131 }
14132
14133 /* From mips_adjust_block_mem:
14134
14135 Helper function for doing a loop-based block operation on memory
14136 reference MEM. Each iteration of the loop will operate on LENGTH
14137 bytes of MEM.
14138
14139 Create a new base register for use within the loop and point it to
14140 the start of MEM. Create a new memory reference that uses this
14141 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14142
14143 static void
14144 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14145 rtx *loop_mem)
14146 {
14147 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14148
14149 /* Although the new mem does not refer to a known location,
14150 it does keep up to LENGTH bytes of alignment. */
14151 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14152 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14153 }
14154
14155 /* From mips_block_move_loop:
14156
14157 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14158 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14159 the memory regions do not overlap. */
14160
14161 static void
14162 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14163 unsigned int interleave_factor,
14164 HOST_WIDE_INT bytes_per_iter)
14165 {
14166 rtx label, src_reg, dest_reg, final_src, test;
14167 HOST_WIDE_INT leftover;
14168
14169 leftover = length % bytes_per_iter;
14170 length -= leftover;
14171
14172 /* Create registers and memory references for use within the loop. */
14173 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14174 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14175
14176 /* Calculate the value that SRC_REG should have after the last iteration of
14177 the loop. */
14178 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14179 0, 0, OPTAB_WIDEN);
14180
14181 /* Emit the start of the loop. */
14182 label = gen_label_rtx ();
14183 emit_label (label);
14184
14185 /* Emit the loop body. */
14186 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14187 interleave_factor);
14188
14189 /* Move on to the next block. */
14190 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14191 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14192
14193 /* Emit the loop condition. */
14194 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14195 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14196
14197 /* Mop up any left-over bytes. */
14198 if (leftover)
14199 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14200 }
14201
14202 /* Emit a block move when either the source or destination is unaligned (not
14203 aligned to a four-byte boundary). This may need further tuning depending on
14204 core type, optimize_size setting, etc. */
14205
14206 static int
14207 arm_movmemqi_unaligned (rtx *operands)
14208 {
14209 HOST_WIDE_INT length = INTVAL (operands[2]);
14210
14211 if (optimize_size)
14212 {
14213 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14214 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14215 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14216 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14217 or dst_aligned though: allow more interleaving in those cases since the
14218 resulting code can be smaller. */
14219 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14220 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14221
14222 if (length > 12)
14223 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14224 interleave_factor, bytes_per_iter);
14225 else
14226 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14227 interleave_factor);
14228 }
14229 else
14230 {
14231 /* Note that the loop created by arm_block_move_unaligned_loop may be
14232 subject to loop unrolling, which makes tuning this condition a little
14233 redundant. */
14234 if (length > 32)
14235 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14236 else
14237 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14238 }
14239
14240 return 1;
14241 }
14242
14243 int
14244 arm_gen_movmemqi (rtx *operands)
14245 {
14246 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14247 HOST_WIDE_INT srcoffset, dstoffset;
14248 int i;
14249 rtx src, dst, srcbase, dstbase;
14250 rtx part_bytes_reg = NULL;
14251 rtx mem;
14252
14253 if (!CONST_INT_P (operands[2])
14254 || !CONST_INT_P (operands[3])
14255 || INTVAL (operands[2]) > 64)
14256 return 0;
14257
14258 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14259 return arm_movmemqi_unaligned (operands);
14260
14261 if (INTVAL (operands[3]) & 3)
14262 return 0;
14263
14264 dstbase = operands[0];
14265 srcbase = operands[1];
14266
14267 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14268 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14269
14270 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14271 out_words_to_go = INTVAL (operands[2]) / 4;
14272 last_bytes = INTVAL (operands[2]) & 3;
14273 dstoffset = srcoffset = 0;
14274
14275 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14276 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14277
14278 for (i = 0; in_words_to_go >= 2; i+=4)
14279 {
14280 if (in_words_to_go > 4)
14281 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14282 TRUE, srcbase, &srcoffset));
14283 else
14284 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14285 src, FALSE, srcbase,
14286 &srcoffset));
14287
14288 if (out_words_to_go)
14289 {
14290 if (out_words_to_go > 4)
14291 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14292 TRUE, dstbase, &dstoffset));
14293 else if (out_words_to_go != 1)
14294 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14295 out_words_to_go, dst,
14296 (last_bytes == 0
14297 ? FALSE : TRUE),
14298 dstbase, &dstoffset));
14299 else
14300 {
14301 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14302 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14303 if (last_bytes != 0)
14304 {
14305 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14306 dstoffset += 4;
14307 }
14308 }
14309 }
14310
14311 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14312 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14313 }
14314
14315 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14316 if (out_words_to_go)
14317 {
14318 rtx sreg;
14319
14320 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14321 sreg = copy_to_reg (mem);
14322
14323 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14324 emit_move_insn (mem, sreg);
14325 in_words_to_go--;
14326
14327 gcc_assert (!in_words_to_go); /* Sanity check */
14328 }
14329
14330 if (in_words_to_go)
14331 {
14332 gcc_assert (in_words_to_go > 0);
14333
14334 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14335 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14336 }
14337
14338 gcc_assert (!last_bytes || part_bytes_reg);
14339
14340 if (BYTES_BIG_ENDIAN && last_bytes)
14341 {
14342 rtx tmp = gen_reg_rtx (SImode);
14343
14344 /* The bytes we want are in the top end of the word. */
14345 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14346 GEN_INT (8 * (4 - last_bytes))));
14347 part_bytes_reg = tmp;
14348
14349 while (last_bytes)
14350 {
14351 mem = adjust_automodify_address (dstbase, QImode,
14352 plus_constant (Pmode, dst,
14353 last_bytes - 1),
14354 dstoffset + last_bytes - 1);
14355 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14356
14357 if (--last_bytes)
14358 {
14359 tmp = gen_reg_rtx (SImode);
14360 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14361 part_bytes_reg = tmp;
14362 }
14363 }
14364
14365 }
14366 else
14367 {
14368 if (last_bytes > 1)
14369 {
14370 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14371 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14372 last_bytes -= 2;
14373 if (last_bytes)
14374 {
14375 rtx tmp = gen_reg_rtx (SImode);
14376 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14377 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14378 part_bytes_reg = tmp;
14379 dstoffset += 2;
14380 }
14381 }
14382
14383 if (last_bytes)
14384 {
14385 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14386 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14387 }
14388 }
14389
14390 return 1;
14391 }
14392
14393 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14394 by mode size. */
14395 inline static rtx
14396 next_consecutive_mem (rtx mem)
14397 {
14398 enum machine_mode mode = GET_MODE (mem);
14399 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14400 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14401
14402 return adjust_automodify_address (mem, mode, addr, offset);
14403 }
14404
14405 /* Copy using LDRD/STRD instructions whenever possible.
14406 Returns true upon success. */
14407 bool
14408 gen_movmem_ldrd_strd (rtx *operands)
14409 {
14410 unsigned HOST_WIDE_INT len;
14411 HOST_WIDE_INT align;
14412 rtx src, dst, base;
14413 rtx reg0;
14414 bool src_aligned, dst_aligned;
14415 bool src_volatile, dst_volatile;
14416
14417 gcc_assert (CONST_INT_P (operands[2]));
14418 gcc_assert (CONST_INT_P (operands[3]));
14419
14420 len = UINTVAL (operands[2]);
14421 if (len > 64)
14422 return false;
14423
14424 /* Maximum alignment we can assume for both src and dst buffers. */
14425 align = INTVAL (operands[3]);
14426
14427 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14428 return false;
14429
14430 /* Place src and dst addresses in registers
14431 and update the corresponding mem rtx. */
14432 dst = operands[0];
14433 dst_volatile = MEM_VOLATILE_P (dst);
14434 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14435 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14436 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14437
14438 src = operands[1];
14439 src_volatile = MEM_VOLATILE_P (src);
14440 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14441 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14442 src = adjust_automodify_address (src, VOIDmode, base, 0);
14443
14444 if (!unaligned_access && !(src_aligned && dst_aligned))
14445 return false;
14446
14447 if (src_volatile || dst_volatile)
14448 return false;
14449
14450 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14451 if (!(dst_aligned || src_aligned))
14452 return arm_gen_movmemqi (operands);
14453
14454 src = adjust_address (src, DImode, 0);
14455 dst = adjust_address (dst, DImode, 0);
14456 while (len >= 8)
14457 {
14458 len -= 8;
14459 reg0 = gen_reg_rtx (DImode);
14460 if (src_aligned)
14461 emit_move_insn (reg0, src);
14462 else
14463 emit_insn (gen_unaligned_loaddi (reg0, src));
14464
14465 if (dst_aligned)
14466 emit_move_insn (dst, reg0);
14467 else
14468 emit_insn (gen_unaligned_storedi (dst, reg0));
14469
14470 src = next_consecutive_mem (src);
14471 dst = next_consecutive_mem (dst);
14472 }
14473
14474 gcc_assert (len < 8);
14475 if (len >= 4)
14476 {
14477 /* More than a word but less than a double-word to copy. Copy a word. */
14478 reg0 = gen_reg_rtx (SImode);
14479 src = adjust_address (src, SImode, 0);
14480 dst = adjust_address (dst, SImode, 0);
14481 if (src_aligned)
14482 emit_move_insn (reg0, src);
14483 else
14484 emit_insn (gen_unaligned_loadsi (reg0, src));
14485
14486 if (dst_aligned)
14487 emit_move_insn (dst, reg0);
14488 else
14489 emit_insn (gen_unaligned_storesi (dst, reg0));
14490
14491 src = next_consecutive_mem (src);
14492 dst = next_consecutive_mem (dst);
14493 len -= 4;
14494 }
14495
14496 if (len == 0)
14497 return true;
14498
14499 /* Copy the remaining bytes. */
14500 if (len >= 2)
14501 {
14502 dst = adjust_address (dst, HImode, 0);
14503 src = adjust_address (src, HImode, 0);
14504 reg0 = gen_reg_rtx (SImode);
14505 if (src_aligned)
14506 emit_insn (gen_zero_extendhisi2 (reg0, src));
14507 else
14508 emit_insn (gen_unaligned_loadhiu (reg0, src));
14509
14510 if (dst_aligned)
14511 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14512 else
14513 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14514
14515 src = next_consecutive_mem (src);
14516 dst = next_consecutive_mem (dst);
14517 if (len == 2)
14518 return true;
14519 }
14520
14521 dst = adjust_address (dst, QImode, 0);
14522 src = adjust_address (src, QImode, 0);
14523 reg0 = gen_reg_rtx (QImode);
14524 emit_move_insn (reg0, src);
14525 emit_move_insn (dst, reg0);
14526 return true;
14527 }
14528
14529 /* Select a dominance comparison mode if possible for a test of the general
14530 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14531 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14532 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14533 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14534 In all cases OP will be either EQ or NE, but we don't need to know which
14535 here. If we are unable to support a dominance comparison we return
14536 CC mode. This will then fail to match for the RTL expressions that
14537 generate this call. */
14538 enum machine_mode
14539 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14540 {
14541 enum rtx_code cond1, cond2;
14542 int swapped = 0;
14543
14544 /* Currently we will probably get the wrong result if the individual
14545 comparisons are not simple. This also ensures that it is safe to
14546 reverse a comparison if necessary. */
14547 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14548 != CCmode)
14549 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14550 != CCmode))
14551 return CCmode;
14552
14553 /* The if_then_else variant of this tests the second condition if the
14554 first passes, but is true if the first fails. Reverse the first
14555 condition to get a true "inclusive-or" expression. */
14556 if (cond_or == DOM_CC_NX_OR_Y)
14557 cond1 = reverse_condition (cond1);
14558
14559 /* If the comparisons are not equal, and one doesn't dominate the other,
14560 then we can't do this. */
14561 if (cond1 != cond2
14562 && !comparison_dominates_p (cond1, cond2)
14563 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14564 return CCmode;
14565
14566 if (swapped)
14567 {
14568 enum rtx_code temp = cond1;
14569 cond1 = cond2;
14570 cond2 = temp;
14571 }
14572
14573 switch (cond1)
14574 {
14575 case EQ:
14576 if (cond_or == DOM_CC_X_AND_Y)
14577 return CC_DEQmode;
14578
14579 switch (cond2)
14580 {
14581 case EQ: return CC_DEQmode;
14582 case LE: return CC_DLEmode;
14583 case LEU: return CC_DLEUmode;
14584 case GE: return CC_DGEmode;
14585 case GEU: return CC_DGEUmode;
14586 default: gcc_unreachable ();
14587 }
14588
14589 case LT:
14590 if (cond_or == DOM_CC_X_AND_Y)
14591 return CC_DLTmode;
14592
14593 switch (cond2)
14594 {
14595 case LT:
14596 return CC_DLTmode;
14597 case LE:
14598 return CC_DLEmode;
14599 case NE:
14600 return CC_DNEmode;
14601 default:
14602 gcc_unreachable ();
14603 }
14604
14605 case GT:
14606 if (cond_or == DOM_CC_X_AND_Y)
14607 return CC_DGTmode;
14608
14609 switch (cond2)
14610 {
14611 case GT:
14612 return CC_DGTmode;
14613 case GE:
14614 return CC_DGEmode;
14615 case NE:
14616 return CC_DNEmode;
14617 default:
14618 gcc_unreachable ();
14619 }
14620
14621 case LTU:
14622 if (cond_or == DOM_CC_X_AND_Y)
14623 return CC_DLTUmode;
14624
14625 switch (cond2)
14626 {
14627 case LTU:
14628 return CC_DLTUmode;
14629 case LEU:
14630 return CC_DLEUmode;
14631 case NE:
14632 return CC_DNEmode;
14633 default:
14634 gcc_unreachable ();
14635 }
14636
14637 case GTU:
14638 if (cond_or == DOM_CC_X_AND_Y)
14639 return CC_DGTUmode;
14640
14641 switch (cond2)
14642 {
14643 case GTU:
14644 return CC_DGTUmode;
14645 case GEU:
14646 return CC_DGEUmode;
14647 case NE:
14648 return CC_DNEmode;
14649 default:
14650 gcc_unreachable ();
14651 }
14652
14653 /* The remaining cases only occur when both comparisons are the
14654 same. */
14655 case NE:
14656 gcc_assert (cond1 == cond2);
14657 return CC_DNEmode;
14658
14659 case LE:
14660 gcc_assert (cond1 == cond2);
14661 return CC_DLEmode;
14662
14663 case GE:
14664 gcc_assert (cond1 == cond2);
14665 return CC_DGEmode;
14666
14667 case LEU:
14668 gcc_assert (cond1 == cond2);
14669 return CC_DLEUmode;
14670
14671 case GEU:
14672 gcc_assert (cond1 == cond2);
14673 return CC_DGEUmode;
14674
14675 default:
14676 gcc_unreachable ();
14677 }
14678 }
14679
14680 enum machine_mode
14681 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14682 {
14683 /* All floating point compares return CCFP if it is an equality
14684 comparison, and CCFPE otherwise. */
14685 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14686 {
14687 switch (op)
14688 {
14689 case EQ:
14690 case NE:
14691 case UNORDERED:
14692 case ORDERED:
14693 case UNLT:
14694 case UNLE:
14695 case UNGT:
14696 case UNGE:
14697 case UNEQ:
14698 case LTGT:
14699 return CCFPmode;
14700
14701 case LT:
14702 case LE:
14703 case GT:
14704 case GE:
14705 return CCFPEmode;
14706
14707 default:
14708 gcc_unreachable ();
14709 }
14710 }
14711
14712 /* A compare with a shifted operand. Because of canonicalization, the
14713 comparison will have to be swapped when we emit the assembler. */
14714 if (GET_MODE (y) == SImode
14715 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14716 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14717 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14718 || GET_CODE (x) == ROTATERT))
14719 return CC_SWPmode;
14720
14721 /* This operation is performed swapped, but since we only rely on the Z
14722 flag we don't need an additional mode. */
14723 if (GET_MODE (y) == SImode
14724 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14725 && GET_CODE (x) == NEG
14726 && (op == EQ || op == NE))
14727 return CC_Zmode;
14728
14729 /* This is a special case that is used by combine to allow a
14730 comparison of a shifted byte load to be split into a zero-extend
14731 followed by a comparison of the shifted integer (only valid for
14732 equalities and unsigned inequalities). */
14733 if (GET_MODE (x) == SImode
14734 && GET_CODE (x) == ASHIFT
14735 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14736 && GET_CODE (XEXP (x, 0)) == SUBREG
14737 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14738 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14739 && (op == EQ || op == NE
14740 || op == GEU || op == GTU || op == LTU || op == LEU)
14741 && CONST_INT_P (y))
14742 return CC_Zmode;
14743
14744 /* A construct for a conditional compare, if the false arm contains
14745 0, then both conditions must be true, otherwise either condition
14746 must be true. Not all conditions are possible, so CCmode is
14747 returned if it can't be done. */
14748 if (GET_CODE (x) == IF_THEN_ELSE
14749 && (XEXP (x, 2) == const0_rtx
14750 || XEXP (x, 2) == const1_rtx)
14751 && COMPARISON_P (XEXP (x, 0))
14752 && COMPARISON_P (XEXP (x, 1)))
14753 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14754 INTVAL (XEXP (x, 2)));
14755
14756 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14757 if (GET_CODE (x) == AND
14758 && (op == EQ || op == NE)
14759 && COMPARISON_P (XEXP (x, 0))
14760 && COMPARISON_P (XEXP (x, 1)))
14761 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14762 DOM_CC_X_AND_Y);
14763
14764 if (GET_CODE (x) == IOR
14765 && (op == EQ || op == NE)
14766 && COMPARISON_P (XEXP (x, 0))
14767 && COMPARISON_P (XEXP (x, 1)))
14768 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14769 DOM_CC_X_OR_Y);
14770
14771 /* An operation (on Thumb) where we want to test for a single bit.
14772 This is done by shifting that bit up into the top bit of a
14773 scratch register; we can then branch on the sign bit. */
14774 if (TARGET_THUMB1
14775 && GET_MODE (x) == SImode
14776 && (op == EQ || op == NE)
14777 && GET_CODE (x) == ZERO_EXTRACT
14778 && XEXP (x, 1) == const1_rtx)
14779 return CC_Nmode;
14780
14781 /* An operation that sets the condition codes as a side-effect, the
14782 V flag is not set correctly, so we can only use comparisons where
14783 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14784 instead.) */
14785 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14786 if (GET_MODE (x) == SImode
14787 && y == const0_rtx
14788 && (op == EQ || op == NE || op == LT || op == GE)
14789 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14790 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14791 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14792 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14793 || GET_CODE (x) == LSHIFTRT
14794 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14795 || GET_CODE (x) == ROTATERT
14796 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14797 return CC_NOOVmode;
14798
14799 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14800 return CC_Zmode;
14801
14802 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14803 && GET_CODE (x) == PLUS
14804 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14805 return CC_Cmode;
14806
14807 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14808 {
14809 switch (op)
14810 {
14811 case EQ:
14812 case NE:
14813 /* A DImode comparison against zero can be implemented by
14814 or'ing the two halves together. */
14815 if (y == const0_rtx)
14816 return CC_Zmode;
14817
14818 /* We can do an equality test in three Thumb instructions. */
14819 if (!TARGET_32BIT)
14820 return CC_Zmode;
14821
14822 /* FALLTHROUGH */
14823
14824 case LTU:
14825 case LEU:
14826 case GTU:
14827 case GEU:
14828 /* DImode unsigned comparisons can be implemented by cmp +
14829 cmpeq without a scratch register. Not worth doing in
14830 Thumb-2. */
14831 if (TARGET_32BIT)
14832 return CC_CZmode;
14833
14834 /* FALLTHROUGH */
14835
14836 case LT:
14837 case LE:
14838 case GT:
14839 case GE:
14840 /* DImode signed and unsigned comparisons can be implemented
14841 by cmp + sbcs with a scratch register, but that does not
14842 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14843 gcc_assert (op != EQ && op != NE);
14844 return CC_NCVmode;
14845
14846 default:
14847 gcc_unreachable ();
14848 }
14849 }
14850
14851 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14852 return GET_MODE (x);
14853
14854 return CCmode;
14855 }
14856
14857 /* X and Y are two things to compare using CODE. Emit the compare insn and
14858 return the rtx for register 0 in the proper mode. FP means this is a
14859 floating point compare: I don't think that it is needed on the arm. */
14860 rtx
14861 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14862 {
14863 enum machine_mode mode;
14864 rtx cc_reg;
14865 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14866
14867 /* We might have X as a constant, Y as a register because of the predicates
14868 used for cmpdi. If so, force X to a register here. */
14869 if (dimode_comparison && !REG_P (x))
14870 x = force_reg (DImode, x);
14871
14872 mode = SELECT_CC_MODE (code, x, y);
14873 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14874
14875 if (dimode_comparison
14876 && mode != CC_CZmode)
14877 {
14878 rtx clobber, set;
14879
14880 /* To compare two non-zero values for equality, XOR them and
14881 then compare against zero. Not used for ARM mode; there
14882 CC_CZmode is cheaper. */
14883 if (mode == CC_Zmode && y != const0_rtx)
14884 {
14885 gcc_assert (!reload_completed);
14886 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14887 y = const0_rtx;
14888 }
14889
14890 /* A scratch register is required. */
14891 if (reload_completed)
14892 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14893 else
14894 scratch = gen_rtx_SCRATCH (SImode);
14895
14896 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14897 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
14898 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14899 }
14900 else
14901 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14902
14903 return cc_reg;
14904 }
14905
14906 /* Generate a sequence of insns that will generate the correct return
14907 address mask depending on the physical architecture that the program
14908 is running on. */
14909 rtx
14910 arm_gen_return_addr_mask (void)
14911 {
14912 rtx reg = gen_reg_rtx (Pmode);
14913
14914 emit_insn (gen_return_addr_mask (reg));
14915 return reg;
14916 }
14917
14918 void
14919 arm_reload_in_hi (rtx *operands)
14920 {
14921 rtx ref = operands[1];
14922 rtx base, scratch;
14923 HOST_WIDE_INT offset = 0;
14924
14925 if (GET_CODE (ref) == SUBREG)
14926 {
14927 offset = SUBREG_BYTE (ref);
14928 ref = SUBREG_REG (ref);
14929 }
14930
14931 if (REG_P (ref))
14932 {
14933 /* We have a pseudo which has been spilt onto the stack; there
14934 are two cases here: the first where there is a simple
14935 stack-slot replacement and a second where the stack-slot is
14936 out of range, or is used as a subreg. */
14937 if (reg_equiv_mem (REGNO (ref)))
14938 {
14939 ref = reg_equiv_mem (REGNO (ref));
14940 base = find_replacement (&XEXP (ref, 0));
14941 }
14942 else
14943 /* The slot is out of range, or was dressed up in a SUBREG. */
14944 base = reg_equiv_address (REGNO (ref));
14945 }
14946 else
14947 base = find_replacement (&XEXP (ref, 0));
14948
14949 /* Handle the case where the address is too complex to be offset by 1. */
14950 if (GET_CODE (base) == MINUS
14951 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14952 {
14953 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14954
14955 emit_set_insn (base_plus, base);
14956 base = base_plus;
14957 }
14958 else if (GET_CODE (base) == PLUS)
14959 {
14960 /* The addend must be CONST_INT, or we would have dealt with it above. */
14961 HOST_WIDE_INT hi, lo;
14962
14963 offset += INTVAL (XEXP (base, 1));
14964 base = XEXP (base, 0);
14965
14966 /* Rework the address into a legal sequence of insns. */
14967 /* Valid range for lo is -4095 -> 4095 */
14968 lo = (offset >= 0
14969 ? (offset & 0xfff)
14970 : -((-offset) & 0xfff));
14971
14972 /* Corner case, if lo is the max offset then we would be out of range
14973 once we have added the additional 1 below, so bump the msb into the
14974 pre-loading insn(s). */
14975 if (lo == 4095)
14976 lo &= 0x7ff;
14977
14978 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14979 ^ (HOST_WIDE_INT) 0x80000000)
14980 - (HOST_WIDE_INT) 0x80000000);
14981
14982 gcc_assert (hi + lo == offset);
14983
14984 if (hi != 0)
14985 {
14986 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14987
14988 /* Get the base address; addsi3 knows how to handle constants
14989 that require more than one insn. */
14990 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14991 base = base_plus;
14992 offset = lo;
14993 }
14994 }
14995
14996 /* Operands[2] may overlap operands[0] (though it won't overlap
14997 operands[1]), that's why we asked for a DImode reg -- so we can
14998 use the bit that does not overlap. */
14999 if (REGNO (operands[2]) == REGNO (operands[0]))
15000 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15001 else
15002 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15003
15004 emit_insn (gen_zero_extendqisi2 (scratch,
15005 gen_rtx_MEM (QImode,
15006 plus_constant (Pmode, base,
15007 offset))));
15008 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15009 gen_rtx_MEM (QImode,
15010 plus_constant (Pmode, base,
15011 offset + 1))));
15012 if (!BYTES_BIG_ENDIAN)
15013 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15014 gen_rtx_IOR (SImode,
15015 gen_rtx_ASHIFT
15016 (SImode,
15017 gen_rtx_SUBREG (SImode, operands[0], 0),
15018 GEN_INT (8)),
15019 scratch));
15020 else
15021 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15022 gen_rtx_IOR (SImode,
15023 gen_rtx_ASHIFT (SImode, scratch,
15024 GEN_INT (8)),
15025 gen_rtx_SUBREG (SImode, operands[0], 0)));
15026 }
15027
15028 /* Handle storing a half-word to memory during reload by synthesizing as two
15029 byte stores. Take care not to clobber the input values until after we
15030 have moved them somewhere safe. This code assumes that if the DImode
15031 scratch in operands[2] overlaps either the input value or output address
15032 in some way, then that value must die in this insn (we absolutely need
15033 two scratch registers for some corner cases). */
15034 void
15035 arm_reload_out_hi (rtx *operands)
15036 {
15037 rtx ref = operands[0];
15038 rtx outval = operands[1];
15039 rtx base, scratch;
15040 HOST_WIDE_INT offset = 0;
15041
15042 if (GET_CODE (ref) == SUBREG)
15043 {
15044 offset = SUBREG_BYTE (ref);
15045 ref = SUBREG_REG (ref);
15046 }
15047
15048 if (REG_P (ref))
15049 {
15050 /* We have a pseudo which has been spilt onto the stack; there
15051 are two cases here: the first where there is a simple
15052 stack-slot replacement and a second where the stack-slot is
15053 out of range, or is used as a subreg. */
15054 if (reg_equiv_mem (REGNO (ref)))
15055 {
15056 ref = reg_equiv_mem (REGNO (ref));
15057 base = find_replacement (&XEXP (ref, 0));
15058 }
15059 else
15060 /* The slot is out of range, or was dressed up in a SUBREG. */
15061 base = reg_equiv_address (REGNO (ref));
15062 }
15063 else
15064 base = find_replacement (&XEXP (ref, 0));
15065
15066 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15067
15068 /* Handle the case where the address is too complex to be offset by 1. */
15069 if (GET_CODE (base) == MINUS
15070 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15071 {
15072 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15073
15074 /* Be careful not to destroy OUTVAL. */
15075 if (reg_overlap_mentioned_p (base_plus, outval))
15076 {
15077 /* Updating base_plus might destroy outval, see if we can
15078 swap the scratch and base_plus. */
15079 if (!reg_overlap_mentioned_p (scratch, outval))
15080 {
15081 rtx tmp = scratch;
15082 scratch = base_plus;
15083 base_plus = tmp;
15084 }
15085 else
15086 {
15087 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15088
15089 /* Be conservative and copy OUTVAL into the scratch now,
15090 this should only be necessary if outval is a subreg
15091 of something larger than a word. */
15092 /* XXX Might this clobber base? I can't see how it can,
15093 since scratch is known to overlap with OUTVAL, and
15094 must be wider than a word. */
15095 emit_insn (gen_movhi (scratch_hi, outval));
15096 outval = scratch_hi;
15097 }
15098 }
15099
15100 emit_set_insn (base_plus, base);
15101 base = base_plus;
15102 }
15103 else if (GET_CODE (base) == PLUS)
15104 {
15105 /* The addend must be CONST_INT, or we would have dealt with it above. */
15106 HOST_WIDE_INT hi, lo;
15107
15108 offset += INTVAL (XEXP (base, 1));
15109 base = XEXP (base, 0);
15110
15111 /* Rework the address into a legal sequence of insns. */
15112 /* Valid range for lo is -4095 -> 4095 */
15113 lo = (offset >= 0
15114 ? (offset & 0xfff)
15115 : -((-offset) & 0xfff));
15116
15117 /* Corner case, if lo is the max offset then we would be out of range
15118 once we have added the additional 1 below, so bump the msb into the
15119 pre-loading insn(s). */
15120 if (lo == 4095)
15121 lo &= 0x7ff;
15122
15123 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15124 ^ (HOST_WIDE_INT) 0x80000000)
15125 - (HOST_WIDE_INT) 0x80000000);
15126
15127 gcc_assert (hi + lo == offset);
15128
15129 if (hi != 0)
15130 {
15131 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15132
15133 /* Be careful not to destroy OUTVAL. */
15134 if (reg_overlap_mentioned_p (base_plus, outval))
15135 {
15136 /* Updating base_plus might destroy outval, see if we
15137 can swap the scratch and base_plus. */
15138 if (!reg_overlap_mentioned_p (scratch, outval))
15139 {
15140 rtx tmp = scratch;
15141 scratch = base_plus;
15142 base_plus = tmp;
15143 }
15144 else
15145 {
15146 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15147
15148 /* Be conservative and copy outval into scratch now,
15149 this should only be necessary if outval is a
15150 subreg of something larger than a word. */
15151 /* XXX Might this clobber base? I can't see how it
15152 can, since scratch is known to overlap with
15153 outval. */
15154 emit_insn (gen_movhi (scratch_hi, outval));
15155 outval = scratch_hi;
15156 }
15157 }
15158
15159 /* Get the base address; addsi3 knows how to handle constants
15160 that require more than one insn. */
15161 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15162 base = base_plus;
15163 offset = lo;
15164 }
15165 }
15166
15167 if (BYTES_BIG_ENDIAN)
15168 {
15169 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15170 plus_constant (Pmode, base,
15171 offset + 1)),
15172 gen_lowpart (QImode, outval)));
15173 emit_insn (gen_lshrsi3 (scratch,
15174 gen_rtx_SUBREG (SImode, outval, 0),
15175 GEN_INT (8)));
15176 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15177 offset)),
15178 gen_lowpart (QImode, scratch)));
15179 }
15180 else
15181 {
15182 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15183 offset)),
15184 gen_lowpart (QImode, outval)));
15185 emit_insn (gen_lshrsi3 (scratch,
15186 gen_rtx_SUBREG (SImode, outval, 0),
15187 GEN_INT (8)));
15188 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15189 plus_constant (Pmode, base,
15190 offset + 1)),
15191 gen_lowpart (QImode, scratch)));
15192 }
15193 }
15194
15195 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15196 (padded to the size of a word) should be passed in a register. */
15197
15198 static bool
15199 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
15200 {
15201 if (TARGET_AAPCS_BASED)
15202 return must_pass_in_stack_var_size (mode, type);
15203 else
15204 return must_pass_in_stack_var_size_or_pad (mode, type);
15205 }
15206
15207
15208 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15209 Return true if an argument passed on the stack should be padded upwards,
15210 i.e. if the least-significant byte has useful data.
15211 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15212 aggregate types are placed in the lowest memory address. */
15213
15214 bool
15215 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15216 {
15217 if (!TARGET_AAPCS_BASED)
15218 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15219
15220 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15221 return false;
15222
15223 return true;
15224 }
15225
15226
15227 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15228 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15229 register has useful data, and return the opposite if the most
15230 significant byte does. */
15231
15232 bool
15233 arm_pad_reg_upward (enum machine_mode mode,
15234 tree type, int first ATTRIBUTE_UNUSED)
15235 {
15236 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15237 {
15238 /* For AAPCS, small aggregates, small fixed-point types,
15239 and small complex types are always padded upwards. */
15240 if (type)
15241 {
15242 if ((AGGREGATE_TYPE_P (type)
15243 || TREE_CODE (type) == COMPLEX_TYPE
15244 || FIXED_POINT_TYPE_P (type))
15245 && int_size_in_bytes (type) <= 4)
15246 return true;
15247 }
15248 else
15249 {
15250 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15251 && GET_MODE_SIZE (mode) <= 4)
15252 return true;
15253 }
15254 }
15255
15256 /* Otherwise, use default padding. */
15257 return !BYTES_BIG_ENDIAN;
15258 }
15259
15260 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15261 assuming that the address in the base register is word aligned. */
15262 bool
15263 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15264 {
15265 HOST_WIDE_INT max_offset;
15266
15267 /* Offset must be a multiple of 4 in Thumb mode. */
15268 if (TARGET_THUMB2 && ((offset & 3) != 0))
15269 return false;
15270
15271 if (TARGET_THUMB2)
15272 max_offset = 1020;
15273 else if (TARGET_ARM)
15274 max_offset = 255;
15275 else
15276 return false;
15277
15278 return ((offset <= max_offset) && (offset >= -max_offset));
15279 }
15280
15281 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15282 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15283 Assumes that the address in the base register RN is word aligned. Pattern
15284 guarantees that both memory accesses use the same base register,
15285 the offsets are constants within the range, and the gap between the offsets is 4.
15286 If preload complete then check that registers are legal. WBACK indicates whether
15287 address is updated. LOAD indicates whether memory access is load or store. */
15288 bool
15289 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15290 bool wback, bool load)
15291 {
15292 unsigned int t, t2, n;
15293
15294 if (!reload_completed)
15295 return true;
15296
15297 if (!offset_ok_for_ldrd_strd (offset))
15298 return false;
15299
15300 t = REGNO (rt);
15301 t2 = REGNO (rt2);
15302 n = REGNO (rn);
15303
15304 if ((TARGET_THUMB2)
15305 && ((wback && (n == t || n == t2))
15306 || (t == SP_REGNUM)
15307 || (t == PC_REGNUM)
15308 || (t2 == SP_REGNUM)
15309 || (t2 == PC_REGNUM)
15310 || (!load && (n == PC_REGNUM))
15311 || (load && (t == t2))
15312 /* Triggers Cortex-M3 LDRD errata. */
15313 || (!wback && load && fix_cm3_ldrd && (n == t))))
15314 return false;
15315
15316 if ((TARGET_ARM)
15317 && ((wback && (n == t || n == t2))
15318 || (t2 == PC_REGNUM)
15319 || (t % 2 != 0) /* First destination register is not even. */
15320 || (t2 != t + 1)
15321 /* PC can be used as base register (for offset addressing only),
15322 but it is depricated. */
15323 || (n == PC_REGNUM)))
15324 return false;
15325
15326 return true;
15327 }
15328
15329 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15330 operand MEM's address contains an immediate offset from the base
15331 register and has no side effects, in which case it sets BASE and
15332 OFFSET accordingly. */
15333 static bool
15334 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15335 {
15336 rtx addr;
15337
15338 gcc_assert (base != NULL && offset != NULL);
15339
15340 /* TODO: Handle more general memory operand patterns, such as
15341 PRE_DEC and PRE_INC. */
15342
15343 if (side_effects_p (mem))
15344 return false;
15345
15346 /* Can't deal with subregs. */
15347 if (GET_CODE (mem) == SUBREG)
15348 return false;
15349
15350 gcc_assert (MEM_P (mem));
15351
15352 *offset = const0_rtx;
15353
15354 addr = XEXP (mem, 0);
15355
15356 /* If addr isn't valid for DImode, then we can't handle it. */
15357 if (!arm_legitimate_address_p (DImode, addr,
15358 reload_in_progress || reload_completed))
15359 return false;
15360
15361 if (REG_P (addr))
15362 {
15363 *base = addr;
15364 return true;
15365 }
15366 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15367 {
15368 *base = XEXP (addr, 0);
15369 *offset = XEXP (addr, 1);
15370 return (REG_P (*base) && CONST_INT_P (*offset));
15371 }
15372
15373 return false;
15374 }
15375
15376 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15377
15378 /* Called from a peephole2 to replace two word-size accesses with a
15379 single LDRD/STRD instruction. Returns true iff we can generate a
15380 new instruction sequence. That is, both accesses use the same base
15381 register and the gap between constant offsets is 4. This function
15382 may reorder its operands to match ldrd/strd RTL templates.
15383 OPERANDS are the operands found by the peephole matcher;
15384 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15385 corresponding memory operands. LOAD indicaates whether the access
15386 is load or store. CONST_STORE indicates a store of constant
15387 integer values held in OPERANDS[4,5] and assumes that the pattern
15388 is of length 4 insn, for the purpose of checking dead registers.
15389 COMMUTE indicates that register operands may be reordered. */
15390 bool
15391 gen_operands_ldrd_strd (rtx *operands, bool load,
15392 bool const_store, bool commute)
15393 {
15394 int nops = 2;
15395 HOST_WIDE_INT offsets[2], offset;
15396 rtx base = NULL_RTX;
15397 rtx cur_base, cur_offset, tmp;
15398 int i, gap;
15399 HARD_REG_SET regset;
15400
15401 gcc_assert (!const_store || !load);
15402 /* Check that the memory references are immediate offsets from the
15403 same base register. Extract the base register, the destination
15404 registers, and the corresponding memory offsets. */
15405 for (i = 0; i < nops; i++)
15406 {
15407 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15408 return false;
15409
15410 if (i == 0)
15411 base = cur_base;
15412 else if (REGNO (base) != REGNO (cur_base))
15413 return false;
15414
15415 offsets[i] = INTVAL (cur_offset);
15416 if (GET_CODE (operands[i]) == SUBREG)
15417 {
15418 tmp = SUBREG_REG (operands[i]);
15419 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15420 operands[i] = tmp;
15421 }
15422 }
15423
15424 /* Make sure there is no dependency between the individual loads. */
15425 if (load && REGNO (operands[0]) == REGNO (base))
15426 return false; /* RAW */
15427
15428 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15429 return false; /* WAW */
15430
15431 /* If the same input register is used in both stores
15432 when storing different constants, try to find a free register.
15433 For example, the code
15434 mov r0, 0
15435 str r0, [r2]
15436 mov r0, 1
15437 str r0, [r2, #4]
15438 can be transformed into
15439 mov r1, 0
15440 strd r1, r0, [r2]
15441 in Thumb mode assuming that r1 is free. */
15442 if (const_store
15443 && REGNO (operands[0]) == REGNO (operands[1])
15444 && INTVAL (operands[4]) != INTVAL (operands[5]))
15445 {
15446 if (TARGET_THUMB2)
15447 {
15448 CLEAR_HARD_REG_SET (regset);
15449 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15450 if (tmp == NULL_RTX)
15451 return false;
15452
15453 /* Use the new register in the first load to ensure that
15454 if the original input register is not dead after peephole,
15455 then it will have the correct constant value. */
15456 operands[0] = tmp;
15457 }
15458 else if (TARGET_ARM)
15459 {
15460 return false;
15461 int regno = REGNO (operands[0]);
15462 if (!peep2_reg_dead_p (4, operands[0]))
15463 {
15464 /* When the input register is even and is not dead after the
15465 pattern, it has to hold the second constant but we cannot
15466 form a legal STRD in ARM mode with this register as the second
15467 register. */
15468 if (regno % 2 == 0)
15469 return false;
15470
15471 /* Is regno-1 free? */
15472 SET_HARD_REG_SET (regset);
15473 CLEAR_HARD_REG_BIT(regset, regno - 1);
15474 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15475 if (tmp == NULL_RTX)
15476 return false;
15477
15478 operands[0] = tmp;
15479 }
15480 else
15481 {
15482 /* Find a DImode register. */
15483 CLEAR_HARD_REG_SET (regset);
15484 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15485 if (tmp != NULL_RTX)
15486 {
15487 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15488 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15489 }
15490 else
15491 {
15492 /* Can we use the input register to form a DI register? */
15493 SET_HARD_REG_SET (regset);
15494 CLEAR_HARD_REG_BIT(regset,
15495 regno % 2 == 0 ? regno + 1 : regno - 1);
15496 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15497 if (tmp == NULL_RTX)
15498 return false;
15499 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15500 }
15501 }
15502
15503 gcc_assert (operands[0] != NULL_RTX);
15504 gcc_assert (operands[1] != NULL_RTX);
15505 gcc_assert (REGNO (operands[0]) % 2 == 0);
15506 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15507 }
15508 }
15509
15510 /* Make sure the instructions are ordered with lower memory access first. */
15511 if (offsets[0] > offsets[1])
15512 {
15513 gap = offsets[0] - offsets[1];
15514 offset = offsets[1];
15515
15516 /* Swap the instructions such that lower memory is accessed first. */
15517 SWAP_RTX (operands[0], operands[1]);
15518 SWAP_RTX (operands[2], operands[3]);
15519 if (const_store)
15520 SWAP_RTX (operands[4], operands[5]);
15521 }
15522 else
15523 {
15524 gap = offsets[1] - offsets[0];
15525 offset = offsets[0];
15526 }
15527
15528 /* Make sure accesses are to consecutive memory locations. */
15529 if (gap != 4)
15530 return false;
15531
15532 /* Make sure we generate legal instructions. */
15533 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15534 false, load))
15535 return true;
15536
15537 /* In Thumb state, where registers are almost unconstrained, there
15538 is little hope to fix it. */
15539 if (TARGET_THUMB2)
15540 return false;
15541
15542 if (load && commute)
15543 {
15544 /* Try reordering registers. */
15545 SWAP_RTX (operands[0], operands[1]);
15546 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15547 false, load))
15548 return true;
15549 }
15550
15551 if (const_store)
15552 {
15553 /* If input registers are dead after this pattern, they can be
15554 reordered or replaced by other registers that are free in the
15555 current pattern. */
15556 if (!peep2_reg_dead_p (4, operands[0])
15557 || !peep2_reg_dead_p (4, operands[1]))
15558 return false;
15559
15560 /* Try to reorder the input registers. */
15561 /* For example, the code
15562 mov r0, 0
15563 mov r1, 1
15564 str r1, [r2]
15565 str r0, [r2, #4]
15566 can be transformed into
15567 mov r1, 0
15568 mov r0, 1
15569 strd r0, [r2]
15570 */
15571 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15572 false, false))
15573 {
15574 SWAP_RTX (operands[0], operands[1]);
15575 return true;
15576 }
15577
15578 /* Try to find a free DI register. */
15579 CLEAR_HARD_REG_SET (regset);
15580 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15581 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15582 while (true)
15583 {
15584 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15585 if (tmp == NULL_RTX)
15586 return false;
15587
15588 /* DREG must be an even-numbered register in DImode.
15589 Split it into SI registers. */
15590 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15591 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15592 gcc_assert (operands[0] != NULL_RTX);
15593 gcc_assert (operands[1] != NULL_RTX);
15594 gcc_assert (REGNO (operands[0]) % 2 == 0);
15595 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15596
15597 return (operands_ok_ldrd_strd (operands[0], operands[1],
15598 base, offset,
15599 false, load));
15600 }
15601 }
15602
15603 return false;
15604 }
15605 #undef SWAP_RTX
15606
15607
15608
15609 \f
15610 /* Print a symbolic form of X to the debug file, F. */
15611 static void
15612 arm_print_value (FILE *f, rtx x)
15613 {
15614 switch (GET_CODE (x))
15615 {
15616 case CONST_INT:
15617 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15618 return;
15619
15620 case CONST_DOUBLE:
15621 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15622 return;
15623
15624 case CONST_VECTOR:
15625 {
15626 int i;
15627
15628 fprintf (f, "<");
15629 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15630 {
15631 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15632 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15633 fputc (',', f);
15634 }
15635 fprintf (f, ">");
15636 }
15637 return;
15638
15639 case CONST_STRING:
15640 fprintf (f, "\"%s\"", XSTR (x, 0));
15641 return;
15642
15643 case SYMBOL_REF:
15644 fprintf (f, "`%s'", XSTR (x, 0));
15645 return;
15646
15647 case LABEL_REF:
15648 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15649 return;
15650
15651 case CONST:
15652 arm_print_value (f, XEXP (x, 0));
15653 return;
15654
15655 case PLUS:
15656 arm_print_value (f, XEXP (x, 0));
15657 fprintf (f, "+");
15658 arm_print_value (f, XEXP (x, 1));
15659 return;
15660
15661 case PC:
15662 fprintf (f, "pc");
15663 return;
15664
15665 default:
15666 fprintf (f, "????");
15667 return;
15668 }
15669 }
15670 \f
15671 /* Routines for manipulation of the constant pool. */
15672
15673 /* Arm instructions cannot load a large constant directly into a
15674 register; they have to come from a pc relative load. The constant
15675 must therefore be placed in the addressable range of the pc
15676 relative load. Depending on the precise pc relative load
15677 instruction the range is somewhere between 256 bytes and 4k. This
15678 means that we often have to dump a constant inside a function, and
15679 generate code to branch around it.
15680
15681 It is important to minimize this, since the branches will slow
15682 things down and make the code larger.
15683
15684 Normally we can hide the table after an existing unconditional
15685 branch so that there is no interruption of the flow, but in the
15686 worst case the code looks like this:
15687
15688 ldr rn, L1
15689 ...
15690 b L2
15691 align
15692 L1: .long value
15693 L2:
15694 ...
15695
15696 ldr rn, L3
15697 ...
15698 b L4
15699 align
15700 L3: .long value
15701 L4:
15702 ...
15703
15704 We fix this by performing a scan after scheduling, which notices
15705 which instructions need to have their operands fetched from the
15706 constant table and builds the table.
15707
15708 The algorithm starts by building a table of all the constants that
15709 need fixing up and all the natural barriers in the function (places
15710 where a constant table can be dropped without breaking the flow).
15711 For each fixup we note how far the pc-relative replacement will be
15712 able to reach and the offset of the instruction into the function.
15713
15714 Having built the table we then group the fixes together to form
15715 tables that are as large as possible (subject to addressing
15716 constraints) and emit each table of constants after the last
15717 barrier that is within range of all the instructions in the group.
15718 If a group does not contain a barrier, then we forcibly create one
15719 by inserting a jump instruction into the flow. Once the table has
15720 been inserted, the insns are then modified to reference the
15721 relevant entry in the pool.
15722
15723 Possible enhancements to the algorithm (not implemented) are:
15724
15725 1) For some processors and object formats, there may be benefit in
15726 aligning the pools to the start of cache lines; this alignment
15727 would need to be taken into account when calculating addressability
15728 of a pool. */
15729
15730 /* These typedefs are located at the start of this file, so that
15731 they can be used in the prototypes there. This comment is to
15732 remind readers of that fact so that the following structures
15733 can be understood more easily.
15734
15735 typedef struct minipool_node Mnode;
15736 typedef struct minipool_fixup Mfix; */
15737
15738 struct minipool_node
15739 {
15740 /* Doubly linked chain of entries. */
15741 Mnode * next;
15742 Mnode * prev;
15743 /* The maximum offset into the code that this entry can be placed. While
15744 pushing fixes for forward references, all entries are sorted in order
15745 of increasing max_address. */
15746 HOST_WIDE_INT max_address;
15747 /* Similarly for an entry inserted for a backwards ref. */
15748 HOST_WIDE_INT min_address;
15749 /* The number of fixes referencing this entry. This can become zero
15750 if we "unpush" an entry. In this case we ignore the entry when we
15751 come to emit the code. */
15752 int refcount;
15753 /* The offset from the start of the minipool. */
15754 HOST_WIDE_INT offset;
15755 /* The value in table. */
15756 rtx value;
15757 /* The mode of value. */
15758 enum machine_mode mode;
15759 /* The size of the value. With iWMMXt enabled
15760 sizes > 4 also imply an alignment of 8-bytes. */
15761 int fix_size;
15762 };
15763
15764 struct minipool_fixup
15765 {
15766 Mfix * next;
15767 rtx insn;
15768 HOST_WIDE_INT address;
15769 rtx * loc;
15770 enum machine_mode mode;
15771 int fix_size;
15772 rtx value;
15773 Mnode * minipool;
15774 HOST_WIDE_INT forwards;
15775 HOST_WIDE_INT backwards;
15776 };
15777
15778 /* Fixes less than a word need padding out to a word boundary. */
15779 #define MINIPOOL_FIX_SIZE(mode) \
15780 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15781
15782 static Mnode * minipool_vector_head;
15783 static Mnode * minipool_vector_tail;
15784 static rtx minipool_vector_label;
15785 static int minipool_pad;
15786
15787 /* The linked list of all minipool fixes required for this function. */
15788 Mfix * minipool_fix_head;
15789 Mfix * minipool_fix_tail;
15790 /* The fix entry for the current minipool, once it has been placed. */
15791 Mfix * minipool_barrier;
15792
15793 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15794 #define JUMP_TABLES_IN_TEXT_SECTION 0
15795 #endif
15796
15797 static HOST_WIDE_INT
15798 get_jump_table_size (rtx insn)
15799 {
15800 /* ADDR_VECs only take room if read-only data does into the text
15801 section. */
15802 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15803 {
15804 rtx body = PATTERN (insn);
15805 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15806 HOST_WIDE_INT size;
15807 HOST_WIDE_INT modesize;
15808
15809 modesize = GET_MODE_SIZE (GET_MODE (body));
15810 size = modesize * XVECLEN (body, elt);
15811 switch (modesize)
15812 {
15813 case 1:
15814 /* Round up size of TBB table to a halfword boundary. */
15815 size = (size + 1) & ~(HOST_WIDE_INT)1;
15816 break;
15817 case 2:
15818 /* No padding necessary for TBH. */
15819 break;
15820 case 4:
15821 /* Add two bytes for alignment on Thumb. */
15822 if (TARGET_THUMB)
15823 size += 2;
15824 break;
15825 default:
15826 gcc_unreachable ();
15827 }
15828 return size;
15829 }
15830
15831 return 0;
15832 }
15833
15834 /* Return the maximum amount of padding that will be inserted before
15835 label LABEL. */
15836
15837 static HOST_WIDE_INT
15838 get_label_padding (rtx label)
15839 {
15840 HOST_WIDE_INT align, min_insn_size;
15841
15842 align = 1 << label_to_alignment (label);
15843 min_insn_size = TARGET_THUMB ? 2 : 4;
15844 return align > min_insn_size ? align - min_insn_size : 0;
15845 }
15846
15847 /* Move a minipool fix MP from its current location to before MAX_MP.
15848 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15849 constraints may need updating. */
15850 static Mnode *
15851 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15852 HOST_WIDE_INT max_address)
15853 {
15854 /* The code below assumes these are different. */
15855 gcc_assert (mp != max_mp);
15856
15857 if (max_mp == NULL)
15858 {
15859 if (max_address < mp->max_address)
15860 mp->max_address = max_address;
15861 }
15862 else
15863 {
15864 if (max_address > max_mp->max_address - mp->fix_size)
15865 mp->max_address = max_mp->max_address - mp->fix_size;
15866 else
15867 mp->max_address = max_address;
15868
15869 /* Unlink MP from its current position. Since max_mp is non-null,
15870 mp->prev must be non-null. */
15871 mp->prev->next = mp->next;
15872 if (mp->next != NULL)
15873 mp->next->prev = mp->prev;
15874 else
15875 minipool_vector_tail = mp->prev;
15876
15877 /* Re-insert it before MAX_MP. */
15878 mp->next = max_mp;
15879 mp->prev = max_mp->prev;
15880 max_mp->prev = mp;
15881
15882 if (mp->prev != NULL)
15883 mp->prev->next = mp;
15884 else
15885 minipool_vector_head = mp;
15886 }
15887
15888 /* Save the new entry. */
15889 max_mp = mp;
15890
15891 /* Scan over the preceding entries and adjust their addresses as
15892 required. */
15893 while (mp->prev != NULL
15894 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15895 {
15896 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15897 mp = mp->prev;
15898 }
15899
15900 return max_mp;
15901 }
15902
15903 /* Add a constant to the minipool for a forward reference. Returns the
15904 node added or NULL if the constant will not fit in this pool. */
15905 static Mnode *
15906 add_minipool_forward_ref (Mfix *fix)
15907 {
15908 /* If set, max_mp is the first pool_entry that has a lower
15909 constraint than the one we are trying to add. */
15910 Mnode * max_mp = NULL;
15911 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15912 Mnode * mp;
15913
15914 /* If the minipool starts before the end of FIX->INSN then this FIX
15915 can not be placed into the current pool. Furthermore, adding the
15916 new constant pool entry may cause the pool to start FIX_SIZE bytes
15917 earlier. */
15918 if (minipool_vector_head &&
15919 (fix->address + get_attr_length (fix->insn)
15920 >= minipool_vector_head->max_address - fix->fix_size))
15921 return NULL;
15922
15923 /* Scan the pool to see if a constant with the same value has
15924 already been added. While we are doing this, also note the
15925 location where we must insert the constant if it doesn't already
15926 exist. */
15927 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15928 {
15929 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15930 && fix->mode == mp->mode
15931 && (!LABEL_P (fix->value)
15932 || (CODE_LABEL_NUMBER (fix->value)
15933 == CODE_LABEL_NUMBER (mp->value)))
15934 && rtx_equal_p (fix->value, mp->value))
15935 {
15936 /* More than one fix references this entry. */
15937 mp->refcount++;
15938 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15939 }
15940
15941 /* Note the insertion point if necessary. */
15942 if (max_mp == NULL
15943 && mp->max_address > max_address)
15944 max_mp = mp;
15945
15946 /* If we are inserting an 8-bytes aligned quantity and
15947 we have not already found an insertion point, then
15948 make sure that all such 8-byte aligned quantities are
15949 placed at the start of the pool. */
15950 if (ARM_DOUBLEWORD_ALIGN
15951 && max_mp == NULL
15952 && fix->fix_size >= 8
15953 && mp->fix_size < 8)
15954 {
15955 max_mp = mp;
15956 max_address = mp->max_address;
15957 }
15958 }
15959
15960 /* The value is not currently in the minipool, so we need to create
15961 a new entry for it. If MAX_MP is NULL, the entry will be put on
15962 the end of the list since the placement is less constrained than
15963 any existing entry. Otherwise, we insert the new fix before
15964 MAX_MP and, if necessary, adjust the constraints on the other
15965 entries. */
15966 mp = XNEW (Mnode);
15967 mp->fix_size = fix->fix_size;
15968 mp->mode = fix->mode;
15969 mp->value = fix->value;
15970 mp->refcount = 1;
15971 /* Not yet required for a backwards ref. */
15972 mp->min_address = -65536;
15973
15974 if (max_mp == NULL)
15975 {
15976 mp->max_address = max_address;
15977 mp->next = NULL;
15978 mp->prev = minipool_vector_tail;
15979
15980 if (mp->prev == NULL)
15981 {
15982 minipool_vector_head = mp;
15983 minipool_vector_label = gen_label_rtx ();
15984 }
15985 else
15986 mp->prev->next = mp;
15987
15988 minipool_vector_tail = mp;
15989 }
15990 else
15991 {
15992 if (max_address > max_mp->max_address - mp->fix_size)
15993 mp->max_address = max_mp->max_address - mp->fix_size;
15994 else
15995 mp->max_address = max_address;
15996
15997 mp->next = max_mp;
15998 mp->prev = max_mp->prev;
15999 max_mp->prev = mp;
16000 if (mp->prev != NULL)
16001 mp->prev->next = mp;
16002 else
16003 minipool_vector_head = mp;
16004 }
16005
16006 /* Save the new entry. */
16007 max_mp = mp;
16008
16009 /* Scan over the preceding entries and adjust their addresses as
16010 required. */
16011 while (mp->prev != NULL
16012 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16013 {
16014 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16015 mp = mp->prev;
16016 }
16017
16018 return max_mp;
16019 }
16020
16021 static Mnode *
16022 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16023 HOST_WIDE_INT min_address)
16024 {
16025 HOST_WIDE_INT offset;
16026
16027 /* The code below assumes these are different. */
16028 gcc_assert (mp != min_mp);
16029
16030 if (min_mp == NULL)
16031 {
16032 if (min_address > mp->min_address)
16033 mp->min_address = min_address;
16034 }
16035 else
16036 {
16037 /* We will adjust this below if it is too loose. */
16038 mp->min_address = min_address;
16039
16040 /* Unlink MP from its current position. Since min_mp is non-null,
16041 mp->next must be non-null. */
16042 mp->next->prev = mp->prev;
16043 if (mp->prev != NULL)
16044 mp->prev->next = mp->next;
16045 else
16046 minipool_vector_head = mp->next;
16047
16048 /* Reinsert it after MIN_MP. */
16049 mp->prev = min_mp;
16050 mp->next = min_mp->next;
16051 min_mp->next = mp;
16052 if (mp->next != NULL)
16053 mp->next->prev = mp;
16054 else
16055 minipool_vector_tail = mp;
16056 }
16057
16058 min_mp = mp;
16059
16060 offset = 0;
16061 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16062 {
16063 mp->offset = offset;
16064 if (mp->refcount > 0)
16065 offset += mp->fix_size;
16066
16067 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16068 mp->next->min_address = mp->min_address + mp->fix_size;
16069 }
16070
16071 return min_mp;
16072 }
16073
16074 /* Add a constant to the minipool for a backward reference. Returns the
16075 node added or NULL if the constant will not fit in this pool.
16076
16077 Note that the code for insertion for a backwards reference can be
16078 somewhat confusing because the calculated offsets for each fix do
16079 not take into account the size of the pool (which is still under
16080 construction. */
16081 static Mnode *
16082 add_minipool_backward_ref (Mfix *fix)
16083 {
16084 /* If set, min_mp is the last pool_entry that has a lower constraint
16085 than the one we are trying to add. */
16086 Mnode *min_mp = NULL;
16087 /* This can be negative, since it is only a constraint. */
16088 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16089 Mnode *mp;
16090
16091 /* If we can't reach the current pool from this insn, or if we can't
16092 insert this entry at the end of the pool without pushing other
16093 fixes out of range, then we don't try. This ensures that we
16094 can't fail later on. */
16095 if (min_address >= minipool_barrier->address
16096 || (minipool_vector_tail->min_address + fix->fix_size
16097 >= minipool_barrier->address))
16098 return NULL;
16099
16100 /* Scan the pool to see if a constant with the same value has
16101 already been added. While we are doing this, also note the
16102 location where we must insert the constant if it doesn't already
16103 exist. */
16104 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16105 {
16106 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16107 && fix->mode == mp->mode
16108 && (!LABEL_P (fix->value)
16109 || (CODE_LABEL_NUMBER (fix->value)
16110 == CODE_LABEL_NUMBER (mp->value)))
16111 && rtx_equal_p (fix->value, mp->value)
16112 /* Check that there is enough slack to move this entry to the
16113 end of the table (this is conservative). */
16114 && (mp->max_address
16115 > (minipool_barrier->address
16116 + minipool_vector_tail->offset
16117 + minipool_vector_tail->fix_size)))
16118 {
16119 mp->refcount++;
16120 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16121 }
16122
16123 if (min_mp != NULL)
16124 mp->min_address += fix->fix_size;
16125 else
16126 {
16127 /* Note the insertion point if necessary. */
16128 if (mp->min_address < min_address)
16129 {
16130 /* For now, we do not allow the insertion of 8-byte alignment
16131 requiring nodes anywhere but at the start of the pool. */
16132 if (ARM_DOUBLEWORD_ALIGN
16133 && fix->fix_size >= 8 && mp->fix_size < 8)
16134 return NULL;
16135 else
16136 min_mp = mp;
16137 }
16138 else if (mp->max_address
16139 < minipool_barrier->address + mp->offset + fix->fix_size)
16140 {
16141 /* Inserting before this entry would push the fix beyond
16142 its maximum address (which can happen if we have
16143 re-located a forwards fix); force the new fix to come
16144 after it. */
16145 if (ARM_DOUBLEWORD_ALIGN
16146 && fix->fix_size >= 8 && mp->fix_size < 8)
16147 return NULL;
16148 else
16149 {
16150 min_mp = mp;
16151 min_address = mp->min_address + fix->fix_size;
16152 }
16153 }
16154 /* Do not insert a non-8-byte aligned quantity before 8-byte
16155 aligned quantities. */
16156 else if (ARM_DOUBLEWORD_ALIGN
16157 && fix->fix_size < 8
16158 && mp->fix_size >= 8)
16159 {
16160 min_mp = mp;
16161 min_address = mp->min_address + fix->fix_size;
16162 }
16163 }
16164 }
16165
16166 /* We need to create a new entry. */
16167 mp = XNEW (Mnode);
16168 mp->fix_size = fix->fix_size;
16169 mp->mode = fix->mode;
16170 mp->value = fix->value;
16171 mp->refcount = 1;
16172 mp->max_address = minipool_barrier->address + 65536;
16173
16174 mp->min_address = min_address;
16175
16176 if (min_mp == NULL)
16177 {
16178 mp->prev = NULL;
16179 mp->next = minipool_vector_head;
16180
16181 if (mp->next == NULL)
16182 {
16183 minipool_vector_tail = mp;
16184 minipool_vector_label = gen_label_rtx ();
16185 }
16186 else
16187 mp->next->prev = mp;
16188
16189 minipool_vector_head = mp;
16190 }
16191 else
16192 {
16193 mp->next = min_mp->next;
16194 mp->prev = min_mp;
16195 min_mp->next = mp;
16196
16197 if (mp->next != NULL)
16198 mp->next->prev = mp;
16199 else
16200 minipool_vector_tail = mp;
16201 }
16202
16203 /* Save the new entry. */
16204 min_mp = mp;
16205
16206 if (mp->prev)
16207 mp = mp->prev;
16208 else
16209 mp->offset = 0;
16210
16211 /* Scan over the following entries and adjust their offsets. */
16212 while (mp->next != NULL)
16213 {
16214 if (mp->next->min_address < mp->min_address + mp->fix_size)
16215 mp->next->min_address = mp->min_address + mp->fix_size;
16216
16217 if (mp->refcount)
16218 mp->next->offset = mp->offset + mp->fix_size;
16219 else
16220 mp->next->offset = mp->offset;
16221
16222 mp = mp->next;
16223 }
16224
16225 return min_mp;
16226 }
16227
16228 static void
16229 assign_minipool_offsets (Mfix *barrier)
16230 {
16231 HOST_WIDE_INT offset = 0;
16232 Mnode *mp;
16233
16234 minipool_barrier = barrier;
16235
16236 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16237 {
16238 mp->offset = offset;
16239
16240 if (mp->refcount > 0)
16241 offset += mp->fix_size;
16242 }
16243 }
16244
16245 /* Output the literal table */
16246 static void
16247 dump_minipool (rtx scan)
16248 {
16249 Mnode * mp;
16250 Mnode * nmp;
16251 int align64 = 0;
16252
16253 if (ARM_DOUBLEWORD_ALIGN)
16254 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16255 if (mp->refcount > 0 && mp->fix_size >= 8)
16256 {
16257 align64 = 1;
16258 break;
16259 }
16260
16261 if (dump_file)
16262 fprintf (dump_file,
16263 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16264 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16265
16266 scan = emit_label_after (gen_label_rtx (), scan);
16267 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16268 scan = emit_label_after (minipool_vector_label, scan);
16269
16270 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16271 {
16272 if (mp->refcount > 0)
16273 {
16274 if (dump_file)
16275 {
16276 fprintf (dump_file,
16277 ";; Offset %u, min %ld, max %ld ",
16278 (unsigned) mp->offset, (unsigned long) mp->min_address,
16279 (unsigned long) mp->max_address);
16280 arm_print_value (dump_file, mp->value);
16281 fputc ('\n', dump_file);
16282 }
16283
16284 switch (mp->fix_size)
16285 {
16286 #ifdef HAVE_consttable_1
16287 case 1:
16288 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16289 break;
16290
16291 #endif
16292 #ifdef HAVE_consttable_2
16293 case 2:
16294 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16295 break;
16296
16297 #endif
16298 #ifdef HAVE_consttable_4
16299 case 4:
16300 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16301 break;
16302
16303 #endif
16304 #ifdef HAVE_consttable_8
16305 case 8:
16306 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16307 break;
16308
16309 #endif
16310 #ifdef HAVE_consttable_16
16311 case 16:
16312 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16313 break;
16314
16315 #endif
16316 default:
16317 gcc_unreachable ();
16318 }
16319 }
16320
16321 nmp = mp->next;
16322 free (mp);
16323 }
16324
16325 minipool_vector_head = minipool_vector_tail = NULL;
16326 scan = emit_insn_after (gen_consttable_end (), scan);
16327 scan = emit_barrier_after (scan);
16328 }
16329
16330 /* Return the cost of forcibly inserting a barrier after INSN. */
16331 static int
16332 arm_barrier_cost (rtx insn)
16333 {
16334 /* Basing the location of the pool on the loop depth is preferable,
16335 but at the moment, the basic block information seems to be
16336 corrupt by this stage of the compilation. */
16337 int base_cost = 50;
16338 rtx next = next_nonnote_insn (insn);
16339
16340 if (next != NULL && LABEL_P (next))
16341 base_cost -= 20;
16342
16343 switch (GET_CODE (insn))
16344 {
16345 case CODE_LABEL:
16346 /* It will always be better to place the table before the label, rather
16347 than after it. */
16348 return 50;
16349
16350 case INSN:
16351 case CALL_INSN:
16352 return base_cost;
16353
16354 case JUMP_INSN:
16355 return base_cost - 10;
16356
16357 default:
16358 return base_cost + 10;
16359 }
16360 }
16361
16362 /* Find the best place in the insn stream in the range
16363 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16364 Create the barrier by inserting a jump and add a new fix entry for
16365 it. */
16366 static Mfix *
16367 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16368 {
16369 HOST_WIDE_INT count = 0;
16370 rtx barrier;
16371 rtx from = fix->insn;
16372 /* The instruction after which we will insert the jump. */
16373 rtx selected = NULL;
16374 int selected_cost;
16375 /* The address at which the jump instruction will be placed. */
16376 HOST_WIDE_INT selected_address;
16377 Mfix * new_fix;
16378 HOST_WIDE_INT max_count = max_address - fix->address;
16379 rtx label = gen_label_rtx ();
16380
16381 selected_cost = arm_barrier_cost (from);
16382 selected_address = fix->address;
16383
16384 while (from && count < max_count)
16385 {
16386 rtx tmp;
16387 int new_cost;
16388
16389 /* This code shouldn't have been called if there was a natural barrier
16390 within range. */
16391 gcc_assert (!BARRIER_P (from));
16392
16393 /* Count the length of this insn. This must stay in sync with the
16394 code that pushes minipool fixes. */
16395 if (LABEL_P (from))
16396 count += get_label_padding (from);
16397 else
16398 count += get_attr_length (from);
16399
16400 /* If there is a jump table, add its length. */
16401 if (tablejump_p (from, NULL, &tmp))
16402 {
16403 count += get_jump_table_size (tmp);
16404
16405 /* Jump tables aren't in a basic block, so base the cost on
16406 the dispatch insn. If we select this location, we will
16407 still put the pool after the table. */
16408 new_cost = arm_barrier_cost (from);
16409
16410 if (count < max_count
16411 && (!selected || new_cost <= selected_cost))
16412 {
16413 selected = tmp;
16414 selected_cost = new_cost;
16415 selected_address = fix->address + count;
16416 }
16417
16418 /* Continue after the dispatch table. */
16419 from = NEXT_INSN (tmp);
16420 continue;
16421 }
16422
16423 new_cost = arm_barrier_cost (from);
16424
16425 if (count < max_count
16426 && (!selected || new_cost <= selected_cost))
16427 {
16428 selected = from;
16429 selected_cost = new_cost;
16430 selected_address = fix->address + count;
16431 }
16432
16433 from = NEXT_INSN (from);
16434 }
16435
16436 /* Make sure that we found a place to insert the jump. */
16437 gcc_assert (selected);
16438
16439 /* Make sure we do not split a call and its corresponding
16440 CALL_ARG_LOCATION note. */
16441 if (CALL_P (selected))
16442 {
16443 rtx next = NEXT_INSN (selected);
16444 if (next && NOTE_P (next)
16445 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16446 selected = next;
16447 }
16448
16449 /* Create a new JUMP_INSN that branches around a barrier. */
16450 from = emit_jump_insn_after (gen_jump (label), selected);
16451 JUMP_LABEL (from) = label;
16452 barrier = emit_barrier_after (from);
16453 emit_label_after (label, barrier);
16454
16455 /* Create a minipool barrier entry for the new barrier. */
16456 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16457 new_fix->insn = barrier;
16458 new_fix->address = selected_address;
16459 new_fix->next = fix->next;
16460 fix->next = new_fix;
16461
16462 return new_fix;
16463 }
16464
16465 /* Record that there is a natural barrier in the insn stream at
16466 ADDRESS. */
16467 static void
16468 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
16469 {
16470 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16471
16472 fix->insn = insn;
16473 fix->address = address;
16474
16475 fix->next = NULL;
16476 if (minipool_fix_head != NULL)
16477 minipool_fix_tail->next = fix;
16478 else
16479 minipool_fix_head = fix;
16480
16481 minipool_fix_tail = fix;
16482 }
16483
16484 /* Record INSN, which will need fixing up to load a value from the
16485 minipool. ADDRESS is the offset of the insn since the start of the
16486 function; LOC is a pointer to the part of the insn which requires
16487 fixing; VALUE is the constant that must be loaded, which is of type
16488 MODE. */
16489 static void
16490 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
16491 enum machine_mode mode, rtx value)
16492 {
16493 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16494
16495 fix->insn = insn;
16496 fix->address = address;
16497 fix->loc = loc;
16498 fix->mode = mode;
16499 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16500 fix->value = value;
16501 fix->forwards = get_attr_pool_range (insn);
16502 fix->backwards = get_attr_neg_pool_range (insn);
16503 fix->minipool = NULL;
16504
16505 /* If an insn doesn't have a range defined for it, then it isn't
16506 expecting to be reworked by this code. Better to stop now than
16507 to generate duff assembly code. */
16508 gcc_assert (fix->forwards || fix->backwards);
16509
16510 /* If an entry requires 8-byte alignment then assume all constant pools
16511 require 4 bytes of padding. Trying to do this later on a per-pool
16512 basis is awkward because existing pool entries have to be modified. */
16513 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16514 minipool_pad = 4;
16515
16516 if (dump_file)
16517 {
16518 fprintf (dump_file,
16519 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16520 GET_MODE_NAME (mode),
16521 INSN_UID (insn), (unsigned long) address,
16522 -1 * (long)fix->backwards, (long)fix->forwards);
16523 arm_print_value (dump_file, fix->value);
16524 fprintf (dump_file, "\n");
16525 }
16526
16527 /* Add it to the chain of fixes. */
16528 fix->next = NULL;
16529
16530 if (minipool_fix_head != NULL)
16531 minipool_fix_tail->next = fix;
16532 else
16533 minipool_fix_head = fix;
16534
16535 minipool_fix_tail = fix;
16536 }
16537
16538 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16539 Returns the number of insns needed, or 99 if we always want to synthesize
16540 the value. */
16541 int
16542 arm_max_const_double_inline_cost ()
16543 {
16544 /* Let the value get synthesized to avoid the use of literal pools. */
16545 if (arm_disable_literal_pool)
16546 return 99;
16547
16548 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16549 }
16550
16551 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16552 Returns the number of insns needed, or 99 if we don't know how to
16553 do it. */
16554 int
16555 arm_const_double_inline_cost (rtx val)
16556 {
16557 rtx lowpart, highpart;
16558 enum machine_mode mode;
16559
16560 mode = GET_MODE (val);
16561
16562 if (mode == VOIDmode)
16563 mode = DImode;
16564
16565 gcc_assert (GET_MODE_SIZE (mode) == 8);
16566
16567 lowpart = gen_lowpart (SImode, val);
16568 highpart = gen_highpart_mode (SImode, mode, val);
16569
16570 gcc_assert (CONST_INT_P (lowpart));
16571 gcc_assert (CONST_INT_P (highpart));
16572
16573 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16574 NULL_RTX, NULL_RTX, 0, 0)
16575 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16576 NULL_RTX, NULL_RTX, 0, 0));
16577 }
16578
16579 /* Return true if it is worthwhile to split a 64-bit constant into two
16580 32-bit operations. This is the case if optimizing for size, or
16581 if we have load delay slots, or if one 32-bit part can be done with
16582 a single data operation. */
16583 bool
16584 arm_const_double_by_parts (rtx val)
16585 {
16586 enum machine_mode mode = GET_MODE (val);
16587 rtx part;
16588
16589 if (optimize_size || arm_ld_sched)
16590 return true;
16591
16592 if (mode == VOIDmode)
16593 mode = DImode;
16594
16595 part = gen_highpart_mode (SImode, mode, val);
16596
16597 gcc_assert (CONST_INT_P (part));
16598
16599 if (const_ok_for_arm (INTVAL (part))
16600 || const_ok_for_arm (~INTVAL (part)))
16601 return true;
16602
16603 part = gen_lowpart (SImode, val);
16604
16605 gcc_assert (CONST_INT_P (part));
16606
16607 if (const_ok_for_arm (INTVAL (part))
16608 || const_ok_for_arm (~INTVAL (part)))
16609 return true;
16610
16611 return false;
16612 }
16613
16614 /* Return true if it is possible to inline both the high and low parts
16615 of a 64-bit constant into 32-bit data processing instructions. */
16616 bool
16617 arm_const_double_by_immediates (rtx val)
16618 {
16619 enum machine_mode mode = GET_MODE (val);
16620 rtx part;
16621
16622 if (mode == VOIDmode)
16623 mode = DImode;
16624
16625 part = gen_highpart_mode (SImode, mode, val);
16626
16627 gcc_assert (CONST_INT_P (part));
16628
16629 if (!const_ok_for_arm (INTVAL (part)))
16630 return false;
16631
16632 part = gen_lowpart (SImode, val);
16633
16634 gcc_assert (CONST_INT_P (part));
16635
16636 if (!const_ok_for_arm (INTVAL (part)))
16637 return false;
16638
16639 return true;
16640 }
16641
16642 /* Scan INSN and note any of its operands that need fixing.
16643 If DO_PUSHES is false we do not actually push any of the fixups
16644 needed. */
16645 static void
16646 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
16647 {
16648 int opno;
16649
16650 extract_insn (insn);
16651
16652 if (!constrain_operands (1))
16653 fatal_insn_not_found (insn);
16654
16655 if (recog_data.n_alternatives == 0)
16656 return;
16657
16658 /* Fill in recog_op_alt with information about the constraints of
16659 this insn. */
16660 preprocess_constraints ();
16661
16662 for (opno = 0; opno < recog_data.n_operands; opno++)
16663 {
16664 /* Things we need to fix can only occur in inputs. */
16665 if (recog_data.operand_type[opno] != OP_IN)
16666 continue;
16667
16668 /* If this alternative is a memory reference, then any mention
16669 of constants in this alternative is really to fool reload
16670 into allowing us to accept one there. We need to fix them up
16671 now so that we output the right code. */
16672 if (recog_op_alt[opno][which_alternative].memory_ok)
16673 {
16674 rtx op = recog_data.operand[opno];
16675
16676 if (CONSTANT_P (op))
16677 {
16678 if (do_pushes)
16679 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16680 recog_data.operand_mode[opno], op);
16681 }
16682 else if (MEM_P (op)
16683 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16684 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16685 {
16686 if (do_pushes)
16687 {
16688 rtx cop = avoid_constant_pool_reference (op);
16689
16690 /* Casting the address of something to a mode narrower
16691 than a word can cause avoid_constant_pool_reference()
16692 to return the pool reference itself. That's no good to
16693 us here. Lets just hope that we can use the
16694 constant pool value directly. */
16695 if (op == cop)
16696 cop = get_pool_constant (XEXP (op, 0));
16697
16698 push_minipool_fix (insn, address,
16699 recog_data.operand_loc[opno],
16700 recog_data.operand_mode[opno], cop);
16701 }
16702
16703 }
16704 }
16705 }
16706
16707 return;
16708 }
16709
16710 /* Rewrite move insn into subtract of 0 if the condition codes will
16711 be useful in next conditional jump insn. */
16712
16713 static void
16714 thumb1_reorg (void)
16715 {
16716 basic_block bb;
16717
16718 FOR_EACH_BB_FN (bb, cfun)
16719 {
16720 rtx dest, src;
16721 rtx pat, op0, set = NULL;
16722 rtx prev, insn = BB_END (bb);
16723 bool insn_clobbered = false;
16724
16725 while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn))
16726 insn = PREV_INSN (insn);
16727
16728 /* Find the last cbranchsi4_insn in basic block BB. */
16729 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
16730 continue;
16731
16732 /* Get the register with which we are comparing. */
16733 pat = PATTERN (insn);
16734 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
16735
16736 /* Find the first flag setting insn before INSN in basic block BB. */
16737 gcc_assert (insn != BB_HEAD (bb));
16738 for (prev = PREV_INSN (insn);
16739 (!insn_clobbered
16740 && prev != BB_HEAD (bb)
16741 && (NOTE_P (prev)
16742 || DEBUG_INSN_P (prev)
16743 || ((set = single_set (prev)) != NULL
16744 && get_attr_conds (prev) == CONDS_NOCOND)));
16745 prev = PREV_INSN (prev))
16746 {
16747 if (reg_set_p (op0, prev))
16748 insn_clobbered = true;
16749 }
16750
16751 /* Skip if op0 is clobbered by insn other than prev. */
16752 if (insn_clobbered)
16753 continue;
16754
16755 if (!set)
16756 continue;
16757
16758 dest = SET_DEST (set);
16759 src = SET_SRC (set);
16760 if (!low_register_operand (dest, SImode)
16761 || !low_register_operand (src, SImode))
16762 continue;
16763
16764 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16765 in INSN. Both src and dest of the move insn are checked. */
16766 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
16767 {
16768 dest = copy_rtx (dest);
16769 src = copy_rtx (src);
16770 src = gen_rtx_MINUS (SImode, src, const0_rtx);
16771 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
16772 INSN_CODE (prev) = -1;
16773 /* Set test register in INSN to dest. */
16774 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
16775 INSN_CODE (insn) = -1;
16776 }
16777 }
16778 }
16779
16780 /* Convert instructions to their cc-clobbering variant if possible, since
16781 that allows us to use smaller encodings. */
16782
16783 static void
16784 thumb2_reorg (void)
16785 {
16786 basic_block bb;
16787 regset_head live;
16788
16789 INIT_REG_SET (&live);
16790
16791 /* We are freeing block_for_insn in the toplev to keep compatibility
16792 with old MDEP_REORGS that are not CFG based. Recompute it now. */
16793 compute_bb_for_insn ();
16794 df_analyze ();
16795
16796 FOR_EACH_BB_FN (bb, cfun)
16797 {
16798 rtx insn;
16799
16800 COPY_REG_SET (&live, DF_LR_OUT (bb));
16801 df_simulate_initialize_backwards (bb, &live);
16802 FOR_BB_INSNS_REVERSE (bb, insn)
16803 {
16804 if (NONJUMP_INSN_P (insn)
16805 && !REGNO_REG_SET_P (&live, CC_REGNUM)
16806 && GET_CODE (PATTERN (insn)) == SET)
16807 {
16808 enum {SKIP, CONV, SWAP_CONV} action = SKIP;
16809 rtx pat = PATTERN (insn);
16810 rtx dst = XEXP (pat, 0);
16811 rtx src = XEXP (pat, 1);
16812 rtx op0 = NULL_RTX, op1 = NULL_RTX;
16813
16814 if (!OBJECT_P (src))
16815 op0 = XEXP (src, 0);
16816
16817 if (BINARY_P (src))
16818 op1 = XEXP (src, 1);
16819
16820 if (low_register_operand (dst, SImode))
16821 {
16822 switch (GET_CODE (src))
16823 {
16824 case PLUS:
16825 /* Adding two registers and storing the result
16826 in the first source is already a 16-bit
16827 operation. */
16828 if (rtx_equal_p (dst, op0)
16829 && register_operand (op1, SImode))
16830 break;
16831
16832 if (low_register_operand (op0, SImode))
16833 {
16834 /* ADDS <Rd>,<Rn>,<Rm> */
16835 if (low_register_operand (op1, SImode))
16836 action = CONV;
16837 /* ADDS <Rdn>,#<imm8> */
16838 /* SUBS <Rdn>,#<imm8> */
16839 else if (rtx_equal_p (dst, op0)
16840 && CONST_INT_P (op1)
16841 && IN_RANGE (INTVAL (op1), -255, 255))
16842 action = CONV;
16843 /* ADDS <Rd>,<Rn>,#<imm3> */
16844 /* SUBS <Rd>,<Rn>,#<imm3> */
16845 else if (CONST_INT_P (op1)
16846 && IN_RANGE (INTVAL (op1), -7, 7))
16847 action = CONV;
16848 }
16849 /* ADCS <Rd>, <Rn> */
16850 else if (GET_CODE (XEXP (src, 0)) == PLUS
16851 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
16852 && low_register_operand (XEXP (XEXP (src, 0), 1),
16853 SImode)
16854 && COMPARISON_P (op1)
16855 && cc_register (XEXP (op1, 0), VOIDmode)
16856 && maybe_get_arm_condition_code (op1) == ARM_CS
16857 && XEXP (op1, 1) == const0_rtx)
16858 action = CONV;
16859 break;
16860
16861 case MINUS:
16862 /* RSBS <Rd>,<Rn>,#0
16863 Not handled here: see NEG below. */
16864 /* SUBS <Rd>,<Rn>,#<imm3>
16865 SUBS <Rdn>,#<imm8>
16866 Not handled here: see PLUS above. */
16867 /* SUBS <Rd>,<Rn>,<Rm> */
16868 if (low_register_operand (op0, SImode)
16869 && low_register_operand (op1, SImode))
16870 action = CONV;
16871 break;
16872
16873 case MULT:
16874 /* MULS <Rdm>,<Rn>,<Rdm>
16875 As an exception to the rule, this is only used
16876 when optimizing for size since MULS is slow on all
16877 known implementations. We do not even want to use
16878 MULS in cold code, if optimizing for speed, so we
16879 test the global flag here. */
16880 if (!optimize_size)
16881 break;
16882 /* else fall through. */
16883 case AND:
16884 case IOR:
16885 case XOR:
16886 /* ANDS <Rdn>,<Rm> */
16887 if (rtx_equal_p (dst, op0)
16888 && low_register_operand (op1, SImode))
16889 action = CONV;
16890 else if (rtx_equal_p (dst, op1)
16891 && low_register_operand (op0, SImode))
16892 action = SWAP_CONV;
16893 break;
16894
16895 case ASHIFTRT:
16896 case ASHIFT:
16897 case LSHIFTRT:
16898 /* ASRS <Rdn>,<Rm> */
16899 /* LSRS <Rdn>,<Rm> */
16900 /* LSLS <Rdn>,<Rm> */
16901 if (rtx_equal_p (dst, op0)
16902 && low_register_operand (op1, SImode))
16903 action = CONV;
16904 /* ASRS <Rd>,<Rm>,#<imm5> */
16905 /* LSRS <Rd>,<Rm>,#<imm5> */
16906 /* LSLS <Rd>,<Rm>,#<imm5> */
16907 else if (low_register_operand (op0, SImode)
16908 && CONST_INT_P (op1)
16909 && IN_RANGE (INTVAL (op1), 0, 31))
16910 action = CONV;
16911 break;
16912
16913 case ROTATERT:
16914 /* RORS <Rdn>,<Rm> */
16915 if (rtx_equal_p (dst, op0)
16916 && low_register_operand (op1, SImode))
16917 action = CONV;
16918 break;
16919
16920 case NOT:
16921 case NEG:
16922 /* MVNS <Rd>,<Rm> */
16923 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
16924 if (low_register_operand (op0, SImode))
16925 action = CONV;
16926 break;
16927
16928 case CONST_INT:
16929 /* MOVS <Rd>,#<imm8> */
16930 if (CONST_INT_P (src)
16931 && IN_RANGE (INTVAL (src), 0, 255))
16932 action = CONV;
16933 break;
16934
16935 case REG:
16936 /* MOVS and MOV<c> with registers have different
16937 encodings, so are not relevant here. */
16938 break;
16939
16940 default:
16941 break;
16942 }
16943 }
16944
16945 if (action != SKIP)
16946 {
16947 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
16948 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
16949 rtvec vec;
16950
16951 if (action == SWAP_CONV)
16952 {
16953 src = copy_rtx (src);
16954 XEXP (src, 0) = op1;
16955 XEXP (src, 1) = op0;
16956 pat = gen_rtx_SET (VOIDmode, dst, src);
16957 vec = gen_rtvec (2, pat, clobber);
16958 }
16959 else /* action == CONV */
16960 vec = gen_rtvec (2, pat, clobber);
16961
16962 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
16963 INSN_CODE (insn) = -1;
16964 }
16965 }
16966
16967 if (NONDEBUG_INSN_P (insn))
16968 df_simulate_one_insn_backwards (bb, insn, &live);
16969 }
16970 }
16971
16972 CLEAR_REG_SET (&live);
16973 }
16974
16975 /* Gcc puts the pool in the wrong place for ARM, since we can only
16976 load addresses a limited distance around the pc. We do some
16977 special munging to move the constant pool values to the correct
16978 point in the code. */
16979 static void
16980 arm_reorg (void)
16981 {
16982 rtx insn;
16983 HOST_WIDE_INT address = 0;
16984 Mfix * fix;
16985
16986 if (TARGET_THUMB1)
16987 thumb1_reorg ();
16988 else if (TARGET_THUMB2)
16989 thumb2_reorg ();
16990
16991 /* Ensure all insns that must be split have been split at this point.
16992 Otherwise, the pool placement code below may compute incorrect
16993 insn lengths. Note that when optimizing, all insns have already
16994 been split at this point. */
16995 if (!optimize)
16996 split_all_insns_noflow ();
16997
16998 minipool_fix_head = minipool_fix_tail = NULL;
16999
17000 /* The first insn must always be a note, or the code below won't
17001 scan it properly. */
17002 insn = get_insns ();
17003 gcc_assert (NOTE_P (insn));
17004 minipool_pad = 0;
17005
17006 /* Scan all the insns and record the operands that will need fixing. */
17007 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17008 {
17009 if (BARRIER_P (insn))
17010 push_minipool_barrier (insn, address);
17011 else if (INSN_P (insn))
17012 {
17013 rtx table;
17014
17015 note_invalid_constants (insn, address, true);
17016 address += get_attr_length (insn);
17017
17018 /* If the insn is a vector jump, add the size of the table
17019 and skip the table. */
17020 if (tablejump_p (insn, NULL, &table))
17021 {
17022 address += get_jump_table_size (table);
17023 insn = table;
17024 }
17025 }
17026 else if (LABEL_P (insn))
17027 /* Add the worst-case padding due to alignment. We don't add
17028 the _current_ padding because the minipool insertions
17029 themselves might change it. */
17030 address += get_label_padding (insn);
17031 }
17032
17033 fix = minipool_fix_head;
17034
17035 /* Now scan the fixups and perform the required changes. */
17036 while (fix)
17037 {
17038 Mfix * ftmp;
17039 Mfix * fdel;
17040 Mfix * last_added_fix;
17041 Mfix * last_barrier = NULL;
17042 Mfix * this_fix;
17043
17044 /* Skip any further barriers before the next fix. */
17045 while (fix && BARRIER_P (fix->insn))
17046 fix = fix->next;
17047
17048 /* No more fixes. */
17049 if (fix == NULL)
17050 break;
17051
17052 last_added_fix = NULL;
17053
17054 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17055 {
17056 if (BARRIER_P (ftmp->insn))
17057 {
17058 if (ftmp->address >= minipool_vector_head->max_address)
17059 break;
17060
17061 last_barrier = ftmp;
17062 }
17063 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17064 break;
17065
17066 last_added_fix = ftmp; /* Keep track of the last fix added. */
17067 }
17068
17069 /* If we found a barrier, drop back to that; any fixes that we
17070 could have reached but come after the barrier will now go in
17071 the next mini-pool. */
17072 if (last_barrier != NULL)
17073 {
17074 /* Reduce the refcount for those fixes that won't go into this
17075 pool after all. */
17076 for (fdel = last_barrier->next;
17077 fdel && fdel != ftmp;
17078 fdel = fdel->next)
17079 {
17080 fdel->minipool->refcount--;
17081 fdel->minipool = NULL;
17082 }
17083
17084 ftmp = last_barrier;
17085 }
17086 else
17087 {
17088 /* ftmp is first fix that we can't fit into this pool and
17089 there no natural barriers that we could use. Insert a
17090 new barrier in the code somewhere between the previous
17091 fix and this one, and arrange to jump around it. */
17092 HOST_WIDE_INT max_address;
17093
17094 /* The last item on the list of fixes must be a barrier, so
17095 we can never run off the end of the list of fixes without
17096 last_barrier being set. */
17097 gcc_assert (ftmp);
17098
17099 max_address = minipool_vector_head->max_address;
17100 /* Check that there isn't another fix that is in range that
17101 we couldn't fit into this pool because the pool was
17102 already too large: we need to put the pool before such an
17103 instruction. The pool itself may come just after the
17104 fix because create_fix_barrier also allows space for a
17105 jump instruction. */
17106 if (ftmp->address < max_address)
17107 max_address = ftmp->address + 1;
17108
17109 last_barrier = create_fix_barrier (last_added_fix, max_address);
17110 }
17111
17112 assign_minipool_offsets (last_barrier);
17113
17114 while (ftmp)
17115 {
17116 if (!BARRIER_P (ftmp->insn)
17117 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17118 == NULL))
17119 break;
17120
17121 ftmp = ftmp->next;
17122 }
17123
17124 /* Scan over the fixes we have identified for this pool, fixing them
17125 up and adding the constants to the pool itself. */
17126 for (this_fix = fix; this_fix && ftmp != this_fix;
17127 this_fix = this_fix->next)
17128 if (!BARRIER_P (this_fix->insn))
17129 {
17130 rtx addr
17131 = plus_constant (Pmode,
17132 gen_rtx_LABEL_REF (VOIDmode,
17133 minipool_vector_label),
17134 this_fix->minipool->offset);
17135 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17136 }
17137
17138 dump_minipool (last_barrier->insn);
17139 fix = ftmp;
17140 }
17141
17142 /* From now on we must synthesize any constants that we can't handle
17143 directly. This can happen if the RTL gets split during final
17144 instruction generation. */
17145 after_arm_reorg = 1;
17146
17147 /* Free the minipool memory. */
17148 obstack_free (&minipool_obstack, minipool_startobj);
17149 }
17150 \f
17151 /* Routines to output assembly language. */
17152
17153 /* If the rtx is the correct value then return the string of the number.
17154 In this way we can ensure that valid double constants are generated even
17155 when cross compiling. */
17156 const char *
17157 fp_immediate_constant (rtx x)
17158 {
17159 REAL_VALUE_TYPE r;
17160
17161 if (!fp_consts_inited)
17162 init_fp_table ();
17163
17164 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17165
17166 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
17167 return "0";
17168 }
17169
17170 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
17171 static const char *
17172 fp_const_from_val (REAL_VALUE_TYPE *r)
17173 {
17174 if (!fp_consts_inited)
17175 init_fp_table ();
17176
17177 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17178 return "0";
17179 }
17180
17181 /* OPERANDS[0] is the entire list of insns that constitute pop,
17182 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17183 is in the list, UPDATE is true iff the list contains explicit
17184 update of base register. */
17185 void
17186 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17187 bool update)
17188 {
17189 int i;
17190 char pattern[100];
17191 int offset;
17192 const char *conditional;
17193 int num_saves = XVECLEN (operands[0], 0);
17194 unsigned int regno;
17195 unsigned int regno_base = REGNO (operands[1]);
17196
17197 offset = 0;
17198 offset += update ? 1 : 0;
17199 offset += return_pc ? 1 : 0;
17200
17201 /* Is the base register in the list? */
17202 for (i = offset; i < num_saves; i++)
17203 {
17204 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17205 /* If SP is in the list, then the base register must be SP. */
17206 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17207 /* If base register is in the list, there must be no explicit update. */
17208 if (regno == regno_base)
17209 gcc_assert (!update);
17210 }
17211
17212 conditional = reverse ? "%?%D0" : "%?%d0";
17213 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17214 {
17215 /* Output pop (not stmfd) because it has a shorter encoding. */
17216 gcc_assert (update);
17217 sprintf (pattern, "pop%s\t{", conditional);
17218 }
17219 else
17220 {
17221 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17222 It's just a convention, their semantics are identical. */
17223 if (regno_base == SP_REGNUM)
17224 sprintf (pattern, "ldm%sfd\t", conditional);
17225 else if (TARGET_UNIFIED_ASM)
17226 sprintf (pattern, "ldmia%s\t", conditional);
17227 else
17228 sprintf (pattern, "ldm%sia\t", conditional);
17229
17230 strcat (pattern, reg_names[regno_base]);
17231 if (update)
17232 strcat (pattern, "!, {");
17233 else
17234 strcat (pattern, ", {");
17235 }
17236
17237 /* Output the first destination register. */
17238 strcat (pattern,
17239 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17240
17241 /* Output the rest of the destination registers. */
17242 for (i = offset + 1; i < num_saves; i++)
17243 {
17244 strcat (pattern, ", ");
17245 strcat (pattern,
17246 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17247 }
17248
17249 strcat (pattern, "}");
17250
17251 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17252 strcat (pattern, "^");
17253
17254 output_asm_insn (pattern, &cond);
17255 }
17256
17257
17258 /* Output the assembly for a store multiple. */
17259
17260 const char *
17261 vfp_output_fstmd (rtx * operands)
17262 {
17263 char pattern[100];
17264 int p;
17265 int base;
17266 int i;
17267
17268 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
17269 p = strlen (pattern);
17270
17271 gcc_assert (REG_P (operands[1]));
17272
17273 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17274 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17275 {
17276 p += sprintf (&pattern[p], ", d%d", base + i);
17277 }
17278 strcpy (&pattern[p], "}");
17279
17280 output_asm_insn (pattern, operands);
17281 return "";
17282 }
17283
17284
17285 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17286 number of bytes pushed. */
17287
17288 static int
17289 vfp_emit_fstmd (int base_reg, int count)
17290 {
17291 rtx par;
17292 rtx dwarf;
17293 rtx tmp, reg;
17294 int i;
17295
17296 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17297 register pairs are stored by a store multiple insn. We avoid this
17298 by pushing an extra pair. */
17299 if (count == 2 && !arm_arch6)
17300 {
17301 if (base_reg == LAST_VFP_REGNUM - 3)
17302 base_reg -= 2;
17303 count++;
17304 }
17305
17306 /* FSTMD may not store more than 16 doubleword registers at once. Split
17307 larger stores into multiple parts (up to a maximum of two, in
17308 practice). */
17309 if (count > 16)
17310 {
17311 int saved;
17312 /* NOTE: base_reg is an internal register number, so each D register
17313 counts as 2. */
17314 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17315 saved += vfp_emit_fstmd (base_reg, 16);
17316 return saved;
17317 }
17318
17319 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17320 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17321
17322 reg = gen_rtx_REG (DFmode, base_reg);
17323 base_reg += 2;
17324
17325 XVECEXP (par, 0, 0)
17326 = gen_rtx_SET (VOIDmode,
17327 gen_frame_mem
17328 (BLKmode,
17329 gen_rtx_PRE_MODIFY (Pmode,
17330 stack_pointer_rtx,
17331 plus_constant
17332 (Pmode, stack_pointer_rtx,
17333 - (count * 8)))
17334 ),
17335 gen_rtx_UNSPEC (BLKmode,
17336 gen_rtvec (1, reg),
17337 UNSPEC_PUSH_MULT));
17338
17339 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17340 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17341 RTX_FRAME_RELATED_P (tmp) = 1;
17342 XVECEXP (dwarf, 0, 0) = tmp;
17343
17344 tmp = gen_rtx_SET (VOIDmode,
17345 gen_frame_mem (DFmode, stack_pointer_rtx),
17346 reg);
17347 RTX_FRAME_RELATED_P (tmp) = 1;
17348 XVECEXP (dwarf, 0, 1) = tmp;
17349
17350 for (i = 1; i < count; i++)
17351 {
17352 reg = gen_rtx_REG (DFmode, base_reg);
17353 base_reg += 2;
17354 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17355
17356 tmp = gen_rtx_SET (VOIDmode,
17357 gen_frame_mem (DFmode,
17358 plus_constant (Pmode,
17359 stack_pointer_rtx,
17360 i * 8)),
17361 reg);
17362 RTX_FRAME_RELATED_P (tmp) = 1;
17363 XVECEXP (dwarf, 0, i + 1) = tmp;
17364 }
17365
17366 par = emit_insn (par);
17367 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17368 RTX_FRAME_RELATED_P (par) = 1;
17369
17370 return count * 8;
17371 }
17372
17373 /* Emit a call instruction with pattern PAT. ADDR is the address of
17374 the call target. */
17375
17376 void
17377 arm_emit_call_insn (rtx pat, rtx addr)
17378 {
17379 rtx insn;
17380
17381 insn = emit_call_insn (pat);
17382
17383 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17384 If the call might use such an entry, add a use of the PIC register
17385 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17386 if (TARGET_VXWORKS_RTP
17387 && flag_pic
17388 && GET_CODE (addr) == SYMBOL_REF
17389 && (SYMBOL_REF_DECL (addr)
17390 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17391 : !SYMBOL_REF_LOCAL_P (addr)))
17392 {
17393 require_pic_register ();
17394 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17395 }
17396 }
17397
17398 /* Output a 'call' insn. */
17399 const char *
17400 output_call (rtx *operands)
17401 {
17402 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17403
17404 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17405 if (REGNO (operands[0]) == LR_REGNUM)
17406 {
17407 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17408 output_asm_insn ("mov%?\t%0, %|lr", operands);
17409 }
17410
17411 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17412
17413 if (TARGET_INTERWORK || arm_arch4t)
17414 output_asm_insn ("bx%?\t%0", operands);
17415 else
17416 output_asm_insn ("mov%?\t%|pc, %0", operands);
17417
17418 return "";
17419 }
17420
17421 /* Output a 'call' insn that is a reference in memory. This is
17422 disabled for ARMv5 and we prefer a blx instead because otherwise
17423 there's a significant performance overhead. */
17424 const char *
17425 output_call_mem (rtx *operands)
17426 {
17427 gcc_assert (!arm_arch5);
17428 if (TARGET_INTERWORK)
17429 {
17430 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17431 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17432 output_asm_insn ("bx%?\t%|ip", operands);
17433 }
17434 else if (regno_use_in (LR_REGNUM, operands[0]))
17435 {
17436 /* LR is used in the memory address. We load the address in the
17437 first instruction. It's safe to use IP as the target of the
17438 load since the call will kill it anyway. */
17439 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17440 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17441 if (arm_arch4t)
17442 output_asm_insn ("bx%?\t%|ip", operands);
17443 else
17444 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17445 }
17446 else
17447 {
17448 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17449 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17450 }
17451
17452 return "";
17453 }
17454
17455
17456 /* Output a move from arm registers to arm registers of a long double
17457 OPERANDS[0] is the destination.
17458 OPERANDS[1] is the source. */
17459 const char *
17460 output_mov_long_double_arm_from_arm (rtx *operands)
17461 {
17462 /* We have to be careful here because the two might overlap. */
17463 int dest_start = REGNO (operands[0]);
17464 int src_start = REGNO (operands[1]);
17465 rtx ops[2];
17466 int i;
17467
17468 if (dest_start < src_start)
17469 {
17470 for (i = 0; i < 3; i++)
17471 {
17472 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17473 ops[1] = gen_rtx_REG (SImode, src_start + i);
17474 output_asm_insn ("mov%?\t%0, %1", ops);
17475 }
17476 }
17477 else
17478 {
17479 for (i = 2; i >= 0; i--)
17480 {
17481 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17482 ops[1] = gen_rtx_REG (SImode, src_start + i);
17483 output_asm_insn ("mov%?\t%0, %1", ops);
17484 }
17485 }
17486
17487 return "";
17488 }
17489
17490 void
17491 arm_emit_movpair (rtx dest, rtx src)
17492 {
17493 /* If the src is an immediate, simplify it. */
17494 if (CONST_INT_P (src))
17495 {
17496 HOST_WIDE_INT val = INTVAL (src);
17497 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17498 if ((val >> 16) & 0x0000ffff)
17499 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17500 GEN_INT (16)),
17501 GEN_INT ((val >> 16) & 0x0000ffff));
17502 return;
17503 }
17504 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17505 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17506 }
17507
17508 /* Output a move between double words. It must be REG<-MEM
17509 or MEM<-REG. */
17510 const char *
17511 output_move_double (rtx *operands, bool emit, int *count)
17512 {
17513 enum rtx_code code0 = GET_CODE (operands[0]);
17514 enum rtx_code code1 = GET_CODE (operands[1]);
17515 rtx otherops[3];
17516 if (count)
17517 *count = 1;
17518
17519 /* The only case when this might happen is when
17520 you are looking at the length of a DImode instruction
17521 that has an invalid constant in it. */
17522 if (code0 == REG && code1 != MEM)
17523 {
17524 gcc_assert (!emit);
17525 *count = 2;
17526 return "";
17527 }
17528
17529 if (code0 == REG)
17530 {
17531 unsigned int reg0 = REGNO (operands[0]);
17532
17533 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17534
17535 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17536
17537 switch (GET_CODE (XEXP (operands[1], 0)))
17538 {
17539 case REG:
17540
17541 if (emit)
17542 {
17543 if (TARGET_LDRD
17544 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17545 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17546 else
17547 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17548 }
17549 break;
17550
17551 case PRE_INC:
17552 gcc_assert (TARGET_LDRD);
17553 if (emit)
17554 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17555 break;
17556
17557 case PRE_DEC:
17558 if (emit)
17559 {
17560 if (TARGET_LDRD)
17561 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17562 else
17563 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17564 }
17565 break;
17566
17567 case POST_INC:
17568 if (emit)
17569 {
17570 if (TARGET_LDRD)
17571 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17572 else
17573 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17574 }
17575 break;
17576
17577 case POST_DEC:
17578 gcc_assert (TARGET_LDRD);
17579 if (emit)
17580 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17581 break;
17582
17583 case PRE_MODIFY:
17584 case POST_MODIFY:
17585 /* Autoicrement addressing modes should never have overlapping
17586 base and destination registers, and overlapping index registers
17587 are already prohibited, so this doesn't need to worry about
17588 fix_cm3_ldrd. */
17589 otherops[0] = operands[0];
17590 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17591 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17592
17593 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17594 {
17595 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17596 {
17597 /* Registers overlap so split out the increment. */
17598 if (emit)
17599 {
17600 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17601 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17602 }
17603 if (count)
17604 *count = 2;
17605 }
17606 else
17607 {
17608 /* Use a single insn if we can.
17609 FIXME: IWMMXT allows offsets larger than ldrd can
17610 handle, fix these up with a pair of ldr. */
17611 if (TARGET_THUMB2
17612 || !CONST_INT_P (otherops[2])
17613 || (INTVAL (otherops[2]) > -256
17614 && INTVAL (otherops[2]) < 256))
17615 {
17616 if (emit)
17617 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
17618 }
17619 else
17620 {
17621 if (emit)
17622 {
17623 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17624 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17625 }
17626 if (count)
17627 *count = 2;
17628
17629 }
17630 }
17631 }
17632 else
17633 {
17634 /* Use a single insn if we can.
17635 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17636 fix these up with a pair of ldr. */
17637 if (TARGET_THUMB2
17638 || !CONST_INT_P (otherops[2])
17639 || (INTVAL (otherops[2]) > -256
17640 && INTVAL (otherops[2]) < 256))
17641 {
17642 if (emit)
17643 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
17644 }
17645 else
17646 {
17647 if (emit)
17648 {
17649 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17650 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17651 }
17652 if (count)
17653 *count = 2;
17654 }
17655 }
17656 break;
17657
17658 case LABEL_REF:
17659 case CONST:
17660 /* We might be able to use ldrd %0, %1 here. However the range is
17661 different to ldr/adr, and it is broken on some ARMv7-M
17662 implementations. */
17663 /* Use the second register of the pair to avoid problematic
17664 overlap. */
17665 otherops[1] = operands[1];
17666 if (emit)
17667 output_asm_insn ("adr%?\t%0, %1", otherops);
17668 operands[1] = otherops[0];
17669 if (emit)
17670 {
17671 if (TARGET_LDRD)
17672 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17673 else
17674 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
17675 }
17676
17677 if (count)
17678 *count = 2;
17679 break;
17680
17681 /* ??? This needs checking for thumb2. */
17682 default:
17683 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
17684 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
17685 {
17686 otherops[0] = operands[0];
17687 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
17688 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
17689
17690 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
17691 {
17692 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17693 {
17694 switch ((int) INTVAL (otherops[2]))
17695 {
17696 case -8:
17697 if (emit)
17698 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
17699 return "";
17700 case -4:
17701 if (TARGET_THUMB2)
17702 break;
17703 if (emit)
17704 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
17705 return "";
17706 case 4:
17707 if (TARGET_THUMB2)
17708 break;
17709 if (emit)
17710 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
17711 return "";
17712 }
17713 }
17714 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
17715 operands[1] = otherops[0];
17716 if (TARGET_LDRD
17717 && (REG_P (otherops[2])
17718 || TARGET_THUMB2
17719 || (CONST_INT_P (otherops[2])
17720 && INTVAL (otherops[2]) > -256
17721 && INTVAL (otherops[2]) < 256)))
17722 {
17723 if (reg_overlap_mentioned_p (operands[0],
17724 otherops[2]))
17725 {
17726 rtx tmp;
17727 /* Swap base and index registers over to
17728 avoid a conflict. */
17729 tmp = otherops[1];
17730 otherops[1] = otherops[2];
17731 otherops[2] = tmp;
17732 }
17733 /* If both registers conflict, it will usually
17734 have been fixed by a splitter. */
17735 if (reg_overlap_mentioned_p (operands[0], otherops[2])
17736 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
17737 {
17738 if (emit)
17739 {
17740 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17741 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17742 }
17743 if (count)
17744 *count = 2;
17745 }
17746 else
17747 {
17748 otherops[0] = operands[0];
17749 if (emit)
17750 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
17751 }
17752 return "";
17753 }
17754
17755 if (CONST_INT_P (otherops[2]))
17756 {
17757 if (emit)
17758 {
17759 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
17760 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
17761 else
17762 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17763 }
17764 }
17765 else
17766 {
17767 if (emit)
17768 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17769 }
17770 }
17771 else
17772 {
17773 if (emit)
17774 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
17775 }
17776
17777 if (count)
17778 *count = 2;
17779
17780 if (TARGET_LDRD)
17781 return "ldr%(d%)\t%0, [%1]";
17782
17783 return "ldm%(ia%)\t%1, %M0";
17784 }
17785 else
17786 {
17787 otherops[1] = adjust_address (operands[1], SImode, 4);
17788 /* Take care of overlapping base/data reg. */
17789 if (reg_mentioned_p (operands[0], operands[1]))
17790 {
17791 if (emit)
17792 {
17793 output_asm_insn ("ldr%?\t%0, %1", otherops);
17794 output_asm_insn ("ldr%?\t%0, %1", operands);
17795 }
17796 if (count)
17797 *count = 2;
17798
17799 }
17800 else
17801 {
17802 if (emit)
17803 {
17804 output_asm_insn ("ldr%?\t%0, %1", operands);
17805 output_asm_insn ("ldr%?\t%0, %1", otherops);
17806 }
17807 if (count)
17808 *count = 2;
17809 }
17810 }
17811 }
17812 }
17813 else
17814 {
17815 /* Constraints should ensure this. */
17816 gcc_assert (code0 == MEM && code1 == REG);
17817 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
17818 || (TARGET_ARM && TARGET_LDRD));
17819
17820 switch (GET_CODE (XEXP (operands[0], 0)))
17821 {
17822 case REG:
17823 if (emit)
17824 {
17825 if (TARGET_LDRD)
17826 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
17827 else
17828 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
17829 }
17830 break;
17831
17832 case PRE_INC:
17833 gcc_assert (TARGET_LDRD);
17834 if (emit)
17835 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
17836 break;
17837
17838 case PRE_DEC:
17839 if (emit)
17840 {
17841 if (TARGET_LDRD)
17842 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
17843 else
17844 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
17845 }
17846 break;
17847
17848 case POST_INC:
17849 if (emit)
17850 {
17851 if (TARGET_LDRD)
17852 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
17853 else
17854 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
17855 }
17856 break;
17857
17858 case POST_DEC:
17859 gcc_assert (TARGET_LDRD);
17860 if (emit)
17861 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
17862 break;
17863
17864 case PRE_MODIFY:
17865 case POST_MODIFY:
17866 otherops[0] = operands[1];
17867 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
17868 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
17869
17870 /* IWMMXT allows offsets larger than ldrd can handle,
17871 fix these up with a pair of ldr. */
17872 if (!TARGET_THUMB2
17873 && CONST_INT_P (otherops[2])
17874 && (INTVAL(otherops[2]) <= -256
17875 || INTVAL(otherops[2]) >= 256))
17876 {
17877 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
17878 {
17879 if (emit)
17880 {
17881 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
17882 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
17883 }
17884 if (count)
17885 *count = 2;
17886 }
17887 else
17888 {
17889 if (emit)
17890 {
17891 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
17892 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
17893 }
17894 if (count)
17895 *count = 2;
17896 }
17897 }
17898 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
17899 {
17900 if (emit)
17901 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
17902 }
17903 else
17904 {
17905 if (emit)
17906 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
17907 }
17908 break;
17909
17910 case PLUS:
17911 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
17912 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17913 {
17914 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
17915 {
17916 case -8:
17917 if (emit)
17918 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
17919 return "";
17920
17921 case -4:
17922 if (TARGET_THUMB2)
17923 break;
17924 if (emit)
17925 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
17926 return "";
17927
17928 case 4:
17929 if (TARGET_THUMB2)
17930 break;
17931 if (emit)
17932 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
17933 return "";
17934 }
17935 }
17936 if (TARGET_LDRD
17937 && (REG_P (otherops[2])
17938 || TARGET_THUMB2
17939 || (CONST_INT_P (otherops[2])
17940 && INTVAL (otherops[2]) > -256
17941 && INTVAL (otherops[2]) < 256)))
17942 {
17943 otherops[0] = operands[1];
17944 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
17945 if (emit)
17946 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
17947 return "";
17948 }
17949 /* Fall through */
17950
17951 default:
17952 otherops[0] = adjust_address (operands[0], SImode, 4);
17953 otherops[1] = operands[1];
17954 if (emit)
17955 {
17956 output_asm_insn ("str%?\t%1, %0", operands);
17957 output_asm_insn ("str%?\t%H1, %0", otherops);
17958 }
17959 if (count)
17960 *count = 2;
17961 }
17962 }
17963
17964 return "";
17965 }
17966
17967 /* Output a move, load or store for quad-word vectors in ARM registers. Only
17968 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
17969
17970 const char *
17971 output_move_quad (rtx *operands)
17972 {
17973 if (REG_P (operands[0]))
17974 {
17975 /* Load, or reg->reg move. */
17976
17977 if (MEM_P (operands[1]))
17978 {
17979 switch (GET_CODE (XEXP (operands[1], 0)))
17980 {
17981 case REG:
17982 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17983 break;
17984
17985 case LABEL_REF:
17986 case CONST:
17987 output_asm_insn ("adr%?\t%0, %1", operands);
17988 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
17989 break;
17990
17991 default:
17992 gcc_unreachable ();
17993 }
17994 }
17995 else
17996 {
17997 rtx ops[2];
17998 int dest, src, i;
17999
18000 gcc_assert (REG_P (operands[1]));
18001
18002 dest = REGNO (operands[0]);
18003 src = REGNO (operands[1]);
18004
18005 /* This seems pretty dumb, but hopefully GCC won't try to do it
18006 very often. */
18007 if (dest < src)
18008 for (i = 0; i < 4; i++)
18009 {
18010 ops[0] = gen_rtx_REG (SImode, dest + i);
18011 ops[1] = gen_rtx_REG (SImode, src + i);
18012 output_asm_insn ("mov%?\t%0, %1", ops);
18013 }
18014 else
18015 for (i = 3; i >= 0; i--)
18016 {
18017 ops[0] = gen_rtx_REG (SImode, dest + i);
18018 ops[1] = gen_rtx_REG (SImode, src + i);
18019 output_asm_insn ("mov%?\t%0, %1", ops);
18020 }
18021 }
18022 }
18023 else
18024 {
18025 gcc_assert (MEM_P (operands[0]));
18026 gcc_assert (REG_P (operands[1]));
18027 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18028
18029 switch (GET_CODE (XEXP (operands[0], 0)))
18030 {
18031 case REG:
18032 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18033 break;
18034
18035 default:
18036 gcc_unreachable ();
18037 }
18038 }
18039
18040 return "";
18041 }
18042
18043 /* Output a VFP load or store instruction. */
18044
18045 const char *
18046 output_move_vfp (rtx *operands)
18047 {
18048 rtx reg, mem, addr, ops[2];
18049 int load = REG_P (operands[0]);
18050 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18051 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18052 const char *templ;
18053 char buff[50];
18054 enum machine_mode mode;
18055
18056 reg = operands[!load];
18057 mem = operands[load];
18058
18059 mode = GET_MODE (reg);
18060
18061 gcc_assert (REG_P (reg));
18062 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18063 gcc_assert (mode == SFmode
18064 || mode == DFmode
18065 || mode == SImode
18066 || mode == DImode
18067 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18068 gcc_assert (MEM_P (mem));
18069
18070 addr = XEXP (mem, 0);
18071
18072 switch (GET_CODE (addr))
18073 {
18074 case PRE_DEC:
18075 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
18076 ops[0] = XEXP (addr, 0);
18077 ops[1] = reg;
18078 break;
18079
18080 case POST_INC:
18081 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
18082 ops[0] = XEXP (addr, 0);
18083 ops[1] = reg;
18084 break;
18085
18086 default:
18087 templ = "f%s%c%%?\t%%%s0, %%1%s";
18088 ops[0] = reg;
18089 ops[1] = mem;
18090 break;
18091 }
18092
18093 sprintf (buff, templ,
18094 load ? "ld" : "st",
18095 dp ? 'd' : 's',
18096 dp ? "P" : "",
18097 integer_p ? "\t%@ int" : "");
18098 output_asm_insn (buff, ops);
18099
18100 return "";
18101 }
18102
18103 /* Output a Neon double-word or quad-word load or store, or a load
18104 or store for larger structure modes.
18105
18106 WARNING: The ordering of elements is weird in big-endian mode,
18107 because the EABI requires that vectors stored in memory appear
18108 as though they were stored by a VSTM, as required by the EABI.
18109 GCC RTL defines element ordering based on in-memory order.
18110 This can be different from the architectural ordering of elements
18111 within a NEON register. The intrinsics defined in arm_neon.h use the
18112 NEON register element ordering, not the GCC RTL element ordering.
18113
18114 For example, the in-memory ordering of a big-endian a quadword
18115 vector with 16-bit elements when stored from register pair {d0,d1}
18116 will be (lowest address first, d0[N] is NEON register element N):
18117
18118 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18119
18120 When necessary, quadword registers (dN, dN+1) are moved to ARM
18121 registers from rN in the order:
18122
18123 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18124
18125 So that STM/LDM can be used on vectors in ARM registers, and the
18126 same memory layout will result as if VSTM/VLDM were used.
18127
18128 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18129 possible, which allows use of appropriate alignment tags.
18130 Note that the choice of "64" is independent of the actual vector
18131 element size; this size simply ensures that the behavior is
18132 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18133
18134 Due to limitations of those instructions, use of VST1.64/VLD1.64
18135 is not possible if:
18136 - the address contains PRE_DEC, or
18137 - the mode refers to more than 4 double-word registers
18138
18139 In those cases, it would be possible to replace VSTM/VLDM by a
18140 sequence of instructions; this is not currently implemented since
18141 this is not certain to actually improve performance. */
18142
18143 const char *
18144 output_move_neon (rtx *operands)
18145 {
18146 rtx reg, mem, addr, ops[2];
18147 int regno, nregs, load = REG_P (operands[0]);
18148 const char *templ;
18149 char buff[50];
18150 enum machine_mode mode;
18151
18152 reg = operands[!load];
18153 mem = operands[load];
18154
18155 mode = GET_MODE (reg);
18156
18157 gcc_assert (REG_P (reg));
18158 regno = REGNO (reg);
18159 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18160 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18161 || NEON_REGNO_OK_FOR_QUAD (regno));
18162 gcc_assert (VALID_NEON_DREG_MODE (mode)
18163 || VALID_NEON_QREG_MODE (mode)
18164 || VALID_NEON_STRUCT_MODE (mode));
18165 gcc_assert (MEM_P (mem));
18166
18167 addr = XEXP (mem, 0);
18168
18169 /* Strip off const from addresses like (const (plus (...))). */
18170 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18171 addr = XEXP (addr, 0);
18172
18173 switch (GET_CODE (addr))
18174 {
18175 case POST_INC:
18176 /* We have to use vldm / vstm for too-large modes. */
18177 if (nregs > 4)
18178 {
18179 templ = "v%smia%%?\t%%0!, %%h1";
18180 ops[0] = XEXP (addr, 0);
18181 }
18182 else
18183 {
18184 templ = "v%s1.64\t%%h1, %%A0";
18185 ops[0] = mem;
18186 }
18187 ops[1] = reg;
18188 break;
18189
18190 case PRE_DEC:
18191 /* We have to use vldm / vstm in this case, since there is no
18192 pre-decrement form of the vld1 / vst1 instructions. */
18193 templ = "v%smdb%%?\t%%0!, %%h1";
18194 ops[0] = XEXP (addr, 0);
18195 ops[1] = reg;
18196 break;
18197
18198 case POST_MODIFY:
18199 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18200 gcc_unreachable ();
18201
18202 case LABEL_REF:
18203 case PLUS:
18204 {
18205 int i;
18206 int overlap = -1;
18207 for (i = 0; i < nregs; i++)
18208 {
18209 /* We're only using DImode here because it's a convenient size. */
18210 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18211 ops[1] = adjust_address (mem, DImode, 8 * i);
18212 if (reg_overlap_mentioned_p (ops[0], mem))
18213 {
18214 gcc_assert (overlap == -1);
18215 overlap = i;
18216 }
18217 else
18218 {
18219 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18220 output_asm_insn (buff, ops);
18221 }
18222 }
18223 if (overlap != -1)
18224 {
18225 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18226 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18227 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18228 output_asm_insn (buff, ops);
18229 }
18230
18231 return "";
18232 }
18233
18234 default:
18235 /* We have to use vldm / vstm for too-large modes. */
18236 if (nregs > 4)
18237 templ = "v%smia%%?\t%%m0, %%h1";
18238 else
18239 templ = "v%s1.64\t%%h1, %%A0";
18240
18241 ops[0] = mem;
18242 ops[1] = reg;
18243 }
18244
18245 sprintf (buff, templ, load ? "ld" : "st");
18246 output_asm_insn (buff, ops);
18247
18248 return "";
18249 }
18250
18251 /* Compute and return the length of neon_mov<mode>, where <mode> is
18252 one of VSTRUCT modes: EI, OI, CI or XI. */
18253 int
18254 arm_attr_length_move_neon (rtx insn)
18255 {
18256 rtx reg, mem, addr;
18257 int load;
18258 enum machine_mode mode;
18259
18260 extract_insn_cached (insn);
18261
18262 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18263 {
18264 mode = GET_MODE (recog_data.operand[0]);
18265 switch (mode)
18266 {
18267 case EImode:
18268 case OImode:
18269 return 8;
18270 case CImode:
18271 return 12;
18272 case XImode:
18273 return 16;
18274 default:
18275 gcc_unreachable ();
18276 }
18277 }
18278
18279 load = REG_P (recog_data.operand[0]);
18280 reg = recog_data.operand[!load];
18281 mem = recog_data.operand[load];
18282
18283 gcc_assert (MEM_P (mem));
18284
18285 mode = GET_MODE (reg);
18286 addr = XEXP (mem, 0);
18287
18288 /* Strip off const from addresses like (const (plus (...))). */
18289 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18290 addr = XEXP (addr, 0);
18291
18292 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18293 {
18294 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18295 return insns * 4;
18296 }
18297 else
18298 return 4;
18299 }
18300
18301 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18302 return zero. */
18303
18304 int
18305 arm_address_offset_is_imm (rtx insn)
18306 {
18307 rtx mem, addr;
18308
18309 extract_insn_cached (insn);
18310
18311 if (REG_P (recog_data.operand[0]))
18312 return 0;
18313
18314 mem = recog_data.operand[0];
18315
18316 gcc_assert (MEM_P (mem));
18317
18318 addr = XEXP (mem, 0);
18319
18320 if (REG_P (addr)
18321 || (GET_CODE (addr) == PLUS
18322 && REG_P (XEXP (addr, 0))
18323 && CONST_INT_P (XEXP (addr, 1))))
18324 return 1;
18325 else
18326 return 0;
18327 }
18328
18329 /* Output an ADD r, s, #n where n may be too big for one instruction.
18330 If adding zero to one register, output nothing. */
18331 const char *
18332 output_add_immediate (rtx *operands)
18333 {
18334 HOST_WIDE_INT n = INTVAL (operands[2]);
18335
18336 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18337 {
18338 if (n < 0)
18339 output_multi_immediate (operands,
18340 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18341 -n);
18342 else
18343 output_multi_immediate (operands,
18344 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18345 n);
18346 }
18347
18348 return "";
18349 }
18350
18351 /* Output a multiple immediate operation.
18352 OPERANDS is the vector of operands referred to in the output patterns.
18353 INSTR1 is the output pattern to use for the first constant.
18354 INSTR2 is the output pattern to use for subsequent constants.
18355 IMMED_OP is the index of the constant slot in OPERANDS.
18356 N is the constant value. */
18357 static const char *
18358 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18359 int immed_op, HOST_WIDE_INT n)
18360 {
18361 #if HOST_BITS_PER_WIDE_INT > 32
18362 n &= 0xffffffff;
18363 #endif
18364
18365 if (n == 0)
18366 {
18367 /* Quick and easy output. */
18368 operands[immed_op] = const0_rtx;
18369 output_asm_insn (instr1, operands);
18370 }
18371 else
18372 {
18373 int i;
18374 const char * instr = instr1;
18375
18376 /* Note that n is never zero here (which would give no output). */
18377 for (i = 0; i < 32; i += 2)
18378 {
18379 if (n & (3 << i))
18380 {
18381 operands[immed_op] = GEN_INT (n & (255 << i));
18382 output_asm_insn (instr, operands);
18383 instr = instr2;
18384 i += 6;
18385 }
18386 }
18387 }
18388
18389 return "";
18390 }
18391
18392 /* Return the name of a shifter operation. */
18393 static const char *
18394 arm_shift_nmem(enum rtx_code code)
18395 {
18396 switch (code)
18397 {
18398 case ASHIFT:
18399 return ARM_LSL_NAME;
18400
18401 case ASHIFTRT:
18402 return "asr";
18403
18404 case LSHIFTRT:
18405 return "lsr";
18406
18407 case ROTATERT:
18408 return "ror";
18409
18410 default:
18411 abort();
18412 }
18413 }
18414
18415 /* Return the appropriate ARM instruction for the operation code.
18416 The returned result should not be overwritten. OP is the rtx of the
18417 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18418 was shifted. */
18419 const char *
18420 arithmetic_instr (rtx op, int shift_first_arg)
18421 {
18422 switch (GET_CODE (op))
18423 {
18424 case PLUS:
18425 return "add";
18426
18427 case MINUS:
18428 return shift_first_arg ? "rsb" : "sub";
18429
18430 case IOR:
18431 return "orr";
18432
18433 case XOR:
18434 return "eor";
18435
18436 case AND:
18437 return "and";
18438
18439 case ASHIFT:
18440 case ASHIFTRT:
18441 case LSHIFTRT:
18442 case ROTATERT:
18443 return arm_shift_nmem(GET_CODE(op));
18444
18445 default:
18446 gcc_unreachable ();
18447 }
18448 }
18449
18450 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18451 for the operation code. The returned result should not be overwritten.
18452 OP is the rtx code of the shift.
18453 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18454 shift. */
18455 static const char *
18456 shift_op (rtx op, HOST_WIDE_INT *amountp)
18457 {
18458 const char * mnem;
18459 enum rtx_code code = GET_CODE (op);
18460
18461 switch (code)
18462 {
18463 case ROTATE:
18464 if (!CONST_INT_P (XEXP (op, 1)))
18465 {
18466 output_operand_lossage ("invalid shift operand");
18467 return NULL;
18468 }
18469
18470 code = ROTATERT;
18471 *amountp = 32 - INTVAL (XEXP (op, 1));
18472 mnem = "ror";
18473 break;
18474
18475 case ASHIFT:
18476 case ASHIFTRT:
18477 case LSHIFTRT:
18478 case ROTATERT:
18479 mnem = arm_shift_nmem(code);
18480 if (CONST_INT_P (XEXP (op, 1)))
18481 {
18482 *amountp = INTVAL (XEXP (op, 1));
18483 }
18484 else if (REG_P (XEXP (op, 1)))
18485 {
18486 *amountp = -1;
18487 return mnem;
18488 }
18489 else
18490 {
18491 output_operand_lossage ("invalid shift operand");
18492 return NULL;
18493 }
18494 break;
18495
18496 case MULT:
18497 /* We never have to worry about the amount being other than a
18498 power of 2, since this case can never be reloaded from a reg. */
18499 if (!CONST_INT_P (XEXP (op, 1)))
18500 {
18501 output_operand_lossage ("invalid shift operand");
18502 return NULL;
18503 }
18504
18505 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18506
18507 /* Amount must be a power of two. */
18508 if (*amountp & (*amountp - 1))
18509 {
18510 output_operand_lossage ("invalid shift operand");
18511 return NULL;
18512 }
18513
18514 *amountp = int_log2 (*amountp);
18515 return ARM_LSL_NAME;
18516
18517 default:
18518 output_operand_lossage ("invalid shift operand");
18519 return NULL;
18520 }
18521
18522 /* This is not 100% correct, but follows from the desire to merge
18523 multiplication by a power of 2 with the recognizer for a
18524 shift. >=32 is not a valid shift for "lsl", so we must try and
18525 output a shift that produces the correct arithmetical result.
18526 Using lsr #32 is identical except for the fact that the carry bit
18527 is not set correctly if we set the flags; but we never use the
18528 carry bit from such an operation, so we can ignore that. */
18529 if (code == ROTATERT)
18530 /* Rotate is just modulo 32. */
18531 *amountp &= 31;
18532 else if (*amountp != (*amountp & 31))
18533 {
18534 if (code == ASHIFT)
18535 mnem = "lsr";
18536 *amountp = 32;
18537 }
18538
18539 /* Shifts of 0 are no-ops. */
18540 if (*amountp == 0)
18541 return NULL;
18542
18543 return mnem;
18544 }
18545
18546 /* Obtain the shift from the POWER of two. */
18547
18548 static HOST_WIDE_INT
18549 int_log2 (HOST_WIDE_INT power)
18550 {
18551 HOST_WIDE_INT shift = 0;
18552
18553 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18554 {
18555 gcc_assert (shift <= 31);
18556 shift++;
18557 }
18558
18559 return shift;
18560 }
18561
18562 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18563 because /bin/as is horribly restrictive. The judgement about
18564 whether or not each character is 'printable' (and can be output as
18565 is) or not (and must be printed with an octal escape) must be made
18566 with reference to the *host* character set -- the situation is
18567 similar to that discussed in the comments above pp_c_char in
18568 c-pretty-print.c. */
18569
18570 #define MAX_ASCII_LEN 51
18571
18572 void
18573 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18574 {
18575 int i;
18576 int len_so_far = 0;
18577
18578 fputs ("\t.ascii\t\"", stream);
18579
18580 for (i = 0; i < len; i++)
18581 {
18582 int c = p[i];
18583
18584 if (len_so_far >= MAX_ASCII_LEN)
18585 {
18586 fputs ("\"\n\t.ascii\t\"", stream);
18587 len_so_far = 0;
18588 }
18589
18590 if (ISPRINT (c))
18591 {
18592 if (c == '\\' || c == '\"')
18593 {
18594 putc ('\\', stream);
18595 len_so_far++;
18596 }
18597 putc (c, stream);
18598 len_so_far++;
18599 }
18600 else
18601 {
18602 fprintf (stream, "\\%03o", c);
18603 len_so_far += 4;
18604 }
18605 }
18606
18607 fputs ("\"\n", stream);
18608 }
18609 \f
18610 /* Compute the register save mask for registers 0 through 12
18611 inclusive. This code is used by arm_compute_save_reg_mask. */
18612
18613 static unsigned long
18614 arm_compute_save_reg0_reg12_mask (void)
18615 {
18616 unsigned long func_type = arm_current_func_type ();
18617 unsigned long save_reg_mask = 0;
18618 unsigned int reg;
18619
18620 if (IS_INTERRUPT (func_type))
18621 {
18622 unsigned int max_reg;
18623 /* Interrupt functions must not corrupt any registers,
18624 even call clobbered ones. If this is a leaf function
18625 we can just examine the registers used by the RTL, but
18626 otherwise we have to assume that whatever function is
18627 called might clobber anything, and so we have to save
18628 all the call-clobbered registers as well. */
18629 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18630 /* FIQ handlers have registers r8 - r12 banked, so
18631 we only need to check r0 - r7, Normal ISRs only
18632 bank r14 and r15, so we must check up to r12.
18633 r13 is the stack pointer which is always preserved,
18634 so we do not need to consider it here. */
18635 max_reg = 7;
18636 else
18637 max_reg = 12;
18638
18639 for (reg = 0; reg <= max_reg; reg++)
18640 if (df_regs_ever_live_p (reg)
18641 || (! crtl->is_leaf && call_used_regs[reg]))
18642 save_reg_mask |= (1 << reg);
18643
18644 /* Also save the pic base register if necessary. */
18645 if (flag_pic
18646 && !TARGET_SINGLE_PIC_BASE
18647 && arm_pic_register != INVALID_REGNUM
18648 && crtl->uses_pic_offset_table)
18649 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18650 }
18651 else if (IS_VOLATILE(func_type))
18652 {
18653 /* For noreturn functions we historically omitted register saves
18654 altogether. However this really messes up debugging. As a
18655 compromise save just the frame pointers. Combined with the link
18656 register saved elsewhere this should be sufficient to get
18657 a backtrace. */
18658 if (frame_pointer_needed)
18659 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18660 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18661 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18662 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18663 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18664 }
18665 else
18666 {
18667 /* In the normal case we only need to save those registers
18668 which are call saved and which are used by this function. */
18669 for (reg = 0; reg <= 11; reg++)
18670 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
18671 save_reg_mask |= (1 << reg);
18672
18673 /* Handle the frame pointer as a special case. */
18674 if (frame_pointer_needed)
18675 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18676
18677 /* If we aren't loading the PIC register,
18678 don't stack it even though it may be live. */
18679 if (flag_pic
18680 && !TARGET_SINGLE_PIC_BASE
18681 && arm_pic_register != INVALID_REGNUM
18682 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
18683 || crtl->uses_pic_offset_table))
18684 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18685
18686 /* The prologue will copy SP into R0, so save it. */
18687 if (IS_STACKALIGN (func_type))
18688 save_reg_mask |= 1;
18689 }
18690
18691 /* Save registers so the exception handler can modify them. */
18692 if (crtl->calls_eh_return)
18693 {
18694 unsigned int i;
18695
18696 for (i = 0; ; i++)
18697 {
18698 reg = EH_RETURN_DATA_REGNO (i);
18699 if (reg == INVALID_REGNUM)
18700 break;
18701 save_reg_mask |= 1 << reg;
18702 }
18703 }
18704
18705 return save_reg_mask;
18706 }
18707
18708 /* Return true if r3 is live at the start of the function. */
18709
18710 static bool
18711 arm_r3_live_at_start_p (void)
18712 {
18713 /* Just look at cfg info, which is still close enough to correct at this
18714 point. This gives false positives for broken functions that might use
18715 uninitialized data that happens to be allocated in r3, but who cares? */
18716 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
18717 }
18718
18719 /* Compute the number of bytes used to store the static chain register on the
18720 stack, above the stack frame. We need to know this accurately to get the
18721 alignment of the rest of the stack frame correct. */
18722
18723 static int
18724 arm_compute_static_chain_stack_bytes (void)
18725 {
18726 /* See the defining assertion in arm_expand_prologue. */
18727 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
18728 && IS_NESTED (arm_current_func_type ())
18729 && arm_r3_live_at_start_p ()
18730 && crtl->args.pretend_args_size == 0)
18731 return 4;
18732
18733 return 0;
18734 }
18735
18736 /* Compute a bit mask of which registers need to be
18737 saved on the stack for the current function.
18738 This is used by arm_get_frame_offsets, which may add extra registers. */
18739
18740 static unsigned long
18741 arm_compute_save_reg_mask (void)
18742 {
18743 unsigned int save_reg_mask = 0;
18744 unsigned long func_type = arm_current_func_type ();
18745 unsigned int reg;
18746
18747 if (IS_NAKED (func_type))
18748 /* This should never really happen. */
18749 return 0;
18750
18751 /* If we are creating a stack frame, then we must save the frame pointer,
18752 IP (which will hold the old stack pointer), LR and the PC. */
18753 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
18754 save_reg_mask |=
18755 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
18756 | (1 << IP_REGNUM)
18757 | (1 << LR_REGNUM)
18758 | (1 << PC_REGNUM);
18759
18760 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
18761
18762 /* Decide if we need to save the link register.
18763 Interrupt routines have their own banked link register,
18764 so they never need to save it.
18765 Otherwise if we do not use the link register we do not need to save
18766 it. If we are pushing other registers onto the stack however, we
18767 can save an instruction in the epilogue by pushing the link register
18768 now and then popping it back into the PC. This incurs extra memory
18769 accesses though, so we only do it when optimizing for size, and only
18770 if we know that we will not need a fancy return sequence. */
18771 if (df_regs_ever_live_p (LR_REGNUM)
18772 || (save_reg_mask
18773 && optimize_size
18774 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
18775 && !crtl->calls_eh_return))
18776 save_reg_mask |= 1 << LR_REGNUM;
18777
18778 if (cfun->machine->lr_save_eliminated)
18779 save_reg_mask &= ~ (1 << LR_REGNUM);
18780
18781 if (TARGET_REALLY_IWMMXT
18782 && ((bit_count (save_reg_mask)
18783 + ARM_NUM_INTS (crtl->args.pretend_args_size +
18784 arm_compute_static_chain_stack_bytes())
18785 ) % 2) != 0)
18786 {
18787 /* The total number of registers that are going to be pushed
18788 onto the stack is odd. We need to ensure that the stack
18789 is 64-bit aligned before we start to save iWMMXt registers,
18790 and also before we start to create locals. (A local variable
18791 might be a double or long long which we will load/store using
18792 an iWMMXt instruction). Therefore we need to push another
18793 ARM register, so that the stack will be 64-bit aligned. We
18794 try to avoid using the arg registers (r0 -r3) as they might be
18795 used to pass values in a tail call. */
18796 for (reg = 4; reg <= 12; reg++)
18797 if ((save_reg_mask & (1 << reg)) == 0)
18798 break;
18799
18800 if (reg <= 12)
18801 save_reg_mask |= (1 << reg);
18802 else
18803 {
18804 cfun->machine->sibcall_blocked = 1;
18805 save_reg_mask |= (1 << 3);
18806 }
18807 }
18808
18809 /* We may need to push an additional register for use initializing the
18810 PIC base register. */
18811 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
18812 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
18813 {
18814 reg = thumb_find_work_register (1 << 4);
18815 if (!call_used_regs[reg])
18816 save_reg_mask |= (1 << reg);
18817 }
18818
18819 return save_reg_mask;
18820 }
18821
18822
18823 /* Compute a bit mask of which registers need to be
18824 saved on the stack for the current function. */
18825 static unsigned long
18826 thumb1_compute_save_reg_mask (void)
18827 {
18828 unsigned long mask;
18829 unsigned reg;
18830
18831 mask = 0;
18832 for (reg = 0; reg < 12; reg ++)
18833 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
18834 mask |= 1 << reg;
18835
18836 if (flag_pic
18837 && !TARGET_SINGLE_PIC_BASE
18838 && arm_pic_register != INVALID_REGNUM
18839 && crtl->uses_pic_offset_table)
18840 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18841
18842 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
18843 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
18844 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18845
18846 /* LR will also be pushed if any lo regs are pushed. */
18847 if (mask & 0xff || thumb_force_lr_save ())
18848 mask |= (1 << LR_REGNUM);
18849
18850 /* Make sure we have a low work register if we need one.
18851 We will need one if we are going to push a high register,
18852 but we are not currently intending to push a low register. */
18853 if ((mask & 0xff) == 0
18854 && ((mask & 0x0f00) || TARGET_BACKTRACE))
18855 {
18856 /* Use thumb_find_work_register to choose which register
18857 we will use. If the register is live then we will
18858 have to push it. Use LAST_LO_REGNUM as our fallback
18859 choice for the register to select. */
18860 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
18861 /* Make sure the register returned by thumb_find_work_register is
18862 not part of the return value. */
18863 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
18864 reg = LAST_LO_REGNUM;
18865
18866 if (! call_used_regs[reg])
18867 mask |= 1 << reg;
18868 }
18869
18870 /* The 504 below is 8 bytes less than 512 because there are two possible
18871 alignment words. We can't tell here if they will be present or not so we
18872 have to play it safe and assume that they are. */
18873 if ((CALLER_INTERWORKING_SLOT_SIZE +
18874 ROUND_UP_WORD (get_frame_size ()) +
18875 crtl->outgoing_args_size) >= 504)
18876 {
18877 /* This is the same as the code in thumb1_expand_prologue() which
18878 determines which register to use for stack decrement. */
18879 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
18880 if (mask & (1 << reg))
18881 break;
18882
18883 if (reg > LAST_LO_REGNUM)
18884 {
18885 /* Make sure we have a register available for stack decrement. */
18886 mask |= 1 << LAST_LO_REGNUM;
18887 }
18888 }
18889
18890 return mask;
18891 }
18892
18893
18894 /* Return the number of bytes required to save VFP registers. */
18895 static int
18896 arm_get_vfp_saved_size (void)
18897 {
18898 unsigned int regno;
18899 int count;
18900 int saved;
18901
18902 saved = 0;
18903 /* Space for saved VFP registers. */
18904 if (TARGET_HARD_FLOAT && TARGET_VFP)
18905 {
18906 count = 0;
18907 for (regno = FIRST_VFP_REGNUM;
18908 regno < LAST_VFP_REGNUM;
18909 regno += 2)
18910 {
18911 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
18912 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
18913 {
18914 if (count > 0)
18915 {
18916 /* Workaround ARM10 VFPr1 bug. */
18917 if (count == 2 && !arm_arch6)
18918 count++;
18919 saved += count * 8;
18920 }
18921 count = 0;
18922 }
18923 else
18924 count++;
18925 }
18926 if (count > 0)
18927 {
18928 if (count == 2 && !arm_arch6)
18929 count++;
18930 saved += count * 8;
18931 }
18932 }
18933 return saved;
18934 }
18935
18936
18937 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
18938 everything bar the final return instruction. If simple_return is true,
18939 then do not output epilogue, because it has already been emitted in RTL. */
18940 const char *
18941 output_return_instruction (rtx operand, bool really_return, bool reverse,
18942 bool simple_return)
18943 {
18944 char conditional[10];
18945 char instr[100];
18946 unsigned reg;
18947 unsigned long live_regs_mask;
18948 unsigned long func_type;
18949 arm_stack_offsets *offsets;
18950
18951 func_type = arm_current_func_type ();
18952
18953 if (IS_NAKED (func_type))
18954 return "";
18955
18956 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
18957 {
18958 /* If this function was declared non-returning, and we have
18959 found a tail call, then we have to trust that the called
18960 function won't return. */
18961 if (really_return)
18962 {
18963 rtx ops[2];
18964
18965 /* Otherwise, trap an attempted return by aborting. */
18966 ops[0] = operand;
18967 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
18968 : "abort");
18969 assemble_external_libcall (ops[1]);
18970 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
18971 }
18972
18973 return "";
18974 }
18975
18976 gcc_assert (!cfun->calls_alloca || really_return);
18977
18978 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
18979
18980 cfun->machine->return_used_this_function = 1;
18981
18982 offsets = arm_get_frame_offsets ();
18983 live_regs_mask = offsets->saved_regs_mask;
18984
18985 if (!simple_return && live_regs_mask)
18986 {
18987 const char * return_reg;
18988
18989 /* If we do not have any special requirements for function exit
18990 (e.g. interworking) then we can load the return address
18991 directly into the PC. Otherwise we must load it into LR. */
18992 if (really_return
18993 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
18994 return_reg = reg_names[PC_REGNUM];
18995 else
18996 return_reg = reg_names[LR_REGNUM];
18997
18998 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
18999 {
19000 /* There are three possible reasons for the IP register
19001 being saved. 1) a stack frame was created, in which case
19002 IP contains the old stack pointer, or 2) an ISR routine
19003 corrupted it, or 3) it was saved to align the stack on
19004 iWMMXt. In case 1, restore IP into SP, otherwise just
19005 restore IP. */
19006 if (frame_pointer_needed)
19007 {
19008 live_regs_mask &= ~ (1 << IP_REGNUM);
19009 live_regs_mask |= (1 << SP_REGNUM);
19010 }
19011 else
19012 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19013 }
19014
19015 /* On some ARM architectures it is faster to use LDR rather than
19016 LDM to load a single register. On other architectures, the
19017 cost is the same. In 26 bit mode, or for exception handlers,
19018 we have to use LDM to load the PC so that the CPSR is also
19019 restored. */
19020 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19021 if (live_regs_mask == (1U << reg))
19022 break;
19023
19024 if (reg <= LAST_ARM_REGNUM
19025 && (reg != LR_REGNUM
19026 || ! really_return
19027 || ! IS_INTERRUPT (func_type)))
19028 {
19029 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19030 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19031 }
19032 else
19033 {
19034 char *p;
19035 int first = 1;
19036
19037 /* Generate the load multiple instruction to restore the
19038 registers. Note we can get here, even if
19039 frame_pointer_needed is true, but only if sp already
19040 points to the base of the saved core registers. */
19041 if (live_regs_mask & (1 << SP_REGNUM))
19042 {
19043 unsigned HOST_WIDE_INT stack_adjust;
19044
19045 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19046 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19047
19048 if (stack_adjust && arm_arch5 && TARGET_ARM)
19049 if (TARGET_UNIFIED_ASM)
19050 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19051 else
19052 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19053 else
19054 {
19055 /* If we can't use ldmib (SA110 bug),
19056 then try to pop r3 instead. */
19057 if (stack_adjust)
19058 live_regs_mask |= 1 << 3;
19059
19060 if (TARGET_UNIFIED_ASM)
19061 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19062 else
19063 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19064 }
19065 }
19066 else
19067 if (TARGET_UNIFIED_ASM)
19068 sprintf (instr, "pop%s\t{", conditional);
19069 else
19070 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19071
19072 p = instr + strlen (instr);
19073
19074 for (reg = 0; reg <= SP_REGNUM; reg++)
19075 if (live_regs_mask & (1 << reg))
19076 {
19077 int l = strlen (reg_names[reg]);
19078
19079 if (first)
19080 first = 0;
19081 else
19082 {
19083 memcpy (p, ", ", 2);
19084 p += 2;
19085 }
19086
19087 memcpy (p, "%|", 2);
19088 memcpy (p + 2, reg_names[reg], l);
19089 p += l + 2;
19090 }
19091
19092 if (live_regs_mask & (1 << LR_REGNUM))
19093 {
19094 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19095 /* If returning from an interrupt, restore the CPSR. */
19096 if (IS_INTERRUPT (func_type))
19097 strcat (p, "^");
19098 }
19099 else
19100 strcpy (p, "}");
19101 }
19102
19103 output_asm_insn (instr, & operand);
19104
19105 /* See if we need to generate an extra instruction to
19106 perform the actual function return. */
19107 if (really_return
19108 && func_type != ARM_FT_INTERWORKED
19109 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19110 {
19111 /* The return has already been handled
19112 by loading the LR into the PC. */
19113 return "";
19114 }
19115 }
19116
19117 if (really_return)
19118 {
19119 switch ((int) ARM_FUNC_TYPE (func_type))
19120 {
19121 case ARM_FT_ISR:
19122 case ARM_FT_FIQ:
19123 /* ??? This is wrong for unified assembly syntax. */
19124 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19125 break;
19126
19127 case ARM_FT_INTERWORKED:
19128 sprintf (instr, "bx%s\t%%|lr", conditional);
19129 break;
19130
19131 case ARM_FT_EXCEPTION:
19132 /* ??? This is wrong for unified assembly syntax. */
19133 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19134 break;
19135
19136 default:
19137 /* Use bx if it's available. */
19138 if (arm_arch5 || arm_arch4t)
19139 sprintf (instr, "bx%s\t%%|lr", conditional);
19140 else
19141 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19142 break;
19143 }
19144
19145 output_asm_insn (instr, & operand);
19146 }
19147
19148 return "";
19149 }
19150
19151 /* Write the function name into the code section, directly preceding
19152 the function prologue.
19153
19154 Code will be output similar to this:
19155 t0
19156 .ascii "arm_poke_function_name", 0
19157 .align
19158 t1
19159 .word 0xff000000 + (t1 - t0)
19160 arm_poke_function_name
19161 mov ip, sp
19162 stmfd sp!, {fp, ip, lr, pc}
19163 sub fp, ip, #4
19164
19165 When performing a stack backtrace, code can inspect the value
19166 of 'pc' stored at 'fp' + 0. If the trace function then looks
19167 at location pc - 12 and the top 8 bits are set, then we know
19168 that there is a function name embedded immediately preceding this
19169 location and has length ((pc[-3]) & 0xff000000).
19170
19171 We assume that pc is declared as a pointer to an unsigned long.
19172
19173 It is of no benefit to output the function name if we are assembling
19174 a leaf function. These function types will not contain a stack
19175 backtrace structure, therefore it is not possible to determine the
19176 function name. */
19177 void
19178 arm_poke_function_name (FILE *stream, const char *name)
19179 {
19180 unsigned long alignlength;
19181 unsigned long length;
19182 rtx x;
19183
19184 length = strlen (name) + 1;
19185 alignlength = ROUND_UP_WORD (length);
19186
19187 ASM_OUTPUT_ASCII (stream, name, length);
19188 ASM_OUTPUT_ALIGN (stream, 2);
19189 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19190 assemble_aligned_integer (UNITS_PER_WORD, x);
19191 }
19192
19193 /* Place some comments into the assembler stream
19194 describing the current function. */
19195 static void
19196 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19197 {
19198 unsigned long func_type;
19199
19200 /* ??? Do we want to print some of the below anyway? */
19201 if (TARGET_THUMB1)
19202 return;
19203
19204 /* Sanity check. */
19205 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19206
19207 func_type = arm_current_func_type ();
19208
19209 switch ((int) ARM_FUNC_TYPE (func_type))
19210 {
19211 default:
19212 case ARM_FT_NORMAL:
19213 break;
19214 case ARM_FT_INTERWORKED:
19215 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19216 break;
19217 case ARM_FT_ISR:
19218 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19219 break;
19220 case ARM_FT_FIQ:
19221 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19222 break;
19223 case ARM_FT_EXCEPTION:
19224 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19225 break;
19226 }
19227
19228 if (IS_NAKED (func_type))
19229 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19230
19231 if (IS_VOLATILE (func_type))
19232 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19233
19234 if (IS_NESTED (func_type))
19235 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19236 if (IS_STACKALIGN (func_type))
19237 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19238
19239 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19240 crtl->args.size,
19241 crtl->args.pretend_args_size, frame_size);
19242
19243 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19244 frame_pointer_needed,
19245 cfun->machine->uses_anonymous_args);
19246
19247 if (cfun->machine->lr_save_eliminated)
19248 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19249
19250 if (crtl->calls_eh_return)
19251 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19252
19253 }
19254
19255 static void
19256 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19257 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19258 {
19259 arm_stack_offsets *offsets;
19260
19261 if (TARGET_THUMB1)
19262 {
19263 int regno;
19264
19265 /* Emit any call-via-reg trampolines that are needed for v4t support
19266 of call_reg and call_value_reg type insns. */
19267 for (regno = 0; regno < LR_REGNUM; regno++)
19268 {
19269 rtx label = cfun->machine->call_via[regno];
19270
19271 if (label != NULL)
19272 {
19273 switch_to_section (function_section (current_function_decl));
19274 targetm.asm_out.internal_label (asm_out_file, "L",
19275 CODE_LABEL_NUMBER (label));
19276 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19277 }
19278 }
19279
19280 /* ??? Probably not safe to set this here, since it assumes that a
19281 function will be emitted as assembly immediately after we generate
19282 RTL for it. This does not happen for inline functions. */
19283 cfun->machine->return_used_this_function = 0;
19284 }
19285 else /* TARGET_32BIT */
19286 {
19287 /* We need to take into account any stack-frame rounding. */
19288 offsets = arm_get_frame_offsets ();
19289
19290 gcc_assert (!use_return_insn (FALSE, NULL)
19291 || (cfun->machine->return_used_this_function != 0)
19292 || offsets->saved_regs == offsets->outgoing_args
19293 || frame_pointer_needed);
19294
19295 /* Reset the ARM-specific per-function variables. */
19296 after_arm_reorg = 0;
19297 }
19298 }
19299
19300 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19301 STR and STRD. If an even number of registers are being pushed, one
19302 or more STRD patterns are created for each register pair. If an
19303 odd number of registers are pushed, emit an initial STR followed by
19304 as many STRD instructions as are needed. This works best when the
19305 stack is initially 64-bit aligned (the normal case), since it
19306 ensures that each STRD is also 64-bit aligned. */
19307 static void
19308 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19309 {
19310 int num_regs = 0;
19311 int i;
19312 int regno;
19313 rtx par = NULL_RTX;
19314 rtx dwarf = NULL_RTX;
19315 rtx tmp;
19316 bool first = true;
19317
19318 num_regs = bit_count (saved_regs_mask);
19319
19320 /* Must be at least one register to save, and can't save SP or PC. */
19321 gcc_assert (num_regs > 0 && num_regs <= 14);
19322 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19323 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19324
19325 /* Create sequence for DWARF info. All the frame-related data for
19326 debugging is held in this wrapper. */
19327 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19328
19329 /* Describe the stack adjustment. */
19330 tmp = gen_rtx_SET (VOIDmode,
19331 stack_pointer_rtx,
19332 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19333 RTX_FRAME_RELATED_P (tmp) = 1;
19334 XVECEXP (dwarf, 0, 0) = tmp;
19335
19336 /* Find the first register. */
19337 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19338 ;
19339
19340 i = 0;
19341
19342 /* If there's an odd number of registers to push. Start off by
19343 pushing a single register. This ensures that subsequent strd
19344 operations are dword aligned (assuming that SP was originally
19345 64-bit aligned). */
19346 if ((num_regs & 1) != 0)
19347 {
19348 rtx reg, mem, insn;
19349
19350 reg = gen_rtx_REG (SImode, regno);
19351 if (num_regs == 1)
19352 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19353 stack_pointer_rtx));
19354 else
19355 mem = gen_frame_mem (Pmode,
19356 gen_rtx_PRE_MODIFY
19357 (Pmode, stack_pointer_rtx,
19358 plus_constant (Pmode, stack_pointer_rtx,
19359 -4 * num_regs)));
19360
19361 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19362 RTX_FRAME_RELATED_P (tmp) = 1;
19363 insn = emit_insn (tmp);
19364 RTX_FRAME_RELATED_P (insn) = 1;
19365 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19366 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19367 reg);
19368 RTX_FRAME_RELATED_P (tmp) = 1;
19369 i++;
19370 regno++;
19371 XVECEXP (dwarf, 0, i) = tmp;
19372 first = false;
19373 }
19374
19375 while (i < num_regs)
19376 if (saved_regs_mask & (1 << regno))
19377 {
19378 rtx reg1, reg2, mem1, mem2;
19379 rtx tmp0, tmp1, tmp2;
19380 int regno2;
19381
19382 /* Find the register to pair with this one. */
19383 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19384 regno2++)
19385 ;
19386
19387 reg1 = gen_rtx_REG (SImode, regno);
19388 reg2 = gen_rtx_REG (SImode, regno2);
19389
19390 if (first)
19391 {
19392 rtx insn;
19393
19394 first = false;
19395 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19396 stack_pointer_rtx,
19397 -4 * num_regs));
19398 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19399 stack_pointer_rtx,
19400 -4 * (num_regs - 1)));
19401 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19402 plus_constant (Pmode, stack_pointer_rtx,
19403 -4 * (num_regs)));
19404 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19405 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19406 RTX_FRAME_RELATED_P (tmp0) = 1;
19407 RTX_FRAME_RELATED_P (tmp1) = 1;
19408 RTX_FRAME_RELATED_P (tmp2) = 1;
19409 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19410 XVECEXP (par, 0, 0) = tmp0;
19411 XVECEXP (par, 0, 1) = tmp1;
19412 XVECEXP (par, 0, 2) = tmp2;
19413 insn = emit_insn (par);
19414 RTX_FRAME_RELATED_P (insn) = 1;
19415 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19416 }
19417 else
19418 {
19419 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19420 stack_pointer_rtx,
19421 4 * i));
19422 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19423 stack_pointer_rtx,
19424 4 * (i + 1)));
19425 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19426 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19427 RTX_FRAME_RELATED_P (tmp1) = 1;
19428 RTX_FRAME_RELATED_P (tmp2) = 1;
19429 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19430 XVECEXP (par, 0, 0) = tmp1;
19431 XVECEXP (par, 0, 1) = tmp2;
19432 emit_insn (par);
19433 }
19434
19435 /* Create unwind information. This is an approximation. */
19436 tmp1 = gen_rtx_SET (VOIDmode,
19437 gen_frame_mem (Pmode,
19438 plus_constant (Pmode,
19439 stack_pointer_rtx,
19440 4 * i)),
19441 reg1);
19442 tmp2 = gen_rtx_SET (VOIDmode,
19443 gen_frame_mem (Pmode,
19444 plus_constant (Pmode,
19445 stack_pointer_rtx,
19446 4 * (i + 1))),
19447 reg2);
19448
19449 RTX_FRAME_RELATED_P (tmp1) = 1;
19450 RTX_FRAME_RELATED_P (tmp2) = 1;
19451 XVECEXP (dwarf, 0, i + 1) = tmp1;
19452 XVECEXP (dwarf, 0, i + 2) = tmp2;
19453 i += 2;
19454 regno = regno2 + 1;
19455 }
19456 else
19457 regno++;
19458
19459 return;
19460 }
19461
19462 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19463 whenever possible, otherwise it emits single-word stores. The first store
19464 also allocates stack space for all saved registers, using writeback with
19465 post-addressing mode. All other stores use offset addressing. If no STRD
19466 can be emitted, this function emits a sequence of single-word stores,
19467 and not an STM as before, because single-word stores provide more freedom
19468 scheduling and can be turned into an STM by peephole optimizations. */
19469 static void
19470 arm_emit_strd_push (unsigned long saved_regs_mask)
19471 {
19472 int num_regs = 0;
19473 int i, j, dwarf_index = 0;
19474 int offset = 0;
19475 rtx dwarf = NULL_RTX;
19476 rtx insn = NULL_RTX;
19477 rtx tmp, mem;
19478
19479 /* TODO: A more efficient code can be emitted by changing the
19480 layout, e.g., first push all pairs that can use STRD to keep the
19481 stack aligned, and then push all other registers. */
19482 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19483 if (saved_regs_mask & (1 << i))
19484 num_regs++;
19485
19486 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19487 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19488 gcc_assert (num_regs > 0);
19489
19490 /* Create sequence for DWARF info. */
19491 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19492
19493 /* For dwarf info, we generate explicit stack update. */
19494 tmp = gen_rtx_SET (VOIDmode,
19495 stack_pointer_rtx,
19496 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19497 RTX_FRAME_RELATED_P (tmp) = 1;
19498 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19499
19500 /* Save registers. */
19501 offset = - 4 * num_regs;
19502 j = 0;
19503 while (j <= LAST_ARM_REGNUM)
19504 if (saved_regs_mask & (1 << j))
19505 {
19506 if ((j % 2 == 0)
19507 && (saved_regs_mask & (1 << (j + 1))))
19508 {
19509 /* Current register and previous register form register pair for
19510 which STRD can be generated. */
19511 if (offset < 0)
19512 {
19513 /* Allocate stack space for all saved registers. */
19514 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19515 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19516 mem = gen_frame_mem (DImode, tmp);
19517 offset = 0;
19518 }
19519 else if (offset > 0)
19520 mem = gen_frame_mem (DImode,
19521 plus_constant (Pmode,
19522 stack_pointer_rtx,
19523 offset));
19524 else
19525 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19526
19527 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19528 RTX_FRAME_RELATED_P (tmp) = 1;
19529 tmp = emit_insn (tmp);
19530
19531 /* Record the first store insn. */
19532 if (dwarf_index == 1)
19533 insn = tmp;
19534
19535 /* Generate dwarf info. */
19536 mem = gen_frame_mem (SImode,
19537 plus_constant (Pmode,
19538 stack_pointer_rtx,
19539 offset));
19540 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19541 RTX_FRAME_RELATED_P (tmp) = 1;
19542 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19543
19544 mem = gen_frame_mem (SImode,
19545 plus_constant (Pmode,
19546 stack_pointer_rtx,
19547 offset + 4));
19548 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19549 RTX_FRAME_RELATED_P (tmp) = 1;
19550 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19551
19552 offset += 8;
19553 j += 2;
19554 }
19555 else
19556 {
19557 /* Emit a single word store. */
19558 if (offset < 0)
19559 {
19560 /* Allocate stack space for all saved registers. */
19561 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19562 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19563 mem = gen_frame_mem (SImode, tmp);
19564 offset = 0;
19565 }
19566 else if (offset > 0)
19567 mem = gen_frame_mem (SImode,
19568 plus_constant (Pmode,
19569 stack_pointer_rtx,
19570 offset));
19571 else
19572 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19573
19574 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19575 RTX_FRAME_RELATED_P (tmp) = 1;
19576 tmp = emit_insn (tmp);
19577
19578 /* Record the first store insn. */
19579 if (dwarf_index == 1)
19580 insn = tmp;
19581
19582 /* Generate dwarf info. */
19583 mem = gen_frame_mem (SImode,
19584 plus_constant(Pmode,
19585 stack_pointer_rtx,
19586 offset));
19587 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19588 RTX_FRAME_RELATED_P (tmp) = 1;
19589 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19590
19591 offset += 4;
19592 j += 1;
19593 }
19594 }
19595 else
19596 j++;
19597
19598 /* Attach dwarf info to the first insn we generate. */
19599 gcc_assert (insn != NULL_RTX);
19600 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19601 RTX_FRAME_RELATED_P (insn) = 1;
19602 }
19603
19604 /* Generate and emit an insn that we will recognize as a push_multi.
19605 Unfortunately, since this insn does not reflect very well the actual
19606 semantics of the operation, we need to annotate the insn for the benefit
19607 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19608 MASK for registers that should be annotated for DWARF2 frame unwind
19609 information. */
19610 static rtx
19611 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19612 {
19613 int num_regs = 0;
19614 int num_dwarf_regs = 0;
19615 int i, j;
19616 rtx par;
19617 rtx dwarf;
19618 int dwarf_par_index;
19619 rtx tmp, reg;
19620
19621 /* We don't record the PC in the dwarf frame information. */
19622 dwarf_regs_mask &= ~(1 << PC_REGNUM);
19623
19624 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19625 {
19626 if (mask & (1 << i))
19627 num_regs++;
19628 if (dwarf_regs_mask & (1 << i))
19629 num_dwarf_regs++;
19630 }
19631
19632 gcc_assert (num_regs && num_regs <= 16);
19633 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
19634
19635 /* For the body of the insn we are going to generate an UNSPEC in
19636 parallel with several USEs. This allows the insn to be recognized
19637 by the push_multi pattern in the arm.md file.
19638
19639 The body of the insn looks something like this:
19640
19641 (parallel [
19642 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19643 (const_int:SI <num>)))
19644 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19645 (use (reg:SI XX))
19646 (use (reg:SI YY))
19647 ...
19648 ])
19649
19650 For the frame note however, we try to be more explicit and actually
19651 show each register being stored into the stack frame, plus a (single)
19652 decrement of the stack pointer. We do it this way in order to be
19653 friendly to the stack unwinding code, which only wants to see a single
19654 stack decrement per instruction. The RTL we generate for the note looks
19655 something like this:
19656
19657 (sequence [
19658 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19659 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19660 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19661 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19662 ...
19663 ])
19664
19665 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19666 instead we'd have a parallel expression detailing all
19667 the stores to the various memory addresses so that debug
19668 information is more up-to-date. Remember however while writing
19669 this to take care of the constraints with the push instruction.
19670
19671 Note also that this has to be taken care of for the VFP registers.
19672
19673 For more see PR43399. */
19674
19675 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
19676 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
19677 dwarf_par_index = 1;
19678
19679 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19680 {
19681 if (mask & (1 << i))
19682 {
19683 reg = gen_rtx_REG (SImode, i);
19684
19685 XVECEXP (par, 0, 0)
19686 = gen_rtx_SET (VOIDmode,
19687 gen_frame_mem
19688 (BLKmode,
19689 gen_rtx_PRE_MODIFY (Pmode,
19690 stack_pointer_rtx,
19691 plus_constant
19692 (Pmode, stack_pointer_rtx,
19693 -4 * num_regs))
19694 ),
19695 gen_rtx_UNSPEC (BLKmode,
19696 gen_rtvec (1, reg),
19697 UNSPEC_PUSH_MULT));
19698
19699 if (dwarf_regs_mask & (1 << i))
19700 {
19701 tmp = gen_rtx_SET (VOIDmode,
19702 gen_frame_mem (SImode, stack_pointer_rtx),
19703 reg);
19704 RTX_FRAME_RELATED_P (tmp) = 1;
19705 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19706 }
19707
19708 break;
19709 }
19710 }
19711
19712 for (j = 1, i++; j < num_regs; i++)
19713 {
19714 if (mask & (1 << i))
19715 {
19716 reg = gen_rtx_REG (SImode, i);
19717
19718 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
19719
19720 if (dwarf_regs_mask & (1 << i))
19721 {
19722 tmp
19723 = gen_rtx_SET (VOIDmode,
19724 gen_frame_mem
19725 (SImode,
19726 plus_constant (Pmode, stack_pointer_rtx,
19727 4 * j)),
19728 reg);
19729 RTX_FRAME_RELATED_P (tmp) = 1;
19730 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19731 }
19732
19733 j++;
19734 }
19735 }
19736
19737 par = emit_insn (par);
19738
19739 tmp = gen_rtx_SET (VOIDmode,
19740 stack_pointer_rtx,
19741 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19742 RTX_FRAME_RELATED_P (tmp) = 1;
19743 XVECEXP (dwarf, 0, 0) = tmp;
19744
19745 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19746
19747 return par;
19748 }
19749
19750 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19751 SIZE is the offset to be adjusted.
19752 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19753 static void
19754 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
19755 {
19756 rtx dwarf;
19757
19758 RTX_FRAME_RELATED_P (insn) = 1;
19759 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
19760 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
19761 }
19762
19763 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19764 SAVED_REGS_MASK shows which registers need to be restored.
19765
19766 Unfortunately, since this insn does not reflect very well the actual
19767 semantics of the operation, we need to annotate the insn for the benefit
19768 of DWARF2 frame unwind information. */
19769 static void
19770 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
19771 {
19772 int num_regs = 0;
19773 int i, j;
19774 rtx par;
19775 rtx dwarf = NULL_RTX;
19776 rtx tmp, reg;
19777 bool return_in_pc;
19778 int offset_adj;
19779 int emit_update;
19780
19781 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
19782 offset_adj = return_in_pc ? 1 : 0;
19783 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19784 if (saved_regs_mask & (1 << i))
19785 num_regs++;
19786
19787 gcc_assert (num_regs && num_regs <= 16);
19788
19789 /* If SP is in reglist, then we don't emit SP update insn. */
19790 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
19791
19792 /* The parallel needs to hold num_regs SETs
19793 and one SET for the stack update. */
19794 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
19795
19796 if (return_in_pc)
19797 {
19798 tmp = ret_rtx;
19799 XVECEXP (par, 0, 0) = tmp;
19800 }
19801
19802 if (emit_update)
19803 {
19804 /* Increment the stack pointer, based on there being
19805 num_regs 4-byte registers to restore. */
19806 tmp = gen_rtx_SET (VOIDmode,
19807 stack_pointer_rtx,
19808 plus_constant (Pmode,
19809 stack_pointer_rtx,
19810 4 * num_regs));
19811 RTX_FRAME_RELATED_P (tmp) = 1;
19812 XVECEXP (par, 0, offset_adj) = tmp;
19813 }
19814
19815 /* Now restore every reg, which may include PC. */
19816 for (j = 0, i = 0; j < num_regs; i++)
19817 if (saved_regs_mask & (1 << i))
19818 {
19819 reg = gen_rtx_REG (SImode, i);
19820 if ((num_regs == 1) && emit_update && !return_in_pc)
19821 {
19822 /* Emit single load with writeback. */
19823 tmp = gen_frame_mem (SImode,
19824 gen_rtx_POST_INC (Pmode,
19825 stack_pointer_rtx));
19826 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
19827 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19828 return;
19829 }
19830
19831 tmp = gen_rtx_SET (VOIDmode,
19832 reg,
19833 gen_frame_mem
19834 (SImode,
19835 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
19836 RTX_FRAME_RELATED_P (tmp) = 1;
19837 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
19838
19839 /* We need to maintain a sequence for DWARF info too. As dwarf info
19840 should not have PC, skip PC. */
19841 if (i != PC_REGNUM)
19842 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19843
19844 j++;
19845 }
19846
19847 if (return_in_pc)
19848 par = emit_jump_insn (par);
19849 else
19850 par = emit_insn (par);
19851
19852 REG_NOTES (par) = dwarf;
19853 if (!return_in_pc)
19854 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
19855 stack_pointer_rtx, stack_pointer_rtx);
19856 }
19857
19858 /* Generate and emit an insn pattern that we will recognize as a pop_multi
19859 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
19860
19861 Unfortunately, since this insn does not reflect very well the actual
19862 semantics of the operation, we need to annotate the insn for the benefit
19863 of DWARF2 frame unwind information. */
19864 static void
19865 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
19866 {
19867 int i, j;
19868 rtx par;
19869 rtx dwarf = NULL_RTX;
19870 rtx tmp, reg;
19871
19872 gcc_assert (num_regs && num_regs <= 32);
19873
19874 /* Workaround ARM10 VFPr1 bug. */
19875 if (num_regs == 2 && !arm_arch6)
19876 {
19877 if (first_reg == 15)
19878 first_reg--;
19879
19880 num_regs++;
19881 }
19882
19883 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
19884 there could be up to 32 D-registers to restore.
19885 If there are more than 16 D-registers, make two recursive calls,
19886 each of which emits one pop_multi instruction. */
19887 if (num_regs > 16)
19888 {
19889 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
19890 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
19891 return;
19892 }
19893
19894 /* The parallel needs to hold num_regs SETs
19895 and one SET for the stack update. */
19896 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
19897
19898 /* Increment the stack pointer, based on there being
19899 num_regs 8-byte registers to restore. */
19900 tmp = gen_rtx_SET (VOIDmode,
19901 base_reg,
19902 plus_constant (Pmode, base_reg, 8 * num_regs));
19903 RTX_FRAME_RELATED_P (tmp) = 1;
19904 XVECEXP (par, 0, 0) = tmp;
19905
19906 /* Now show every reg that will be restored, using a SET for each. */
19907 for (j = 0, i=first_reg; j < num_regs; i += 2)
19908 {
19909 reg = gen_rtx_REG (DFmode, i);
19910
19911 tmp = gen_rtx_SET (VOIDmode,
19912 reg,
19913 gen_frame_mem
19914 (DFmode,
19915 plus_constant (Pmode, base_reg, 8 * j)));
19916 RTX_FRAME_RELATED_P (tmp) = 1;
19917 XVECEXP (par, 0, j + 1) = tmp;
19918
19919 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19920
19921 j++;
19922 }
19923
19924 par = emit_insn (par);
19925 REG_NOTES (par) = dwarf;
19926
19927 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
19928 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
19929 {
19930 RTX_FRAME_RELATED_P (par) = 1;
19931 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
19932 }
19933 else
19934 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
19935 base_reg, base_reg);
19936 }
19937
19938 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
19939 number of registers are being popped, multiple LDRD patterns are created for
19940 all register pairs. If odd number of registers are popped, last register is
19941 loaded by using LDR pattern. */
19942 static void
19943 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
19944 {
19945 int num_regs = 0;
19946 int i, j;
19947 rtx par = NULL_RTX;
19948 rtx dwarf = NULL_RTX;
19949 rtx tmp, reg, tmp1;
19950 bool return_in_pc;
19951
19952 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
19953 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19954 if (saved_regs_mask & (1 << i))
19955 num_regs++;
19956
19957 gcc_assert (num_regs && num_regs <= 16);
19958
19959 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
19960 to be popped. So, if num_regs is even, now it will become odd,
19961 and we can generate pop with PC. If num_regs is odd, it will be
19962 even now, and ldr with return can be generated for PC. */
19963 if (return_in_pc)
19964 num_regs--;
19965
19966 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19967
19968 /* Var j iterates over all the registers to gather all the registers in
19969 saved_regs_mask. Var i gives index of saved registers in stack frame.
19970 A PARALLEL RTX of register-pair is created here, so that pattern for
19971 LDRD can be matched. As PC is always last register to be popped, and
19972 we have already decremented num_regs if PC, we don't have to worry
19973 about PC in this loop. */
19974 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
19975 if (saved_regs_mask & (1 << j))
19976 {
19977 /* Create RTX for memory load. */
19978 reg = gen_rtx_REG (SImode, j);
19979 tmp = gen_rtx_SET (SImode,
19980 reg,
19981 gen_frame_mem (SImode,
19982 plus_constant (Pmode,
19983 stack_pointer_rtx, 4 * i)));
19984 RTX_FRAME_RELATED_P (tmp) = 1;
19985
19986 if (i % 2 == 0)
19987 {
19988 /* When saved-register index (i) is even, the RTX to be emitted is
19989 yet to be created. Hence create it first. The LDRD pattern we
19990 are generating is :
19991 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
19992 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
19993 where target registers need not be consecutive. */
19994 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19995 dwarf = NULL_RTX;
19996 }
19997
19998 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
19999 added as 0th element and if i is odd, reg_i is added as 1st element
20000 of LDRD pattern shown above. */
20001 XVECEXP (par, 0, (i % 2)) = tmp;
20002 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20003
20004 if ((i % 2) == 1)
20005 {
20006 /* When saved-register index (i) is odd, RTXs for both the registers
20007 to be loaded are generated in above given LDRD pattern, and the
20008 pattern can be emitted now. */
20009 par = emit_insn (par);
20010 REG_NOTES (par) = dwarf;
20011 RTX_FRAME_RELATED_P (par) = 1;
20012 }
20013
20014 i++;
20015 }
20016
20017 /* If the number of registers pushed is odd AND return_in_pc is false OR
20018 number of registers are even AND return_in_pc is true, last register is
20019 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20020 then LDR with post increment. */
20021
20022 /* Increment the stack pointer, based on there being
20023 num_regs 4-byte registers to restore. */
20024 tmp = gen_rtx_SET (VOIDmode,
20025 stack_pointer_rtx,
20026 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20027 RTX_FRAME_RELATED_P (tmp) = 1;
20028 tmp = emit_insn (tmp);
20029 if (!return_in_pc)
20030 {
20031 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20032 stack_pointer_rtx, stack_pointer_rtx);
20033 }
20034
20035 dwarf = NULL_RTX;
20036
20037 if (((num_regs % 2) == 1 && !return_in_pc)
20038 || ((num_regs % 2) == 0 && return_in_pc))
20039 {
20040 /* Scan for the single register to be popped. Skip until the saved
20041 register is found. */
20042 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20043
20044 /* Gen LDR with post increment here. */
20045 tmp1 = gen_rtx_MEM (SImode,
20046 gen_rtx_POST_INC (SImode,
20047 stack_pointer_rtx));
20048 set_mem_alias_set (tmp1, get_frame_alias_set ());
20049
20050 reg = gen_rtx_REG (SImode, j);
20051 tmp = gen_rtx_SET (SImode, reg, tmp1);
20052 RTX_FRAME_RELATED_P (tmp) = 1;
20053 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20054
20055 if (return_in_pc)
20056 {
20057 /* If return_in_pc, j must be PC_REGNUM. */
20058 gcc_assert (j == PC_REGNUM);
20059 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20060 XVECEXP (par, 0, 0) = ret_rtx;
20061 XVECEXP (par, 0, 1) = tmp;
20062 par = emit_jump_insn (par);
20063 }
20064 else
20065 {
20066 par = emit_insn (tmp);
20067 REG_NOTES (par) = dwarf;
20068 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20069 stack_pointer_rtx, stack_pointer_rtx);
20070 }
20071
20072 }
20073 else if ((num_regs % 2) == 1 && return_in_pc)
20074 {
20075 /* There are 2 registers to be popped. So, generate the pattern
20076 pop_multiple_with_stack_update_and_return to pop in PC. */
20077 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20078 }
20079
20080 return;
20081 }
20082
20083 /* LDRD in ARM mode needs consecutive registers as operands. This function
20084 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20085 offset addressing and then generates one separate stack udpate. This provides
20086 more scheduling freedom, compared to writeback on every load. However,
20087 if the function returns using load into PC directly
20088 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20089 before the last load. TODO: Add a peephole optimization to recognize
20090 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20091 peephole optimization to merge the load at stack-offset zero
20092 with the stack update instruction using load with writeback
20093 in post-index addressing mode. */
20094 static void
20095 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20096 {
20097 int j = 0;
20098 int offset = 0;
20099 rtx par = NULL_RTX;
20100 rtx dwarf = NULL_RTX;
20101 rtx tmp, mem;
20102
20103 /* Restore saved registers. */
20104 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20105 j = 0;
20106 while (j <= LAST_ARM_REGNUM)
20107 if (saved_regs_mask & (1 << j))
20108 {
20109 if ((j % 2) == 0
20110 && (saved_regs_mask & (1 << (j + 1)))
20111 && (j + 1) != PC_REGNUM)
20112 {
20113 /* Current register and next register form register pair for which
20114 LDRD can be generated. PC is always the last register popped, and
20115 we handle it separately. */
20116 if (offset > 0)
20117 mem = gen_frame_mem (DImode,
20118 plus_constant (Pmode,
20119 stack_pointer_rtx,
20120 offset));
20121 else
20122 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20123
20124 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20125 tmp = emit_insn (tmp);
20126 RTX_FRAME_RELATED_P (tmp) = 1;
20127
20128 /* Generate dwarf info. */
20129
20130 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20131 gen_rtx_REG (SImode, j),
20132 NULL_RTX);
20133 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20134 gen_rtx_REG (SImode, j + 1),
20135 dwarf);
20136
20137 REG_NOTES (tmp) = dwarf;
20138
20139 offset += 8;
20140 j += 2;
20141 }
20142 else if (j != PC_REGNUM)
20143 {
20144 /* Emit a single word load. */
20145 if (offset > 0)
20146 mem = gen_frame_mem (SImode,
20147 plus_constant (Pmode,
20148 stack_pointer_rtx,
20149 offset));
20150 else
20151 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20152
20153 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20154 tmp = emit_insn (tmp);
20155 RTX_FRAME_RELATED_P (tmp) = 1;
20156
20157 /* Generate dwarf info. */
20158 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20159 gen_rtx_REG (SImode, j),
20160 NULL_RTX);
20161
20162 offset += 4;
20163 j += 1;
20164 }
20165 else /* j == PC_REGNUM */
20166 j++;
20167 }
20168 else
20169 j++;
20170
20171 /* Update the stack. */
20172 if (offset > 0)
20173 {
20174 tmp = gen_rtx_SET (Pmode,
20175 stack_pointer_rtx,
20176 plus_constant (Pmode,
20177 stack_pointer_rtx,
20178 offset));
20179 tmp = emit_insn (tmp);
20180 arm_add_cfa_adjust_cfa_note (tmp, offset,
20181 stack_pointer_rtx, stack_pointer_rtx);
20182 offset = 0;
20183 }
20184
20185 if (saved_regs_mask & (1 << PC_REGNUM))
20186 {
20187 /* Only PC is to be popped. */
20188 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20189 XVECEXP (par, 0, 0) = ret_rtx;
20190 tmp = gen_rtx_SET (SImode,
20191 gen_rtx_REG (SImode, PC_REGNUM),
20192 gen_frame_mem (SImode,
20193 gen_rtx_POST_INC (SImode,
20194 stack_pointer_rtx)));
20195 RTX_FRAME_RELATED_P (tmp) = 1;
20196 XVECEXP (par, 0, 1) = tmp;
20197 par = emit_jump_insn (par);
20198
20199 /* Generate dwarf info. */
20200 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20201 gen_rtx_REG (SImode, PC_REGNUM),
20202 NULL_RTX);
20203 REG_NOTES (par) = dwarf;
20204 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20205 stack_pointer_rtx, stack_pointer_rtx);
20206 }
20207 }
20208
20209 /* Calculate the size of the return value that is passed in registers. */
20210 static unsigned
20211 arm_size_return_regs (void)
20212 {
20213 enum machine_mode mode;
20214
20215 if (crtl->return_rtx != 0)
20216 mode = GET_MODE (crtl->return_rtx);
20217 else
20218 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20219
20220 return GET_MODE_SIZE (mode);
20221 }
20222
20223 /* Return true if the current function needs to save/restore LR. */
20224 static bool
20225 thumb_force_lr_save (void)
20226 {
20227 return !cfun->machine->lr_save_eliminated
20228 && (!leaf_function_p ()
20229 || thumb_far_jump_used_p ()
20230 || df_regs_ever_live_p (LR_REGNUM));
20231 }
20232
20233 /* We do not know if r3 will be available because
20234 we do have an indirect tailcall happening in this
20235 particular case. */
20236 static bool
20237 is_indirect_tailcall_p (rtx call)
20238 {
20239 rtx pat = PATTERN (call);
20240
20241 /* Indirect tail call. */
20242 pat = XVECEXP (pat, 0, 0);
20243 if (GET_CODE (pat) == SET)
20244 pat = SET_SRC (pat);
20245
20246 pat = XEXP (XEXP (pat, 0), 0);
20247 return REG_P (pat);
20248 }
20249
20250 /* Return true if r3 is used by any of the tail call insns in the
20251 current function. */
20252 static bool
20253 any_sibcall_could_use_r3 (void)
20254 {
20255 edge_iterator ei;
20256 edge e;
20257
20258 if (!crtl->tail_call_emit)
20259 return false;
20260 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20261 if (e->flags & EDGE_SIBCALL)
20262 {
20263 rtx call = BB_END (e->src);
20264 if (!CALL_P (call))
20265 call = prev_nonnote_nondebug_insn (call);
20266 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20267 if (find_regno_fusage (call, USE, 3)
20268 || is_indirect_tailcall_p (call))
20269 return true;
20270 }
20271 return false;
20272 }
20273
20274
20275 /* Compute the distance from register FROM to register TO.
20276 These can be the arg pointer (26), the soft frame pointer (25),
20277 the stack pointer (13) or the hard frame pointer (11).
20278 In thumb mode r7 is used as the soft frame pointer, if needed.
20279 Typical stack layout looks like this:
20280
20281 old stack pointer -> | |
20282 ----
20283 | | \
20284 | | saved arguments for
20285 | | vararg functions
20286 | | /
20287 --
20288 hard FP & arg pointer -> | | \
20289 | | stack
20290 | | frame
20291 | | /
20292 --
20293 | | \
20294 | | call saved
20295 | | registers
20296 soft frame pointer -> | | /
20297 --
20298 | | \
20299 | | local
20300 | | variables
20301 locals base pointer -> | | /
20302 --
20303 | | \
20304 | | outgoing
20305 | | arguments
20306 current stack pointer -> | | /
20307 --
20308
20309 For a given function some or all of these stack components
20310 may not be needed, giving rise to the possibility of
20311 eliminating some of the registers.
20312
20313 The values returned by this function must reflect the behavior
20314 of arm_expand_prologue() and arm_compute_save_reg_mask().
20315
20316 The sign of the number returned reflects the direction of stack
20317 growth, so the values are positive for all eliminations except
20318 from the soft frame pointer to the hard frame pointer.
20319
20320 SFP may point just inside the local variables block to ensure correct
20321 alignment. */
20322
20323
20324 /* Calculate stack offsets. These are used to calculate register elimination
20325 offsets and in prologue/epilogue code. Also calculates which registers
20326 should be saved. */
20327
20328 static arm_stack_offsets *
20329 arm_get_frame_offsets (void)
20330 {
20331 struct arm_stack_offsets *offsets;
20332 unsigned long func_type;
20333 int leaf;
20334 int saved;
20335 int core_saved;
20336 HOST_WIDE_INT frame_size;
20337 int i;
20338
20339 offsets = &cfun->machine->stack_offsets;
20340
20341 /* We need to know if we are a leaf function. Unfortunately, it
20342 is possible to be called after start_sequence has been called,
20343 which causes get_insns to return the insns for the sequence,
20344 not the function, which will cause leaf_function_p to return
20345 the incorrect result.
20346
20347 to know about leaf functions once reload has completed, and the
20348 frame size cannot be changed after that time, so we can safely
20349 use the cached value. */
20350
20351 if (reload_completed)
20352 return offsets;
20353
20354 /* Initially this is the size of the local variables. It will translated
20355 into an offset once we have determined the size of preceding data. */
20356 frame_size = ROUND_UP_WORD (get_frame_size ());
20357
20358 leaf = leaf_function_p ();
20359
20360 /* Space for variadic functions. */
20361 offsets->saved_args = crtl->args.pretend_args_size;
20362
20363 /* In Thumb mode this is incorrect, but never used. */
20364 offsets->frame
20365 = (offsets->saved_args
20366 + arm_compute_static_chain_stack_bytes ()
20367 + (frame_pointer_needed ? 4 : 0));
20368
20369 if (TARGET_32BIT)
20370 {
20371 unsigned int regno;
20372
20373 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20374 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20375 saved = core_saved;
20376
20377 /* We know that SP will be doubleword aligned on entry, and we must
20378 preserve that condition at any subroutine call. We also require the
20379 soft frame pointer to be doubleword aligned. */
20380
20381 if (TARGET_REALLY_IWMMXT)
20382 {
20383 /* Check for the call-saved iWMMXt registers. */
20384 for (regno = FIRST_IWMMXT_REGNUM;
20385 regno <= LAST_IWMMXT_REGNUM;
20386 regno++)
20387 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20388 saved += 8;
20389 }
20390
20391 func_type = arm_current_func_type ();
20392 /* Space for saved VFP registers. */
20393 if (! IS_VOLATILE (func_type)
20394 && TARGET_HARD_FLOAT && TARGET_VFP)
20395 saved += arm_get_vfp_saved_size ();
20396 }
20397 else /* TARGET_THUMB1 */
20398 {
20399 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20400 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20401 saved = core_saved;
20402 if (TARGET_BACKTRACE)
20403 saved += 16;
20404 }
20405
20406 /* Saved registers include the stack frame. */
20407 offsets->saved_regs
20408 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20409 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20410
20411 /* A leaf function does not need any stack alignment if it has nothing
20412 on the stack. */
20413 if (leaf && frame_size == 0
20414 /* However if it calls alloca(), we have a dynamically allocated
20415 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20416 && ! cfun->calls_alloca)
20417 {
20418 offsets->outgoing_args = offsets->soft_frame;
20419 offsets->locals_base = offsets->soft_frame;
20420 return offsets;
20421 }
20422
20423 /* Ensure SFP has the correct alignment. */
20424 if (ARM_DOUBLEWORD_ALIGN
20425 && (offsets->soft_frame & 7))
20426 {
20427 offsets->soft_frame += 4;
20428 /* Try to align stack by pushing an extra reg. Don't bother doing this
20429 when there is a stack frame as the alignment will be rolled into
20430 the normal stack adjustment. */
20431 if (frame_size + crtl->outgoing_args_size == 0)
20432 {
20433 int reg = -1;
20434
20435 /* If it is safe to use r3, then do so. This sometimes
20436 generates better code on Thumb-2 by avoiding the need to
20437 use 32-bit push/pop instructions. */
20438 if (! any_sibcall_could_use_r3 ()
20439 && arm_size_return_regs () <= 12
20440 && (offsets->saved_regs_mask & (1 << 3)) == 0
20441 && (TARGET_THUMB2
20442 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20443 {
20444 reg = 3;
20445 }
20446 else
20447 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20448 {
20449 /* Avoid fixed registers; they may be changed at
20450 arbitrary times so it's unsafe to restore them
20451 during the epilogue. */
20452 if (!fixed_regs[i]
20453 && (offsets->saved_regs_mask & (1 << i)) == 0)
20454 {
20455 reg = i;
20456 break;
20457 }
20458 }
20459
20460 if (reg != -1)
20461 {
20462 offsets->saved_regs += 4;
20463 offsets->saved_regs_mask |= (1 << reg);
20464 }
20465 }
20466 }
20467
20468 offsets->locals_base = offsets->soft_frame + frame_size;
20469 offsets->outgoing_args = (offsets->locals_base
20470 + crtl->outgoing_args_size);
20471
20472 if (ARM_DOUBLEWORD_ALIGN)
20473 {
20474 /* Ensure SP remains doubleword aligned. */
20475 if (offsets->outgoing_args & 7)
20476 offsets->outgoing_args += 4;
20477 gcc_assert (!(offsets->outgoing_args & 7));
20478 }
20479
20480 return offsets;
20481 }
20482
20483
20484 /* Calculate the relative offsets for the different stack pointers. Positive
20485 offsets are in the direction of stack growth. */
20486
20487 HOST_WIDE_INT
20488 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20489 {
20490 arm_stack_offsets *offsets;
20491
20492 offsets = arm_get_frame_offsets ();
20493
20494 /* OK, now we have enough information to compute the distances.
20495 There must be an entry in these switch tables for each pair
20496 of registers in ELIMINABLE_REGS, even if some of the entries
20497 seem to be redundant or useless. */
20498 switch (from)
20499 {
20500 case ARG_POINTER_REGNUM:
20501 switch (to)
20502 {
20503 case THUMB_HARD_FRAME_POINTER_REGNUM:
20504 return 0;
20505
20506 case FRAME_POINTER_REGNUM:
20507 /* This is the reverse of the soft frame pointer
20508 to hard frame pointer elimination below. */
20509 return offsets->soft_frame - offsets->saved_args;
20510
20511 case ARM_HARD_FRAME_POINTER_REGNUM:
20512 /* This is only non-zero in the case where the static chain register
20513 is stored above the frame. */
20514 return offsets->frame - offsets->saved_args - 4;
20515
20516 case STACK_POINTER_REGNUM:
20517 /* If nothing has been pushed on the stack at all
20518 then this will return -4. This *is* correct! */
20519 return offsets->outgoing_args - (offsets->saved_args + 4);
20520
20521 default:
20522 gcc_unreachable ();
20523 }
20524 gcc_unreachable ();
20525
20526 case FRAME_POINTER_REGNUM:
20527 switch (to)
20528 {
20529 case THUMB_HARD_FRAME_POINTER_REGNUM:
20530 return 0;
20531
20532 case ARM_HARD_FRAME_POINTER_REGNUM:
20533 /* The hard frame pointer points to the top entry in the
20534 stack frame. The soft frame pointer to the bottom entry
20535 in the stack frame. If there is no stack frame at all,
20536 then they are identical. */
20537
20538 return offsets->frame - offsets->soft_frame;
20539
20540 case STACK_POINTER_REGNUM:
20541 return offsets->outgoing_args - offsets->soft_frame;
20542
20543 default:
20544 gcc_unreachable ();
20545 }
20546 gcc_unreachable ();
20547
20548 default:
20549 /* You cannot eliminate from the stack pointer.
20550 In theory you could eliminate from the hard frame
20551 pointer to the stack pointer, but this will never
20552 happen, since if a stack frame is not needed the
20553 hard frame pointer will never be used. */
20554 gcc_unreachable ();
20555 }
20556 }
20557
20558 /* Given FROM and TO register numbers, say whether this elimination is
20559 allowed. Frame pointer elimination is automatically handled.
20560
20561 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20562 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20563 pointer, we must eliminate FRAME_POINTER_REGNUM into
20564 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20565 ARG_POINTER_REGNUM. */
20566
20567 bool
20568 arm_can_eliminate (const int from, const int to)
20569 {
20570 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20571 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20572 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20573 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20574 true);
20575 }
20576
20577 /* Emit RTL to save coprocessor registers on function entry. Returns the
20578 number of bytes pushed. */
20579
20580 static int
20581 arm_save_coproc_regs(void)
20582 {
20583 int saved_size = 0;
20584 unsigned reg;
20585 unsigned start_reg;
20586 rtx insn;
20587
20588 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20589 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20590 {
20591 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20592 insn = gen_rtx_MEM (V2SImode, insn);
20593 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20594 RTX_FRAME_RELATED_P (insn) = 1;
20595 saved_size += 8;
20596 }
20597
20598 if (TARGET_HARD_FLOAT && TARGET_VFP)
20599 {
20600 start_reg = FIRST_VFP_REGNUM;
20601
20602 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20603 {
20604 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20605 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20606 {
20607 if (start_reg != reg)
20608 saved_size += vfp_emit_fstmd (start_reg,
20609 (reg - start_reg) / 2);
20610 start_reg = reg + 2;
20611 }
20612 }
20613 if (start_reg != reg)
20614 saved_size += vfp_emit_fstmd (start_reg,
20615 (reg - start_reg) / 2);
20616 }
20617 return saved_size;
20618 }
20619
20620
20621 /* Set the Thumb frame pointer from the stack pointer. */
20622
20623 static void
20624 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20625 {
20626 HOST_WIDE_INT amount;
20627 rtx insn, dwarf;
20628
20629 amount = offsets->outgoing_args - offsets->locals_base;
20630 if (amount < 1024)
20631 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20632 stack_pointer_rtx, GEN_INT (amount)));
20633 else
20634 {
20635 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
20636 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20637 expects the first two operands to be the same. */
20638 if (TARGET_THUMB2)
20639 {
20640 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20641 stack_pointer_rtx,
20642 hard_frame_pointer_rtx));
20643 }
20644 else
20645 {
20646 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20647 hard_frame_pointer_rtx,
20648 stack_pointer_rtx));
20649 }
20650 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
20651 plus_constant (Pmode, stack_pointer_rtx, amount));
20652 RTX_FRAME_RELATED_P (dwarf) = 1;
20653 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20654 }
20655
20656 RTX_FRAME_RELATED_P (insn) = 1;
20657 }
20658
20659 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20660 function. */
20661 void
20662 arm_expand_prologue (void)
20663 {
20664 rtx amount;
20665 rtx insn;
20666 rtx ip_rtx;
20667 unsigned long live_regs_mask;
20668 unsigned long func_type;
20669 int fp_offset = 0;
20670 int saved_pretend_args = 0;
20671 int saved_regs = 0;
20672 unsigned HOST_WIDE_INT args_to_push;
20673 arm_stack_offsets *offsets;
20674
20675 func_type = arm_current_func_type ();
20676
20677 /* Naked functions don't have prologues. */
20678 if (IS_NAKED (func_type))
20679 return;
20680
20681 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20682 args_to_push = crtl->args.pretend_args_size;
20683
20684 /* Compute which register we will have to save onto the stack. */
20685 offsets = arm_get_frame_offsets ();
20686 live_regs_mask = offsets->saved_regs_mask;
20687
20688 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
20689
20690 if (IS_STACKALIGN (func_type))
20691 {
20692 rtx r0, r1;
20693
20694 /* Handle a word-aligned stack pointer. We generate the following:
20695
20696 mov r0, sp
20697 bic r1, r0, #7
20698 mov sp, r1
20699 <save and restore r0 in normal prologue/epilogue>
20700 mov sp, r0
20701 bx lr
20702
20703 The unwinder doesn't need to know about the stack realignment.
20704 Just tell it we saved SP in r0. */
20705 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
20706
20707 r0 = gen_rtx_REG (SImode, 0);
20708 r1 = gen_rtx_REG (SImode, 1);
20709
20710 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
20711 RTX_FRAME_RELATED_P (insn) = 1;
20712 add_reg_note (insn, REG_CFA_REGISTER, NULL);
20713
20714 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
20715
20716 /* ??? The CFA changes here, which may cause GDB to conclude that it
20717 has entered a different function. That said, the unwind info is
20718 correct, individually, before and after this instruction because
20719 we've described the save of SP, which will override the default
20720 handling of SP as restoring from the CFA. */
20721 emit_insn (gen_movsi (stack_pointer_rtx, r1));
20722 }
20723
20724 /* For APCS frames, if IP register is clobbered
20725 when creating frame, save that register in a special
20726 way. */
20727 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20728 {
20729 if (IS_INTERRUPT (func_type))
20730 {
20731 /* Interrupt functions must not corrupt any registers.
20732 Creating a frame pointer however, corrupts the IP
20733 register, so we must push it first. */
20734 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
20735
20736 /* Do not set RTX_FRAME_RELATED_P on this insn.
20737 The dwarf stack unwinding code only wants to see one
20738 stack decrement per function, and this is not it. If
20739 this instruction is labeled as being part of the frame
20740 creation sequence then dwarf2out_frame_debug_expr will
20741 die when it encounters the assignment of IP to FP
20742 later on, since the use of SP here establishes SP as
20743 the CFA register and not IP.
20744
20745 Anyway this instruction is not really part of the stack
20746 frame creation although it is part of the prologue. */
20747 }
20748 else if (IS_NESTED (func_type))
20749 {
20750 /* The static chain register is the same as the IP register
20751 used as a scratch register during stack frame creation.
20752 To get around this need to find somewhere to store IP
20753 whilst the frame is being created. We try the following
20754 places in order:
20755
20756 1. The last argument register r3 if it is available.
20757 2. A slot on the stack above the frame if there are no
20758 arguments to push onto the stack.
20759 3. Register r3 again, after pushing the argument registers
20760 onto the stack, if this is a varargs function.
20761 4. The last slot on the stack created for the arguments to
20762 push, if this isn't a varargs function.
20763
20764 Note - we only need to tell the dwarf2 backend about the SP
20765 adjustment in the second variant; the static chain register
20766 doesn't need to be unwound, as it doesn't contain a value
20767 inherited from the caller. */
20768
20769 if (!arm_r3_live_at_start_p ())
20770 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
20771 else if (args_to_push == 0)
20772 {
20773 rtx addr, dwarf;
20774
20775 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
20776 saved_regs += 4;
20777
20778 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20779 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
20780 fp_offset = 4;
20781
20782 /* Just tell the dwarf backend that we adjusted SP. */
20783 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20784 plus_constant (Pmode, stack_pointer_rtx,
20785 -fp_offset));
20786 RTX_FRAME_RELATED_P (insn) = 1;
20787 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20788 }
20789 else
20790 {
20791 /* Store the args on the stack. */
20792 if (cfun->machine->uses_anonymous_args)
20793 {
20794 insn
20795 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
20796 (0xf0 >> (args_to_push / 4)) & 0xf);
20797 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
20798 saved_pretend_args = 1;
20799 }
20800 else
20801 {
20802 rtx addr, dwarf;
20803
20804 if (args_to_push == 4)
20805 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20806 else
20807 addr
20808 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
20809 plus_constant (Pmode,
20810 stack_pointer_rtx,
20811 -args_to_push));
20812
20813 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
20814
20815 /* Just tell the dwarf backend that we adjusted SP. */
20816 dwarf
20817 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20818 plus_constant (Pmode, stack_pointer_rtx,
20819 -args_to_push));
20820 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20821 }
20822
20823 RTX_FRAME_RELATED_P (insn) = 1;
20824 fp_offset = args_to_push;
20825 args_to_push = 0;
20826 }
20827 }
20828
20829 insn = emit_set_insn (ip_rtx,
20830 plus_constant (Pmode, stack_pointer_rtx,
20831 fp_offset));
20832 RTX_FRAME_RELATED_P (insn) = 1;
20833 }
20834
20835 if (args_to_push)
20836 {
20837 /* Push the argument registers, or reserve space for them. */
20838 if (cfun->machine->uses_anonymous_args)
20839 insn = emit_multi_reg_push
20840 ((0xf0 >> (args_to_push / 4)) & 0xf,
20841 (0xf0 >> (args_to_push / 4)) & 0xf);
20842 else
20843 insn = emit_insn
20844 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20845 GEN_INT (- args_to_push)));
20846 RTX_FRAME_RELATED_P (insn) = 1;
20847 }
20848
20849 /* If this is an interrupt service routine, and the link register
20850 is going to be pushed, and we're not generating extra
20851 push of IP (needed when frame is needed and frame layout if apcs),
20852 subtracting four from LR now will mean that the function return
20853 can be done with a single instruction. */
20854 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
20855 && (live_regs_mask & (1 << LR_REGNUM)) != 0
20856 && !(frame_pointer_needed && TARGET_APCS_FRAME)
20857 && TARGET_ARM)
20858 {
20859 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
20860
20861 emit_set_insn (lr, plus_constant (SImode, lr, -4));
20862 }
20863
20864 if (live_regs_mask)
20865 {
20866 unsigned long dwarf_regs_mask = live_regs_mask;
20867
20868 saved_regs += bit_count (live_regs_mask) * 4;
20869 if (optimize_size && !frame_pointer_needed
20870 && saved_regs == offsets->saved_regs - offsets->saved_args)
20871 {
20872 /* If no coprocessor registers are being pushed and we don't have
20873 to worry about a frame pointer then push extra registers to
20874 create the stack frame. This is done is a way that does not
20875 alter the frame layout, so is independent of the epilogue. */
20876 int n;
20877 int frame;
20878 n = 0;
20879 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
20880 n++;
20881 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
20882 if (frame && n * 4 >= frame)
20883 {
20884 n = frame / 4;
20885 live_regs_mask |= (1 << n) - 1;
20886 saved_regs += frame;
20887 }
20888 }
20889
20890 if (TARGET_LDRD
20891 && current_tune->prefer_ldrd_strd
20892 && !optimize_function_for_size_p (cfun))
20893 {
20894 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
20895 if (TARGET_THUMB2)
20896 thumb2_emit_strd_push (live_regs_mask);
20897 else if (TARGET_ARM
20898 && !TARGET_APCS_FRAME
20899 && !IS_INTERRUPT (func_type))
20900 arm_emit_strd_push (live_regs_mask);
20901 else
20902 {
20903 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
20904 RTX_FRAME_RELATED_P (insn) = 1;
20905 }
20906 }
20907 else
20908 {
20909 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
20910 RTX_FRAME_RELATED_P (insn) = 1;
20911 }
20912 }
20913
20914 if (! IS_VOLATILE (func_type))
20915 saved_regs += arm_save_coproc_regs ();
20916
20917 if (frame_pointer_needed && TARGET_ARM)
20918 {
20919 /* Create the new frame pointer. */
20920 if (TARGET_APCS_FRAME)
20921 {
20922 insn = GEN_INT (-(4 + args_to_push + fp_offset));
20923 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
20924 RTX_FRAME_RELATED_P (insn) = 1;
20925
20926 if (IS_NESTED (func_type))
20927 {
20928 /* Recover the static chain register. */
20929 if (!arm_r3_live_at_start_p () || saved_pretend_args)
20930 insn = gen_rtx_REG (SImode, 3);
20931 else
20932 {
20933 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
20934 insn = gen_frame_mem (SImode, insn);
20935 }
20936 emit_set_insn (ip_rtx, insn);
20937 /* Add a USE to stop propagate_one_insn() from barfing. */
20938 emit_insn (gen_force_register_use (ip_rtx));
20939 }
20940 }
20941 else
20942 {
20943 insn = GEN_INT (saved_regs - 4);
20944 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20945 stack_pointer_rtx, insn));
20946 RTX_FRAME_RELATED_P (insn) = 1;
20947 }
20948 }
20949
20950 if (flag_stack_usage_info)
20951 current_function_static_stack_size
20952 = offsets->outgoing_args - offsets->saved_args;
20953
20954 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
20955 {
20956 /* This add can produce multiple insns for a large constant, so we
20957 need to get tricky. */
20958 rtx last = get_last_insn ();
20959
20960 amount = GEN_INT (offsets->saved_args + saved_regs
20961 - offsets->outgoing_args);
20962
20963 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20964 amount));
20965 do
20966 {
20967 last = last ? NEXT_INSN (last) : get_insns ();
20968 RTX_FRAME_RELATED_P (last) = 1;
20969 }
20970 while (last != insn);
20971
20972 /* If the frame pointer is needed, emit a special barrier that
20973 will prevent the scheduler from moving stores to the frame
20974 before the stack adjustment. */
20975 if (frame_pointer_needed)
20976 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
20977 hard_frame_pointer_rtx));
20978 }
20979
20980
20981 if (frame_pointer_needed && TARGET_THUMB2)
20982 thumb_set_frame_pointer (offsets);
20983
20984 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20985 {
20986 unsigned long mask;
20987
20988 mask = live_regs_mask;
20989 mask &= THUMB2_WORK_REGS;
20990 if (!IS_NESTED (func_type))
20991 mask |= (1 << IP_REGNUM);
20992 arm_load_pic_register (mask);
20993 }
20994
20995 /* If we are profiling, make sure no instructions are scheduled before
20996 the call to mcount. Similarly if the user has requested no
20997 scheduling in the prolog. Similarly if we want non-call exceptions
20998 using the EABI unwinder, to prevent faulting instructions from being
20999 swapped with a stack adjustment. */
21000 if (crtl->profile || !TARGET_SCHED_PROLOG
21001 || (arm_except_unwind_info (&global_options) == UI_TARGET
21002 && cfun->can_throw_non_call_exceptions))
21003 emit_insn (gen_blockage ());
21004
21005 /* If the link register is being kept alive, with the return address in it,
21006 then make sure that it does not get reused by the ce2 pass. */
21007 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21008 cfun->machine->lr_save_eliminated = 1;
21009 }
21010 \f
21011 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21012 static void
21013 arm_print_condition (FILE *stream)
21014 {
21015 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21016 {
21017 /* Branch conversion is not implemented for Thumb-2. */
21018 if (TARGET_THUMB)
21019 {
21020 output_operand_lossage ("predicated Thumb instruction");
21021 return;
21022 }
21023 if (current_insn_predicate != NULL)
21024 {
21025 output_operand_lossage
21026 ("predicated instruction in conditional sequence");
21027 return;
21028 }
21029
21030 fputs (arm_condition_codes[arm_current_cc], stream);
21031 }
21032 else if (current_insn_predicate)
21033 {
21034 enum arm_cond_code code;
21035
21036 if (TARGET_THUMB1)
21037 {
21038 output_operand_lossage ("predicated Thumb instruction");
21039 return;
21040 }
21041
21042 code = get_arm_condition_code (current_insn_predicate);
21043 fputs (arm_condition_codes[code], stream);
21044 }
21045 }
21046
21047
21048 /* If CODE is 'd', then the X is a condition operand and the instruction
21049 should only be executed if the condition is true.
21050 if CODE is 'D', then the X is a condition operand and the instruction
21051 should only be executed if the condition is false: however, if the mode
21052 of the comparison is CCFPEmode, then always execute the instruction -- we
21053 do this because in these circumstances !GE does not necessarily imply LT;
21054 in these cases the instruction pattern will take care to make sure that
21055 an instruction containing %d will follow, thereby undoing the effects of
21056 doing this instruction unconditionally.
21057 If CODE is 'N' then X is a floating point operand that must be negated
21058 before output.
21059 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21060 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21061 static void
21062 arm_print_operand (FILE *stream, rtx x, int code)
21063 {
21064 switch (code)
21065 {
21066 case '@':
21067 fputs (ASM_COMMENT_START, stream);
21068 return;
21069
21070 case '_':
21071 fputs (user_label_prefix, stream);
21072 return;
21073
21074 case '|':
21075 fputs (REGISTER_PREFIX, stream);
21076 return;
21077
21078 case '?':
21079 arm_print_condition (stream);
21080 return;
21081
21082 case '(':
21083 /* Nothing in unified syntax, otherwise the current condition code. */
21084 if (!TARGET_UNIFIED_ASM)
21085 arm_print_condition (stream);
21086 break;
21087
21088 case ')':
21089 /* The current condition code in unified syntax, otherwise nothing. */
21090 if (TARGET_UNIFIED_ASM)
21091 arm_print_condition (stream);
21092 break;
21093
21094 case '.':
21095 /* The current condition code for a condition code setting instruction.
21096 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21097 if (TARGET_UNIFIED_ASM)
21098 {
21099 fputc('s', stream);
21100 arm_print_condition (stream);
21101 }
21102 else
21103 {
21104 arm_print_condition (stream);
21105 fputc('s', stream);
21106 }
21107 return;
21108
21109 case '!':
21110 /* If the instruction is conditionally executed then print
21111 the current condition code, otherwise print 's'. */
21112 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21113 if (current_insn_predicate)
21114 arm_print_condition (stream);
21115 else
21116 fputc('s', stream);
21117 break;
21118
21119 /* %# is a "break" sequence. It doesn't output anything, but is used to
21120 separate e.g. operand numbers from following text, if that text consists
21121 of further digits which we don't want to be part of the operand
21122 number. */
21123 case '#':
21124 return;
21125
21126 case 'N':
21127 {
21128 REAL_VALUE_TYPE r;
21129 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21130 r = real_value_negate (&r);
21131 fprintf (stream, "%s", fp_const_from_val (&r));
21132 }
21133 return;
21134
21135 /* An integer or symbol address without a preceding # sign. */
21136 case 'c':
21137 switch (GET_CODE (x))
21138 {
21139 case CONST_INT:
21140 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21141 break;
21142
21143 case SYMBOL_REF:
21144 output_addr_const (stream, x);
21145 break;
21146
21147 case CONST:
21148 if (GET_CODE (XEXP (x, 0)) == PLUS
21149 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21150 {
21151 output_addr_const (stream, x);
21152 break;
21153 }
21154 /* Fall through. */
21155
21156 default:
21157 output_operand_lossage ("Unsupported operand for code '%c'", code);
21158 }
21159 return;
21160
21161 /* An integer that we want to print in HEX. */
21162 case 'x':
21163 switch (GET_CODE (x))
21164 {
21165 case CONST_INT:
21166 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21167 break;
21168
21169 default:
21170 output_operand_lossage ("Unsupported operand for code '%c'", code);
21171 }
21172 return;
21173
21174 case 'B':
21175 if (CONST_INT_P (x))
21176 {
21177 HOST_WIDE_INT val;
21178 val = ARM_SIGN_EXTEND (~INTVAL (x));
21179 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21180 }
21181 else
21182 {
21183 putc ('~', stream);
21184 output_addr_const (stream, x);
21185 }
21186 return;
21187
21188 case 'L':
21189 /* The low 16 bits of an immediate constant. */
21190 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21191 return;
21192
21193 case 'i':
21194 fprintf (stream, "%s", arithmetic_instr (x, 1));
21195 return;
21196
21197 case 'I':
21198 fprintf (stream, "%s", arithmetic_instr (x, 0));
21199 return;
21200
21201 case 'S':
21202 {
21203 HOST_WIDE_INT val;
21204 const char *shift;
21205
21206 shift = shift_op (x, &val);
21207
21208 if (shift)
21209 {
21210 fprintf (stream, ", %s ", shift);
21211 if (val == -1)
21212 arm_print_operand (stream, XEXP (x, 1), 0);
21213 else
21214 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21215 }
21216 }
21217 return;
21218
21219 /* An explanation of the 'Q', 'R' and 'H' register operands:
21220
21221 In a pair of registers containing a DI or DF value the 'Q'
21222 operand returns the register number of the register containing
21223 the least significant part of the value. The 'R' operand returns
21224 the register number of the register containing the most
21225 significant part of the value.
21226
21227 The 'H' operand returns the higher of the two register numbers.
21228 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21229 same as the 'Q' operand, since the most significant part of the
21230 value is held in the lower number register. The reverse is true
21231 on systems where WORDS_BIG_ENDIAN is false.
21232
21233 The purpose of these operands is to distinguish between cases
21234 where the endian-ness of the values is important (for example
21235 when they are added together), and cases where the endian-ness
21236 is irrelevant, but the order of register operations is important.
21237 For example when loading a value from memory into a register
21238 pair, the endian-ness does not matter. Provided that the value
21239 from the lower memory address is put into the lower numbered
21240 register, and the value from the higher address is put into the
21241 higher numbered register, the load will work regardless of whether
21242 the value being loaded is big-wordian or little-wordian. The
21243 order of the two register loads can matter however, if the address
21244 of the memory location is actually held in one of the registers
21245 being overwritten by the load.
21246
21247 The 'Q' and 'R' constraints are also available for 64-bit
21248 constants. */
21249 case 'Q':
21250 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21251 {
21252 rtx part = gen_lowpart (SImode, x);
21253 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21254 return;
21255 }
21256
21257 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21258 {
21259 output_operand_lossage ("invalid operand for code '%c'", code);
21260 return;
21261 }
21262
21263 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21264 return;
21265
21266 case 'R':
21267 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21268 {
21269 enum machine_mode mode = GET_MODE (x);
21270 rtx part;
21271
21272 if (mode == VOIDmode)
21273 mode = DImode;
21274 part = gen_highpart_mode (SImode, mode, x);
21275 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21276 return;
21277 }
21278
21279 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21280 {
21281 output_operand_lossage ("invalid operand for code '%c'", code);
21282 return;
21283 }
21284
21285 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21286 return;
21287
21288 case 'H':
21289 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21290 {
21291 output_operand_lossage ("invalid operand for code '%c'", code);
21292 return;
21293 }
21294
21295 asm_fprintf (stream, "%r", REGNO (x) + 1);
21296 return;
21297
21298 case 'J':
21299 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21300 {
21301 output_operand_lossage ("invalid operand for code '%c'", code);
21302 return;
21303 }
21304
21305 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21306 return;
21307
21308 case 'K':
21309 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21310 {
21311 output_operand_lossage ("invalid operand for code '%c'", code);
21312 return;
21313 }
21314
21315 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21316 return;
21317
21318 case 'm':
21319 asm_fprintf (stream, "%r",
21320 REG_P (XEXP (x, 0))
21321 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21322 return;
21323
21324 case 'M':
21325 asm_fprintf (stream, "{%r-%r}",
21326 REGNO (x),
21327 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21328 return;
21329
21330 /* Like 'M', but writing doubleword vector registers, for use by Neon
21331 insns. */
21332 case 'h':
21333 {
21334 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21335 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21336 if (numregs == 1)
21337 asm_fprintf (stream, "{d%d}", regno);
21338 else
21339 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21340 }
21341 return;
21342
21343 case 'd':
21344 /* CONST_TRUE_RTX means always -- that's the default. */
21345 if (x == const_true_rtx)
21346 return;
21347
21348 if (!COMPARISON_P (x))
21349 {
21350 output_operand_lossage ("invalid operand for code '%c'", code);
21351 return;
21352 }
21353
21354 fputs (arm_condition_codes[get_arm_condition_code (x)],
21355 stream);
21356 return;
21357
21358 case 'D':
21359 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21360 want to do that. */
21361 if (x == const_true_rtx)
21362 {
21363 output_operand_lossage ("instruction never executed");
21364 return;
21365 }
21366 if (!COMPARISON_P (x))
21367 {
21368 output_operand_lossage ("invalid operand for code '%c'", code);
21369 return;
21370 }
21371
21372 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21373 (get_arm_condition_code (x))],
21374 stream);
21375 return;
21376
21377 case 's':
21378 case 'V':
21379 case 'W':
21380 case 'X':
21381 case 'Y':
21382 case 'Z':
21383 /* Former Maverick support, removed after GCC-4.7. */
21384 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21385 return;
21386
21387 case 'U':
21388 if (!REG_P (x)
21389 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21390 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21391 /* Bad value for wCG register number. */
21392 {
21393 output_operand_lossage ("invalid operand for code '%c'", code);
21394 return;
21395 }
21396
21397 else
21398 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21399 return;
21400
21401 /* Print an iWMMXt control register name. */
21402 case 'w':
21403 if (!CONST_INT_P (x)
21404 || INTVAL (x) < 0
21405 || INTVAL (x) >= 16)
21406 /* Bad value for wC register number. */
21407 {
21408 output_operand_lossage ("invalid operand for code '%c'", code);
21409 return;
21410 }
21411
21412 else
21413 {
21414 static const char * wc_reg_names [16] =
21415 {
21416 "wCID", "wCon", "wCSSF", "wCASF",
21417 "wC4", "wC5", "wC6", "wC7",
21418 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21419 "wC12", "wC13", "wC14", "wC15"
21420 };
21421
21422 fputs (wc_reg_names [INTVAL (x)], stream);
21423 }
21424 return;
21425
21426 /* Print the high single-precision register of a VFP double-precision
21427 register. */
21428 case 'p':
21429 {
21430 int mode = GET_MODE (x);
21431 int regno;
21432
21433 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21434 {
21435 output_operand_lossage ("invalid operand for code '%c'", code);
21436 return;
21437 }
21438
21439 regno = REGNO (x);
21440 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21441 {
21442 output_operand_lossage ("invalid operand for code '%c'", code);
21443 return;
21444 }
21445
21446 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21447 }
21448 return;
21449
21450 /* Print a VFP/Neon double precision or quad precision register name. */
21451 case 'P':
21452 case 'q':
21453 {
21454 int mode = GET_MODE (x);
21455 int is_quad = (code == 'q');
21456 int regno;
21457
21458 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21459 {
21460 output_operand_lossage ("invalid operand for code '%c'", code);
21461 return;
21462 }
21463
21464 if (!REG_P (x)
21465 || !IS_VFP_REGNUM (REGNO (x)))
21466 {
21467 output_operand_lossage ("invalid operand for code '%c'", code);
21468 return;
21469 }
21470
21471 regno = REGNO (x);
21472 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21473 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21474 {
21475 output_operand_lossage ("invalid operand for code '%c'", code);
21476 return;
21477 }
21478
21479 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21480 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21481 }
21482 return;
21483
21484 /* These two codes print the low/high doubleword register of a Neon quad
21485 register, respectively. For pair-structure types, can also print
21486 low/high quadword registers. */
21487 case 'e':
21488 case 'f':
21489 {
21490 int mode = GET_MODE (x);
21491 int regno;
21492
21493 if ((GET_MODE_SIZE (mode) != 16
21494 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21495 {
21496 output_operand_lossage ("invalid operand for code '%c'", code);
21497 return;
21498 }
21499
21500 regno = REGNO (x);
21501 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21502 {
21503 output_operand_lossage ("invalid operand for code '%c'", code);
21504 return;
21505 }
21506
21507 if (GET_MODE_SIZE (mode) == 16)
21508 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21509 + (code == 'f' ? 1 : 0));
21510 else
21511 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21512 + (code == 'f' ? 1 : 0));
21513 }
21514 return;
21515
21516 /* Print a VFPv3 floating-point constant, represented as an integer
21517 index. */
21518 case 'G':
21519 {
21520 int index = vfp3_const_double_index (x);
21521 gcc_assert (index != -1);
21522 fprintf (stream, "%d", index);
21523 }
21524 return;
21525
21526 /* Print bits representing opcode features for Neon.
21527
21528 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21529 and polynomials as unsigned.
21530
21531 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21532
21533 Bit 2 is 1 for rounding functions, 0 otherwise. */
21534
21535 /* Identify the type as 's', 'u', 'p' or 'f'. */
21536 case 'T':
21537 {
21538 HOST_WIDE_INT bits = INTVAL (x);
21539 fputc ("uspf"[bits & 3], stream);
21540 }
21541 return;
21542
21543 /* Likewise, but signed and unsigned integers are both 'i'. */
21544 case 'F':
21545 {
21546 HOST_WIDE_INT bits = INTVAL (x);
21547 fputc ("iipf"[bits & 3], stream);
21548 }
21549 return;
21550
21551 /* As for 'T', but emit 'u' instead of 'p'. */
21552 case 't':
21553 {
21554 HOST_WIDE_INT bits = INTVAL (x);
21555 fputc ("usuf"[bits & 3], stream);
21556 }
21557 return;
21558
21559 /* Bit 2: rounding (vs none). */
21560 case 'O':
21561 {
21562 HOST_WIDE_INT bits = INTVAL (x);
21563 fputs ((bits & 4) != 0 ? "r" : "", stream);
21564 }
21565 return;
21566
21567 /* Memory operand for vld1/vst1 instruction. */
21568 case 'A':
21569 {
21570 rtx addr;
21571 bool postinc = FALSE;
21572 unsigned align, memsize, align_bits;
21573
21574 gcc_assert (MEM_P (x));
21575 addr = XEXP (x, 0);
21576 if (GET_CODE (addr) == POST_INC)
21577 {
21578 postinc = 1;
21579 addr = XEXP (addr, 0);
21580 }
21581 asm_fprintf (stream, "[%r", REGNO (addr));
21582
21583 /* We know the alignment of this access, so we can emit a hint in the
21584 instruction (for some alignments) as an aid to the memory subsystem
21585 of the target. */
21586 align = MEM_ALIGN (x) >> 3;
21587 memsize = MEM_SIZE (x);
21588
21589 /* Only certain alignment specifiers are supported by the hardware. */
21590 if (memsize == 32 && (align % 32) == 0)
21591 align_bits = 256;
21592 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
21593 align_bits = 128;
21594 else if (memsize >= 8 && (align % 8) == 0)
21595 align_bits = 64;
21596 else
21597 align_bits = 0;
21598
21599 if (align_bits != 0)
21600 asm_fprintf (stream, ":%d", align_bits);
21601
21602 asm_fprintf (stream, "]");
21603
21604 if (postinc)
21605 fputs("!", stream);
21606 }
21607 return;
21608
21609 case 'C':
21610 {
21611 rtx addr;
21612
21613 gcc_assert (MEM_P (x));
21614 addr = XEXP (x, 0);
21615 gcc_assert (REG_P (addr));
21616 asm_fprintf (stream, "[%r]", REGNO (addr));
21617 }
21618 return;
21619
21620 /* Translate an S register number into a D register number and element index. */
21621 case 'y':
21622 {
21623 int mode = GET_MODE (x);
21624 int regno;
21625
21626 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
21627 {
21628 output_operand_lossage ("invalid operand for code '%c'", code);
21629 return;
21630 }
21631
21632 regno = REGNO (x);
21633 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21634 {
21635 output_operand_lossage ("invalid operand for code '%c'", code);
21636 return;
21637 }
21638
21639 regno = regno - FIRST_VFP_REGNUM;
21640 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
21641 }
21642 return;
21643
21644 case 'v':
21645 gcc_assert (CONST_DOUBLE_P (x));
21646 int result;
21647 result = vfp3_const_double_for_fract_bits (x);
21648 if (result == 0)
21649 result = vfp3_const_double_for_bits (x);
21650 fprintf (stream, "#%d", result);
21651 return;
21652
21653 /* Register specifier for vld1.16/vst1.16. Translate the S register
21654 number into a D register number and element index. */
21655 case 'z':
21656 {
21657 int mode = GET_MODE (x);
21658 int regno;
21659
21660 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
21661 {
21662 output_operand_lossage ("invalid operand for code '%c'", code);
21663 return;
21664 }
21665
21666 regno = REGNO (x);
21667 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21668 {
21669 output_operand_lossage ("invalid operand for code '%c'", code);
21670 return;
21671 }
21672
21673 regno = regno - FIRST_VFP_REGNUM;
21674 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
21675 }
21676 return;
21677
21678 default:
21679 if (x == 0)
21680 {
21681 output_operand_lossage ("missing operand");
21682 return;
21683 }
21684
21685 switch (GET_CODE (x))
21686 {
21687 case REG:
21688 asm_fprintf (stream, "%r", REGNO (x));
21689 break;
21690
21691 case MEM:
21692 output_memory_reference_mode = GET_MODE (x);
21693 output_address (XEXP (x, 0));
21694 break;
21695
21696 case CONST_DOUBLE:
21697 if (TARGET_NEON)
21698 {
21699 char fpstr[20];
21700 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
21701 sizeof (fpstr), 0, 1);
21702 fprintf (stream, "#%s", fpstr);
21703 }
21704 else
21705 fprintf (stream, "#%s", fp_immediate_constant (x));
21706 break;
21707
21708 default:
21709 gcc_assert (GET_CODE (x) != NEG);
21710 fputc ('#', stream);
21711 if (GET_CODE (x) == HIGH)
21712 {
21713 fputs (":lower16:", stream);
21714 x = XEXP (x, 0);
21715 }
21716
21717 output_addr_const (stream, x);
21718 break;
21719 }
21720 }
21721 }
21722 \f
21723 /* Target hook for printing a memory address. */
21724 static void
21725 arm_print_operand_address (FILE *stream, rtx x)
21726 {
21727 if (TARGET_32BIT)
21728 {
21729 int is_minus = GET_CODE (x) == MINUS;
21730
21731 if (REG_P (x))
21732 asm_fprintf (stream, "[%r]", REGNO (x));
21733 else if (GET_CODE (x) == PLUS || is_minus)
21734 {
21735 rtx base = XEXP (x, 0);
21736 rtx index = XEXP (x, 1);
21737 HOST_WIDE_INT offset = 0;
21738 if (!REG_P (base)
21739 || (REG_P (index) && REGNO (index) == SP_REGNUM))
21740 {
21741 /* Ensure that BASE is a register. */
21742 /* (one of them must be). */
21743 /* Also ensure the SP is not used as in index register. */
21744 rtx temp = base;
21745 base = index;
21746 index = temp;
21747 }
21748 switch (GET_CODE (index))
21749 {
21750 case CONST_INT:
21751 offset = INTVAL (index);
21752 if (is_minus)
21753 offset = -offset;
21754 asm_fprintf (stream, "[%r, #%wd]",
21755 REGNO (base), offset);
21756 break;
21757
21758 case REG:
21759 asm_fprintf (stream, "[%r, %s%r]",
21760 REGNO (base), is_minus ? "-" : "",
21761 REGNO (index));
21762 break;
21763
21764 case MULT:
21765 case ASHIFTRT:
21766 case LSHIFTRT:
21767 case ASHIFT:
21768 case ROTATERT:
21769 {
21770 asm_fprintf (stream, "[%r, %s%r",
21771 REGNO (base), is_minus ? "-" : "",
21772 REGNO (XEXP (index, 0)));
21773 arm_print_operand (stream, index, 'S');
21774 fputs ("]", stream);
21775 break;
21776 }
21777
21778 default:
21779 gcc_unreachable ();
21780 }
21781 }
21782 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
21783 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
21784 {
21785 extern enum machine_mode output_memory_reference_mode;
21786
21787 gcc_assert (REG_P (XEXP (x, 0)));
21788
21789 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
21790 asm_fprintf (stream, "[%r, #%s%d]!",
21791 REGNO (XEXP (x, 0)),
21792 GET_CODE (x) == PRE_DEC ? "-" : "",
21793 GET_MODE_SIZE (output_memory_reference_mode));
21794 else
21795 asm_fprintf (stream, "[%r], #%s%d",
21796 REGNO (XEXP (x, 0)),
21797 GET_CODE (x) == POST_DEC ? "-" : "",
21798 GET_MODE_SIZE (output_memory_reference_mode));
21799 }
21800 else if (GET_CODE (x) == PRE_MODIFY)
21801 {
21802 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
21803 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
21804 asm_fprintf (stream, "#%wd]!",
21805 INTVAL (XEXP (XEXP (x, 1), 1)));
21806 else
21807 asm_fprintf (stream, "%r]!",
21808 REGNO (XEXP (XEXP (x, 1), 1)));
21809 }
21810 else if (GET_CODE (x) == POST_MODIFY)
21811 {
21812 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
21813 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
21814 asm_fprintf (stream, "#%wd",
21815 INTVAL (XEXP (XEXP (x, 1), 1)));
21816 else
21817 asm_fprintf (stream, "%r",
21818 REGNO (XEXP (XEXP (x, 1), 1)));
21819 }
21820 else output_addr_const (stream, x);
21821 }
21822 else
21823 {
21824 if (REG_P (x))
21825 asm_fprintf (stream, "[%r]", REGNO (x));
21826 else if (GET_CODE (x) == POST_INC)
21827 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
21828 else if (GET_CODE (x) == PLUS)
21829 {
21830 gcc_assert (REG_P (XEXP (x, 0)));
21831 if (CONST_INT_P (XEXP (x, 1)))
21832 asm_fprintf (stream, "[%r, #%wd]",
21833 REGNO (XEXP (x, 0)),
21834 INTVAL (XEXP (x, 1)));
21835 else
21836 asm_fprintf (stream, "[%r, %r]",
21837 REGNO (XEXP (x, 0)),
21838 REGNO (XEXP (x, 1)));
21839 }
21840 else
21841 output_addr_const (stream, x);
21842 }
21843 }
21844 \f
21845 /* Target hook for indicating whether a punctuation character for
21846 TARGET_PRINT_OPERAND is valid. */
21847 static bool
21848 arm_print_operand_punct_valid_p (unsigned char code)
21849 {
21850 return (code == '@' || code == '|' || code == '.'
21851 || code == '(' || code == ')' || code == '#'
21852 || (TARGET_32BIT && (code == '?'))
21853 || (TARGET_THUMB2 && (code == '!'))
21854 || (TARGET_THUMB && (code == '_')));
21855 }
21856 \f
21857 /* Target hook for assembling integer objects. The ARM version needs to
21858 handle word-sized values specially. */
21859 static bool
21860 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
21861 {
21862 enum machine_mode mode;
21863
21864 if (size == UNITS_PER_WORD && aligned_p)
21865 {
21866 fputs ("\t.word\t", asm_out_file);
21867 output_addr_const (asm_out_file, x);
21868
21869 /* Mark symbols as position independent. We only do this in the
21870 .text segment, not in the .data segment. */
21871 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
21872 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
21873 {
21874 /* See legitimize_pic_address for an explanation of the
21875 TARGET_VXWORKS_RTP check. */
21876 if (!arm_pic_data_is_text_relative
21877 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
21878 fputs ("(GOT)", asm_out_file);
21879 else
21880 fputs ("(GOTOFF)", asm_out_file);
21881 }
21882 fputc ('\n', asm_out_file);
21883 return true;
21884 }
21885
21886 mode = GET_MODE (x);
21887
21888 if (arm_vector_mode_supported_p (mode))
21889 {
21890 int i, units;
21891
21892 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21893
21894 units = CONST_VECTOR_NUNITS (x);
21895 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
21896
21897 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21898 for (i = 0; i < units; i++)
21899 {
21900 rtx elt = CONST_VECTOR_ELT (x, i);
21901 assemble_integer
21902 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
21903 }
21904 else
21905 for (i = 0; i < units; i++)
21906 {
21907 rtx elt = CONST_VECTOR_ELT (x, i);
21908 REAL_VALUE_TYPE rval;
21909
21910 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
21911
21912 assemble_real
21913 (rval, GET_MODE_INNER (mode),
21914 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
21915 }
21916
21917 return true;
21918 }
21919
21920 return default_assemble_integer (x, size, aligned_p);
21921 }
21922
21923 static void
21924 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
21925 {
21926 section *s;
21927
21928 if (!TARGET_AAPCS_BASED)
21929 {
21930 (is_ctor ?
21931 default_named_section_asm_out_constructor
21932 : default_named_section_asm_out_destructor) (symbol, priority);
21933 return;
21934 }
21935
21936 /* Put these in the .init_array section, using a special relocation. */
21937 if (priority != DEFAULT_INIT_PRIORITY)
21938 {
21939 char buf[18];
21940 sprintf (buf, "%s.%.5u",
21941 is_ctor ? ".init_array" : ".fini_array",
21942 priority);
21943 s = get_section (buf, SECTION_WRITE, NULL_TREE);
21944 }
21945 else if (is_ctor)
21946 s = ctors_section;
21947 else
21948 s = dtors_section;
21949
21950 switch_to_section (s);
21951 assemble_align (POINTER_SIZE);
21952 fputs ("\t.word\t", asm_out_file);
21953 output_addr_const (asm_out_file, symbol);
21954 fputs ("(target1)\n", asm_out_file);
21955 }
21956
21957 /* Add a function to the list of static constructors. */
21958
21959 static void
21960 arm_elf_asm_constructor (rtx symbol, int priority)
21961 {
21962 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
21963 }
21964
21965 /* Add a function to the list of static destructors. */
21966
21967 static void
21968 arm_elf_asm_destructor (rtx symbol, int priority)
21969 {
21970 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
21971 }
21972 \f
21973 /* A finite state machine takes care of noticing whether or not instructions
21974 can be conditionally executed, and thus decrease execution time and code
21975 size by deleting branch instructions. The fsm is controlled by
21976 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
21977
21978 /* The state of the fsm controlling condition codes are:
21979 0: normal, do nothing special
21980 1: make ASM_OUTPUT_OPCODE not output this instruction
21981 2: make ASM_OUTPUT_OPCODE not output this instruction
21982 3: make instructions conditional
21983 4: make instructions conditional
21984
21985 State transitions (state->state by whom under condition):
21986 0 -> 1 final_prescan_insn if the `target' is a label
21987 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
21988 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
21989 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
21990 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
21991 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
21992 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
21993 (the target insn is arm_target_insn).
21994
21995 If the jump clobbers the conditions then we use states 2 and 4.
21996
21997 A similar thing can be done with conditional return insns.
21998
21999 XXX In case the `target' is an unconditional branch, this conditionalising
22000 of the instructions always reduces code size, but not always execution
22001 time. But then, I want to reduce the code size to somewhere near what
22002 /bin/cc produces. */
22003
22004 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22005 instructions. When a COND_EXEC instruction is seen the subsequent
22006 instructions are scanned so that multiple conditional instructions can be
22007 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22008 specify the length and true/false mask for the IT block. These will be
22009 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22010
22011 /* Returns the index of the ARM condition code string in
22012 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22013 COMPARISON should be an rtx like `(eq (...) (...))'. */
22014
22015 enum arm_cond_code
22016 maybe_get_arm_condition_code (rtx comparison)
22017 {
22018 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
22019 enum arm_cond_code code;
22020 enum rtx_code comp_code = GET_CODE (comparison);
22021
22022 if (GET_MODE_CLASS (mode) != MODE_CC)
22023 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22024 XEXP (comparison, 1));
22025
22026 switch (mode)
22027 {
22028 case CC_DNEmode: code = ARM_NE; goto dominance;
22029 case CC_DEQmode: code = ARM_EQ; goto dominance;
22030 case CC_DGEmode: code = ARM_GE; goto dominance;
22031 case CC_DGTmode: code = ARM_GT; goto dominance;
22032 case CC_DLEmode: code = ARM_LE; goto dominance;
22033 case CC_DLTmode: code = ARM_LT; goto dominance;
22034 case CC_DGEUmode: code = ARM_CS; goto dominance;
22035 case CC_DGTUmode: code = ARM_HI; goto dominance;
22036 case CC_DLEUmode: code = ARM_LS; goto dominance;
22037 case CC_DLTUmode: code = ARM_CC;
22038
22039 dominance:
22040 if (comp_code == EQ)
22041 return ARM_INVERSE_CONDITION_CODE (code);
22042 if (comp_code == NE)
22043 return code;
22044 return ARM_NV;
22045
22046 case CC_NOOVmode:
22047 switch (comp_code)
22048 {
22049 case NE: return ARM_NE;
22050 case EQ: return ARM_EQ;
22051 case GE: return ARM_PL;
22052 case LT: return ARM_MI;
22053 default: return ARM_NV;
22054 }
22055
22056 case CC_Zmode:
22057 switch (comp_code)
22058 {
22059 case NE: return ARM_NE;
22060 case EQ: return ARM_EQ;
22061 default: return ARM_NV;
22062 }
22063
22064 case CC_Nmode:
22065 switch (comp_code)
22066 {
22067 case NE: return ARM_MI;
22068 case EQ: return ARM_PL;
22069 default: return ARM_NV;
22070 }
22071
22072 case CCFPEmode:
22073 case CCFPmode:
22074 /* We can handle all cases except UNEQ and LTGT. */
22075 switch (comp_code)
22076 {
22077 case GE: return ARM_GE;
22078 case GT: return ARM_GT;
22079 case LE: return ARM_LS;
22080 case LT: return ARM_MI;
22081 case NE: return ARM_NE;
22082 case EQ: return ARM_EQ;
22083 case ORDERED: return ARM_VC;
22084 case UNORDERED: return ARM_VS;
22085 case UNLT: return ARM_LT;
22086 case UNLE: return ARM_LE;
22087 case UNGT: return ARM_HI;
22088 case UNGE: return ARM_PL;
22089 /* UNEQ and LTGT do not have a representation. */
22090 case UNEQ: /* Fall through. */
22091 case LTGT: /* Fall through. */
22092 default: return ARM_NV;
22093 }
22094
22095 case CC_SWPmode:
22096 switch (comp_code)
22097 {
22098 case NE: return ARM_NE;
22099 case EQ: return ARM_EQ;
22100 case GE: return ARM_LE;
22101 case GT: return ARM_LT;
22102 case LE: return ARM_GE;
22103 case LT: return ARM_GT;
22104 case GEU: return ARM_LS;
22105 case GTU: return ARM_CC;
22106 case LEU: return ARM_CS;
22107 case LTU: return ARM_HI;
22108 default: return ARM_NV;
22109 }
22110
22111 case CC_Cmode:
22112 switch (comp_code)
22113 {
22114 case LTU: return ARM_CS;
22115 case GEU: return ARM_CC;
22116 default: return ARM_NV;
22117 }
22118
22119 case CC_CZmode:
22120 switch (comp_code)
22121 {
22122 case NE: return ARM_NE;
22123 case EQ: return ARM_EQ;
22124 case GEU: return ARM_CS;
22125 case GTU: return ARM_HI;
22126 case LEU: return ARM_LS;
22127 case LTU: return ARM_CC;
22128 default: return ARM_NV;
22129 }
22130
22131 case CC_NCVmode:
22132 switch (comp_code)
22133 {
22134 case GE: return ARM_GE;
22135 case LT: return ARM_LT;
22136 case GEU: return ARM_CS;
22137 case LTU: return ARM_CC;
22138 default: return ARM_NV;
22139 }
22140
22141 case CCmode:
22142 switch (comp_code)
22143 {
22144 case NE: return ARM_NE;
22145 case EQ: return ARM_EQ;
22146 case GE: return ARM_GE;
22147 case GT: return ARM_GT;
22148 case LE: return ARM_LE;
22149 case LT: return ARM_LT;
22150 case GEU: return ARM_CS;
22151 case GTU: return ARM_HI;
22152 case LEU: return ARM_LS;
22153 case LTU: return ARM_CC;
22154 default: return ARM_NV;
22155 }
22156
22157 default: gcc_unreachable ();
22158 }
22159 }
22160
22161 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22162 static enum arm_cond_code
22163 get_arm_condition_code (rtx comparison)
22164 {
22165 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22166 gcc_assert (code != ARM_NV);
22167 return code;
22168 }
22169
22170 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22171 instructions. */
22172 void
22173 thumb2_final_prescan_insn (rtx insn)
22174 {
22175 rtx first_insn = insn;
22176 rtx body = PATTERN (insn);
22177 rtx predicate;
22178 enum arm_cond_code code;
22179 int n;
22180 int mask;
22181 int max;
22182
22183 /* max_insns_skipped in the tune was already taken into account in the
22184 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22185 just emit the IT blocks as we can. It does not make sense to split
22186 the IT blocks. */
22187 max = MAX_INSN_PER_IT_BLOCK;
22188
22189 /* Remove the previous insn from the count of insns to be output. */
22190 if (arm_condexec_count)
22191 arm_condexec_count--;
22192
22193 /* Nothing to do if we are already inside a conditional block. */
22194 if (arm_condexec_count)
22195 return;
22196
22197 if (GET_CODE (body) != COND_EXEC)
22198 return;
22199
22200 /* Conditional jumps are implemented directly. */
22201 if (JUMP_P (insn))
22202 return;
22203
22204 predicate = COND_EXEC_TEST (body);
22205 arm_current_cc = get_arm_condition_code (predicate);
22206
22207 n = get_attr_ce_count (insn);
22208 arm_condexec_count = 1;
22209 arm_condexec_mask = (1 << n) - 1;
22210 arm_condexec_masklen = n;
22211 /* See if subsequent instructions can be combined into the same block. */
22212 for (;;)
22213 {
22214 insn = next_nonnote_insn (insn);
22215
22216 /* Jumping into the middle of an IT block is illegal, so a label or
22217 barrier terminates the block. */
22218 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22219 break;
22220
22221 body = PATTERN (insn);
22222 /* USE and CLOBBER aren't really insns, so just skip them. */
22223 if (GET_CODE (body) == USE
22224 || GET_CODE (body) == CLOBBER)
22225 continue;
22226
22227 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22228 if (GET_CODE (body) != COND_EXEC)
22229 break;
22230 /* Maximum number of conditionally executed instructions in a block. */
22231 n = get_attr_ce_count (insn);
22232 if (arm_condexec_masklen + n > max)
22233 break;
22234
22235 predicate = COND_EXEC_TEST (body);
22236 code = get_arm_condition_code (predicate);
22237 mask = (1 << n) - 1;
22238 if (arm_current_cc == code)
22239 arm_condexec_mask |= (mask << arm_condexec_masklen);
22240 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22241 break;
22242
22243 arm_condexec_count++;
22244 arm_condexec_masklen += n;
22245
22246 /* A jump must be the last instruction in a conditional block. */
22247 if (JUMP_P (insn))
22248 break;
22249 }
22250 /* Restore recog_data (getting the attributes of other insns can
22251 destroy this array, but final.c assumes that it remains intact
22252 across this call). */
22253 extract_constrain_insn_cached (first_insn);
22254 }
22255
22256 void
22257 arm_final_prescan_insn (rtx insn)
22258 {
22259 /* BODY will hold the body of INSN. */
22260 rtx body = PATTERN (insn);
22261
22262 /* This will be 1 if trying to repeat the trick, and things need to be
22263 reversed if it appears to fail. */
22264 int reverse = 0;
22265
22266 /* If we start with a return insn, we only succeed if we find another one. */
22267 int seeking_return = 0;
22268 enum rtx_code return_code = UNKNOWN;
22269
22270 /* START_INSN will hold the insn from where we start looking. This is the
22271 first insn after the following code_label if REVERSE is true. */
22272 rtx start_insn = insn;
22273
22274 /* If in state 4, check if the target branch is reached, in order to
22275 change back to state 0. */
22276 if (arm_ccfsm_state == 4)
22277 {
22278 if (insn == arm_target_insn)
22279 {
22280 arm_target_insn = NULL;
22281 arm_ccfsm_state = 0;
22282 }
22283 return;
22284 }
22285
22286 /* If in state 3, it is possible to repeat the trick, if this insn is an
22287 unconditional branch to a label, and immediately following this branch
22288 is the previous target label which is only used once, and the label this
22289 branch jumps to is not too far off. */
22290 if (arm_ccfsm_state == 3)
22291 {
22292 if (simplejump_p (insn))
22293 {
22294 start_insn = next_nonnote_insn (start_insn);
22295 if (BARRIER_P (start_insn))
22296 {
22297 /* XXX Isn't this always a barrier? */
22298 start_insn = next_nonnote_insn (start_insn);
22299 }
22300 if (LABEL_P (start_insn)
22301 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22302 && LABEL_NUSES (start_insn) == 1)
22303 reverse = TRUE;
22304 else
22305 return;
22306 }
22307 else if (ANY_RETURN_P (body))
22308 {
22309 start_insn = next_nonnote_insn (start_insn);
22310 if (BARRIER_P (start_insn))
22311 start_insn = next_nonnote_insn (start_insn);
22312 if (LABEL_P (start_insn)
22313 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22314 && LABEL_NUSES (start_insn) == 1)
22315 {
22316 reverse = TRUE;
22317 seeking_return = 1;
22318 return_code = GET_CODE (body);
22319 }
22320 else
22321 return;
22322 }
22323 else
22324 return;
22325 }
22326
22327 gcc_assert (!arm_ccfsm_state || reverse);
22328 if (!JUMP_P (insn))
22329 return;
22330
22331 /* This jump might be paralleled with a clobber of the condition codes
22332 the jump should always come first */
22333 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22334 body = XVECEXP (body, 0, 0);
22335
22336 if (reverse
22337 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22338 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22339 {
22340 int insns_skipped;
22341 int fail = FALSE, succeed = FALSE;
22342 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22343 int then_not_else = TRUE;
22344 rtx this_insn = start_insn, label = 0;
22345
22346 /* Register the insn jumped to. */
22347 if (reverse)
22348 {
22349 if (!seeking_return)
22350 label = XEXP (SET_SRC (body), 0);
22351 }
22352 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22353 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22354 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22355 {
22356 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22357 then_not_else = FALSE;
22358 }
22359 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22360 {
22361 seeking_return = 1;
22362 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22363 }
22364 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22365 {
22366 seeking_return = 1;
22367 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22368 then_not_else = FALSE;
22369 }
22370 else
22371 gcc_unreachable ();
22372
22373 /* See how many insns this branch skips, and what kind of insns. If all
22374 insns are okay, and the label or unconditional branch to the same
22375 label is not too far away, succeed. */
22376 for (insns_skipped = 0;
22377 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22378 {
22379 rtx scanbody;
22380
22381 this_insn = next_nonnote_insn (this_insn);
22382 if (!this_insn)
22383 break;
22384
22385 switch (GET_CODE (this_insn))
22386 {
22387 case CODE_LABEL:
22388 /* Succeed if it is the target label, otherwise fail since
22389 control falls in from somewhere else. */
22390 if (this_insn == label)
22391 {
22392 arm_ccfsm_state = 1;
22393 succeed = TRUE;
22394 }
22395 else
22396 fail = TRUE;
22397 break;
22398
22399 case BARRIER:
22400 /* Succeed if the following insn is the target label.
22401 Otherwise fail.
22402 If return insns are used then the last insn in a function
22403 will be a barrier. */
22404 this_insn = next_nonnote_insn (this_insn);
22405 if (this_insn && this_insn == label)
22406 {
22407 arm_ccfsm_state = 1;
22408 succeed = TRUE;
22409 }
22410 else
22411 fail = TRUE;
22412 break;
22413
22414 case CALL_INSN:
22415 /* The AAPCS says that conditional calls should not be
22416 used since they make interworking inefficient (the
22417 linker can't transform BL<cond> into BLX). That's
22418 only a problem if the machine has BLX. */
22419 if (arm_arch5)
22420 {
22421 fail = TRUE;
22422 break;
22423 }
22424
22425 /* Succeed if the following insn is the target label, or
22426 if the following two insns are a barrier and the
22427 target label. */
22428 this_insn = next_nonnote_insn (this_insn);
22429 if (this_insn && BARRIER_P (this_insn))
22430 this_insn = next_nonnote_insn (this_insn);
22431
22432 if (this_insn && this_insn == label
22433 && insns_skipped < max_insns_skipped)
22434 {
22435 arm_ccfsm_state = 1;
22436 succeed = TRUE;
22437 }
22438 else
22439 fail = TRUE;
22440 break;
22441
22442 case JUMP_INSN:
22443 /* If this is an unconditional branch to the same label, succeed.
22444 If it is to another label, do nothing. If it is conditional,
22445 fail. */
22446 /* XXX Probably, the tests for SET and the PC are
22447 unnecessary. */
22448
22449 scanbody = PATTERN (this_insn);
22450 if (GET_CODE (scanbody) == SET
22451 && GET_CODE (SET_DEST (scanbody)) == PC)
22452 {
22453 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22454 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22455 {
22456 arm_ccfsm_state = 2;
22457 succeed = TRUE;
22458 }
22459 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22460 fail = TRUE;
22461 }
22462 /* Fail if a conditional return is undesirable (e.g. on a
22463 StrongARM), but still allow this if optimizing for size. */
22464 else if (GET_CODE (scanbody) == return_code
22465 && !use_return_insn (TRUE, NULL)
22466 && !optimize_size)
22467 fail = TRUE;
22468 else if (GET_CODE (scanbody) == return_code)
22469 {
22470 arm_ccfsm_state = 2;
22471 succeed = TRUE;
22472 }
22473 else if (GET_CODE (scanbody) == PARALLEL)
22474 {
22475 switch (get_attr_conds (this_insn))
22476 {
22477 case CONDS_NOCOND:
22478 break;
22479 default:
22480 fail = TRUE;
22481 break;
22482 }
22483 }
22484 else
22485 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22486
22487 break;
22488
22489 case INSN:
22490 /* Instructions using or affecting the condition codes make it
22491 fail. */
22492 scanbody = PATTERN (this_insn);
22493 if (!(GET_CODE (scanbody) == SET
22494 || GET_CODE (scanbody) == PARALLEL)
22495 || get_attr_conds (this_insn) != CONDS_NOCOND)
22496 fail = TRUE;
22497 break;
22498
22499 default:
22500 break;
22501 }
22502 }
22503 if (succeed)
22504 {
22505 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22506 arm_target_label = CODE_LABEL_NUMBER (label);
22507 else
22508 {
22509 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22510
22511 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22512 {
22513 this_insn = next_nonnote_insn (this_insn);
22514 gcc_assert (!this_insn
22515 || (!BARRIER_P (this_insn)
22516 && !LABEL_P (this_insn)));
22517 }
22518 if (!this_insn)
22519 {
22520 /* Oh, dear! we ran off the end.. give up. */
22521 extract_constrain_insn_cached (insn);
22522 arm_ccfsm_state = 0;
22523 arm_target_insn = NULL;
22524 return;
22525 }
22526 arm_target_insn = this_insn;
22527 }
22528
22529 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22530 what it was. */
22531 if (!reverse)
22532 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22533
22534 if (reverse || then_not_else)
22535 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22536 }
22537
22538 /* Restore recog_data (getting the attributes of other insns can
22539 destroy this array, but final.c assumes that it remains intact
22540 across this call. */
22541 extract_constrain_insn_cached (insn);
22542 }
22543 }
22544
22545 /* Output IT instructions. */
22546 void
22547 thumb2_asm_output_opcode (FILE * stream)
22548 {
22549 char buff[5];
22550 int n;
22551
22552 if (arm_condexec_mask)
22553 {
22554 for (n = 0; n < arm_condexec_masklen; n++)
22555 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22556 buff[n] = 0;
22557 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22558 arm_condition_codes[arm_current_cc]);
22559 arm_condexec_mask = 0;
22560 }
22561 }
22562
22563 /* Returns true if REGNO is a valid register
22564 for holding a quantity of type MODE. */
22565 int
22566 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
22567 {
22568 if (GET_MODE_CLASS (mode) == MODE_CC)
22569 return (regno == CC_REGNUM
22570 || (TARGET_HARD_FLOAT && TARGET_VFP
22571 && regno == VFPCC_REGNUM));
22572
22573 if (TARGET_THUMB1)
22574 /* For the Thumb we only allow values bigger than SImode in
22575 registers 0 - 6, so that there is always a second low
22576 register available to hold the upper part of the value.
22577 We probably we ought to ensure that the register is the
22578 start of an even numbered register pair. */
22579 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
22580
22581 if (TARGET_HARD_FLOAT && TARGET_VFP
22582 && IS_VFP_REGNUM (regno))
22583 {
22584 if (mode == SFmode || mode == SImode)
22585 return VFP_REGNO_OK_FOR_SINGLE (regno);
22586
22587 if (mode == DFmode)
22588 return VFP_REGNO_OK_FOR_DOUBLE (regno);
22589
22590 /* VFP registers can hold HFmode values, but there is no point in
22591 putting them there unless we have hardware conversion insns. */
22592 if (mode == HFmode)
22593 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
22594
22595 if (TARGET_NEON)
22596 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
22597 || (VALID_NEON_QREG_MODE (mode)
22598 && NEON_REGNO_OK_FOR_QUAD (regno))
22599 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
22600 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
22601 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
22602 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
22603 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
22604
22605 return FALSE;
22606 }
22607
22608 if (TARGET_REALLY_IWMMXT)
22609 {
22610 if (IS_IWMMXT_GR_REGNUM (regno))
22611 return mode == SImode;
22612
22613 if (IS_IWMMXT_REGNUM (regno))
22614 return VALID_IWMMXT_REG_MODE (mode);
22615 }
22616
22617 /* We allow almost any value to be stored in the general registers.
22618 Restrict doubleword quantities to even register pairs so that we can
22619 use ldrd. Do not allow very large Neon structure opaque modes in
22620 general registers; they would use too many. */
22621 if (regno <= LAST_ARM_REGNUM)
22622 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
22623 && ARM_NUM_REGS (mode) <= 4;
22624
22625 if (regno == FRAME_POINTER_REGNUM
22626 || regno == ARG_POINTER_REGNUM)
22627 /* We only allow integers in the fake hard registers. */
22628 return GET_MODE_CLASS (mode) == MODE_INT;
22629
22630 return FALSE;
22631 }
22632
22633 /* Implement MODES_TIEABLE_P. */
22634
22635 bool
22636 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22637 {
22638 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
22639 return true;
22640
22641 /* We specifically want to allow elements of "structure" modes to
22642 be tieable to the structure. This more general condition allows
22643 other rarer situations too. */
22644 if (TARGET_NEON
22645 && (VALID_NEON_DREG_MODE (mode1)
22646 || VALID_NEON_QREG_MODE (mode1)
22647 || VALID_NEON_STRUCT_MODE (mode1))
22648 && (VALID_NEON_DREG_MODE (mode2)
22649 || VALID_NEON_QREG_MODE (mode2)
22650 || VALID_NEON_STRUCT_MODE (mode2)))
22651 return true;
22652
22653 return false;
22654 }
22655
22656 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22657 not used in arm mode. */
22658
22659 enum reg_class
22660 arm_regno_class (int regno)
22661 {
22662 if (TARGET_THUMB1)
22663 {
22664 if (regno == STACK_POINTER_REGNUM)
22665 return STACK_REG;
22666 if (regno == CC_REGNUM)
22667 return CC_REG;
22668 if (regno < 8)
22669 return LO_REGS;
22670 return HI_REGS;
22671 }
22672
22673 if (TARGET_THUMB2 && regno < 8)
22674 return LO_REGS;
22675
22676 if ( regno <= LAST_ARM_REGNUM
22677 || regno == FRAME_POINTER_REGNUM
22678 || regno == ARG_POINTER_REGNUM)
22679 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
22680
22681 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
22682 return TARGET_THUMB2 ? CC_REG : NO_REGS;
22683
22684 if (IS_VFP_REGNUM (regno))
22685 {
22686 if (regno <= D7_VFP_REGNUM)
22687 return VFP_D0_D7_REGS;
22688 else if (regno <= LAST_LO_VFP_REGNUM)
22689 return VFP_LO_REGS;
22690 else
22691 return VFP_HI_REGS;
22692 }
22693
22694 if (IS_IWMMXT_REGNUM (regno))
22695 return IWMMXT_REGS;
22696
22697 if (IS_IWMMXT_GR_REGNUM (regno))
22698 return IWMMXT_GR_REGS;
22699
22700 return NO_REGS;
22701 }
22702
22703 /* Handle a special case when computing the offset
22704 of an argument from the frame pointer. */
22705 int
22706 arm_debugger_arg_offset (int value, rtx addr)
22707 {
22708 rtx insn;
22709
22710 /* We are only interested if dbxout_parms() failed to compute the offset. */
22711 if (value != 0)
22712 return 0;
22713
22714 /* We can only cope with the case where the address is held in a register. */
22715 if (!REG_P (addr))
22716 return 0;
22717
22718 /* If we are using the frame pointer to point at the argument, then
22719 an offset of 0 is correct. */
22720 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
22721 return 0;
22722
22723 /* If we are using the stack pointer to point at the
22724 argument, then an offset of 0 is correct. */
22725 /* ??? Check this is consistent with thumb2 frame layout. */
22726 if ((TARGET_THUMB || !frame_pointer_needed)
22727 && REGNO (addr) == SP_REGNUM)
22728 return 0;
22729
22730 /* Oh dear. The argument is pointed to by a register rather
22731 than being held in a register, or being stored at a known
22732 offset from the frame pointer. Since GDB only understands
22733 those two kinds of argument we must translate the address
22734 held in the register into an offset from the frame pointer.
22735 We do this by searching through the insns for the function
22736 looking to see where this register gets its value. If the
22737 register is initialized from the frame pointer plus an offset
22738 then we are in luck and we can continue, otherwise we give up.
22739
22740 This code is exercised by producing debugging information
22741 for a function with arguments like this:
22742
22743 double func (double a, double b, int c, double d) {return d;}
22744
22745 Without this code the stab for parameter 'd' will be set to
22746 an offset of 0 from the frame pointer, rather than 8. */
22747
22748 /* The if() statement says:
22749
22750 If the insn is a normal instruction
22751 and if the insn is setting the value in a register
22752 and if the register being set is the register holding the address of the argument
22753 and if the address is computing by an addition
22754 that involves adding to a register
22755 which is the frame pointer
22756 a constant integer
22757
22758 then... */
22759
22760 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22761 {
22762 if ( NONJUMP_INSN_P (insn)
22763 && GET_CODE (PATTERN (insn)) == SET
22764 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
22765 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
22766 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
22767 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
22768 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
22769 )
22770 {
22771 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
22772
22773 break;
22774 }
22775 }
22776
22777 if (value == 0)
22778 {
22779 debug_rtx (addr);
22780 warning (0, "unable to compute real location of stacked parameter");
22781 value = 8; /* XXX magic hack */
22782 }
22783
22784 return value;
22785 }
22786 \f
22787 typedef enum {
22788 T_V8QI,
22789 T_V4HI,
22790 T_V4HF,
22791 T_V2SI,
22792 T_V2SF,
22793 T_DI,
22794 T_V16QI,
22795 T_V8HI,
22796 T_V4SI,
22797 T_V4SF,
22798 T_V2DI,
22799 T_TI,
22800 T_EI,
22801 T_OI,
22802 T_MAX /* Size of enum. Keep last. */
22803 } neon_builtin_type_mode;
22804
22805 #define TYPE_MODE_BIT(X) (1 << (X))
22806
22807 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
22808 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
22809 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
22810 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
22811 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
22812 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
22813
22814 #define v8qi_UP T_V8QI
22815 #define v4hi_UP T_V4HI
22816 #define v4hf_UP T_V4HF
22817 #define v2si_UP T_V2SI
22818 #define v2sf_UP T_V2SF
22819 #define di_UP T_DI
22820 #define v16qi_UP T_V16QI
22821 #define v8hi_UP T_V8HI
22822 #define v4si_UP T_V4SI
22823 #define v4sf_UP T_V4SF
22824 #define v2di_UP T_V2DI
22825 #define ti_UP T_TI
22826 #define ei_UP T_EI
22827 #define oi_UP T_OI
22828
22829 #define UP(X) X##_UP
22830
22831 typedef enum {
22832 NEON_BINOP,
22833 NEON_TERNOP,
22834 NEON_UNOP,
22835 NEON_GETLANE,
22836 NEON_SETLANE,
22837 NEON_CREATE,
22838 NEON_RINT,
22839 NEON_DUP,
22840 NEON_DUPLANE,
22841 NEON_COMBINE,
22842 NEON_SPLIT,
22843 NEON_LANEMUL,
22844 NEON_LANEMULL,
22845 NEON_LANEMULH,
22846 NEON_LANEMAC,
22847 NEON_SCALARMUL,
22848 NEON_SCALARMULL,
22849 NEON_SCALARMULH,
22850 NEON_SCALARMAC,
22851 NEON_CONVERT,
22852 NEON_FLOAT_WIDEN,
22853 NEON_FLOAT_NARROW,
22854 NEON_FIXCONV,
22855 NEON_SELECT,
22856 NEON_RESULTPAIR,
22857 NEON_REINTERP,
22858 NEON_VTBL,
22859 NEON_VTBX,
22860 NEON_LOAD1,
22861 NEON_LOAD1LANE,
22862 NEON_STORE1,
22863 NEON_STORE1LANE,
22864 NEON_LOADSTRUCT,
22865 NEON_LOADSTRUCTLANE,
22866 NEON_STORESTRUCT,
22867 NEON_STORESTRUCTLANE,
22868 NEON_LOGICBINOP,
22869 NEON_SHIFTINSERT,
22870 NEON_SHIFTIMM,
22871 NEON_SHIFTACC
22872 } neon_itype;
22873
22874 typedef struct {
22875 const char *name;
22876 const neon_itype itype;
22877 const neon_builtin_type_mode mode;
22878 const enum insn_code code;
22879 unsigned int fcode;
22880 } neon_builtin_datum;
22881
22882 #define CF(N,X) CODE_FOR_neon_##N##X
22883
22884 #define VAR1(T, N, A) \
22885 {#N, NEON_##T, UP (A), CF (N, A), 0}
22886 #define VAR2(T, N, A, B) \
22887 VAR1 (T, N, A), \
22888 {#N, NEON_##T, UP (B), CF (N, B), 0}
22889 #define VAR3(T, N, A, B, C) \
22890 VAR2 (T, N, A, B), \
22891 {#N, NEON_##T, UP (C), CF (N, C), 0}
22892 #define VAR4(T, N, A, B, C, D) \
22893 VAR3 (T, N, A, B, C), \
22894 {#N, NEON_##T, UP (D), CF (N, D), 0}
22895 #define VAR5(T, N, A, B, C, D, E) \
22896 VAR4 (T, N, A, B, C, D), \
22897 {#N, NEON_##T, UP (E), CF (N, E), 0}
22898 #define VAR6(T, N, A, B, C, D, E, F) \
22899 VAR5 (T, N, A, B, C, D, E), \
22900 {#N, NEON_##T, UP (F), CF (N, F), 0}
22901 #define VAR7(T, N, A, B, C, D, E, F, G) \
22902 VAR6 (T, N, A, B, C, D, E, F), \
22903 {#N, NEON_##T, UP (G), CF (N, G), 0}
22904 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22905 VAR7 (T, N, A, B, C, D, E, F, G), \
22906 {#N, NEON_##T, UP (H), CF (N, H), 0}
22907 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22908 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22909 {#N, NEON_##T, UP (I), CF (N, I), 0}
22910 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22911 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22912 {#N, NEON_##T, UP (J), CF (N, J), 0}
22913
22914 /* The NEON builtin data can be found in arm_neon_builtins.def.
22915 The mode entries in the following table correspond to the "key" type of the
22916 instruction variant, i.e. equivalent to that which would be specified after
22917 the assembler mnemonic, which usually refers to the last vector operand.
22918 (Signed/unsigned/polynomial types are not differentiated between though, and
22919 are all mapped onto the same mode for a given element size.) The modes
22920 listed per instruction should be the same as those defined for that
22921 instruction's pattern in neon.md. */
22922
22923 static neon_builtin_datum neon_builtin_data[] =
22924 {
22925 #include "arm_neon_builtins.def"
22926 };
22927
22928 #undef CF
22929 #undef VAR1
22930 #undef VAR2
22931 #undef VAR3
22932 #undef VAR4
22933 #undef VAR5
22934 #undef VAR6
22935 #undef VAR7
22936 #undef VAR8
22937 #undef VAR9
22938 #undef VAR10
22939
22940 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
22941 #define VAR1(T, N, A) \
22942 CF (N, A)
22943 #define VAR2(T, N, A, B) \
22944 VAR1 (T, N, A), \
22945 CF (N, B)
22946 #define VAR3(T, N, A, B, C) \
22947 VAR2 (T, N, A, B), \
22948 CF (N, C)
22949 #define VAR4(T, N, A, B, C, D) \
22950 VAR3 (T, N, A, B, C), \
22951 CF (N, D)
22952 #define VAR5(T, N, A, B, C, D, E) \
22953 VAR4 (T, N, A, B, C, D), \
22954 CF (N, E)
22955 #define VAR6(T, N, A, B, C, D, E, F) \
22956 VAR5 (T, N, A, B, C, D, E), \
22957 CF (N, F)
22958 #define VAR7(T, N, A, B, C, D, E, F, G) \
22959 VAR6 (T, N, A, B, C, D, E, F), \
22960 CF (N, G)
22961 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22962 VAR7 (T, N, A, B, C, D, E, F, G), \
22963 CF (N, H)
22964 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22965 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22966 CF (N, I)
22967 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22968 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22969 CF (N, J)
22970 enum arm_builtins
22971 {
22972 ARM_BUILTIN_GETWCGR0,
22973 ARM_BUILTIN_GETWCGR1,
22974 ARM_BUILTIN_GETWCGR2,
22975 ARM_BUILTIN_GETWCGR3,
22976
22977 ARM_BUILTIN_SETWCGR0,
22978 ARM_BUILTIN_SETWCGR1,
22979 ARM_BUILTIN_SETWCGR2,
22980 ARM_BUILTIN_SETWCGR3,
22981
22982 ARM_BUILTIN_WZERO,
22983
22984 ARM_BUILTIN_WAVG2BR,
22985 ARM_BUILTIN_WAVG2HR,
22986 ARM_BUILTIN_WAVG2B,
22987 ARM_BUILTIN_WAVG2H,
22988
22989 ARM_BUILTIN_WACCB,
22990 ARM_BUILTIN_WACCH,
22991 ARM_BUILTIN_WACCW,
22992
22993 ARM_BUILTIN_WMACS,
22994 ARM_BUILTIN_WMACSZ,
22995 ARM_BUILTIN_WMACU,
22996 ARM_BUILTIN_WMACUZ,
22997
22998 ARM_BUILTIN_WSADB,
22999 ARM_BUILTIN_WSADBZ,
23000 ARM_BUILTIN_WSADH,
23001 ARM_BUILTIN_WSADHZ,
23002
23003 ARM_BUILTIN_WALIGNI,
23004 ARM_BUILTIN_WALIGNR0,
23005 ARM_BUILTIN_WALIGNR1,
23006 ARM_BUILTIN_WALIGNR2,
23007 ARM_BUILTIN_WALIGNR3,
23008
23009 ARM_BUILTIN_TMIA,
23010 ARM_BUILTIN_TMIAPH,
23011 ARM_BUILTIN_TMIABB,
23012 ARM_BUILTIN_TMIABT,
23013 ARM_BUILTIN_TMIATB,
23014 ARM_BUILTIN_TMIATT,
23015
23016 ARM_BUILTIN_TMOVMSKB,
23017 ARM_BUILTIN_TMOVMSKH,
23018 ARM_BUILTIN_TMOVMSKW,
23019
23020 ARM_BUILTIN_TBCSTB,
23021 ARM_BUILTIN_TBCSTH,
23022 ARM_BUILTIN_TBCSTW,
23023
23024 ARM_BUILTIN_WMADDS,
23025 ARM_BUILTIN_WMADDU,
23026
23027 ARM_BUILTIN_WPACKHSS,
23028 ARM_BUILTIN_WPACKWSS,
23029 ARM_BUILTIN_WPACKDSS,
23030 ARM_BUILTIN_WPACKHUS,
23031 ARM_BUILTIN_WPACKWUS,
23032 ARM_BUILTIN_WPACKDUS,
23033
23034 ARM_BUILTIN_WADDB,
23035 ARM_BUILTIN_WADDH,
23036 ARM_BUILTIN_WADDW,
23037 ARM_BUILTIN_WADDSSB,
23038 ARM_BUILTIN_WADDSSH,
23039 ARM_BUILTIN_WADDSSW,
23040 ARM_BUILTIN_WADDUSB,
23041 ARM_BUILTIN_WADDUSH,
23042 ARM_BUILTIN_WADDUSW,
23043 ARM_BUILTIN_WSUBB,
23044 ARM_BUILTIN_WSUBH,
23045 ARM_BUILTIN_WSUBW,
23046 ARM_BUILTIN_WSUBSSB,
23047 ARM_BUILTIN_WSUBSSH,
23048 ARM_BUILTIN_WSUBSSW,
23049 ARM_BUILTIN_WSUBUSB,
23050 ARM_BUILTIN_WSUBUSH,
23051 ARM_BUILTIN_WSUBUSW,
23052
23053 ARM_BUILTIN_WAND,
23054 ARM_BUILTIN_WANDN,
23055 ARM_BUILTIN_WOR,
23056 ARM_BUILTIN_WXOR,
23057
23058 ARM_BUILTIN_WCMPEQB,
23059 ARM_BUILTIN_WCMPEQH,
23060 ARM_BUILTIN_WCMPEQW,
23061 ARM_BUILTIN_WCMPGTUB,
23062 ARM_BUILTIN_WCMPGTUH,
23063 ARM_BUILTIN_WCMPGTUW,
23064 ARM_BUILTIN_WCMPGTSB,
23065 ARM_BUILTIN_WCMPGTSH,
23066 ARM_BUILTIN_WCMPGTSW,
23067
23068 ARM_BUILTIN_TEXTRMSB,
23069 ARM_BUILTIN_TEXTRMSH,
23070 ARM_BUILTIN_TEXTRMSW,
23071 ARM_BUILTIN_TEXTRMUB,
23072 ARM_BUILTIN_TEXTRMUH,
23073 ARM_BUILTIN_TEXTRMUW,
23074 ARM_BUILTIN_TINSRB,
23075 ARM_BUILTIN_TINSRH,
23076 ARM_BUILTIN_TINSRW,
23077
23078 ARM_BUILTIN_WMAXSW,
23079 ARM_BUILTIN_WMAXSH,
23080 ARM_BUILTIN_WMAXSB,
23081 ARM_BUILTIN_WMAXUW,
23082 ARM_BUILTIN_WMAXUH,
23083 ARM_BUILTIN_WMAXUB,
23084 ARM_BUILTIN_WMINSW,
23085 ARM_BUILTIN_WMINSH,
23086 ARM_BUILTIN_WMINSB,
23087 ARM_BUILTIN_WMINUW,
23088 ARM_BUILTIN_WMINUH,
23089 ARM_BUILTIN_WMINUB,
23090
23091 ARM_BUILTIN_WMULUM,
23092 ARM_BUILTIN_WMULSM,
23093 ARM_BUILTIN_WMULUL,
23094
23095 ARM_BUILTIN_PSADBH,
23096 ARM_BUILTIN_WSHUFH,
23097
23098 ARM_BUILTIN_WSLLH,
23099 ARM_BUILTIN_WSLLW,
23100 ARM_BUILTIN_WSLLD,
23101 ARM_BUILTIN_WSRAH,
23102 ARM_BUILTIN_WSRAW,
23103 ARM_BUILTIN_WSRAD,
23104 ARM_BUILTIN_WSRLH,
23105 ARM_BUILTIN_WSRLW,
23106 ARM_BUILTIN_WSRLD,
23107 ARM_BUILTIN_WRORH,
23108 ARM_BUILTIN_WRORW,
23109 ARM_BUILTIN_WRORD,
23110 ARM_BUILTIN_WSLLHI,
23111 ARM_BUILTIN_WSLLWI,
23112 ARM_BUILTIN_WSLLDI,
23113 ARM_BUILTIN_WSRAHI,
23114 ARM_BUILTIN_WSRAWI,
23115 ARM_BUILTIN_WSRADI,
23116 ARM_BUILTIN_WSRLHI,
23117 ARM_BUILTIN_WSRLWI,
23118 ARM_BUILTIN_WSRLDI,
23119 ARM_BUILTIN_WRORHI,
23120 ARM_BUILTIN_WRORWI,
23121 ARM_BUILTIN_WRORDI,
23122
23123 ARM_BUILTIN_WUNPCKIHB,
23124 ARM_BUILTIN_WUNPCKIHH,
23125 ARM_BUILTIN_WUNPCKIHW,
23126 ARM_BUILTIN_WUNPCKILB,
23127 ARM_BUILTIN_WUNPCKILH,
23128 ARM_BUILTIN_WUNPCKILW,
23129
23130 ARM_BUILTIN_WUNPCKEHSB,
23131 ARM_BUILTIN_WUNPCKEHSH,
23132 ARM_BUILTIN_WUNPCKEHSW,
23133 ARM_BUILTIN_WUNPCKEHUB,
23134 ARM_BUILTIN_WUNPCKEHUH,
23135 ARM_BUILTIN_WUNPCKEHUW,
23136 ARM_BUILTIN_WUNPCKELSB,
23137 ARM_BUILTIN_WUNPCKELSH,
23138 ARM_BUILTIN_WUNPCKELSW,
23139 ARM_BUILTIN_WUNPCKELUB,
23140 ARM_BUILTIN_WUNPCKELUH,
23141 ARM_BUILTIN_WUNPCKELUW,
23142
23143 ARM_BUILTIN_WABSB,
23144 ARM_BUILTIN_WABSH,
23145 ARM_BUILTIN_WABSW,
23146
23147 ARM_BUILTIN_WADDSUBHX,
23148 ARM_BUILTIN_WSUBADDHX,
23149
23150 ARM_BUILTIN_WABSDIFFB,
23151 ARM_BUILTIN_WABSDIFFH,
23152 ARM_BUILTIN_WABSDIFFW,
23153
23154 ARM_BUILTIN_WADDCH,
23155 ARM_BUILTIN_WADDCW,
23156
23157 ARM_BUILTIN_WAVG4,
23158 ARM_BUILTIN_WAVG4R,
23159
23160 ARM_BUILTIN_WMADDSX,
23161 ARM_BUILTIN_WMADDUX,
23162
23163 ARM_BUILTIN_WMADDSN,
23164 ARM_BUILTIN_WMADDUN,
23165
23166 ARM_BUILTIN_WMULWSM,
23167 ARM_BUILTIN_WMULWUM,
23168
23169 ARM_BUILTIN_WMULWSMR,
23170 ARM_BUILTIN_WMULWUMR,
23171
23172 ARM_BUILTIN_WMULWL,
23173
23174 ARM_BUILTIN_WMULSMR,
23175 ARM_BUILTIN_WMULUMR,
23176
23177 ARM_BUILTIN_WQMULM,
23178 ARM_BUILTIN_WQMULMR,
23179
23180 ARM_BUILTIN_WQMULWM,
23181 ARM_BUILTIN_WQMULWMR,
23182
23183 ARM_BUILTIN_WADDBHUSM,
23184 ARM_BUILTIN_WADDBHUSL,
23185
23186 ARM_BUILTIN_WQMIABB,
23187 ARM_BUILTIN_WQMIABT,
23188 ARM_BUILTIN_WQMIATB,
23189 ARM_BUILTIN_WQMIATT,
23190
23191 ARM_BUILTIN_WQMIABBN,
23192 ARM_BUILTIN_WQMIABTN,
23193 ARM_BUILTIN_WQMIATBN,
23194 ARM_BUILTIN_WQMIATTN,
23195
23196 ARM_BUILTIN_WMIABB,
23197 ARM_BUILTIN_WMIABT,
23198 ARM_BUILTIN_WMIATB,
23199 ARM_BUILTIN_WMIATT,
23200
23201 ARM_BUILTIN_WMIABBN,
23202 ARM_BUILTIN_WMIABTN,
23203 ARM_BUILTIN_WMIATBN,
23204 ARM_BUILTIN_WMIATTN,
23205
23206 ARM_BUILTIN_WMIAWBB,
23207 ARM_BUILTIN_WMIAWBT,
23208 ARM_BUILTIN_WMIAWTB,
23209 ARM_BUILTIN_WMIAWTT,
23210
23211 ARM_BUILTIN_WMIAWBBN,
23212 ARM_BUILTIN_WMIAWBTN,
23213 ARM_BUILTIN_WMIAWTBN,
23214 ARM_BUILTIN_WMIAWTTN,
23215
23216 ARM_BUILTIN_WMERGE,
23217
23218 ARM_BUILTIN_CRC32B,
23219 ARM_BUILTIN_CRC32H,
23220 ARM_BUILTIN_CRC32W,
23221 ARM_BUILTIN_CRC32CB,
23222 ARM_BUILTIN_CRC32CH,
23223 ARM_BUILTIN_CRC32CW,
23224
23225 #undef CRYPTO1
23226 #undef CRYPTO2
23227 #undef CRYPTO3
23228
23229 #define CRYPTO1(L, U, M1, M2) \
23230 ARM_BUILTIN_CRYPTO_##U,
23231 #define CRYPTO2(L, U, M1, M2, M3) \
23232 ARM_BUILTIN_CRYPTO_##U,
23233 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23234 ARM_BUILTIN_CRYPTO_##U,
23235
23236 #include "crypto.def"
23237
23238 #undef CRYPTO1
23239 #undef CRYPTO2
23240 #undef CRYPTO3
23241
23242 #include "arm_neon_builtins.def"
23243
23244 ,ARM_BUILTIN_MAX
23245 };
23246
23247 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23248
23249 #undef CF
23250 #undef VAR1
23251 #undef VAR2
23252 #undef VAR3
23253 #undef VAR4
23254 #undef VAR5
23255 #undef VAR6
23256 #undef VAR7
23257 #undef VAR8
23258 #undef VAR9
23259 #undef VAR10
23260
23261 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23262
23263 #define NUM_DREG_TYPES 5
23264 #define NUM_QREG_TYPES 6
23265
23266 static void
23267 arm_init_neon_builtins (void)
23268 {
23269 unsigned int i, fcode;
23270 tree decl;
23271
23272 tree neon_intQI_type_node;
23273 tree neon_intHI_type_node;
23274 tree neon_floatHF_type_node;
23275 tree neon_polyQI_type_node;
23276 tree neon_polyHI_type_node;
23277 tree neon_intSI_type_node;
23278 tree neon_intDI_type_node;
23279 tree neon_intUTI_type_node;
23280 tree neon_float_type_node;
23281
23282 tree intQI_pointer_node;
23283 tree intHI_pointer_node;
23284 tree intSI_pointer_node;
23285 tree intDI_pointer_node;
23286 tree float_pointer_node;
23287
23288 tree const_intQI_node;
23289 tree const_intHI_node;
23290 tree const_intSI_node;
23291 tree const_intDI_node;
23292 tree const_float_node;
23293
23294 tree const_intQI_pointer_node;
23295 tree const_intHI_pointer_node;
23296 tree const_intSI_pointer_node;
23297 tree const_intDI_pointer_node;
23298 tree const_float_pointer_node;
23299
23300 tree V8QI_type_node;
23301 tree V4HI_type_node;
23302 tree V4HF_type_node;
23303 tree V2SI_type_node;
23304 tree V2SF_type_node;
23305 tree V16QI_type_node;
23306 tree V8HI_type_node;
23307 tree V4SI_type_node;
23308 tree V4SF_type_node;
23309 tree V2DI_type_node;
23310
23311 tree intUQI_type_node;
23312 tree intUHI_type_node;
23313 tree intUSI_type_node;
23314 tree intUDI_type_node;
23315
23316 tree intEI_type_node;
23317 tree intOI_type_node;
23318 tree intCI_type_node;
23319 tree intXI_type_node;
23320
23321 tree V8QI_pointer_node;
23322 tree V4HI_pointer_node;
23323 tree V2SI_pointer_node;
23324 tree V2SF_pointer_node;
23325 tree V16QI_pointer_node;
23326 tree V8HI_pointer_node;
23327 tree V4SI_pointer_node;
23328 tree V4SF_pointer_node;
23329 tree V2DI_pointer_node;
23330
23331 tree void_ftype_pv8qi_v8qi_v8qi;
23332 tree void_ftype_pv4hi_v4hi_v4hi;
23333 tree void_ftype_pv2si_v2si_v2si;
23334 tree void_ftype_pv2sf_v2sf_v2sf;
23335 tree void_ftype_pdi_di_di;
23336 tree void_ftype_pv16qi_v16qi_v16qi;
23337 tree void_ftype_pv8hi_v8hi_v8hi;
23338 tree void_ftype_pv4si_v4si_v4si;
23339 tree void_ftype_pv4sf_v4sf_v4sf;
23340 tree void_ftype_pv2di_v2di_v2di;
23341
23342 tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
23343 tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
23344 tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
23345
23346 /* Create distinguished type nodes for NEON vector element types,
23347 and pointers to values of such types, so we can detect them later. */
23348 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23349 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23350 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23351 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23352 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
23353 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
23354 neon_float_type_node = make_node (REAL_TYPE);
23355 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23356 layout_type (neon_float_type_node);
23357 neon_floatHF_type_node = make_node (REAL_TYPE);
23358 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23359 layout_type (neon_floatHF_type_node);
23360
23361 /* Define typedefs which exactly correspond to the modes we are basing vector
23362 types on. If you change these names you'll need to change
23363 the table used by arm_mangle_type too. */
23364 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
23365 "__builtin_neon_qi");
23366 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23367 "__builtin_neon_hi");
23368 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23369 "__builtin_neon_hf");
23370 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23371 "__builtin_neon_si");
23372 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23373 "__builtin_neon_sf");
23374 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
23375 "__builtin_neon_di");
23376 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
23377 "__builtin_neon_poly8");
23378 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
23379 "__builtin_neon_poly16");
23380
23381 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
23382 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
23383 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
23384 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
23385 float_pointer_node = build_pointer_type (neon_float_type_node);
23386
23387 /* Next create constant-qualified versions of the above types. */
23388 const_intQI_node = build_qualified_type (neon_intQI_type_node,
23389 TYPE_QUAL_CONST);
23390 const_intHI_node = build_qualified_type (neon_intHI_type_node,
23391 TYPE_QUAL_CONST);
23392 const_intSI_node = build_qualified_type (neon_intSI_type_node,
23393 TYPE_QUAL_CONST);
23394 const_intDI_node = build_qualified_type (neon_intDI_type_node,
23395 TYPE_QUAL_CONST);
23396 const_float_node = build_qualified_type (neon_float_type_node,
23397 TYPE_QUAL_CONST);
23398
23399 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23400 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23401 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23402 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23403 const_float_pointer_node = build_pointer_type (const_float_node);
23404
23405 /* Now create vector types based on our NEON element types. */
23406 /* 64-bit vectors. */
23407 V8QI_type_node =
23408 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23409 V4HI_type_node =
23410 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23411 V4HF_type_node =
23412 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23413 V2SI_type_node =
23414 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23415 V2SF_type_node =
23416 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23417 /* 128-bit vectors. */
23418 V16QI_type_node =
23419 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23420 V8HI_type_node =
23421 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23422 V4SI_type_node =
23423 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23424 V4SF_type_node =
23425 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23426 V2DI_type_node =
23427 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23428
23429 /* Unsigned integer types for various mode sizes. */
23430 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23431 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23432 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23433 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23434 neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
23435
23436
23437 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23438 "__builtin_neon_uqi");
23439 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23440 "__builtin_neon_uhi");
23441 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23442 "__builtin_neon_usi");
23443 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23444 "__builtin_neon_udi");
23445 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23446 "__builtin_neon_poly64");
23447 (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
23448 "__builtin_neon_poly128");
23449
23450 /* Opaque integer types for structures of vectors. */
23451 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23452 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23453 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23454 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23455
23456 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23457 "__builtin_neon_ti");
23458 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23459 "__builtin_neon_ei");
23460 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23461 "__builtin_neon_oi");
23462 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23463 "__builtin_neon_ci");
23464 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23465 "__builtin_neon_xi");
23466
23467 /* Pointers to vector types. */
23468 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
23469 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
23470 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
23471 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
23472 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
23473 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
23474 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
23475 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
23476 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
23477
23478 /* Operations which return results as pairs. */
23479 void_ftype_pv8qi_v8qi_v8qi =
23480 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
23481 V8QI_type_node, NULL);
23482 void_ftype_pv4hi_v4hi_v4hi =
23483 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
23484 V4HI_type_node, NULL);
23485 void_ftype_pv2si_v2si_v2si =
23486 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
23487 V2SI_type_node, NULL);
23488 void_ftype_pv2sf_v2sf_v2sf =
23489 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
23490 V2SF_type_node, NULL);
23491 void_ftype_pdi_di_di =
23492 build_function_type_list (void_type_node, intDI_pointer_node,
23493 neon_intDI_type_node, neon_intDI_type_node, NULL);
23494 void_ftype_pv16qi_v16qi_v16qi =
23495 build_function_type_list (void_type_node, V16QI_pointer_node,
23496 V16QI_type_node, V16QI_type_node, NULL);
23497 void_ftype_pv8hi_v8hi_v8hi =
23498 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
23499 V8HI_type_node, NULL);
23500 void_ftype_pv4si_v4si_v4si =
23501 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
23502 V4SI_type_node, NULL);
23503 void_ftype_pv4sf_v4sf_v4sf =
23504 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
23505 V4SF_type_node, NULL);
23506 void_ftype_pv2di_v2di_v2di =
23507 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
23508 V2DI_type_node, NULL);
23509
23510 if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
23511 {
23512 tree V4USI_type_node =
23513 build_vector_type_for_mode (intUSI_type_node, V4SImode);
23514
23515 tree V16UQI_type_node =
23516 build_vector_type_for_mode (intUQI_type_node, V16QImode);
23517
23518 tree v16uqi_ftype_v16uqi
23519 = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
23520
23521 tree v16uqi_ftype_v16uqi_v16uqi
23522 = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
23523 V16UQI_type_node, NULL_TREE);
23524
23525 tree v4usi_ftype_v4usi
23526 = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
23527
23528 tree v4usi_ftype_v4usi_v4usi
23529 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23530 V4USI_type_node, NULL_TREE);
23531
23532 tree v4usi_ftype_v4usi_v4usi_v4usi
23533 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23534 V4USI_type_node, V4USI_type_node, NULL_TREE);
23535
23536 tree uti_ftype_udi_udi
23537 = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
23538 intUDI_type_node, NULL_TREE);
23539
23540 #undef CRYPTO1
23541 #undef CRYPTO2
23542 #undef CRYPTO3
23543 #undef C
23544 #undef N
23545 #undef CF
23546 #undef FT1
23547 #undef FT2
23548 #undef FT3
23549
23550 #define C(U) \
23551 ARM_BUILTIN_CRYPTO_##U
23552 #define N(L) \
23553 "__builtin_arm_crypto_"#L
23554 #define FT1(R, A) \
23555 R##_ftype_##A
23556 #define FT2(R, A1, A2) \
23557 R##_ftype_##A1##_##A2
23558 #define FT3(R, A1, A2, A3) \
23559 R##_ftype_##A1##_##A2##_##A3
23560 #define CRYPTO1(L, U, R, A) \
23561 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23562 C (U), BUILT_IN_MD, \
23563 NULL, NULL_TREE);
23564 #define CRYPTO2(L, U, R, A1, A2) \
23565 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23566 C (U), BUILT_IN_MD, \
23567 NULL, NULL_TREE);
23568
23569 #define CRYPTO3(L, U, R, A1, A2, A3) \
23570 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23571 C (U), BUILT_IN_MD, \
23572 NULL, NULL_TREE);
23573 #include "crypto.def"
23574
23575 #undef CRYPTO1
23576 #undef CRYPTO2
23577 #undef CRYPTO3
23578 #undef C
23579 #undef N
23580 #undef FT1
23581 #undef FT2
23582 #undef FT3
23583 }
23584 dreg_types[0] = V8QI_type_node;
23585 dreg_types[1] = V4HI_type_node;
23586 dreg_types[2] = V2SI_type_node;
23587 dreg_types[3] = V2SF_type_node;
23588 dreg_types[4] = neon_intDI_type_node;
23589
23590 qreg_types[0] = V16QI_type_node;
23591 qreg_types[1] = V8HI_type_node;
23592 qreg_types[2] = V4SI_type_node;
23593 qreg_types[3] = V4SF_type_node;
23594 qreg_types[4] = V2DI_type_node;
23595 qreg_types[5] = neon_intUTI_type_node;
23596
23597 for (i = 0; i < NUM_QREG_TYPES; i++)
23598 {
23599 int j;
23600 for (j = 0; j < NUM_QREG_TYPES; j++)
23601 {
23602 if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
23603 reinterp_ftype_dreg[i][j]
23604 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
23605
23606 reinterp_ftype_qreg[i][j]
23607 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
23608 }
23609 }
23610
23611 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
23612 i < ARRAY_SIZE (neon_builtin_data);
23613 i++, fcode++)
23614 {
23615 neon_builtin_datum *d = &neon_builtin_data[i];
23616
23617 const char* const modenames[] = {
23618 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23619 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
23620 "ti", "ei", "oi"
23621 };
23622 char namebuf[60];
23623 tree ftype = NULL;
23624 int is_load = 0, is_store = 0;
23625
23626 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
23627
23628 d->fcode = fcode;
23629
23630 switch (d->itype)
23631 {
23632 case NEON_LOAD1:
23633 case NEON_LOAD1LANE:
23634 case NEON_LOADSTRUCT:
23635 case NEON_LOADSTRUCTLANE:
23636 is_load = 1;
23637 /* Fall through. */
23638 case NEON_STORE1:
23639 case NEON_STORE1LANE:
23640 case NEON_STORESTRUCT:
23641 case NEON_STORESTRUCTLANE:
23642 if (!is_load)
23643 is_store = 1;
23644 /* Fall through. */
23645 case NEON_UNOP:
23646 case NEON_RINT:
23647 case NEON_BINOP:
23648 case NEON_LOGICBINOP:
23649 case NEON_SHIFTINSERT:
23650 case NEON_TERNOP:
23651 case NEON_GETLANE:
23652 case NEON_SETLANE:
23653 case NEON_CREATE:
23654 case NEON_DUP:
23655 case NEON_DUPLANE:
23656 case NEON_SHIFTIMM:
23657 case NEON_SHIFTACC:
23658 case NEON_COMBINE:
23659 case NEON_SPLIT:
23660 case NEON_CONVERT:
23661 case NEON_FIXCONV:
23662 case NEON_LANEMUL:
23663 case NEON_LANEMULL:
23664 case NEON_LANEMULH:
23665 case NEON_LANEMAC:
23666 case NEON_SCALARMUL:
23667 case NEON_SCALARMULL:
23668 case NEON_SCALARMULH:
23669 case NEON_SCALARMAC:
23670 case NEON_SELECT:
23671 case NEON_VTBL:
23672 case NEON_VTBX:
23673 {
23674 int k;
23675 tree return_type = void_type_node, args = void_list_node;
23676
23677 /* Build a function type directly from the insn_data for
23678 this builtin. The build_function_type() function takes
23679 care of removing duplicates for us. */
23680 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
23681 {
23682 tree eltype;
23683
23684 if (is_load && k == 1)
23685 {
23686 /* Neon load patterns always have the memory
23687 operand in the operand 1 position. */
23688 gcc_assert (insn_data[d->code].operand[k].predicate
23689 == neon_struct_operand);
23690
23691 switch (d->mode)
23692 {
23693 case T_V8QI:
23694 case T_V16QI:
23695 eltype = const_intQI_pointer_node;
23696 break;
23697
23698 case T_V4HI:
23699 case T_V8HI:
23700 eltype = const_intHI_pointer_node;
23701 break;
23702
23703 case T_V2SI:
23704 case T_V4SI:
23705 eltype = const_intSI_pointer_node;
23706 break;
23707
23708 case T_V2SF:
23709 case T_V4SF:
23710 eltype = const_float_pointer_node;
23711 break;
23712
23713 case T_DI:
23714 case T_V2DI:
23715 eltype = const_intDI_pointer_node;
23716 break;
23717
23718 default: gcc_unreachable ();
23719 }
23720 }
23721 else if (is_store && k == 0)
23722 {
23723 /* Similarly, Neon store patterns use operand 0 as
23724 the memory location to store to. */
23725 gcc_assert (insn_data[d->code].operand[k].predicate
23726 == neon_struct_operand);
23727
23728 switch (d->mode)
23729 {
23730 case T_V8QI:
23731 case T_V16QI:
23732 eltype = intQI_pointer_node;
23733 break;
23734
23735 case T_V4HI:
23736 case T_V8HI:
23737 eltype = intHI_pointer_node;
23738 break;
23739
23740 case T_V2SI:
23741 case T_V4SI:
23742 eltype = intSI_pointer_node;
23743 break;
23744
23745 case T_V2SF:
23746 case T_V4SF:
23747 eltype = float_pointer_node;
23748 break;
23749
23750 case T_DI:
23751 case T_V2DI:
23752 eltype = intDI_pointer_node;
23753 break;
23754
23755 default: gcc_unreachable ();
23756 }
23757 }
23758 else
23759 {
23760 switch (insn_data[d->code].operand[k].mode)
23761 {
23762 case VOIDmode: eltype = void_type_node; break;
23763 /* Scalars. */
23764 case QImode: eltype = neon_intQI_type_node; break;
23765 case HImode: eltype = neon_intHI_type_node; break;
23766 case SImode: eltype = neon_intSI_type_node; break;
23767 case SFmode: eltype = neon_float_type_node; break;
23768 case DImode: eltype = neon_intDI_type_node; break;
23769 case TImode: eltype = intTI_type_node; break;
23770 case EImode: eltype = intEI_type_node; break;
23771 case OImode: eltype = intOI_type_node; break;
23772 case CImode: eltype = intCI_type_node; break;
23773 case XImode: eltype = intXI_type_node; break;
23774 /* 64-bit vectors. */
23775 case V8QImode: eltype = V8QI_type_node; break;
23776 case V4HImode: eltype = V4HI_type_node; break;
23777 case V2SImode: eltype = V2SI_type_node; break;
23778 case V2SFmode: eltype = V2SF_type_node; break;
23779 /* 128-bit vectors. */
23780 case V16QImode: eltype = V16QI_type_node; break;
23781 case V8HImode: eltype = V8HI_type_node; break;
23782 case V4SImode: eltype = V4SI_type_node; break;
23783 case V4SFmode: eltype = V4SF_type_node; break;
23784 case V2DImode: eltype = V2DI_type_node; break;
23785 default: gcc_unreachable ();
23786 }
23787 }
23788
23789 if (k == 0 && !is_store)
23790 return_type = eltype;
23791 else
23792 args = tree_cons (NULL_TREE, eltype, args);
23793 }
23794
23795 ftype = build_function_type (return_type, args);
23796 }
23797 break;
23798
23799 case NEON_RESULTPAIR:
23800 {
23801 switch (insn_data[d->code].operand[1].mode)
23802 {
23803 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
23804 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
23805 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
23806 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
23807 case DImode: ftype = void_ftype_pdi_di_di; break;
23808 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
23809 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
23810 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
23811 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
23812 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
23813 default: gcc_unreachable ();
23814 }
23815 }
23816 break;
23817
23818 case NEON_REINTERP:
23819 {
23820 /* We iterate over NUM_DREG_TYPES doubleword types,
23821 then NUM_QREG_TYPES quadword types.
23822 V4HF is not a type used in reinterpret, so we translate
23823 d->mode to the correct index in reinterp_ftype_dreg. */
23824 bool qreg_p
23825 = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
23826 int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
23827 % NUM_QREG_TYPES;
23828 switch (insn_data[d->code].operand[0].mode)
23829 {
23830 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
23831 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
23832 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
23833 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
23834 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
23835 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
23836 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
23837 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
23838 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
23839 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
23840 case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
23841 default: gcc_unreachable ();
23842 }
23843 }
23844 break;
23845 case NEON_FLOAT_WIDEN:
23846 {
23847 tree eltype = NULL_TREE;
23848 tree return_type = NULL_TREE;
23849
23850 switch (insn_data[d->code].operand[1].mode)
23851 {
23852 case V4HFmode:
23853 eltype = V4HF_type_node;
23854 return_type = V4SF_type_node;
23855 break;
23856 default: gcc_unreachable ();
23857 }
23858 ftype = build_function_type_list (return_type, eltype, NULL);
23859 break;
23860 }
23861 case NEON_FLOAT_NARROW:
23862 {
23863 tree eltype = NULL_TREE;
23864 tree return_type = NULL_TREE;
23865
23866 switch (insn_data[d->code].operand[1].mode)
23867 {
23868 case V4SFmode:
23869 eltype = V4SF_type_node;
23870 return_type = V4HF_type_node;
23871 break;
23872 default: gcc_unreachable ();
23873 }
23874 ftype = build_function_type_list (return_type, eltype, NULL);
23875 break;
23876 }
23877 default:
23878 gcc_unreachable ();
23879 }
23880
23881 gcc_assert (ftype != NULL);
23882
23883 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
23884
23885 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
23886 NULL_TREE);
23887 arm_builtin_decls[fcode] = decl;
23888 }
23889 }
23890
23891 #undef NUM_DREG_TYPES
23892 #undef NUM_QREG_TYPES
23893
23894 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
23895 do \
23896 { \
23897 if ((MASK) & insn_flags) \
23898 { \
23899 tree bdecl; \
23900 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
23901 BUILT_IN_MD, NULL, NULL_TREE); \
23902 arm_builtin_decls[CODE] = bdecl; \
23903 } \
23904 } \
23905 while (0)
23906
23907 struct builtin_description
23908 {
23909 const unsigned int mask;
23910 const enum insn_code icode;
23911 const char * const name;
23912 const enum arm_builtins code;
23913 const enum rtx_code comparison;
23914 const unsigned int flag;
23915 };
23916
23917 static const struct builtin_description bdesc_2arg[] =
23918 {
23919 #define IWMMXT_BUILTIN(code, string, builtin) \
23920 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
23921 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23922
23923 #define IWMMXT2_BUILTIN(code, string, builtin) \
23924 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
23925 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23926
23927 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
23928 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
23929 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
23930 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
23931 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
23932 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
23933 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
23934 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
23935 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
23936 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
23937 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
23938 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
23939 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
23940 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
23941 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
23942 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
23943 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
23944 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
23945 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
23946 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
23947 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
23948 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
23949 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
23950 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
23951 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
23952 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
23953 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
23954 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
23955 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
23956 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
23957 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
23958 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
23959 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
23960 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
23961 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
23962 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
23963 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
23964 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
23965 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
23966 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
23967 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
23968 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
23969 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
23970 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
23971 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
23972 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
23973 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
23974 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
23975 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
23976 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
23977 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
23978 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
23979 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
23980 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
23981 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
23982 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
23983 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
23984 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
23985 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
23986 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
23987 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
23988 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
23989 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
23990 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
23991 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
23992 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
23993 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
23994 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
23995 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
23996 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
23997 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
23998 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
23999 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
24000 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
24001 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
24002 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
24003 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
24004 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
24005
24006 #define IWMMXT_BUILTIN2(code, builtin) \
24007 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24008
24009 #define IWMMXT2_BUILTIN2(code, builtin) \
24010 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24011
24012 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
24013 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
24014 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
24015 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
24016 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
24017 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
24018 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
24019 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
24020 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
24021 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
24022
24023 #define CRC32_BUILTIN(L, U) \
24024 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24025 UNKNOWN, 0},
24026 CRC32_BUILTIN (crc32b, CRC32B)
24027 CRC32_BUILTIN (crc32h, CRC32H)
24028 CRC32_BUILTIN (crc32w, CRC32W)
24029 CRC32_BUILTIN (crc32cb, CRC32CB)
24030 CRC32_BUILTIN (crc32ch, CRC32CH)
24031 CRC32_BUILTIN (crc32cw, CRC32CW)
24032 #undef CRC32_BUILTIN
24033
24034
24035 #define CRYPTO_BUILTIN(L, U) \
24036 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24037 UNKNOWN, 0},
24038 #undef CRYPTO1
24039 #undef CRYPTO2
24040 #undef CRYPTO3
24041 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24042 #define CRYPTO1(L, U, R, A)
24043 #define CRYPTO3(L, U, R, A1, A2, A3)
24044 #include "crypto.def"
24045 #undef CRYPTO1
24046 #undef CRYPTO2
24047 #undef CRYPTO3
24048
24049 };
24050
24051 static const struct builtin_description bdesc_1arg[] =
24052 {
24053 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
24054 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
24055 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
24056 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
24057 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
24058 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
24059 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
24060 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
24061 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
24062 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
24063 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
24064 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
24065 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
24066 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
24067 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
24068 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
24069 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
24070 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
24071 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
24072 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
24073 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
24074 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
24075 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
24076 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
24077
24078 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24079 #define CRYPTO2(L, U, R, A1, A2)
24080 #define CRYPTO3(L, U, R, A1, A2, A3)
24081 #include "crypto.def"
24082 #undef CRYPTO1
24083 #undef CRYPTO2
24084 #undef CRYPTO3
24085 };
24086
24087 static const struct builtin_description bdesc_3arg[] =
24088 {
24089 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24090 #define CRYPTO1(L, U, R, A)
24091 #define CRYPTO2(L, U, R, A1, A2)
24092 #include "crypto.def"
24093 #undef CRYPTO1
24094 #undef CRYPTO2
24095 #undef CRYPTO3
24096 };
24097 #undef CRYPTO_BUILTIN
24098
24099 /* Set up all the iWMMXt builtins. This is not called if
24100 TARGET_IWMMXT is zero. */
24101
24102 static void
24103 arm_init_iwmmxt_builtins (void)
24104 {
24105 const struct builtin_description * d;
24106 size_t i;
24107
24108 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
24109 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
24110 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
24111
24112 tree v8qi_ftype_v8qi_v8qi_int
24113 = build_function_type_list (V8QI_type_node,
24114 V8QI_type_node, V8QI_type_node,
24115 integer_type_node, NULL_TREE);
24116 tree v4hi_ftype_v4hi_int
24117 = build_function_type_list (V4HI_type_node,
24118 V4HI_type_node, integer_type_node, NULL_TREE);
24119 tree v2si_ftype_v2si_int
24120 = build_function_type_list (V2SI_type_node,
24121 V2SI_type_node, integer_type_node, NULL_TREE);
24122 tree v2si_ftype_di_di
24123 = build_function_type_list (V2SI_type_node,
24124 long_long_integer_type_node,
24125 long_long_integer_type_node,
24126 NULL_TREE);
24127 tree di_ftype_di_int
24128 = build_function_type_list (long_long_integer_type_node,
24129 long_long_integer_type_node,
24130 integer_type_node, NULL_TREE);
24131 tree di_ftype_di_int_int
24132 = build_function_type_list (long_long_integer_type_node,
24133 long_long_integer_type_node,
24134 integer_type_node,
24135 integer_type_node, NULL_TREE);
24136 tree int_ftype_v8qi
24137 = build_function_type_list (integer_type_node,
24138 V8QI_type_node, NULL_TREE);
24139 tree int_ftype_v4hi
24140 = build_function_type_list (integer_type_node,
24141 V4HI_type_node, NULL_TREE);
24142 tree int_ftype_v2si
24143 = build_function_type_list (integer_type_node,
24144 V2SI_type_node, NULL_TREE);
24145 tree int_ftype_v8qi_int
24146 = build_function_type_list (integer_type_node,
24147 V8QI_type_node, integer_type_node, NULL_TREE);
24148 tree int_ftype_v4hi_int
24149 = build_function_type_list (integer_type_node,
24150 V4HI_type_node, integer_type_node, NULL_TREE);
24151 tree int_ftype_v2si_int
24152 = build_function_type_list (integer_type_node,
24153 V2SI_type_node, integer_type_node, NULL_TREE);
24154 tree v8qi_ftype_v8qi_int_int
24155 = build_function_type_list (V8QI_type_node,
24156 V8QI_type_node, integer_type_node,
24157 integer_type_node, NULL_TREE);
24158 tree v4hi_ftype_v4hi_int_int
24159 = build_function_type_list (V4HI_type_node,
24160 V4HI_type_node, integer_type_node,
24161 integer_type_node, NULL_TREE);
24162 tree v2si_ftype_v2si_int_int
24163 = build_function_type_list (V2SI_type_node,
24164 V2SI_type_node, integer_type_node,
24165 integer_type_node, NULL_TREE);
24166 /* Miscellaneous. */
24167 tree v8qi_ftype_v4hi_v4hi
24168 = build_function_type_list (V8QI_type_node,
24169 V4HI_type_node, V4HI_type_node, NULL_TREE);
24170 tree v4hi_ftype_v2si_v2si
24171 = build_function_type_list (V4HI_type_node,
24172 V2SI_type_node, V2SI_type_node, NULL_TREE);
24173 tree v8qi_ftype_v4hi_v8qi
24174 = build_function_type_list (V8QI_type_node,
24175 V4HI_type_node, V8QI_type_node, NULL_TREE);
24176 tree v2si_ftype_v4hi_v4hi
24177 = build_function_type_list (V2SI_type_node,
24178 V4HI_type_node, V4HI_type_node, NULL_TREE);
24179 tree v2si_ftype_v8qi_v8qi
24180 = build_function_type_list (V2SI_type_node,
24181 V8QI_type_node, V8QI_type_node, NULL_TREE);
24182 tree v4hi_ftype_v4hi_di
24183 = build_function_type_list (V4HI_type_node,
24184 V4HI_type_node, long_long_integer_type_node,
24185 NULL_TREE);
24186 tree v2si_ftype_v2si_di
24187 = build_function_type_list (V2SI_type_node,
24188 V2SI_type_node, long_long_integer_type_node,
24189 NULL_TREE);
24190 tree di_ftype_void
24191 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
24192 tree int_ftype_void
24193 = build_function_type_list (integer_type_node, NULL_TREE);
24194 tree di_ftype_v8qi
24195 = build_function_type_list (long_long_integer_type_node,
24196 V8QI_type_node, NULL_TREE);
24197 tree di_ftype_v4hi
24198 = build_function_type_list (long_long_integer_type_node,
24199 V4HI_type_node, NULL_TREE);
24200 tree di_ftype_v2si
24201 = build_function_type_list (long_long_integer_type_node,
24202 V2SI_type_node, NULL_TREE);
24203 tree v2si_ftype_v4hi
24204 = build_function_type_list (V2SI_type_node,
24205 V4HI_type_node, NULL_TREE);
24206 tree v4hi_ftype_v8qi
24207 = build_function_type_list (V4HI_type_node,
24208 V8QI_type_node, NULL_TREE);
24209 tree v8qi_ftype_v8qi
24210 = build_function_type_list (V8QI_type_node,
24211 V8QI_type_node, NULL_TREE);
24212 tree v4hi_ftype_v4hi
24213 = build_function_type_list (V4HI_type_node,
24214 V4HI_type_node, NULL_TREE);
24215 tree v2si_ftype_v2si
24216 = build_function_type_list (V2SI_type_node,
24217 V2SI_type_node, NULL_TREE);
24218
24219 tree di_ftype_di_v4hi_v4hi
24220 = build_function_type_list (long_long_unsigned_type_node,
24221 long_long_unsigned_type_node,
24222 V4HI_type_node, V4HI_type_node,
24223 NULL_TREE);
24224
24225 tree di_ftype_v4hi_v4hi
24226 = build_function_type_list (long_long_unsigned_type_node,
24227 V4HI_type_node,V4HI_type_node,
24228 NULL_TREE);
24229
24230 tree v2si_ftype_v2si_v4hi_v4hi
24231 = build_function_type_list (V2SI_type_node,
24232 V2SI_type_node, V4HI_type_node,
24233 V4HI_type_node, NULL_TREE);
24234
24235 tree v2si_ftype_v2si_v8qi_v8qi
24236 = build_function_type_list (V2SI_type_node,
24237 V2SI_type_node, V8QI_type_node,
24238 V8QI_type_node, NULL_TREE);
24239
24240 tree di_ftype_di_v2si_v2si
24241 = build_function_type_list (long_long_unsigned_type_node,
24242 long_long_unsigned_type_node,
24243 V2SI_type_node, V2SI_type_node,
24244 NULL_TREE);
24245
24246 tree di_ftype_di_di_int
24247 = build_function_type_list (long_long_unsigned_type_node,
24248 long_long_unsigned_type_node,
24249 long_long_unsigned_type_node,
24250 integer_type_node, NULL_TREE);
24251
24252 tree void_ftype_int
24253 = build_function_type_list (void_type_node,
24254 integer_type_node, NULL_TREE);
24255
24256 tree v8qi_ftype_char
24257 = build_function_type_list (V8QI_type_node,
24258 signed_char_type_node, NULL_TREE);
24259
24260 tree v4hi_ftype_short
24261 = build_function_type_list (V4HI_type_node,
24262 short_integer_type_node, NULL_TREE);
24263
24264 tree v2si_ftype_int
24265 = build_function_type_list (V2SI_type_node,
24266 integer_type_node, NULL_TREE);
24267
24268 /* Normal vector binops. */
24269 tree v8qi_ftype_v8qi_v8qi
24270 = build_function_type_list (V8QI_type_node,
24271 V8QI_type_node, V8QI_type_node, NULL_TREE);
24272 tree v4hi_ftype_v4hi_v4hi
24273 = build_function_type_list (V4HI_type_node,
24274 V4HI_type_node,V4HI_type_node, NULL_TREE);
24275 tree v2si_ftype_v2si_v2si
24276 = build_function_type_list (V2SI_type_node,
24277 V2SI_type_node, V2SI_type_node, NULL_TREE);
24278 tree di_ftype_di_di
24279 = build_function_type_list (long_long_unsigned_type_node,
24280 long_long_unsigned_type_node,
24281 long_long_unsigned_type_node,
24282 NULL_TREE);
24283
24284 /* Add all builtins that are more or less simple operations on two
24285 operands. */
24286 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24287 {
24288 /* Use one of the operands; the target can have a different mode for
24289 mask-generating compares. */
24290 enum machine_mode mode;
24291 tree type;
24292
24293 if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
24294 continue;
24295
24296 mode = insn_data[d->icode].operand[1].mode;
24297
24298 switch (mode)
24299 {
24300 case V8QImode:
24301 type = v8qi_ftype_v8qi_v8qi;
24302 break;
24303 case V4HImode:
24304 type = v4hi_ftype_v4hi_v4hi;
24305 break;
24306 case V2SImode:
24307 type = v2si_ftype_v2si_v2si;
24308 break;
24309 case DImode:
24310 type = di_ftype_di_di;
24311 break;
24312
24313 default:
24314 gcc_unreachable ();
24315 }
24316
24317 def_mbuiltin (d->mask, d->name, type, d->code);
24318 }
24319
24320 /* Add the remaining MMX insns with somewhat more complicated types. */
24321 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24322 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24323 ARM_BUILTIN_ ## CODE)
24324
24325 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24326 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24327 ARM_BUILTIN_ ## CODE)
24328
24329 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
24330 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
24331 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
24332 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
24333 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
24334 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
24335 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
24336 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
24337 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
24338
24339 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
24340 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
24341 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
24342 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
24343 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
24344 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
24345
24346 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
24347 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
24348 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
24349 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
24350 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
24351 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
24352
24353 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
24354 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
24355 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
24356 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
24357 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
24358 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
24359
24360 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
24361 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
24362 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
24363 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
24364 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
24365 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
24366
24367 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
24368
24369 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
24370 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
24371 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
24372 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
24373 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
24374 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
24375 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
24376 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
24377 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
24378 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
24379
24380 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
24381 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
24382 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
24383 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
24384 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
24385 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
24386 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
24387 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
24388 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
24389
24390 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
24391 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
24392 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
24393
24394 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
24395 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
24396 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
24397
24398 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
24399 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
24400
24401 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
24402 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
24403 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
24404 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
24405 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
24406 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
24407
24408 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
24409 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
24410 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
24411 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
24412 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
24413 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
24414 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
24415 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
24416 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
24417 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
24418 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
24419 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
24420
24421 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
24422 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
24423 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
24424 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
24425
24426 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
24427 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
24428 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
24429 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
24430 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
24431 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
24432 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
24433
24434 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
24435 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
24436 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
24437
24438 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
24439 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
24440 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
24441 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
24442
24443 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
24444 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
24445 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
24446 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
24447
24448 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
24449 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
24450 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
24451 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
24452
24453 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
24454 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
24455 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
24456 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
24457
24458 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
24459 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
24460 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
24461 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
24462
24463 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
24464 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
24465 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
24466 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
24467
24468 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
24469
24470 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
24471 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
24472 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
24473
24474 #undef iwmmx_mbuiltin
24475 #undef iwmmx2_mbuiltin
24476 }
24477
24478 static void
24479 arm_init_fp16_builtins (void)
24480 {
24481 tree fp16_type = make_node (REAL_TYPE);
24482 TYPE_PRECISION (fp16_type) = 16;
24483 layout_type (fp16_type);
24484 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
24485 }
24486
24487 static void
24488 arm_init_crc32_builtins ()
24489 {
24490 tree si_ftype_si_qi
24491 = build_function_type_list (unsigned_intSI_type_node,
24492 unsigned_intSI_type_node,
24493 unsigned_intQI_type_node, NULL_TREE);
24494 tree si_ftype_si_hi
24495 = build_function_type_list (unsigned_intSI_type_node,
24496 unsigned_intSI_type_node,
24497 unsigned_intHI_type_node, NULL_TREE);
24498 tree si_ftype_si_si
24499 = build_function_type_list (unsigned_intSI_type_node,
24500 unsigned_intSI_type_node,
24501 unsigned_intSI_type_node, NULL_TREE);
24502
24503 arm_builtin_decls[ARM_BUILTIN_CRC32B]
24504 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
24505 ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
24506 arm_builtin_decls[ARM_BUILTIN_CRC32H]
24507 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
24508 ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
24509 arm_builtin_decls[ARM_BUILTIN_CRC32W]
24510 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
24511 ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
24512 arm_builtin_decls[ARM_BUILTIN_CRC32CB]
24513 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
24514 ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
24515 arm_builtin_decls[ARM_BUILTIN_CRC32CH]
24516 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
24517 ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
24518 arm_builtin_decls[ARM_BUILTIN_CRC32CW]
24519 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
24520 ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
24521 }
24522
24523 static void
24524 arm_init_builtins (void)
24525 {
24526 if (TARGET_REALLY_IWMMXT)
24527 arm_init_iwmmxt_builtins ();
24528
24529 if (TARGET_NEON)
24530 arm_init_neon_builtins ();
24531
24532 if (arm_fp16_format)
24533 arm_init_fp16_builtins ();
24534
24535 if (TARGET_CRC32)
24536 arm_init_crc32_builtins ();
24537 }
24538
24539 /* Return the ARM builtin for CODE. */
24540
24541 static tree
24542 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24543 {
24544 if (code >= ARM_BUILTIN_MAX)
24545 return error_mark_node;
24546
24547 return arm_builtin_decls[code];
24548 }
24549
24550 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24551
24552 static const char *
24553 arm_invalid_parameter_type (const_tree t)
24554 {
24555 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24556 return N_("function parameters cannot have __fp16 type");
24557 return NULL;
24558 }
24559
24560 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24561
24562 static const char *
24563 arm_invalid_return_type (const_tree t)
24564 {
24565 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24566 return N_("functions cannot return __fp16 type");
24567 return NULL;
24568 }
24569
24570 /* Implement TARGET_PROMOTED_TYPE. */
24571
24572 static tree
24573 arm_promoted_type (const_tree t)
24574 {
24575 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24576 return float_type_node;
24577 return NULL_TREE;
24578 }
24579
24580 /* Implement TARGET_CONVERT_TO_TYPE.
24581 Specifically, this hook implements the peculiarity of the ARM
24582 half-precision floating-point C semantics that requires conversions between
24583 __fp16 to or from double to do an intermediate conversion to float. */
24584
24585 static tree
24586 arm_convert_to_type (tree type, tree expr)
24587 {
24588 tree fromtype = TREE_TYPE (expr);
24589 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
24590 return NULL_TREE;
24591 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
24592 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
24593 return convert (type, convert (float_type_node, expr));
24594 return NULL_TREE;
24595 }
24596
24597 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24598 This simply adds HFmode as a supported mode; even though we don't
24599 implement arithmetic on this type directly, it's supported by
24600 optabs conversions, much the way the double-word arithmetic is
24601 special-cased in the default hook. */
24602
24603 static bool
24604 arm_scalar_mode_supported_p (enum machine_mode mode)
24605 {
24606 if (mode == HFmode)
24607 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
24608 else if (ALL_FIXED_POINT_MODE_P (mode))
24609 return true;
24610 else
24611 return default_scalar_mode_supported_p (mode);
24612 }
24613
24614 /* Errors in the source file can cause expand_expr to return const0_rtx
24615 where we expect a vector. To avoid crashing, use one of the vector
24616 clear instructions. */
24617
24618 static rtx
24619 safe_vector_operand (rtx x, enum machine_mode mode)
24620 {
24621 if (x != const0_rtx)
24622 return x;
24623 x = gen_reg_rtx (mode);
24624
24625 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
24626 : gen_rtx_SUBREG (DImode, x, 0)));
24627 return x;
24628 }
24629
24630 /* Function to expand ternary builtins. */
24631 static rtx
24632 arm_expand_ternop_builtin (enum insn_code icode,
24633 tree exp, rtx target)
24634 {
24635 rtx pat;
24636 tree arg0 = CALL_EXPR_ARG (exp, 0);
24637 tree arg1 = CALL_EXPR_ARG (exp, 1);
24638 tree arg2 = CALL_EXPR_ARG (exp, 2);
24639
24640 rtx op0 = expand_normal (arg0);
24641 rtx op1 = expand_normal (arg1);
24642 rtx op2 = expand_normal (arg2);
24643 rtx op3 = NULL_RTX;
24644
24645 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
24646 lane operand depending on endianness. */
24647 bool builtin_sha1cpm_p = false;
24648
24649 if (insn_data[icode].n_operands == 5)
24650 {
24651 gcc_assert (icode == CODE_FOR_crypto_sha1c
24652 || icode == CODE_FOR_crypto_sha1p
24653 || icode == CODE_FOR_crypto_sha1m);
24654 builtin_sha1cpm_p = true;
24655 }
24656 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24657 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24658 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24659 enum machine_mode mode2 = insn_data[icode].operand[3].mode;
24660
24661
24662 if (VECTOR_MODE_P (mode0))
24663 op0 = safe_vector_operand (op0, mode0);
24664 if (VECTOR_MODE_P (mode1))
24665 op1 = safe_vector_operand (op1, mode1);
24666 if (VECTOR_MODE_P (mode2))
24667 op2 = safe_vector_operand (op2, mode2);
24668
24669 if (! target
24670 || GET_MODE (target) != tmode
24671 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24672 target = gen_reg_rtx (tmode);
24673
24674 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24675 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
24676 && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
24677
24678 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24679 op0 = copy_to_mode_reg (mode0, op0);
24680 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24681 op1 = copy_to_mode_reg (mode1, op1);
24682 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
24683 op2 = copy_to_mode_reg (mode2, op2);
24684 if (builtin_sha1cpm_p)
24685 op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
24686
24687 if (builtin_sha1cpm_p)
24688 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
24689 else
24690 pat = GEN_FCN (icode) (target, op0, op1, op2);
24691 if (! pat)
24692 return 0;
24693 emit_insn (pat);
24694 return target;
24695 }
24696
24697 /* Subroutine of arm_expand_builtin to take care of binop insns. */
24698
24699 static rtx
24700 arm_expand_binop_builtin (enum insn_code icode,
24701 tree exp, rtx target)
24702 {
24703 rtx pat;
24704 tree arg0 = CALL_EXPR_ARG (exp, 0);
24705 tree arg1 = CALL_EXPR_ARG (exp, 1);
24706 rtx op0 = expand_normal (arg0);
24707 rtx op1 = expand_normal (arg1);
24708 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24709 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24710 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24711
24712 if (VECTOR_MODE_P (mode0))
24713 op0 = safe_vector_operand (op0, mode0);
24714 if (VECTOR_MODE_P (mode1))
24715 op1 = safe_vector_operand (op1, mode1);
24716
24717 if (! target
24718 || GET_MODE (target) != tmode
24719 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24720 target = gen_reg_rtx (tmode);
24721
24722 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24723 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
24724
24725 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24726 op0 = copy_to_mode_reg (mode0, op0);
24727 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24728 op1 = copy_to_mode_reg (mode1, op1);
24729
24730 pat = GEN_FCN (icode) (target, op0, op1);
24731 if (! pat)
24732 return 0;
24733 emit_insn (pat);
24734 return target;
24735 }
24736
24737 /* Subroutine of arm_expand_builtin to take care of unop insns. */
24738
24739 static rtx
24740 arm_expand_unop_builtin (enum insn_code icode,
24741 tree exp, rtx target, int do_load)
24742 {
24743 rtx pat;
24744 tree arg0 = CALL_EXPR_ARG (exp, 0);
24745 rtx op0 = expand_normal (arg0);
24746 rtx op1 = NULL_RTX;
24747 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24748 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24749 bool builtin_sha1h_p = false;
24750
24751 if (insn_data[icode].n_operands == 3)
24752 {
24753 gcc_assert (icode == CODE_FOR_crypto_sha1h);
24754 builtin_sha1h_p = true;
24755 }
24756
24757 if (! target
24758 || GET_MODE (target) != tmode
24759 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24760 target = gen_reg_rtx (tmode);
24761 if (do_load)
24762 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
24763 else
24764 {
24765 if (VECTOR_MODE_P (mode0))
24766 op0 = safe_vector_operand (op0, mode0);
24767
24768 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24769 op0 = copy_to_mode_reg (mode0, op0);
24770 }
24771 if (builtin_sha1h_p)
24772 op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
24773
24774 if (builtin_sha1h_p)
24775 pat = GEN_FCN (icode) (target, op0, op1);
24776 else
24777 pat = GEN_FCN (icode) (target, op0);
24778 if (! pat)
24779 return 0;
24780 emit_insn (pat);
24781 return target;
24782 }
24783
24784 typedef enum {
24785 NEON_ARG_COPY_TO_REG,
24786 NEON_ARG_CONSTANT,
24787 NEON_ARG_MEMORY,
24788 NEON_ARG_STOP
24789 } builtin_arg;
24790
24791 #define NEON_MAX_BUILTIN_ARGS 5
24792
24793 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
24794 and return an expression for the accessed memory.
24795
24796 The intrinsic function operates on a block of registers that has
24797 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
24798 function references the memory at EXP of type TYPE and in mode
24799 MEM_MODE; this mode may be BLKmode if no more suitable mode is
24800 available. */
24801
24802 static tree
24803 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
24804 enum machine_mode reg_mode,
24805 neon_builtin_type_mode type_mode)
24806 {
24807 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
24808 tree elem_type, upper_bound, array_type;
24809
24810 /* Work out the size of the register block in bytes. */
24811 reg_size = GET_MODE_SIZE (reg_mode);
24812
24813 /* Work out the size of each vector in bytes. */
24814 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
24815 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
24816
24817 /* Work out how many vectors there are. */
24818 gcc_assert (reg_size % vector_size == 0);
24819 nvectors = reg_size / vector_size;
24820
24821 /* Work out the type of each element. */
24822 gcc_assert (POINTER_TYPE_P (type));
24823 elem_type = TREE_TYPE (type);
24824
24825 /* Work out how many elements are being loaded or stored.
24826 MEM_MODE == REG_MODE implies a one-to-one mapping between register
24827 and memory elements; anything else implies a lane load or store. */
24828 if (mem_mode == reg_mode)
24829 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
24830 else
24831 nelems = nvectors;
24832
24833 /* Create a type that describes the full access. */
24834 upper_bound = build_int_cst (size_type_node, nelems - 1);
24835 array_type = build_array_type (elem_type, build_index_type (upper_bound));
24836
24837 /* Dereference EXP using that type. */
24838 return fold_build2 (MEM_REF, array_type, exp,
24839 build_int_cst (build_pointer_type (array_type), 0));
24840 }
24841
24842 /* Expand a Neon builtin. */
24843 static rtx
24844 arm_expand_neon_args (rtx target, int icode, int have_retval,
24845 neon_builtin_type_mode type_mode,
24846 tree exp, int fcode, ...)
24847 {
24848 va_list ap;
24849 rtx pat;
24850 tree arg[NEON_MAX_BUILTIN_ARGS];
24851 rtx op[NEON_MAX_BUILTIN_ARGS];
24852 tree arg_type;
24853 tree formals;
24854 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24855 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
24856 enum machine_mode other_mode;
24857 int argc = 0;
24858 int opno;
24859
24860 if (have_retval
24861 && (!target
24862 || GET_MODE (target) != tmode
24863 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
24864 target = gen_reg_rtx (tmode);
24865
24866 va_start (ap, fcode);
24867
24868 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
24869
24870 for (;;)
24871 {
24872 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
24873
24874 if (thisarg == NEON_ARG_STOP)
24875 break;
24876 else
24877 {
24878 opno = argc + have_retval;
24879 mode[argc] = insn_data[icode].operand[opno].mode;
24880 arg[argc] = CALL_EXPR_ARG (exp, argc);
24881 arg_type = TREE_VALUE (formals);
24882 if (thisarg == NEON_ARG_MEMORY)
24883 {
24884 other_mode = insn_data[icode].operand[1 - opno].mode;
24885 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
24886 mode[argc], other_mode,
24887 type_mode);
24888 }
24889
24890 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
24891 be returned. */
24892 op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
24893 (thisarg == NEON_ARG_MEMORY
24894 ? EXPAND_MEMORY : EXPAND_NORMAL));
24895
24896 switch (thisarg)
24897 {
24898 case NEON_ARG_COPY_TO_REG:
24899 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
24900 if (!(*insn_data[icode].operand[opno].predicate)
24901 (op[argc], mode[argc]))
24902 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
24903 break;
24904
24905 case NEON_ARG_CONSTANT:
24906 /* FIXME: This error message is somewhat unhelpful. */
24907 if (!(*insn_data[icode].operand[opno].predicate)
24908 (op[argc], mode[argc]))
24909 error ("argument must be a constant");
24910 break;
24911
24912 case NEON_ARG_MEMORY:
24913 /* Check if expand failed. */
24914 if (op[argc] == const0_rtx)
24915 return 0;
24916 gcc_assert (MEM_P (op[argc]));
24917 PUT_MODE (op[argc], mode[argc]);
24918 /* ??? arm_neon.h uses the same built-in functions for signed
24919 and unsigned accesses, casting where necessary. This isn't
24920 alias safe. */
24921 set_mem_alias_set (op[argc], 0);
24922 if (!(*insn_data[icode].operand[opno].predicate)
24923 (op[argc], mode[argc]))
24924 op[argc] = (replace_equiv_address
24925 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
24926 break;
24927
24928 case NEON_ARG_STOP:
24929 gcc_unreachable ();
24930 }
24931
24932 argc++;
24933 formals = TREE_CHAIN (formals);
24934 }
24935 }
24936
24937 va_end (ap);
24938
24939 if (have_retval)
24940 switch (argc)
24941 {
24942 case 1:
24943 pat = GEN_FCN (icode) (target, op[0]);
24944 break;
24945
24946 case 2:
24947 pat = GEN_FCN (icode) (target, op[0], op[1]);
24948 break;
24949
24950 case 3:
24951 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
24952 break;
24953
24954 case 4:
24955 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
24956 break;
24957
24958 case 5:
24959 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
24960 break;
24961
24962 default:
24963 gcc_unreachable ();
24964 }
24965 else
24966 switch (argc)
24967 {
24968 case 1:
24969 pat = GEN_FCN (icode) (op[0]);
24970 break;
24971
24972 case 2:
24973 pat = GEN_FCN (icode) (op[0], op[1]);
24974 break;
24975
24976 case 3:
24977 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
24978 break;
24979
24980 case 4:
24981 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
24982 break;
24983
24984 case 5:
24985 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
24986 break;
24987
24988 default:
24989 gcc_unreachable ();
24990 }
24991
24992 if (!pat)
24993 return 0;
24994
24995 emit_insn (pat);
24996
24997 return target;
24998 }
24999
25000 /* Expand a Neon builtin. These are "special" because they don't have symbolic
25001 constants defined per-instruction or per instruction-variant. Instead, the
25002 required info is looked up in the table neon_builtin_data. */
25003 static rtx
25004 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
25005 {
25006 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
25007 neon_itype itype = d->itype;
25008 enum insn_code icode = d->code;
25009 neon_builtin_type_mode type_mode = d->mode;
25010
25011 switch (itype)
25012 {
25013 case NEON_UNOP:
25014 case NEON_CONVERT:
25015 case NEON_DUPLANE:
25016 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25017 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25018
25019 case NEON_BINOP:
25020 case NEON_SETLANE:
25021 case NEON_SCALARMUL:
25022 case NEON_SCALARMULL:
25023 case NEON_SCALARMULH:
25024 case NEON_SHIFTINSERT:
25025 case NEON_LOGICBINOP:
25026 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25027 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25028 NEON_ARG_STOP);
25029
25030 case NEON_TERNOP:
25031 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25032 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25033 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25034
25035 case NEON_GETLANE:
25036 case NEON_FIXCONV:
25037 case NEON_SHIFTIMM:
25038 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25039 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
25040 NEON_ARG_STOP);
25041
25042 case NEON_CREATE:
25043 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25044 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25045
25046 case NEON_DUP:
25047 case NEON_RINT:
25048 case NEON_SPLIT:
25049 case NEON_FLOAT_WIDEN:
25050 case NEON_FLOAT_NARROW:
25051 case NEON_REINTERP:
25052 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25053 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25054
25055 case NEON_COMBINE:
25056 case NEON_VTBL:
25057 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25058 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25059
25060 case NEON_RESULTPAIR:
25061 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25062 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25063 NEON_ARG_STOP);
25064
25065 case NEON_LANEMUL:
25066 case NEON_LANEMULL:
25067 case NEON_LANEMULH:
25068 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25069 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25070 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25071
25072 case NEON_LANEMAC:
25073 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25074 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25075 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25076
25077 case NEON_SHIFTACC:
25078 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25079 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25080 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25081
25082 case NEON_SCALARMAC:
25083 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25084 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25085 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25086
25087 case NEON_SELECT:
25088 case NEON_VTBX:
25089 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25090 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25091 NEON_ARG_STOP);
25092
25093 case NEON_LOAD1:
25094 case NEON_LOADSTRUCT:
25095 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25096 NEON_ARG_MEMORY, NEON_ARG_STOP);
25097
25098 case NEON_LOAD1LANE:
25099 case NEON_LOADSTRUCTLANE:
25100 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25101 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25102 NEON_ARG_STOP);
25103
25104 case NEON_STORE1:
25105 case NEON_STORESTRUCT:
25106 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25107 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25108
25109 case NEON_STORE1LANE:
25110 case NEON_STORESTRUCTLANE:
25111 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25112 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25113 NEON_ARG_STOP);
25114 }
25115
25116 gcc_unreachable ();
25117 }
25118
25119 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25120 void
25121 neon_reinterpret (rtx dest, rtx src)
25122 {
25123 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
25124 }
25125
25126 /* Emit code to place a Neon pair result in memory locations (with equal
25127 registers). */
25128 void
25129 neon_emit_pair_result_insn (enum machine_mode mode,
25130 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
25131 rtx op1, rtx op2)
25132 {
25133 rtx mem = gen_rtx_MEM (mode, destaddr);
25134 rtx tmp1 = gen_reg_rtx (mode);
25135 rtx tmp2 = gen_reg_rtx (mode);
25136
25137 emit_insn (intfn (tmp1, op1, op2, tmp2));
25138
25139 emit_move_insn (mem, tmp1);
25140 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
25141 emit_move_insn (mem, tmp2);
25142 }
25143
25144 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25145 not to early-clobber SRC registers in the process.
25146
25147 We assume that the operands described by SRC and DEST represent a
25148 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25149 number of components into which the copy has been decomposed. */
25150 void
25151 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25152 {
25153 unsigned int i;
25154
25155 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25156 || REGNO (operands[0]) < REGNO (operands[1]))
25157 {
25158 for (i = 0; i < count; i++)
25159 {
25160 operands[2 * i] = dest[i];
25161 operands[2 * i + 1] = src[i];
25162 }
25163 }
25164 else
25165 {
25166 for (i = 0; i < count; i++)
25167 {
25168 operands[2 * i] = dest[count - i - 1];
25169 operands[2 * i + 1] = src[count - i - 1];
25170 }
25171 }
25172 }
25173
25174 /* Split operands into moves from op[1] + op[2] into op[0]. */
25175
25176 void
25177 neon_split_vcombine (rtx operands[3])
25178 {
25179 unsigned int dest = REGNO (operands[0]);
25180 unsigned int src1 = REGNO (operands[1]);
25181 unsigned int src2 = REGNO (operands[2]);
25182 enum machine_mode halfmode = GET_MODE (operands[1]);
25183 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
25184 rtx destlo, desthi;
25185
25186 if (src1 == dest && src2 == dest + halfregs)
25187 {
25188 /* No-op move. Can't split to nothing; emit something. */
25189 emit_note (NOTE_INSN_DELETED);
25190 return;
25191 }
25192
25193 /* Preserve register attributes for variable tracking. */
25194 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25195 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25196 GET_MODE_SIZE (halfmode));
25197
25198 /* Special case of reversed high/low parts. Use VSWP. */
25199 if (src2 == dest && src1 == dest + halfregs)
25200 {
25201 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
25202 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
25203 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25204 return;
25205 }
25206
25207 if (!reg_overlap_mentioned_p (operands[2], destlo))
25208 {
25209 /* Try to avoid unnecessary moves if part of the result
25210 is in the right place already. */
25211 if (src1 != dest)
25212 emit_move_insn (destlo, operands[1]);
25213 if (src2 != dest + halfregs)
25214 emit_move_insn (desthi, operands[2]);
25215 }
25216 else
25217 {
25218 if (src2 != dest + halfregs)
25219 emit_move_insn (desthi, operands[2]);
25220 if (src1 != dest)
25221 emit_move_insn (destlo, operands[1]);
25222 }
25223 }
25224
25225 /* Expand an expression EXP that calls a built-in function,
25226 with result going to TARGET if that's convenient
25227 (and in mode MODE if that's convenient).
25228 SUBTARGET may be used as the target for computing one of EXP's operands.
25229 IGNORE is nonzero if the value is to be ignored. */
25230
25231 static rtx
25232 arm_expand_builtin (tree exp,
25233 rtx target,
25234 rtx subtarget ATTRIBUTE_UNUSED,
25235 enum machine_mode mode ATTRIBUTE_UNUSED,
25236 int ignore ATTRIBUTE_UNUSED)
25237 {
25238 const struct builtin_description * d;
25239 enum insn_code icode;
25240 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25241 tree arg0;
25242 tree arg1;
25243 tree arg2;
25244 rtx op0;
25245 rtx op1;
25246 rtx op2;
25247 rtx pat;
25248 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25249 size_t i;
25250 enum machine_mode tmode;
25251 enum machine_mode mode0;
25252 enum machine_mode mode1;
25253 enum machine_mode mode2;
25254 int opint;
25255 int selector;
25256 int mask;
25257 int imm;
25258
25259 if (fcode >= ARM_BUILTIN_NEON_BASE)
25260 return arm_expand_neon_builtin (fcode, exp, target);
25261
25262 switch (fcode)
25263 {
25264 case ARM_BUILTIN_TEXTRMSB:
25265 case ARM_BUILTIN_TEXTRMUB:
25266 case ARM_BUILTIN_TEXTRMSH:
25267 case ARM_BUILTIN_TEXTRMUH:
25268 case ARM_BUILTIN_TEXTRMSW:
25269 case ARM_BUILTIN_TEXTRMUW:
25270 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
25271 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
25272 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
25273 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
25274 : CODE_FOR_iwmmxt_textrmw);
25275
25276 arg0 = CALL_EXPR_ARG (exp, 0);
25277 arg1 = CALL_EXPR_ARG (exp, 1);
25278 op0 = expand_normal (arg0);
25279 op1 = expand_normal (arg1);
25280 tmode = insn_data[icode].operand[0].mode;
25281 mode0 = insn_data[icode].operand[1].mode;
25282 mode1 = insn_data[icode].operand[2].mode;
25283
25284 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25285 op0 = copy_to_mode_reg (mode0, op0);
25286 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25287 {
25288 /* @@@ better error message */
25289 error ("selector must be an immediate");
25290 return gen_reg_rtx (tmode);
25291 }
25292
25293 opint = INTVAL (op1);
25294 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
25295 {
25296 if (opint > 7 || opint < 0)
25297 error ("the range of selector should be in 0 to 7");
25298 }
25299 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
25300 {
25301 if (opint > 3 || opint < 0)
25302 error ("the range of selector should be in 0 to 3");
25303 }
25304 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25305 {
25306 if (opint > 1 || opint < 0)
25307 error ("the range of selector should be in 0 to 1");
25308 }
25309
25310 if (target == 0
25311 || GET_MODE (target) != tmode
25312 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25313 target = gen_reg_rtx (tmode);
25314 pat = GEN_FCN (icode) (target, op0, op1);
25315 if (! pat)
25316 return 0;
25317 emit_insn (pat);
25318 return target;
25319
25320 case ARM_BUILTIN_WALIGNI:
25321 /* If op2 is immediate, call walighi, else call walighr. */
25322 arg0 = CALL_EXPR_ARG (exp, 0);
25323 arg1 = CALL_EXPR_ARG (exp, 1);
25324 arg2 = CALL_EXPR_ARG (exp, 2);
25325 op0 = expand_normal (arg0);
25326 op1 = expand_normal (arg1);
25327 op2 = expand_normal (arg2);
25328 if (CONST_INT_P (op2))
25329 {
25330 icode = CODE_FOR_iwmmxt_waligni;
25331 tmode = insn_data[icode].operand[0].mode;
25332 mode0 = insn_data[icode].operand[1].mode;
25333 mode1 = insn_data[icode].operand[2].mode;
25334 mode2 = insn_data[icode].operand[3].mode;
25335 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25336 op0 = copy_to_mode_reg (mode0, op0);
25337 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25338 op1 = copy_to_mode_reg (mode1, op1);
25339 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
25340 selector = INTVAL (op2);
25341 if (selector > 7 || selector < 0)
25342 error ("the range of selector should be in 0 to 7");
25343 }
25344 else
25345 {
25346 icode = CODE_FOR_iwmmxt_walignr;
25347 tmode = insn_data[icode].operand[0].mode;
25348 mode0 = insn_data[icode].operand[1].mode;
25349 mode1 = insn_data[icode].operand[2].mode;
25350 mode2 = insn_data[icode].operand[3].mode;
25351 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25352 op0 = copy_to_mode_reg (mode0, op0);
25353 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25354 op1 = copy_to_mode_reg (mode1, op1);
25355 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
25356 op2 = copy_to_mode_reg (mode2, op2);
25357 }
25358 if (target == 0
25359 || GET_MODE (target) != tmode
25360 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25361 target = gen_reg_rtx (tmode);
25362 pat = GEN_FCN (icode) (target, op0, op1, op2);
25363 if (!pat)
25364 return 0;
25365 emit_insn (pat);
25366 return target;
25367
25368 case ARM_BUILTIN_TINSRB:
25369 case ARM_BUILTIN_TINSRH:
25370 case ARM_BUILTIN_TINSRW:
25371 case ARM_BUILTIN_WMERGE:
25372 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
25373 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
25374 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
25375 : CODE_FOR_iwmmxt_tinsrw);
25376 arg0 = CALL_EXPR_ARG (exp, 0);
25377 arg1 = CALL_EXPR_ARG (exp, 1);
25378 arg2 = CALL_EXPR_ARG (exp, 2);
25379 op0 = expand_normal (arg0);
25380 op1 = expand_normal (arg1);
25381 op2 = expand_normal (arg2);
25382 tmode = insn_data[icode].operand[0].mode;
25383 mode0 = insn_data[icode].operand[1].mode;
25384 mode1 = insn_data[icode].operand[2].mode;
25385 mode2 = insn_data[icode].operand[3].mode;
25386
25387 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25388 op0 = copy_to_mode_reg (mode0, op0);
25389 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25390 op1 = copy_to_mode_reg (mode1, op1);
25391 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25392 {
25393 error ("selector must be an immediate");
25394 return const0_rtx;
25395 }
25396 if (icode == CODE_FOR_iwmmxt_wmerge)
25397 {
25398 selector = INTVAL (op2);
25399 if (selector > 7 || selector < 0)
25400 error ("the range of selector should be in 0 to 7");
25401 }
25402 if ((icode == CODE_FOR_iwmmxt_tinsrb)
25403 || (icode == CODE_FOR_iwmmxt_tinsrh)
25404 || (icode == CODE_FOR_iwmmxt_tinsrw))
25405 {
25406 mask = 0x01;
25407 selector= INTVAL (op2);
25408 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
25409 error ("the range of selector should be in 0 to 7");
25410 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
25411 error ("the range of selector should be in 0 to 3");
25412 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
25413 error ("the range of selector should be in 0 to 1");
25414 mask <<= selector;
25415 op2 = GEN_INT (mask);
25416 }
25417 if (target == 0
25418 || GET_MODE (target) != tmode
25419 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25420 target = gen_reg_rtx (tmode);
25421 pat = GEN_FCN (icode) (target, op0, op1, op2);
25422 if (! pat)
25423 return 0;
25424 emit_insn (pat);
25425 return target;
25426
25427 case ARM_BUILTIN_SETWCGR0:
25428 case ARM_BUILTIN_SETWCGR1:
25429 case ARM_BUILTIN_SETWCGR2:
25430 case ARM_BUILTIN_SETWCGR3:
25431 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
25432 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
25433 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
25434 : CODE_FOR_iwmmxt_setwcgr3);
25435 arg0 = CALL_EXPR_ARG (exp, 0);
25436 op0 = expand_normal (arg0);
25437 mode0 = insn_data[icode].operand[0].mode;
25438 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
25439 op0 = copy_to_mode_reg (mode0, op0);
25440 pat = GEN_FCN (icode) (op0);
25441 if (!pat)
25442 return 0;
25443 emit_insn (pat);
25444 return 0;
25445
25446 case ARM_BUILTIN_GETWCGR0:
25447 case ARM_BUILTIN_GETWCGR1:
25448 case ARM_BUILTIN_GETWCGR2:
25449 case ARM_BUILTIN_GETWCGR3:
25450 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
25451 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
25452 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
25453 : CODE_FOR_iwmmxt_getwcgr3);
25454 tmode = insn_data[icode].operand[0].mode;
25455 if (target == 0
25456 || GET_MODE (target) != tmode
25457 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25458 target = gen_reg_rtx (tmode);
25459 pat = GEN_FCN (icode) (target);
25460 if (!pat)
25461 return 0;
25462 emit_insn (pat);
25463 return target;
25464
25465 case ARM_BUILTIN_WSHUFH:
25466 icode = CODE_FOR_iwmmxt_wshufh;
25467 arg0 = CALL_EXPR_ARG (exp, 0);
25468 arg1 = CALL_EXPR_ARG (exp, 1);
25469 op0 = expand_normal (arg0);
25470 op1 = expand_normal (arg1);
25471 tmode = insn_data[icode].operand[0].mode;
25472 mode1 = insn_data[icode].operand[1].mode;
25473 mode2 = insn_data[icode].operand[2].mode;
25474
25475 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
25476 op0 = copy_to_mode_reg (mode1, op0);
25477 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
25478 {
25479 error ("mask must be an immediate");
25480 return const0_rtx;
25481 }
25482 selector = INTVAL (op1);
25483 if (selector < 0 || selector > 255)
25484 error ("the range of mask should be in 0 to 255");
25485 if (target == 0
25486 || GET_MODE (target) != tmode
25487 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25488 target = gen_reg_rtx (tmode);
25489 pat = GEN_FCN (icode) (target, op0, op1);
25490 if (! pat)
25491 return 0;
25492 emit_insn (pat);
25493 return target;
25494
25495 case ARM_BUILTIN_WMADDS:
25496 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
25497 case ARM_BUILTIN_WMADDSX:
25498 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
25499 case ARM_BUILTIN_WMADDSN:
25500 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
25501 case ARM_BUILTIN_WMADDU:
25502 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
25503 case ARM_BUILTIN_WMADDUX:
25504 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
25505 case ARM_BUILTIN_WMADDUN:
25506 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
25507 case ARM_BUILTIN_WSADBZ:
25508 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
25509 case ARM_BUILTIN_WSADHZ:
25510 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
25511
25512 /* Several three-argument builtins. */
25513 case ARM_BUILTIN_WMACS:
25514 case ARM_BUILTIN_WMACU:
25515 case ARM_BUILTIN_TMIA:
25516 case ARM_BUILTIN_TMIAPH:
25517 case ARM_BUILTIN_TMIATT:
25518 case ARM_BUILTIN_TMIATB:
25519 case ARM_BUILTIN_TMIABT:
25520 case ARM_BUILTIN_TMIABB:
25521 case ARM_BUILTIN_WQMIABB:
25522 case ARM_BUILTIN_WQMIABT:
25523 case ARM_BUILTIN_WQMIATB:
25524 case ARM_BUILTIN_WQMIATT:
25525 case ARM_BUILTIN_WQMIABBN:
25526 case ARM_BUILTIN_WQMIABTN:
25527 case ARM_BUILTIN_WQMIATBN:
25528 case ARM_BUILTIN_WQMIATTN:
25529 case ARM_BUILTIN_WMIABB:
25530 case ARM_BUILTIN_WMIABT:
25531 case ARM_BUILTIN_WMIATB:
25532 case ARM_BUILTIN_WMIATT:
25533 case ARM_BUILTIN_WMIABBN:
25534 case ARM_BUILTIN_WMIABTN:
25535 case ARM_BUILTIN_WMIATBN:
25536 case ARM_BUILTIN_WMIATTN:
25537 case ARM_BUILTIN_WMIAWBB:
25538 case ARM_BUILTIN_WMIAWBT:
25539 case ARM_BUILTIN_WMIAWTB:
25540 case ARM_BUILTIN_WMIAWTT:
25541 case ARM_BUILTIN_WMIAWBBN:
25542 case ARM_BUILTIN_WMIAWBTN:
25543 case ARM_BUILTIN_WMIAWTBN:
25544 case ARM_BUILTIN_WMIAWTTN:
25545 case ARM_BUILTIN_WSADB:
25546 case ARM_BUILTIN_WSADH:
25547 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
25548 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
25549 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
25550 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
25551 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
25552 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
25553 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
25554 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
25555 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
25556 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
25557 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
25558 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
25559 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
25560 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
25561 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
25562 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
25563 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
25564 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
25565 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
25566 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
25567 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
25568 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
25569 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
25570 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
25571 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
25572 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
25573 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
25574 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
25575 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
25576 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
25577 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
25578 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
25579 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
25580 : CODE_FOR_iwmmxt_wsadh);
25581 arg0 = CALL_EXPR_ARG (exp, 0);
25582 arg1 = CALL_EXPR_ARG (exp, 1);
25583 arg2 = CALL_EXPR_ARG (exp, 2);
25584 op0 = expand_normal (arg0);
25585 op1 = expand_normal (arg1);
25586 op2 = expand_normal (arg2);
25587 tmode = insn_data[icode].operand[0].mode;
25588 mode0 = insn_data[icode].operand[1].mode;
25589 mode1 = insn_data[icode].operand[2].mode;
25590 mode2 = insn_data[icode].operand[3].mode;
25591
25592 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25593 op0 = copy_to_mode_reg (mode0, op0);
25594 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25595 op1 = copy_to_mode_reg (mode1, op1);
25596 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25597 op2 = copy_to_mode_reg (mode2, op2);
25598 if (target == 0
25599 || GET_MODE (target) != tmode
25600 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25601 target = gen_reg_rtx (tmode);
25602 pat = GEN_FCN (icode) (target, op0, op1, op2);
25603 if (! pat)
25604 return 0;
25605 emit_insn (pat);
25606 return target;
25607
25608 case ARM_BUILTIN_WZERO:
25609 target = gen_reg_rtx (DImode);
25610 emit_insn (gen_iwmmxt_clrdi (target));
25611 return target;
25612
25613 case ARM_BUILTIN_WSRLHI:
25614 case ARM_BUILTIN_WSRLWI:
25615 case ARM_BUILTIN_WSRLDI:
25616 case ARM_BUILTIN_WSLLHI:
25617 case ARM_BUILTIN_WSLLWI:
25618 case ARM_BUILTIN_WSLLDI:
25619 case ARM_BUILTIN_WSRAHI:
25620 case ARM_BUILTIN_WSRAWI:
25621 case ARM_BUILTIN_WSRADI:
25622 case ARM_BUILTIN_WRORHI:
25623 case ARM_BUILTIN_WRORWI:
25624 case ARM_BUILTIN_WRORDI:
25625 case ARM_BUILTIN_WSRLH:
25626 case ARM_BUILTIN_WSRLW:
25627 case ARM_BUILTIN_WSRLD:
25628 case ARM_BUILTIN_WSLLH:
25629 case ARM_BUILTIN_WSLLW:
25630 case ARM_BUILTIN_WSLLD:
25631 case ARM_BUILTIN_WSRAH:
25632 case ARM_BUILTIN_WSRAW:
25633 case ARM_BUILTIN_WSRAD:
25634 case ARM_BUILTIN_WRORH:
25635 case ARM_BUILTIN_WRORW:
25636 case ARM_BUILTIN_WRORD:
25637 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
25638 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
25639 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
25640 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
25641 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
25642 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
25643 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
25644 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
25645 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
25646 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
25647 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
25648 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
25649 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
25650 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
25651 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
25652 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
25653 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
25654 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
25655 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
25656 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
25657 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
25658 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
25659 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
25660 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
25661 : CODE_FOR_nothing);
25662 arg1 = CALL_EXPR_ARG (exp, 1);
25663 op1 = expand_normal (arg1);
25664 if (GET_MODE (op1) == VOIDmode)
25665 {
25666 imm = INTVAL (op1);
25667 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
25668 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
25669 && (imm < 0 || imm > 32))
25670 {
25671 if (fcode == ARM_BUILTIN_WRORHI)
25672 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
25673 else if (fcode == ARM_BUILTIN_WRORWI)
25674 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
25675 else if (fcode == ARM_BUILTIN_WRORH)
25676 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
25677 else
25678 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
25679 }
25680 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
25681 && (imm < 0 || imm > 64))
25682 {
25683 if (fcode == ARM_BUILTIN_WRORDI)
25684 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
25685 else
25686 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
25687 }
25688 else if (imm < 0)
25689 {
25690 if (fcode == ARM_BUILTIN_WSRLHI)
25691 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
25692 else if (fcode == ARM_BUILTIN_WSRLWI)
25693 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
25694 else if (fcode == ARM_BUILTIN_WSRLDI)
25695 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
25696 else if (fcode == ARM_BUILTIN_WSLLHI)
25697 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
25698 else if (fcode == ARM_BUILTIN_WSLLWI)
25699 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
25700 else if (fcode == ARM_BUILTIN_WSLLDI)
25701 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
25702 else if (fcode == ARM_BUILTIN_WSRAHI)
25703 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
25704 else if (fcode == ARM_BUILTIN_WSRAWI)
25705 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
25706 else if (fcode == ARM_BUILTIN_WSRADI)
25707 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
25708 else if (fcode == ARM_BUILTIN_WSRLH)
25709 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
25710 else if (fcode == ARM_BUILTIN_WSRLW)
25711 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
25712 else if (fcode == ARM_BUILTIN_WSRLD)
25713 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
25714 else if (fcode == ARM_BUILTIN_WSLLH)
25715 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
25716 else if (fcode == ARM_BUILTIN_WSLLW)
25717 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
25718 else if (fcode == ARM_BUILTIN_WSLLD)
25719 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
25720 else if (fcode == ARM_BUILTIN_WSRAH)
25721 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
25722 else if (fcode == ARM_BUILTIN_WSRAW)
25723 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
25724 else
25725 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
25726 }
25727 }
25728 return arm_expand_binop_builtin (icode, exp, target);
25729
25730 default:
25731 break;
25732 }
25733
25734 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
25735 if (d->code == (const enum arm_builtins) fcode)
25736 return arm_expand_binop_builtin (d->icode, exp, target);
25737
25738 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
25739 if (d->code == (const enum arm_builtins) fcode)
25740 return arm_expand_unop_builtin (d->icode, exp, target, 0);
25741
25742 for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
25743 if (d->code == (const enum arm_builtins) fcode)
25744 return arm_expand_ternop_builtin (d->icode, exp, target);
25745
25746 /* @@@ Should really do something sensible here. */
25747 return NULL_RTX;
25748 }
25749 \f
25750 /* Return the number (counting from 0) of
25751 the least significant set bit in MASK. */
25752
25753 inline static int
25754 number_of_first_bit_set (unsigned mask)
25755 {
25756 return ctz_hwi (mask);
25757 }
25758
25759 /* Like emit_multi_reg_push, but allowing for a different set of
25760 registers to be described as saved. MASK is the set of registers
25761 to be saved; REAL_REGS is the set of registers to be described as
25762 saved. If REAL_REGS is 0, only describe the stack adjustment. */
25763
25764 static rtx
25765 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
25766 {
25767 unsigned long regno;
25768 rtx par[10], tmp, reg, insn;
25769 int i, j;
25770
25771 /* Build the parallel of the registers actually being stored. */
25772 for (i = 0; mask; ++i, mask &= mask - 1)
25773 {
25774 regno = ctz_hwi (mask);
25775 reg = gen_rtx_REG (SImode, regno);
25776
25777 if (i == 0)
25778 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
25779 else
25780 tmp = gen_rtx_USE (VOIDmode, reg);
25781
25782 par[i] = tmp;
25783 }
25784
25785 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25786 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
25787 tmp = gen_frame_mem (BLKmode, tmp);
25788 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
25789 par[0] = tmp;
25790
25791 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
25792 insn = emit_insn (tmp);
25793
25794 /* Always build the stack adjustment note for unwind info. */
25795 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25796 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
25797 par[0] = tmp;
25798
25799 /* Build the parallel of the registers recorded as saved for unwind. */
25800 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
25801 {
25802 regno = ctz_hwi (real_regs);
25803 reg = gen_rtx_REG (SImode, regno);
25804
25805 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
25806 tmp = gen_frame_mem (SImode, tmp);
25807 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
25808 RTX_FRAME_RELATED_P (tmp) = 1;
25809 par[j + 1] = tmp;
25810 }
25811
25812 if (j == 0)
25813 tmp = par[0];
25814 else
25815 {
25816 RTX_FRAME_RELATED_P (par[0]) = 1;
25817 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
25818 }
25819
25820 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
25821
25822 return insn;
25823 }
25824
25825 /* Emit code to push or pop registers to or from the stack. F is the
25826 assembly file. MASK is the registers to pop. */
25827 static void
25828 thumb_pop (FILE *f, unsigned long mask)
25829 {
25830 int regno;
25831 int lo_mask = mask & 0xFF;
25832 int pushed_words = 0;
25833
25834 gcc_assert (mask);
25835
25836 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
25837 {
25838 /* Special case. Do not generate a POP PC statement here, do it in
25839 thumb_exit() */
25840 thumb_exit (f, -1);
25841 return;
25842 }
25843
25844 fprintf (f, "\tpop\t{");
25845
25846 /* Look at the low registers first. */
25847 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
25848 {
25849 if (lo_mask & 1)
25850 {
25851 asm_fprintf (f, "%r", regno);
25852
25853 if ((lo_mask & ~1) != 0)
25854 fprintf (f, ", ");
25855
25856 pushed_words++;
25857 }
25858 }
25859
25860 if (mask & (1 << PC_REGNUM))
25861 {
25862 /* Catch popping the PC. */
25863 if (TARGET_INTERWORK || TARGET_BACKTRACE
25864 || crtl->calls_eh_return)
25865 {
25866 /* The PC is never poped directly, instead
25867 it is popped into r3 and then BX is used. */
25868 fprintf (f, "}\n");
25869
25870 thumb_exit (f, -1);
25871
25872 return;
25873 }
25874 else
25875 {
25876 if (mask & 0xFF)
25877 fprintf (f, ", ");
25878
25879 asm_fprintf (f, "%r", PC_REGNUM);
25880 }
25881 }
25882
25883 fprintf (f, "}\n");
25884 }
25885
25886 /* Generate code to return from a thumb function.
25887 If 'reg_containing_return_addr' is -1, then the return address is
25888 actually on the stack, at the stack pointer. */
25889 static void
25890 thumb_exit (FILE *f, int reg_containing_return_addr)
25891 {
25892 unsigned regs_available_for_popping;
25893 unsigned regs_to_pop;
25894 int pops_needed;
25895 unsigned available;
25896 unsigned required;
25897 int mode;
25898 int size;
25899 int restore_a4 = FALSE;
25900
25901 /* Compute the registers we need to pop. */
25902 regs_to_pop = 0;
25903 pops_needed = 0;
25904
25905 if (reg_containing_return_addr == -1)
25906 {
25907 regs_to_pop |= 1 << LR_REGNUM;
25908 ++pops_needed;
25909 }
25910
25911 if (TARGET_BACKTRACE)
25912 {
25913 /* Restore the (ARM) frame pointer and stack pointer. */
25914 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
25915 pops_needed += 2;
25916 }
25917
25918 /* If there is nothing to pop then just emit the BX instruction and
25919 return. */
25920 if (pops_needed == 0)
25921 {
25922 if (crtl->calls_eh_return)
25923 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
25924
25925 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
25926 return;
25927 }
25928 /* Otherwise if we are not supporting interworking and we have not created
25929 a backtrace structure and the function was not entered in ARM mode then
25930 just pop the return address straight into the PC. */
25931 else if (!TARGET_INTERWORK
25932 && !TARGET_BACKTRACE
25933 && !is_called_in_ARM_mode (current_function_decl)
25934 && !crtl->calls_eh_return)
25935 {
25936 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
25937 return;
25938 }
25939
25940 /* Find out how many of the (return) argument registers we can corrupt. */
25941 regs_available_for_popping = 0;
25942
25943 /* If returning via __builtin_eh_return, the bottom three registers
25944 all contain information needed for the return. */
25945 if (crtl->calls_eh_return)
25946 size = 12;
25947 else
25948 {
25949 /* If we can deduce the registers used from the function's
25950 return value. This is more reliable that examining
25951 df_regs_ever_live_p () because that will be set if the register is
25952 ever used in the function, not just if the register is used
25953 to hold a return value. */
25954
25955 if (crtl->return_rtx != 0)
25956 mode = GET_MODE (crtl->return_rtx);
25957 else
25958 mode = DECL_MODE (DECL_RESULT (current_function_decl));
25959
25960 size = GET_MODE_SIZE (mode);
25961
25962 if (size == 0)
25963 {
25964 /* In a void function we can use any argument register.
25965 In a function that returns a structure on the stack
25966 we can use the second and third argument registers. */
25967 if (mode == VOIDmode)
25968 regs_available_for_popping =
25969 (1 << ARG_REGISTER (1))
25970 | (1 << ARG_REGISTER (2))
25971 | (1 << ARG_REGISTER (3));
25972 else
25973 regs_available_for_popping =
25974 (1 << ARG_REGISTER (2))
25975 | (1 << ARG_REGISTER (3));
25976 }
25977 else if (size <= 4)
25978 regs_available_for_popping =
25979 (1 << ARG_REGISTER (2))
25980 | (1 << ARG_REGISTER (3));
25981 else if (size <= 8)
25982 regs_available_for_popping =
25983 (1 << ARG_REGISTER (3));
25984 }
25985
25986 /* Match registers to be popped with registers into which we pop them. */
25987 for (available = regs_available_for_popping,
25988 required = regs_to_pop;
25989 required != 0 && available != 0;
25990 available &= ~(available & - available),
25991 required &= ~(required & - required))
25992 -- pops_needed;
25993
25994 /* If we have any popping registers left over, remove them. */
25995 if (available > 0)
25996 regs_available_for_popping &= ~available;
25997
25998 /* Otherwise if we need another popping register we can use
25999 the fourth argument register. */
26000 else if (pops_needed)
26001 {
26002 /* If we have not found any free argument registers and
26003 reg a4 contains the return address, we must move it. */
26004 if (regs_available_for_popping == 0
26005 && reg_containing_return_addr == LAST_ARG_REGNUM)
26006 {
26007 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26008 reg_containing_return_addr = LR_REGNUM;
26009 }
26010 else if (size > 12)
26011 {
26012 /* Register a4 is being used to hold part of the return value,
26013 but we have dire need of a free, low register. */
26014 restore_a4 = TRUE;
26015
26016 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26017 }
26018
26019 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26020 {
26021 /* The fourth argument register is available. */
26022 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26023
26024 --pops_needed;
26025 }
26026 }
26027
26028 /* Pop as many registers as we can. */
26029 thumb_pop (f, regs_available_for_popping);
26030
26031 /* Process the registers we popped. */
26032 if (reg_containing_return_addr == -1)
26033 {
26034 /* The return address was popped into the lowest numbered register. */
26035 regs_to_pop &= ~(1 << LR_REGNUM);
26036
26037 reg_containing_return_addr =
26038 number_of_first_bit_set (regs_available_for_popping);
26039
26040 /* Remove this register for the mask of available registers, so that
26041 the return address will not be corrupted by further pops. */
26042 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26043 }
26044
26045 /* If we popped other registers then handle them here. */
26046 if (regs_available_for_popping)
26047 {
26048 int frame_pointer;
26049
26050 /* Work out which register currently contains the frame pointer. */
26051 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26052
26053 /* Move it into the correct place. */
26054 asm_fprintf (f, "\tmov\t%r, %r\n",
26055 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26056
26057 /* (Temporarily) remove it from the mask of popped registers. */
26058 regs_available_for_popping &= ~(1 << frame_pointer);
26059 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26060
26061 if (regs_available_for_popping)
26062 {
26063 int stack_pointer;
26064
26065 /* We popped the stack pointer as well,
26066 find the register that contains it. */
26067 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26068
26069 /* Move it into the stack register. */
26070 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26071
26072 /* At this point we have popped all necessary registers, so
26073 do not worry about restoring regs_available_for_popping
26074 to its correct value:
26075
26076 assert (pops_needed == 0)
26077 assert (regs_available_for_popping == (1 << frame_pointer))
26078 assert (regs_to_pop == (1 << STACK_POINTER)) */
26079 }
26080 else
26081 {
26082 /* Since we have just move the popped value into the frame
26083 pointer, the popping register is available for reuse, and
26084 we know that we still have the stack pointer left to pop. */
26085 regs_available_for_popping |= (1 << frame_pointer);
26086 }
26087 }
26088
26089 /* If we still have registers left on the stack, but we no longer have
26090 any registers into which we can pop them, then we must move the return
26091 address into the link register and make available the register that
26092 contained it. */
26093 if (regs_available_for_popping == 0 && pops_needed > 0)
26094 {
26095 regs_available_for_popping |= 1 << reg_containing_return_addr;
26096
26097 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26098 reg_containing_return_addr);
26099
26100 reg_containing_return_addr = LR_REGNUM;
26101 }
26102
26103 /* If we have registers left on the stack then pop some more.
26104 We know that at most we will want to pop FP and SP. */
26105 if (pops_needed > 0)
26106 {
26107 int popped_into;
26108 int move_to;
26109
26110 thumb_pop (f, regs_available_for_popping);
26111
26112 /* We have popped either FP or SP.
26113 Move whichever one it is into the correct register. */
26114 popped_into = number_of_first_bit_set (regs_available_for_popping);
26115 move_to = number_of_first_bit_set (regs_to_pop);
26116
26117 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26118
26119 regs_to_pop &= ~(1 << move_to);
26120
26121 --pops_needed;
26122 }
26123
26124 /* If we still have not popped everything then we must have only
26125 had one register available to us and we are now popping the SP. */
26126 if (pops_needed > 0)
26127 {
26128 int popped_into;
26129
26130 thumb_pop (f, regs_available_for_popping);
26131
26132 popped_into = number_of_first_bit_set (regs_available_for_popping);
26133
26134 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26135 /*
26136 assert (regs_to_pop == (1 << STACK_POINTER))
26137 assert (pops_needed == 1)
26138 */
26139 }
26140
26141 /* If necessary restore the a4 register. */
26142 if (restore_a4)
26143 {
26144 if (reg_containing_return_addr != LR_REGNUM)
26145 {
26146 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26147 reg_containing_return_addr = LR_REGNUM;
26148 }
26149
26150 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26151 }
26152
26153 if (crtl->calls_eh_return)
26154 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26155
26156 /* Return to caller. */
26157 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26158 }
26159 \f
26160 /* Scan INSN just before assembler is output for it.
26161 For Thumb-1, we track the status of the condition codes; this
26162 information is used in the cbranchsi4_insn pattern. */
26163 void
26164 thumb1_final_prescan_insn (rtx insn)
26165 {
26166 if (flag_print_asm_name)
26167 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26168 INSN_ADDRESSES (INSN_UID (insn)));
26169 /* Don't overwrite the previous setter when we get to a cbranch. */
26170 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26171 {
26172 enum attr_conds conds;
26173
26174 if (cfun->machine->thumb1_cc_insn)
26175 {
26176 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26177 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26178 CC_STATUS_INIT;
26179 }
26180 conds = get_attr_conds (insn);
26181 if (conds == CONDS_SET)
26182 {
26183 rtx set = single_set (insn);
26184 cfun->machine->thumb1_cc_insn = insn;
26185 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26186 cfun->machine->thumb1_cc_op1 = const0_rtx;
26187 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
26188 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26189 {
26190 rtx src1 = XEXP (SET_SRC (set), 1);
26191 if (src1 == const0_rtx)
26192 cfun->machine->thumb1_cc_mode = CCmode;
26193 }
26194 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26195 {
26196 /* Record the src register operand instead of dest because
26197 cprop_hardreg pass propagates src. */
26198 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26199 }
26200 }
26201 else if (conds != CONDS_NOCOND)
26202 cfun->machine->thumb1_cc_insn = NULL_RTX;
26203 }
26204
26205 /* Check if unexpected far jump is used. */
26206 if (cfun->machine->lr_save_eliminated
26207 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26208 internal_error("Unexpected thumb1 far jump");
26209 }
26210
26211 int
26212 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26213 {
26214 unsigned HOST_WIDE_INT mask = 0xff;
26215 int i;
26216
26217 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26218 if (val == 0) /* XXX */
26219 return 0;
26220
26221 for (i = 0; i < 25; i++)
26222 if ((val & (mask << i)) == val)
26223 return 1;
26224
26225 return 0;
26226 }
26227
26228 /* Returns nonzero if the current function contains,
26229 or might contain a far jump. */
26230 static int
26231 thumb_far_jump_used_p (void)
26232 {
26233 rtx insn;
26234 bool far_jump = false;
26235 unsigned int func_size = 0;
26236
26237 /* This test is only important for leaf functions. */
26238 /* assert (!leaf_function_p ()); */
26239
26240 /* If we have already decided that far jumps may be used,
26241 do not bother checking again, and always return true even if
26242 it turns out that they are not being used. Once we have made
26243 the decision that far jumps are present (and that hence the link
26244 register will be pushed onto the stack) we cannot go back on it. */
26245 if (cfun->machine->far_jump_used)
26246 return 1;
26247
26248 /* If this function is not being called from the prologue/epilogue
26249 generation code then it must be being called from the
26250 INITIAL_ELIMINATION_OFFSET macro. */
26251 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26252 {
26253 /* In this case we know that we are being asked about the elimination
26254 of the arg pointer register. If that register is not being used,
26255 then there are no arguments on the stack, and we do not have to
26256 worry that a far jump might force the prologue to push the link
26257 register, changing the stack offsets. In this case we can just
26258 return false, since the presence of far jumps in the function will
26259 not affect stack offsets.
26260
26261 If the arg pointer is live (or if it was live, but has now been
26262 eliminated and so set to dead) then we do have to test to see if
26263 the function might contain a far jump. This test can lead to some
26264 false negatives, since before reload is completed, then length of
26265 branch instructions is not known, so gcc defaults to returning their
26266 longest length, which in turn sets the far jump attribute to true.
26267
26268 A false negative will not result in bad code being generated, but it
26269 will result in a needless push and pop of the link register. We
26270 hope that this does not occur too often.
26271
26272 If we need doubleword stack alignment this could affect the other
26273 elimination offsets so we can't risk getting it wrong. */
26274 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26275 cfun->machine->arg_pointer_live = 1;
26276 else if (!cfun->machine->arg_pointer_live)
26277 return 0;
26278 }
26279
26280 /* We should not change far_jump_used during or after reload, as there is
26281 no chance to change stack frame layout. */
26282 if (reload_in_progress || reload_completed)
26283 return 0;
26284
26285 /* Check to see if the function contains a branch
26286 insn with the far jump attribute set. */
26287 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26288 {
26289 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26290 {
26291 far_jump = true;
26292 }
26293 func_size += get_attr_length (insn);
26294 }
26295
26296 /* Attribute far_jump will always be true for thumb1 before
26297 shorten_branch pass. So checking far_jump attribute before
26298 shorten_branch isn't much useful.
26299
26300 Following heuristic tries to estimate more accurately if a far jump
26301 may finally be used. The heuristic is very conservative as there is
26302 no chance to roll-back the decision of not to use far jump.
26303
26304 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26305 2-byte insn is associated with a 4 byte constant pool. Using
26306 function size 2048/3 as the threshold is conservative enough. */
26307 if (far_jump)
26308 {
26309 if ((func_size * 3) >= 2048)
26310 {
26311 /* Record the fact that we have decided that
26312 the function does use far jumps. */
26313 cfun->machine->far_jump_used = 1;
26314 return 1;
26315 }
26316 }
26317
26318 return 0;
26319 }
26320
26321 /* Return nonzero if FUNC must be entered in ARM mode. */
26322 int
26323 is_called_in_ARM_mode (tree func)
26324 {
26325 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26326
26327 /* Ignore the problem about functions whose address is taken. */
26328 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26329 return TRUE;
26330
26331 #ifdef ARM_PE
26332 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26333 #else
26334 return FALSE;
26335 #endif
26336 }
26337
26338 /* Given the stack offsets and register mask in OFFSETS, decide how
26339 many additional registers to push instead of subtracting a constant
26340 from SP. For epilogues the principle is the same except we use pop.
26341 FOR_PROLOGUE indicates which we're generating. */
26342 static int
26343 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26344 {
26345 HOST_WIDE_INT amount;
26346 unsigned long live_regs_mask = offsets->saved_regs_mask;
26347 /* Extract a mask of the ones we can give to the Thumb's push/pop
26348 instruction. */
26349 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26350 /* Then count how many other high registers will need to be pushed. */
26351 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26352 int n_free, reg_base, size;
26353
26354 if (!for_prologue && frame_pointer_needed)
26355 amount = offsets->locals_base - offsets->saved_regs;
26356 else
26357 amount = offsets->outgoing_args - offsets->saved_regs;
26358
26359 /* If the stack frame size is 512 exactly, we can save one load
26360 instruction, which should make this a win even when optimizing
26361 for speed. */
26362 if (!optimize_size && amount != 512)
26363 return 0;
26364
26365 /* Can't do this if there are high registers to push. */
26366 if (high_regs_pushed != 0)
26367 return 0;
26368
26369 /* Shouldn't do it in the prologue if no registers would normally
26370 be pushed at all. In the epilogue, also allow it if we'll have
26371 a pop insn for the PC. */
26372 if (l_mask == 0
26373 && (for_prologue
26374 || TARGET_BACKTRACE
26375 || (live_regs_mask & 1 << LR_REGNUM) == 0
26376 || TARGET_INTERWORK
26377 || crtl->args.pretend_args_size != 0))
26378 return 0;
26379
26380 /* Don't do this if thumb_expand_prologue wants to emit instructions
26381 between the push and the stack frame allocation. */
26382 if (for_prologue
26383 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26384 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26385 return 0;
26386
26387 reg_base = 0;
26388 n_free = 0;
26389 if (!for_prologue)
26390 {
26391 size = arm_size_return_regs ();
26392 reg_base = ARM_NUM_INTS (size);
26393 live_regs_mask >>= reg_base;
26394 }
26395
26396 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26397 && (for_prologue || call_used_regs[reg_base + n_free]))
26398 {
26399 live_regs_mask >>= 1;
26400 n_free++;
26401 }
26402
26403 if (n_free == 0)
26404 return 0;
26405 gcc_assert (amount / 4 * 4 == amount);
26406
26407 if (amount >= 512 && (amount - n_free * 4) < 512)
26408 return (amount - 508) / 4;
26409 if (amount <= n_free * 4)
26410 return amount / 4;
26411 return 0;
26412 }
26413
26414 /* The bits which aren't usefully expanded as rtl. */
26415 const char *
26416 thumb1_unexpanded_epilogue (void)
26417 {
26418 arm_stack_offsets *offsets;
26419 int regno;
26420 unsigned long live_regs_mask = 0;
26421 int high_regs_pushed = 0;
26422 int extra_pop;
26423 int had_to_push_lr;
26424 int size;
26425
26426 if (cfun->machine->return_used_this_function != 0)
26427 return "";
26428
26429 if (IS_NAKED (arm_current_func_type ()))
26430 return "";
26431
26432 offsets = arm_get_frame_offsets ();
26433 live_regs_mask = offsets->saved_regs_mask;
26434 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26435
26436 /* If we can deduce the registers used from the function's return value.
26437 This is more reliable that examining df_regs_ever_live_p () because that
26438 will be set if the register is ever used in the function, not just if
26439 the register is used to hold a return value. */
26440 size = arm_size_return_regs ();
26441
26442 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26443 if (extra_pop > 0)
26444 {
26445 unsigned long extra_mask = (1 << extra_pop) - 1;
26446 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26447 }
26448
26449 /* The prolog may have pushed some high registers to use as
26450 work registers. e.g. the testsuite file:
26451 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26452 compiles to produce:
26453 push {r4, r5, r6, r7, lr}
26454 mov r7, r9
26455 mov r6, r8
26456 push {r6, r7}
26457 as part of the prolog. We have to undo that pushing here. */
26458
26459 if (high_regs_pushed)
26460 {
26461 unsigned long mask = live_regs_mask & 0xff;
26462 int next_hi_reg;
26463
26464 /* The available low registers depend on the size of the value we are
26465 returning. */
26466 if (size <= 12)
26467 mask |= 1 << 3;
26468 if (size <= 8)
26469 mask |= 1 << 2;
26470
26471 if (mask == 0)
26472 /* Oh dear! We have no low registers into which we can pop
26473 high registers! */
26474 internal_error
26475 ("no low registers available for popping high registers");
26476
26477 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
26478 if (live_regs_mask & (1 << next_hi_reg))
26479 break;
26480
26481 while (high_regs_pushed)
26482 {
26483 /* Find lo register(s) into which the high register(s) can
26484 be popped. */
26485 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26486 {
26487 if (mask & (1 << regno))
26488 high_regs_pushed--;
26489 if (high_regs_pushed == 0)
26490 break;
26491 }
26492
26493 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
26494
26495 /* Pop the values into the low register(s). */
26496 thumb_pop (asm_out_file, mask);
26497
26498 /* Move the value(s) into the high registers. */
26499 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26500 {
26501 if (mask & (1 << regno))
26502 {
26503 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26504 regno);
26505
26506 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
26507 if (live_regs_mask & (1 << next_hi_reg))
26508 break;
26509 }
26510 }
26511 }
26512 live_regs_mask &= ~0x0f00;
26513 }
26514
26515 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26516 live_regs_mask &= 0xff;
26517
26518 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26519 {
26520 /* Pop the return address into the PC. */
26521 if (had_to_push_lr)
26522 live_regs_mask |= 1 << PC_REGNUM;
26523
26524 /* Either no argument registers were pushed or a backtrace
26525 structure was created which includes an adjusted stack
26526 pointer, so just pop everything. */
26527 if (live_regs_mask)
26528 thumb_pop (asm_out_file, live_regs_mask);
26529
26530 /* We have either just popped the return address into the
26531 PC or it is was kept in LR for the entire function.
26532 Note that thumb_pop has already called thumb_exit if the
26533 PC was in the list. */
26534 if (!had_to_push_lr)
26535 thumb_exit (asm_out_file, LR_REGNUM);
26536 }
26537 else
26538 {
26539 /* Pop everything but the return address. */
26540 if (live_regs_mask)
26541 thumb_pop (asm_out_file, live_regs_mask);
26542
26543 if (had_to_push_lr)
26544 {
26545 if (size > 12)
26546 {
26547 /* We have no free low regs, so save one. */
26548 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26549 LAST_ARG_REGNUM);
26550 }
26551
26552 /* Get the return address into a temporary register. */
26553 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26554
26555 if (size > 12)
26556 {
26557 /* Move the return address to lr. */
26558 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26559 LAST_ARG_REGNUM);
26560 /* Restore the low register. */
26561 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26562 IP_REGNUM);
26563 regno = LR_REGNUM;
26564 }
26565 else
26566 regno = LAST_ARG_REGNUM;
26567 }
26568 else
26569 regno = LR_REGNUM;
26570
26571 /* Remove the argument registers that were pushed onto the stack. */
26572 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26573 SP_REGNUM, SP_REGNUM,
26574 crtl->args.pretend_args_size);
26575
26576 thumb_exit (asm_out_file, regno);
26577 }
26578
26579 return "";
26580 }
26581
26582 /* Functions to save and restore machine-specific function data. */
26583 static struct machine_function *
26584 arm_init_machine_status (void)
26585 {
26586 struct machine_function *machine;
26587 machine = ggc_alloc_cleared_machine_function ();
26588
26589 #if ARM_FT_UNKNOWN != 0
26590 machine->func_type = ARM_FT_UNKNOWN;
26591 #endif
26592 return machine;
26593 }
26594
26595 /* Return an RTX indicating where the return address to the
26596 calling function can be found. */
26597 rtx
26598 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26599 {
26600 if (count != 0)
26601 return NULL_RTX;
26602
26603 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26604 }
26605
26606 /* Do anything needed before RTL is emitted for each function. */
26607 void
26608 arm_init_expanders (void)
26609 {
26610 /* Arrange to initialize and mark the machine per-function status. */
26611 init_machine_status = arm_init_machine_status;
26612
26613 /* This is to stop the combine pass optimizing away the alignment
26614 adjustment of va_arg. */
26615 /* ??? It is claimed that this should not be necessary. */
26616 if (cfun)
26617 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26618 }
26619
26620
26621 /* Like arm_compute_initial_elimination offset. Simpler because there
26622 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26623 to point at the base of the local variables after static stack
26624 space for a function has been allocated. */
26625
26626 HOST_WIDE_INT
26627 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26628 {
26629 arm_stack_offsets *offsets;
26630
26631 offsets = arm_get_frame_offsets ();
26632
26633 switch (from)
26634 {
26635 case ARG_POINTER_REGNUM:
26636 switch (to)
26637 {
26638 case STACK_POINTER_REGNUM:
26639 return offsets->outgoing_args - offsets->saved_args;
26640
26641 case FRAME_POINTER_REGNUM:
26642 return offsets->soft_frame - offsets->saved_args;
26643
26644 case ARM_HARD_FRAME_POINTER_REGNUM:
26645 return offsets->saved_regs - offsets->saved_args;
26646
26647 case THUMB_HARD_FRAME_POINTER_REGNUM:
26648 return offsets->locals_base - offsets->saved_args;
26649
26650 default:
26651 gcc_unreachable ();
26652 }
26653 break;
26654
26655 case FRAME_POINTER_REGNUM:
26656 switch (to)
26657 {
26658 case STACK_POINTER_REGNUM:
26659 return offsets->outgoing_args - offsets->soft_frame;
26660
26661 case ARM_HARD_FRAME_POINTER_REGNUM:
26662 return offsets->saved_regs - offsets->soft_frame;
26663
26664 case THUMB_HARD_FRAME_POINTER_REGNUM:
26665 return offsets->locals_base - offsets->soft_frame;
26666
26667 default:
26668 gcc_unreachable ();
26669 }
26670 break;
26671
26672 default:
26673 gcc_unreachable ();
26674 }
26675 }
26676
26677 /* Generate the function's prologue. */
26678
26679 void
26680 thumb1_expand_prologue (void)
26681 {
26682 rtx insn;
26683
26684 HOST_WIDE_INT amount;
26685 arm_stack_offsets *offsets;
26686 unsigned long func_type;
26687 int regno;
26688 unsigned long live_regs_mask;
26689 unsigned long l_mask;
26690 unsigned high_regs_pushed = 0;
26691
26692 func_type = arm_current_func_type ();
26693
26694 /* Naked functions don't have prologues. */
26695 if (IS_NAKED (func_type))
26696 return;
26697
26698 if (IS_INTERRUPT (func_type))
26699 {
26700 error ("interrupt Service Routines cannot be coded in Thumb mode");
26701 return;
26702 }
26703
26704 if (is_called_in_ARM_mode (current_function_decl))
26705 emit_insn (gen_prologue_thumb1_interwork ());
26706
26707 offsets = arm_get_frame_offsets ();
26708 live_regs_mask = offsets->saved_regs_mask;
26709
26710 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26711 l_mask = live_regs_mask & 0x40ff;
26712 /* Then count how many other high registers will need to be pushed. */
26713 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26714
26715 if (crtl->args.pretend_args_size)
26716 {
26717 rtx x = GEN_INT (-crtl->args.pretend_args_size);
26718
26719 if (cfun->machine->uses_anonymous_args)
26720 {
26721 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26722 unsigned long mask;
26723
26724 mask = 1ul << (LAST_ARG_REGNUM + 1);
26725 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
26726
26727 insn = thumb1_emit_multi_reg_push (mask, 0);
26728 }
26729 else
26730 {
26731 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26732 stack_pointer_rtx, x));
26733 }
26734 RTX_FRAME_RELATED_P (insn) = 1;
26735 }
26736
26737 if (TARGET_BACKTRACE)
26738 {
26739 HOST_WIDE_INT offset = 0;
26740 unsigned work_register;
26741 rtx work_reg, x, arm_hfp_rtx;
26742
26743 /* We have been asked to create a stack backtrace structure.
26744 The code looks like this:
26745
26746 0 .align 2
26747 0 func:
26748 0 sub SP, #16 Reserve space for 4 registers.
26749 2 push {R7} Push low registers.
26750 4 add R7, SP, #20 Get the stack pointer before the push.
26751 6 str R7, [SP, #8] Store the stack pointer
26752 (before reserving the space).
26753 8 mov R7, PC Get hold of the start of this code + 12.
26754 10 str R7, [SP, #16] Store it.
26755 12 mov R7, FP Get hold of the current frame pointer.
26756 14 str R7, [SP, #4] Store it.
26757 16 mov R7, LR Get hold of the current return address.
26758 18 str R7, [SP, #12] Store it.
26759 20 add R7, SP, #16 Point at the start of the
26760 backtrace structure.
26761 22 mov FP, R7 Put this value into the frame pointer. */
26762
26763 work_register = thumb_find_work_register (live_regs_mask);
26764 work_reg = gen_rtx_REG (SImode, work_register);
26765 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
26766
26767 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26768 stack_pointer_rtx, GEN_INT (-16)));
26769 RTX_FRAME_RELATED_P (insn) = 1;
26770
26771 if (l_mask)
26772 {
26773 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
26774 RTX_FRAME_RELATED_P (insn) = 1;
26775
26776 offset = bit_count (l_mask) * UNITS_PER_WORD;
26777 }
26778
26779 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
26780 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26781
26782 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
26783 x = gen_frame_mem (SImode, x);
26784 emit_move_insn (x, work_reg);
26785
26786 /* Make sure that the instruction fetching the PC is in the right place
26787 to calculate "start of backtrace creation code + 12". */
26788 /* ??? The stores using the common WORK_REG ought to be enough to
26789 prevent the scheduler from doing anything weird. Failing that
26790 we could always move all of the following into an UNSPEC_VOLATILE. */
26791 if (l_mask)
26792 {
26793 x = gen_rtx_REG (SImode, PC_REGNUM);
26794 emit_move_insn (work_reg, x);
26795
26796 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26797 x = gen_frame_mem (SImode, x);
26798 emit_move_insn (x, work_reg);
26799
26800 emit_move_insn (work_reg, arm_hfp_rtx);
26801
26802 x = plus_constant (Pmode, stack_pointer_rtx, offset);
26803 x = gen_frame_mem (SImode, x);
26804 emit_move_insn (x, work_reg);
26805 }
26806 else
26807 {
26808 emit_move_insn (work_reg, arm_hfp_rtx);
26809
26810 x = plus_constant (Pmode, stack_pointer_rtx, offset);
26811 x = gen_frame_mem (SImode, x);
26812 emit_move_insn (x, work_reg);
26813
26814 x = gen_rtx_REG (SImode, PC_REGNUM);
26815 emit_move_insn (work_reg, x);
26816
26817 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26818 x = gen_frame_mem (SImode, x);
26819 emit_move_insn (x, work_reg);
26820 }
26821
26822 x = gen_rtx_REG (SImode, LR_REGNUM);
26823 emit_move_insn (work_reg, x);
26824
26825 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
26826 x = gen_frame_mem (SImode, x);
26827 emit_move_insn (x, work_reg);
26828
26829 x = GEN_INT (offset + 12);
26830 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26831
26832 emit_move_insn (arm_hfp_rtx, work_reg);
26833 }
26834 /* Optimization: If we are not pushing any low registers but we are going
26835 to push some high registers then delay our first push. This will just
26836 be a push of LR and we can combine it with the push of the first high
26837 register. */
26838 else if ((l_mask & 0xff) != 0
26839 || (high_regs_pushed == 0 && l_mask))
26840 {
26841 unsigned long mask = l_mask;
26842 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
26843 insn = thumb1_emit_multi_reg_push (mask, mask);
26844 RTX_FRAME_RELATED_P (insn) = 1;
26845 }
26846
26847 if (high_regs_pushed)
26848 {
26849 unsigned pushable_regs;
26850 unsigned next_hi_reg;
26851 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
26852 : crtl->args.info.nregs;
26853 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
26854
26855 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26856 if (live_regs_mask & (1 << next_hi_reg))
26857 break;
26858
26859 /* Here we need to mask out registers used for passing arguments
26860 even if they can be pushed. This is to avoid using them to stash the high
26861 registers. Such kind of stash may clobber the use of arguments. */
26862 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
26863
26864 if (pushable_regs == 0)
26865 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
26866
26867 while (high_regs_pushed > 0)
26868 {
26869 unsigned long real_regs_mask = 0;
26870
26871 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
26872 {
26873 if (pushable_regs & (1 << regno))
26874 {
26875 emit_move_insn (gen_rtx_REG (SImode, regno),
26876 gen_rtx_REG (SImode, next_hi_reg));
26877
26878 high_regs_pushed --;
26879 real_regs_mask |= (1 << next_hi_reg);
26880
26881 if (high_regs_pushed)
26882 {
26883 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
26884 next_hi_reg --)
26885 if (live_regs_mask & (1 << next_hi_reg))
26886 break;
26887 }
26888 else
26889 {
26890 pushable_regs &= ~((1 << regno) - 1);
26891 break;
26892 }
26893 }
26894 }
26895
26896 /* If we had to find a work register and we have not yet
26897 saved the LR then add it to the list of regs to push. */
26898 if (l_mask == (1 << LR_REGNUM))
26899 {
26900 pushable_regs |= l_mask;
26901 real_regs_mask |= l_mask;
26902 l_mask = 0;
26903 }
26904
26905 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
26906 RTX_FRAME_RELATED_P (insn) = 1;
26907 }
26908 }
26909
26910 /* Load the pic register before setting the frame pointer,
26911 so we can use r7 as a temporary work register. */
26912 if (flag_pic && arm_pic_register != INVALID_REGNUM)
26913 arm_load_pic_register (live_regs_mask);
26914
26915 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
26916 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
26917 stack_pointer_rtx);
26918
26919 if (flag_stack_usage_info)
26920 current_function_static_stack_size
26921 = offsets->outgoing_args - offsets->saved_args;
26922
26923 amount = offsets->outgoing_args - offsets->saved_regs;
26924 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
26925 if (amount)
26926 {
26927 if (amount < 512)
26928 {
26929 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
26930 GEN_INT (- amount)));
26931 RTX_FRAME_RELATED_P (insn) = 1;
26932 }
26933 else
26934 {
26935 rtx reg, dwarf;
26936
26937 /* The stack decrement is too big for an immediate value in a single
26938 insn. In theory we could issue multiple subtracts, but after
26939 three of them it becomes more space efficient to place the full
26940 value in the constant pool and load into a register. (Also the
26941 ARM debugger really likes to see only one stack decrement per
26942 function). So instead we look for a scratch register into which
26943 we can load the decrement, and then we subtract this from the
26944 stack pointer. Unfortunately on the thumb the only available
26945 scratch registers are the argument registers, and we cannot use
26946 these as they may hold arguments to the function. Instead we
26947 attempt to locate a call preserved register which is used by this
26948 function. If we can find one, then we know that it will have
26949 been pushed at the start of the prologue and so we can corrupt
26950 it now. */
26951 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
26952 if (live_regs_mask & (1 << regno))
26953 break;
26954
26955 gcc_assert(regno <= LAST_LO_REGNUM);
26956
26957 reg = gen_rtx_REG (SImode, regno);
26958
26959 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
26960
26961 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26962 stack_pointer_rtx, reg));
26963
26964 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26965 plus_constant (Pmode, stack_pointer_rtx,
26966 -amount));
26967 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
26968 RTX_FRAME_RELATED_P (insn) = 1;
26969 }
26970 }
26971
26972 if (frame_pointer_needed)
26973 thumb_set_frame_pointer (offsets);
26974
26975 /* If we are profiling, make sure no instructions are scheduled before
26976 the call to mcount. Similarly if the user has requested no
26977 scheduling in the prolog. Similarly if we want non-call exceptions
26978 using the EABI unwinder, to prevent faulting instructions from being
26979 swapped with a stack adjustment. */
26980 if (crtl->profile || !TARGET_SCHED_PROLOG
26981 || (arm_except_unwind_info (&global_options) == UI_TARGET
26982 && cfun->can_throw_non_call_exceptions))
26983 emit_insn (gen_blockage ());
26984
26985 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
26986 if (live_regs_mask & 0xff)
26987 cfun->machine->lr_save_eliminated = 0;
26988 }
26989
26990 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
26991 POP instruction can be generated. LR should be replaced by PC. All
26992 the checks required are already done by USE_RETURN_INSN (). Hence,
26993 all we really need to check here is if single register is to be
26994 returned, or multiple register return. */
26995 void
26996 thumb2_expand_return (bool simple_return)
26997 {
26998 int i, num_regs;
26999 unsigned long saved_regs_mask;
27000 arm_stack_offsets *offsets;
27001
27002 offsets = arm_get_frame_offsets ();
27003 saved_regs_mask = offsets->saved_regs_mask;
27004
27005 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
27006 if (saved_regs_mask & (1 << i))
27007 num_regs++;
27008
27009 if (!simple_return && saved_regs_mask)
27010 {
27011 if (num_regs == 1)
27012 {
27013 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27014 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27015 rtx addr = gen_rtx_MEM (SImode,
27016 gen_rtx_POST_INC (SImode,
27017 stack_pointer_rtx));
27018 set_mem_alias_set (addr, get_frame_alias_set ());
27019 XVECEXP (par, 0, 0) = ret_rtx;
27020 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
27021 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27022 emit_jump_insn (par);
27023 }
27024 else
27025 {
27026 saved_regs_mask &= ~ (1 << LR_REGNUM);
27027 saved_regs_mask |= (1 << PC_REGNUM);
27028 arm_emit_multi_reg_pop (saved_regs_mask);
27029 }
27030 }
27031 else
27032 {
27033 emit_jump_insn (simple_return_rtx);
27034 }
27035 }
27036
27037 void
27038 thumb1_expand_epilogue (void)
27039 {
27040 HOST_WIDE_INT amount;
27041 arm_stack_offsets *offsets;
27042 int regno;
27043
27044 /* Naked functions don't have prologues. */
27045 if (IS_NAKED (arm_current_func_type ()))
27046 return;
27047
27048 offsets = arm_get_frame_offsets ();
27049 amount = offsets->outgoing_args - offsets->saved_regs;
27050
27051 if (frame_pointer_needed)
27052 {
27053 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27054 amount = offsets->locals_base - offsets->saved_regs;
27055 }
27056 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27057
27058 gcc_assert (amount >= 0);
27059 if (amount)
27060 {
27061 emit_insn (gen_blockage ());
27062
27063 if (amount < 512)
27064 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27065 GEN_INT (amount)));
27066 else
27067 {
27068 /* r3 is always free in the epilogue. */
27069 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27070
27071 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27072 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27073 }
27074 }
27075
27076 /* Emit a USE (stack_pointer_rtx), so that
27077 the stack adjustment will not be deleted. */
27078 emit_insn (gen_force_register_use (stack_pointer_rtx));
27079
27080 if (crtl->profile || !TARGET_SCHED_PROLOG)
27081 emit_insn (gen_blockage ());
27082
27083 /* Emit a clobber for each insn that will be restored in the epilogue,
27084 so that flow2 will get register lifetimes correct. */
27085 for (regno = 0; regno < 13; regno++)
27086 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
27087 emit_clobber (gen_rtx_REG (SImode, regno));
27088
27089 if (! df_regs_ever_live_p (LR_REGNUM))
27090 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27091 }
27092
27093 /* Epilogue code for APCS frame. */
27094 static void
27095 arm_expand_epilogue_apcs_frame (bool really_return)
27096 {
27097 unsigned long func_type;
27098 unsigned long saved_regs_mask;
27099 int num_regs = 0;
27100 int i;
27101 int floats_from_frame = 0;
27102 arm_stack_offsets *offsets;
27103
27104 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27105 func_type = arm_current_func_type ();
27106
27107 /* Get frame offsets for ARM. */
27108 offsets = arm_get_frame_offsets ();
27109 saved_regs_mask = offsets->saved_regs_mask;
27110
27111 /* Find the offset of the floating-point save area in the frame. */
27112 floats_from_frame
27113 = (offsets->saved_args
27114 + arm_compute_static_chain_stack_bytes ()
27115 - offsets->frame);
27116
27117 /* Compute how many core registers saved and how far away the floats are. */
27118 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27119 if (saved_regs_mask & (1 << i))
27120 {
27121 num_regs++;
27122 floats_from_frame += 4;
27123 }
27124
27125 if (TARGET_HARD_FLOAT && TARGET_VFP)
27126 {
27127 int start_reg;
27128 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27129
27130 /* The offset is from IP_REGNUM. */
27131 int saved_size = arm_get_vfp_saved_size ();
27132 if (saved_size > 0)
27133 {
27134 rtx insn;
27135 floats_from_frame += saved_size;
27136 insn = emit_insn (gen_addsi3 (ip_rtx,
27137 hard_frame_pointer_rtx,
27138 GEN_INT (-floats_from_frame)));
27139 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27140 ip_rtx, hard_frame_pointer_rtx);
27141 }
27142
27143 /* Generate VFP register multi-pop. */
27144 start_reg = FIRST_VFP_REGNUM;
27145
27146 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27147 /* Look for a case where a reg does not need restoring. */
27148 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27149 && (!df_regs_ever_live_p (i + 1)
27150 || call_used_regs[i + 1]))
27151 {
27152 if (start_reg != i)
27153 arm_emit_vfp_multi_reg_pop (start_reg,
27154 (i - start_reg) / 2,
27155 gen_rtx_REG (SImode,
27156 IP_REGNUM));
27157 start_reg = i + 2;
27158 }
27159
27160 /* Restore the remaining regs that we have discovered (or possibly
27161 even all of them, if the conditional in the for loop never
27162 fired). */
27163 if (start_reg != i)
27164 arm_emit_vfp_multi_reg_pop (start_reg,
27165 (i - start_reg) / 2,
27166 gen_rtx_REG (SImode, IP_REGNUM));
27167 }
27168
27169 if (TARGET_IWMMXT)
27170 {
27171 /* The frame pointer is guaranteed to be non-double-word aligned, as
27172 it is set to double-word-aligned old_stack_pointer - 4. */
27173 rtx insn;
27174 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27175
27176 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27177 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27178 {
27179 rtx addr = gen_frame_mem (V2SImode,
27180 plus_constant (Pmode, hard_frame_pointer_rtx,
27181 - lrm_count * 4));
27182 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27183 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27184 gen_rtx_REG (V2SImode, i),
27185 NULL_RTX);
27186 lrm_count += 2;
27187 }
27188 }
27189
27190 /* saved_regs_mask should contain IP which contains old stack pointer
27191 at the time of activation creation. Since SP and IP are adjacent registers,
27192 we can restore the value directly into SP. */
27193 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27194 saved_regs_mask &= ~(1 << IP_REGNUM);
27195 saved_regs_mask |= (1 << SP_REGNUM);
27196
27197 /* There are two registers left in saved_regs_mask - LR and PC. We
27198 only need to restore LR (the return address), but to
27199 save time we can load it directly into PC, unless we need a
27200 special function exit sequence, or we are not really returning. */
27201 if (really_return
27202 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27203 && !crtl->calls_eh_return)
27204 /* Delete LR from the register mask, so that LR on
27205 the stack is loaded into the PC in the register mask. */
27206 saved_regs_mask &= ~(1 << LR_REGNUM);
27207 else
27208 saved_regs_mask &= ~(1 << PC_REGNUM);
27209
27210 num_regs = bit_count (saved_regs_mask);
27211 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27212 {
27213 rtx insn;
27214 emit_insn (gen_blockage ());
27215 /* Unwind the stack to just below the saved registers. */
27216 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27217 hard_frame_pointer_rtx,
27218 GEN_INT (- 4 * num_regs)));
27219
27220 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27221 stack_pointer_rtx, hard_frame_pointer_rtx);
27222 }
27223
27224 arm_emit_multi_reg_pop (saved_regs_mask);
27225
27226 if (IS_INTERRUPT (func_type))
27227 {
27228 /* Interrupt handlers will have pushed the
27229 IP onto the stack, so restore it now. */
27230 rtx insn;
27231 rtx addr = gen_rtx_MEM (SImode,
27232 gen_rtx_POST_INC (SImode,
27233 stack_pointer_rtx));
27234 set_mem_alias_set (addr, get_frame_alias_set ());
27235 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27236 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27237 gen_rtx_REG (SImode, IP_REGNUM),
27238 NULL_RTX);
27239 }
27240
27241 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27242 return;
27243
27244 if (crtl->calls_eh_return)
27245 emit_insn (gen_addsi3 (stack_pointer_rtx,
27246 stack_pointer_rtx,
27247 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27248
27249 if (IS_STACKALIGN (func_type))
27250 /* Restore the original stack pointer. Before prologue, the stack was
27251 realigned and the original stack pointer saved in r0. For details,
27252 see comment in arm_expand_prologue. */
27253 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27254
27255 emit_jump_insn (simple_return_rtx);
27256 }
27257
27258 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27259 function is not a sibcall. */
27260 void
27261 arm_expand_epilogue (bool really_return)
27262 {
27263 unsigned long func_type;
27264 unsigned long saved_regs_mask;
27265 int num_regs = 0;
27266 int i;
27267 int amount;
27268 arm_stack_offsets *offsets;
27269
27270 func_type = arm_current_func_type ();
27271
27272 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27273 let output_return_instruction take care of instruction emission if any. */
27274 if (IS_NAKED (func_type)
27275 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27276 {
27277 if (really_return)
27278 emit_jump_insn (simple_return_rtx);
27279 return;
27280 }
27281
27282 /* If we are throwing an exception, then we really must be doing a
27283 return, so we can't tail-call. */
27284 gcc_assert (!crtl->calls_eh_return || really_return);
27285
27286 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27287 {
27288 arm_expand_epilogue_apcs_frame (really_return);
27289 return;
27290 }
27291
27292 /* Get frame offsets for ARM. */
27293 offsets = arm_get_frame_offsets ();
27294 saved_regs_mask = offsets->saved_regs_mask;
27295 num_regs = bit_count (saved_regs_mask);
27296
27297 if (frame_pointer_needed)
27298 {
27299 rtx insn;
27300 /* Restore stack pointer if necessary. */
27301 if (TARGET_ARM)
27302 {
27303 /* In ARM mode, frame pointer points to first saved register.
27304 Restore stack pointer to last saved register. */
27305 amount = offsets->frame - offsets->saved_regs;
27306
27307 /* Force out any pending memory operations that reference stacked data
27308 before stack de-allocation occurs. */
27309 emit_insn (gen_blockage ());
27310 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27311 hard_frame_pointer_rtx,
27312 GEN_INT (amount)));
27313 arm_add_cfa_adjust_cfa_note (insn, amount,
27314 stack_pointer_rtx,
27315 hard_frame_pointer_rtx);
27316
27317 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27318 deleted. */
27319 emit_insn (gen_force_register_use (stack_pointer_rtx));
27320 }
27321 else
27322 {
27323 /* In Thumb-2 mode, the frame pointer points to the last saved
27324 register. */
27325 amount = offsets->locals_base - offsets->saved_regs;
27326 if (amount)
27327 {
27328 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27329 hard_frame_pointer_rtx,
27330 GEN_INT (amount)));
27331 arm_add_cfa_adjust_cfa_note (insn, amount,
27332 hard_frame_pointer_rtx,
27333 hard_frame_pointer_rtx);
27334 }
27335
27336 /* Force out any pending memory operations that reference stacked data
27337 before stack de-allocation occurs. */
27338 emit_insn (gen_blockage ());
27339 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27340 hard_frame_pointer_rtx));
27341 arm_add_cfa_adjust_cfa_note (insn, 0,
27342 stack_pointer_rtx,
27343 hard_frame_pointer_rtx);
27344 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27345 deleted. */
27346 emit_insn (gen_force_register_use (stack_pointer_rtx));
27347 }
27348 }
27349 else
27350 {
27351 /* Pop off outgoing args and local frame to adjust stack pointer to
27352 last saved register. */
27353 amount = offsets->outgoing_args - offsets->saved_regs;
27354 if (amount)
27355 {
27356 rtx tmp;
27357 /* Force out any pending memory operations that reference stacked data
27358 before stack de-allocation occurs. */
27359 emit_insn (gen_blockage ());
27360 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27361 stack_pointer_rtx,
27362 GEN_INT (amount)));
27363 arm_add_cfa_adjust_cfa_note (tmp, amount,
27364 stack_pointer_rtx, stack_pointer_rtx);
27365 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27366 not deleted. */
27367 emit_insn (gen_force_register_use (stack_pointer_rtx));
27368 }
27369 }
27370
27371 if (TARGET_HARD_FLOAT && TARGET_VFP)
27372 {
27373 /* Generate VFP register multi-pop. */
27374 int end_reg = LAST_VFP_REGNUM + 1;
27375
27376 /* Scan the registers in reverse order. We need to match
27377 any groupings made in the prologue and generate matching
27378 vldm operations. The need to match groups is because,
27379 unlike pop, vldm can only do consecutive regs. */
27380 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27381 /* Look for a case where a reg does not need restoring. */
27382 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27383 && (!df_regs_ever_live_p (i + 1)
27384 || call_used_regs[i + 1]))
27385 {
27386 /* Restore the regs discovered so far (from reg+2 to
27387 end_reg). */
27388 if (end_reg > i + 2)
27389 arm_emit_vfp_multi_reg_pop (i + 2,
27390 (end_reg - (i + 2)) / 2,
27391 stack_pointer_rtx);
27392 end_reg = i;
27393 }
27394
27395 /* Restore the remaining regs that we have discovered (or possibly
27396 even all of them, if the conditional in the for loop never
27397 fired). */
27398 if (end_reg > i + 2)
27399 arm_emit_vfp_multi_reg_pop (i + 2,
27400 (end_reg - (i + 2)) / 2,
27401 stack_pointer_rtx);
27402 }
27403
27404 if (TARGET_IWMMXT)
27405 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27406 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27407 {
27408 rtx insn;
27409 rtx addr = gen_rtx_MEM (V2SImode,
27410 gen_rtx_POST_INC (SImode,
27411 stack_pointer_rtx));
27412 set_mem_alias_set (addr, get_frame_alias_set ());
27413 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27414 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27415 gen_rtx_REG (V2SImode, i),
27416 NULL_RTX);
27417 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27418 stack_pointer_rtx, stack_pointer_rtx);
27419 }
27420
27421 if (saved_regs_mask)
27422 {
27423 rtx insn;
27424 bool return_in_pc = false;
27425
27426 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27427 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27428 && !IS_STACKALIGN (func_type)
27429 && really_return
27430 && crtl->args.pretend_args_size == 0
27431 && saved_regs_mask & (1 << LR_REGNUM)
27432 && !crtl->calls_eh_return)
27433 {
27434 saved_regs_mask &= ~(1 << LR_REGNUM);
27435 saved_regs_mask |= (1 << PC_REGNUM);
27436 return_in_pc = true;
27437 }
27438
27439 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27440 {
27441 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27442 if (saved_regs_mask & (1 << i))
27443 {
27444 rtx addr = gen_rtx_MEM (SImode,
27445 gen_rtx_POST_INC (SImode,
27446 stack_pointer_rtx));
27447 set_mem_alias_set (addr, get_frame_alias_set ());
27448
27449 if (i == PC_REGNUM)
27450 {
27451 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27452 XVECEXP (insn, 0, 0) = ret_rtx;
27453 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
27454 gen_rtx_REG (SImode, i),
27455 addr);
27456 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27457 insn = emit_jump_insn (insn);
27458 }
27459 else
27460 {
27461 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27462 addr));
27463 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27464 gen_rtx_REG (SImode, i),
27465 NULL_RTX);
27466 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27467 stack_pointer_rtx,
27468 stack_pointer_rtx);
27469 }
27470 }
27471 }
27472 else
27473 {
27474 if (TARGET_LDRD
27475 && current_tune->prefer_ldrd_strd
27476 && !optimize_function_for_size_p (cfun))
27477 {
27478 if (TARGET_THUMB2)
27479 thumb2_emit_ldrd_pop (saved_regs_mask);
27480 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27481 arm_emit_ldrd_pop (saved_regs_mask);
27482 else
27483 arm_emit_multi_reg_pop (saved_regs_mask);
27484 }
27485 else
27486 arm_emit_multi_reg_pop (saved_regs_mask);
27487 }
27488
27489 if (return_in_pc == true)
27490 return;
27491 }
27492
27493 if (crtl->args.pretend_args_size)
27494 {
27495 int i, j;
27496 rtx dwarf = NULL_RTX;
27497 rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27498 stack_pointer_rtx,
27499 GEN_INT (crtl->args.pretend_args_size)));
27500
27501 RTX_FRAME_RELATED_P (tmp) = 1;
27502
27503 if (cfun->machine->uses_anonymous_args)
27504 {
27505 /* Restore pretend args. Refer arm_expand_prologue on how to save
27506 pretend_args in stack. */
27507 int num_regs = crtl->args.pretend_args_size / 4;
27508 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27509 for (j = 0, i = 0; j < num_regs; i++)
27510 if (saved_regs_mask & (1 << i))
27511 {
27512 rtx reg = gen_rtx_REG (SImode, i);
27513 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27514 j++;
27515 }
27516 REG_NOTES (tmp) = dwarf;
27517 }
27518 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
27519 stack_pointer_rtx, stack_pointer_rtx);
27520 }
27521
27522 if (!really_return)
27523 return;
27524
27525 if (crtl->calls_eh_return)
27526 emit_insn (gen_addsi3 (stack_pointer_rtx,
27527 stack_pointer_rtx,
27528 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27529
27530 if (IS_STACKALIGN (func_type))
27531 /* Restore the original stack pointer. Before prologue, the stack was
27532 realigned and the original stack pointer saved in r0. For details,
27533 see comment in arm_expand_prologue. */
27534 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27535
27536 emit_jump_insn (simple_return_rtx);
27537 }
27538
27539 /* Implementation of insn prologue_thumb1_interwork. This is the first
27540 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27541
27542 const char *
27543 thumb1_output_interwork (void)
27544 {
27545 const char * name;
27546 FILE *f = asm_out_file;
27547
27548 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27549 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27550 == SYMBOL_REF);
27551 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27552
27553 /* Generate code sequence to switch us into Thumb mode. */
27554 /* The .code 32 directive has already been emitted by
27555 ASM_DECLARE_FUNCTION_NAME. */
27556 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27557 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27558
27559 /* Generate a label, so that the debugger will notice the
27560 change in instruction sets. This label is also used by
27561 the assembler to bypass the ARM code when this function
27562 is called from a Thumb encoded function elsewhere in the
27563 same file. Hence the definition of STUB_NAME here must
27564 agree with the definition in gas/config/tc-arm.c. */
27565
27566 #define STUB_NAME ".real_start_of"
27567
27568 fprintf (f, "\t.code\t16\n");
27569 #ifdef ARM_PE
27570 if (arm_dllexport_name_p (name))
27571 name = arm_strip_name_encoding (name);
27572 #endif
27573 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27574 fprintf (f, "\t.thumb_func\n");
27575 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27576
27577 return "";
27578 }
27579
27580 /* Handle the case of a double word load into a low register from
27581 a computed memory address. The computed address may involve a
27582 register which is overwritten by the load. */
27583 const char *
27584 thumb_load_double_from_address (rtx *operands)
27585 {
27586 rtx addr;
27587 rtx base;
27588 rtx offset;
27589 rtx arg1;
27590 rtx arg2;
27591
27592 gcc_assert (REG_P (operands[0]));
27593 gcc_assert (MEM_P (operands[1]));
27594
27595 /* Get the memory address. */
27596 addr = XEXP (operands[1], 0);
27597
27598 /* Work out how the memory address is computed. */
27599 switch (GET_CODE (addr))
27600 {
27601 case REG:
27602 operands[2] = adjust_address (operands[1], SImode, 4);
27603
27604 if (REGNO (operands[0]) == REGNO (addr))
27605 {
27606 output_asm_insn ("ldr\t%H0, %2", operands);
27607 output_asm_insn ("ldr\t%0, %1", operands);
27608 }
27609 else
27610 {
27611 output_asm_insn ("ldr\t%0, %1", operands);
27612 output_asm_insn ("ldr\t%H0, %2", operands);
27613 }
27614 break;
27615
27616 case CONST:
27617 /* Compute <address> + 4 for the high order load. */
27618 operands[2] = adjust_address (operands[1], SImode, 4);
27619
27620 output_asm_insn ("ldr\t%0, %1", operands);
27621 output_asm_insn ("ldr\t%H0, %2", operands);
27622 break;
27623
27624 case PLUS:
27625 arg1 = XEXP (addr, 0);
27626 arg2 = XEXP (addr, 1);
27627
27628 if (CONSTANT_P (arg1))
27629 base = arg2, offset = arg1;
27630 else
27631 base = arg1, offset = arg2;
27632
27633 gcc_assert (REG_P (base));
27634
27635 /* Catch the case of <address> = <reg> + <reg> */
27636 if (REG_P (offset))
27637 {
27638 int reg_offset = REGNO (offset);
27639 int reg_base = REGNO (base);
27640 int reg_dest = REGNO (operands[0]);
27641
27642 /* Add the base and offset registers together into the
27643 higher destination register. */
27644 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
27645 reg_dest + 1, reg_base, reg_offset);
27646
27647 /* Load the lower destination register from the address in
27648 the higher destination register. */
27649 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
27650 reg_dest, reg_dest + 1);
27651
27652 /* Load the higher destination register from its own address
27653 plus 4. */
27654 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
27655 reg_dest + 1, reg_dest + 1);
27656 }
27657 else
27658 {
27659 /* Compute <address> + 4 for the high order load. */
27660 operands[2] = adjust_address (operands[1], SImode, 4);
27661
27662 /* If the computed address is held in the low order register
27663 then load the high order register first, otherwise always
27664 load the low order register first. */
27665 if (REGNO (operands[0]) == REGNO (base))
27666 {
27667 output_asm_insn ("ldr\t%H0, %2", operands);
27668 output_asm_insn ("ldr\t%0, %1", operands);
27669 }
27670 else
27671 {
27672 output_asm_insn ("ldr\t%0, %1", operands);
27673 output_asm_insn ("ldr\t%H0, %2", operands);
27674 }
27675 }
27676 break;
27677
27678 case LABEL_REF:
27679 /* With no registers to worry about we can just load the value
27680 directly. */
27681 operands[2] = adjust_address (operands[1], SImode, 4);
27682
27683 output_asm_insn ("ldr\t%H0, %2", operands);
27684 output_asm_insn ("ldr\t%0, %1", operands);
27685 break;
27686
27687 default:
27688 gcc_unreachable ();
27689 }
27690
27691 return "";
27692 }
27693
27694 const char *
27695 thumb_output_move_mem_multiple (int n, rtx *operands)
27696 {
27697 rtx tmp;
27698
27699 switch (n)
27700 {
27701 case 2:
27702 if (REGNO (operands[4]) > REGNO (operands[5]))
27703 {
27704 tmp = operands[4];
27705 operands[4] = operands[5];
27706 operands[5] = tmp;
27707 }
27708 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
27709 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
27710 break;
27711
27712 case 3:
27713 if (REGNO (operands[4]) > REGNO (operands[5]))
27714 {
27715 tmp = operands[4];
27716 operands[4] = operands[5];
27717 operands[5] = tmp;
27718 }
27719 if (REGNO (operands[5]) > REGNO (operands[6]))
27720 {
27721 tmp = operands[5];
27722 operands[5] = operands[6];
27723 operands[6] = tmp;
27724 }
27725 if (REGNO (operands[4]) > REGNO (operands[5]))
27726 {
27727 tmp = operands[4];
27728 operands[4] = operands[5];
27729 operands[5] = tmp;
27730 }
27731
27732 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
27733 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
27734 break;
27735
27736 default:
27737 gcc_unreachable ();
27738 }
27739
27740 return "";
27741 }
27742
27743 /* Output a call-via instruction for thumb state. */
27744 const char *
27745 thumb_call_via_reg (rtx reg)
27746 {
27747 int regno = REGNO (reg);
27748 rtx *labelp;
27749
27750 gcc_assert (regno < LR_REGNUM);
27751
27752 /* If we are in the normal text section we can use a single instance
27753 per compilation unit. If we are doing function sections, then we need
27754 an entry per section, since we can't rely on reachability. */
27755 if (in_section == text_section)
27756 {
27757 thumb_call_reg_needed = 1;
27758
27759 if (thumb_call_via_label[regno] == NULL)
27760 thumb_call_via_label[regno] = gen_label_rtx ();
27761 labelp = thumb_call_via_label + regno;
27762 }
27763 else
27764 {
27765 if (cfun->machine->call_via[regno] == NULL)
27766 cfun->machine->call_via[regno] = gen_label_rtx ();
27767 labelp = cfun->machine->call_via + regno;
27768 }
27769
27770 output_asm_insn ("bl\t%a0", labelp);
27771 return "";
27772 }
27773
27774 /* Routines for generating rtl. */
27775 void
27776 thumb_expand_movmemqi (rtx *operands)
27777 {
27778 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
27779 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
27780 HOST_WIDE_INT len = INTVAL (operands[2]);
27781 HOST_WIDE_INT offset = 0;
27782
27783 while (len >= 12)
27784 {
27785 emit_insn (gen_movmem12b (out, in, out, in));
27786 len -= 12;
27787 }
27788
27789 if (len >= 8)
27790 {
27791 emit_insn (gen_movmem8b (out, in, out, in));
27792 len -= 8;
27793 }
27794
27795 if (len >= 4)
27796 {
27797 rtx reg = gen_reg_rtx (SImode);
27798 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
27799 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
27800 len -= 4;
27801 offset += 4;
27802 }
27803
27804 if (len >= 2)
27805 {
27806 rtx reg = gen_reg_rtx (HImode);
27807 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
27808 plus_constant (Pmode, in,
27809 offset))));
27810 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
27811 offset)),
27812 reg));
27813 len -= 2;
27814 offset += 2;
27815 }
27816
27817 if (len)
27818 {
27819 rtx reg = gen_reg_rtx (QImode);
27820 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
27821 plus_constant (Pmode, in,
27822 offset))));
27823 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
27824 offset)),
27825 reg));
27826 }
27827 }
27828
27829 void
27830 thumb_reload_out_hi (rtx *operands)
27831 {
27832 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
27833 }
27834
27835 /* Handle reading a half-word from memory during reload. */
27836 void
27837 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
27838 {
27839 gcc_unreachable ();
27840 }
27841
27842 /* Return the length of a function name prefix
27843 that starts with the character 'c'. */
27844 static int
27845 arm_get_strip_length (int c)
27846 {
27847 switch (c)
27848 {
27849 ARM_NAME_ENCODING_LENGTHS
27850 default: return 0;
27851 }
27852 }
27853
27854 /* Return a pointer to a function's name with any
27855 and all prefix encodings stripped from it. */
27856 const char *
27857 arm_strip_name_encoding (const char *name)
27858 {
27859 int skip;
27860
27861 while ((skip = arm_get_strip_length (* name)))
27862 name += skip;
27863
27864 return name;
27865 }
27866
27867 /* If there is a '*' anywhere in the name's prefix, then
27868 emit the stripped name verbatim, otherwise prepend an
27869 underscore if leading underscores are being used. */
27870 void
27871 arm_asm_output_labelref (FILE *stream, const char *name)
27872 {
27873 int skip;
27874 int verbatim = 0;
27875
27876 while ((skip = arm_get_strip_length (* name)))
27877 {
27878 verbatim |= (*name == '*');
27879 name += skip;
27880 }
27881
27882 if (verbatim)
27883 fputs (name, stream);
27884 else
27885 asm_fprintf (stream, "%U%s", name);
27886 }
27887
27888 /* This function is used to emit an EABI tag and its associated value.
27889 We emit the numerical value of the tag in case the assembler does not
27890 support textual tags. (Eg gas prior to 2.20). If requested we include
27891 the tag name in a comment so that anyone reading the assembler output
27892 will know which tag is being set.
27893
27894 This function is not static because arm-c.c needs it too. */
27895
27896 void
27897 arm_emit_eabi_attribute (const char *name, int num, int val)
27898 {
27899 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
27900 if (flag_verbose_asm || flag_debug_asm)
27901 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
27902 asm_fprintf (asm_out_file, "\n");
27903 }
27904
27905 static void
27906 arm_file_start (void)
27907 {
27908 int val;
27909
27910 if (TARGET_UNIFIED_ASM)
27911 asm_fprintf (asm_out_file, "\t.syntax unified\n");
27912
27913 if (TARGET_BPABI)
27914 {
27915 const char *fpu_name;
27916 if (arm_selected_arch)
27917 {
27918 /* armv7ve doesn't support any extensions. */
27919 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
27920 {
27921 /* Keep backward compatability for assemblers
27922 which don't support armv7ve. */
27923 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
27924 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
27925 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
27926 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
27927 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
27928 }
27929 else
27930 {
27931 const char* pos = strchr (arm_selected_arch->name, '+');
27932 if (pos)
27933 {
27934 char buf[15];
27935 gcc_assert (strlen (arm_selected_arch->name)
27936 <= sizeof (buf) / sizeof (*pos));
27937 strncpy (buf, arm_selected_arch->name,
27938 (pos - arm_selected_arch->name) * sizeof (*pos));
27939 buf[pos - arm_selected_arch->name] = '\0';
27940 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
27941 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
27942 }
27943 else
27944 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
27945 }
27946 }
27947 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
27948 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
27949 else
27950 {
27951 const char* truncated_name
27952 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
27953 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
27954 }
27955
27956 if (TARGET_SOFT_FLOAT)
27957 {
27958 fpu_name = "softvfp";
27959 }
27960 else
27961 {
27962 fpu_name = arm_fpu_desc->name;
27963 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
27964 {
27965 if (TARGET_HARD_FLOAT)
27966 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
27967 if (TARGET_HARD_FLOAT_ABI)
27968 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
27969 }
27970 }
27971 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
27972
27973 /* Some of these attributes only apply when the corresponding features
27974 are used. However we don't have any easy way of figuring this out.
27975 Conservatively record the setting that would have been used. */
27976
27977 if (flag_rounding_math)
27978 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
27979
27980 if (!flag_unsafe_math_optimizations)
27981 {
27982 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
27983 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
27984 }
27985 if (flag_signaling_nans)
27986 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
27987
27988 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
27989 flag_finite_math_only ? 1 : 3);
27990
27991 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
27992 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
27993 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
27994 flag_short_enums ? 1 : 2);
27995
27996 /* Tag_ABI_optimization_goals. */
27997 if (optimize_size)
27998 val = 4;
27999 else if (optimize >= 2)
28000 val = 2;
28001 else if (optimize)
28002 val = 1;
28003 else
28004 val = 6;
28005 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
28006
28007 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
28008 unaligned_access);
28009
28010 if (arm_fp16_format)
28011 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28012 (int) arm_fp16_format);
28013
28014 if (arm_lang_output_object_attributes_hook)
28015 arm_lang_output_object_attributes_hook();
28016 }
28017
28018 default_file_start ();
28019 }
28020
28021 static void
28022 arm_file_end (void)
28023 {
28024 int regno;
28025
28026 if (NEED_INDICATE_EXEC_STACK)
28027 /* Add .note.GNU-stack. */
28028 file_end_indicate_exec_stack ();
28029
28030 if (! thumb_call_reg_needed)
28031 return;
28032
28033 switch_to_section (text_section);
28034 asm_fprintf (asm_out_file, "\t.code 16\n");
28035 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28036
28037 for (regno = 0; regno < LR_REGNUM; regno++)
28038 {
28039 rtx label = thumb_call_via_label[regno];
28040
28041 if (label != 0)
28042 {
28043 targetm.asm_out.internal_label (asm_out_file, "L",
28044 CODE_LABEL_NUMBER (label));
28045 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28046 }
28047 }
28048 }
28049
28050 #ifndef ARM_PE
28051 /* Symbols in the text segment can be accessed without indirecting via the
28052 constant pool; it may take an extra binary operation, but this is still
28053 faster than indirecting via memory. Don't do this when not optimizing,
28054 since we won't be calculating al of the offsets necessary to do this
28055 simplification. */
28056
28057 static void
28058 arm_encode_section_info (tree decl, rtx rtl, int first)
28059 {
28060 if (optimize > 0 && TREE_CONSTANT (decl))
28061 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28062
28063 default_encode_section_info (decl, rtl, first);
28064 }
28065 #endif /* !ARM_PE */
28066
28067 static void
28068 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28069 {
28070 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28071 && !strcmp (prefix, "L"))
28072 {
28073 arm_ccfsm_state = 0;
28074 arm_target_insn = NULL;
28075 }
28076 default_internal_label (stream, prefix, labelno);
28077 }
28078
28079 /* Output code to add DELTA to the first argument, and then jump
28080 to FUNCTION. Used for C++ multiple inheritance. */
28081 static void
28082 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
28083 HOST_WIDE_INT delta,
28084 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
28085 tree function)
28086 {
28087 static int thunk_label = 0;
28088 char label[256];
28089 char labelpc[256];
28090 int mi_delta = delta;
28091 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28092 int shift = 0;
28093 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28094 ? 1 : 0);
28095 if (mi_delta < 0)
28096 mi_delta = - mi_delta;
28097
28098 final_start_function (emit_barrier (), file, 1);
28099
28100 if (TARGET_THUMB1)
28101 {
28102 int labelno = thunk_label++;
28103 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28104 /* Thunks are entered in arm mode when avaiable. */
28105 if (TARGET_THUMB1_ONLY)
28106 {
28107 /* push r3 so we can use it as a temporary. */
28108 /* TODO: Omit this save if r3 is not used. */
28109 fputs ("\tpush {r3}\n", file);
28110 fputs ("\tldr\tr3, ", file);
28111 }
28112 else
28113 {
28114 fputs ("\tldr\tr12, ", file);
28115 }
28116 assemble_name (file, label);
28117 fputc ('\n', file);
28118 if (flag_pic)
28119 {
28120 /* If we are generating PIC, the ldr instruction below loads
28121 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28122 the address of the add + 8, so we have:
28123
28124 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28125 = target + 1.
28126
28127 Note that we have "+ 1" because some versions of GNU ld
28128 don't set the low bit of the result for R_ARM_REL32
28129 relocations against thumb function symbols.
28130 On ARMv6M this is +4, not +8. */
28131 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28132 assemble_name (file, labelpc);
28133 fputs (":\n", file);
28134 if (TARGET_THUMB1_ONLY)
28135 {
28136 /* This is 2 insns after the start of the thunk, so we know it
28137 is 4-byte aligned. */
28138 fputs ("\tadd\tr3, pc, r3\n", file);
28139 fputs ("\tmov r12, r3\n", file);
28140 }
28141 else
28142 fputs ("\tadd\tr12, pc, r12\n", file);
28143 }
28144 else if (TARGET_THUMB1_ONLY)
28145 fputs ("\tmov r12, r3\n", file);
28146 }
28147 if (TARGET_THUMB1_ONLY)
28148 {
28149 if (mi_delta > 255)
28150 {
28151 fputs ("\tldr\tr3, ", file);
28152 assemble_name (file, label);
28153 fputs ("+4\n", file);
28154 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
28155 mi_op, this_regno, this_regno);
28156 }
28157 else if (mi_delta != 0)
28158 {
28159 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28160 mi_op, this_regno, this_regno,
28161 mi_delta);
28162 }
28163 }
28164 else
28165 {
28166 /* TODO: Use movw/movt for large constants when available. */
28167 while (mi_delta != 0)
28168 {
28169 if ((mi_delta & (3 << shift)) == 0)
28170 shift += 2;
28171 else
28172 {
28173 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28174 mi_op, this_regno, this_regno,
28175 mi_delta & (0xff << shift));
28176 mi_delta &= ~(0xff << shift);
28177 shift += 8;
28178 }
28179 }
28180 }
28181 if (TARGET_THUMB1)
28182 {
28183 if (TARGET_THUMB1_ONLY)
28184 fputs ("\tpop\t{r3}\n", file);
28185
28186 fprintf (file, "\tbx\tr12\n");
28187 ASM_OUTPUT_ALIGN (file, 2);
28188 assemble_name (file, label);
28189 fputs (":\n", file);
28190 if (flag_pic)
28191 {
28192 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
28193 rtx tem = XEXP (DECL_RTL (function), 0);
28194 tem = plus_constant (GET_MODE (tem), tem, -7);
28195 tem = gen_rtx_MINUS (GET_MODE (tem),
28196 tem,
28197 gen_rtx_SYMBOL_REF (Pmode,
28198 ggc_strdup (labelpc)));
28199 assemble_integer (tem, 4, BITS_PER_WORD, 1);
28200 }
28201 else
28202 /* Output ".word .LTHUNKn". */
28203 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28204
28205 if (TARGET_THUMB1_ONLY && mi_delta > 255)
28206 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28207 }
28208 else
28209 {
28210 fputs ("\tb\t", file);
28211 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28212 if (NEED_PLT_RELOC)
28213 fputs ("(PLT)", file);
28214 fputc ('\n', file);
28215 }
28216
28217 final_end_function ();
28218 }
28219
28220 int
28221 arm_emit_vector_const (FILE *file, rtx x)
28222 {
28223 int i;
28224 const char * pattern;
28225
28226 gcc_assert (GET_CODE (x) == CONST_VECTOR);
28227
28228 switch (GET_MODE (x))
28229 {
28230 case V2SImode: pattern = "%08x"; break;
28231 case V4HImode: pattern = "%04x"; break;
28232 case V8QImode: pattern = "%02x"; break;
28233 default: gcc_unreachable ();
28234 }
28235
28236 fprintf (file, "0x");
28237 for (i = CONST_VECTOR_NUNITS (x); i--;)
28238 {
28239 rtx element;
28240
28241 element = CONST_VECTOR_ELT (x, i);
28242 fprintf (file, pattern, INTVAL (element));
28243 }
28244
28245 return 1;
28246 }
28247
28248 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28249 HFmode constant pool entries are actually loaded with ldr. */
28250 void
28251 arm_emit_fp16_const (rtx c)
28252 {
28253 REAL_VALUE_TYPE r;
28254 long bits;
28255
28256 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
28257 bits = real_to_target (NULL, &r, HFmode);
28258 if (WORDS_BIG_ENDIAN)
28259 assemble_zeros (2);
28260 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28261 if (!WORDS_BIG_ENDIAN)
28262 assemble_zeros (2);
28263 }
28264
28265 const char *
28266 arm_output_load_gr (rtx *operands)
28267 {
28268 rtx reg;
28269 rtx offset;
28270 rtx wcgr;
28271 rtx sum;
28272
28273 if (!MEM_P (operands [1])
28274 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28275 || !REG_P (reg = XEXP (sum, 0))
28276 || !CONST_INT_P (offset = XEXP (sum, 1))
28277 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28278 return "wldrw%?\t%0, %1";
28279
28280 /* Fix up an out-of-range load of a GR register. */
28281 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28282 wcgr = operands[0];
28283 operands[0] = reg;
28284 output_asm_insn ("ldr%?\t%0, %1", operands);
28285
28286 operands[0] = wcgr;
28287 operands[1] = reg;
28288 output_asm_insn ("tmcr%?\t%0, %1", operands);
28289 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28290
28291 return "";
28292 }
28293
28294 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28295
28296 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28297 named arg and all anonymous args onto the stack.
28298 XXX I know the prologue shouldn't be pushing registers, but it is faster
28299 that way. */
28300
28301 static void
28302 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28303 enum machine_mode mode,
28304 tree type,
28305 int *pretend_size,
28306 int second_time ATTRIBUTE_UNUSED)
28307 {
28308 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28309 int nregs;
28310
28311 cfun->machine->uses_anonymous_args = 1;
28312 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28313 {
28314 nregs = pcum->aapcs_ncrn;
28315 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
28316 nregs++;
28317 }
28318 else
28319 nregs = pcum->nregs;
28320
28321 if (nregs < NUM_ARG_REGS)
28322 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28323 }
28324
28325 /* We can't rely on the caller doing the proper promotion when
28326 using APCS or ATPCS. */
28327
28328 static bool
28329 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28330 {
28331 return !TARGET_AAPCS_BASED;
28332 }
28333
28334 static enum machine_mode
28335 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28336 enum machine_mode mode,
28337 int *punsignedp ATTRIBUTE_UNUSED,
28338 const_tree fntype ATTRIBUTE_UNUSED,
28339 int for_return ATTRIBUTE_UNUSED)
28340 {
28341 if (GET_MODE_CLASS (mode) == MODE_INT
28342 && GET_MODE_SIZE (mode) < 4)
28343 return SImode;
28344
28345 return mode;
28346 }
28347
28348 /* AAPCS based ABIs use short enums by default. */
28349
28350 static bool
28351 arm_default_short_enums (void)
28352 {
28353 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
28354 }
28355
28356
28357 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28358
28359 static bool
28360 arm_align_anon_bitfield (void)
28361 {
28362 return TARGET_AAPCS_BASED;
28363 }
28364
28365
28366 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28367
28368 static tree
28369 arm_cxx_guard_type (void)
28370 {
28371 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28372 }
28373
28374
28375 /* The EABI says test the least significant bit of a guard variable. */
28376
28377 static bool
28378 arm_cxx_guard_mask_bit (void)
28379 {
28380 return TARGET_AAPCS_BASED;
28381 }
28382
28383
28384 /* The EABI specifies that all array cookies are 8 bytes long. */
28385
28386 static tree
28387 arm_get_cookie_size (tree type)
28388 {
28389 tree size;
28390
28391 if (!TARGET_AAPCS_BASED)
28392 return default_cxx_get_cookie_size (type);
28393
28394 size = build_int_cst (sizetype, 8);
28395 return size;
28396 }
28397
28398
28399 /* The EABI says that array cookies should also contain the element size. */
28400
28401 static bool
28402 arm_cookie_has_size (void)
28403 {
28404 return TARGET_AAPCS_BASED;
28405 }
28406
28407
28408 /* The EABI says constructors and destructors should return a pointer to
28409 the object constructed/destroyed. */
28410
28411 static bool
28412 arm_cxx_cdtor_returns_this (void)
28413 {
28414 return TARGET_AAPCS_BASED;
28415 }
28416
28417 /* The EABI says that an inline function may never be the key
28418 method. */
28419
28420 static bool
28421 arm_cxx_key_method_may_be_inline (void)
28422 {
28423 return !TARGET_AAPCS_BASED;
28424 }
28425
28426 static void
28427 arm_cxx_determine_class_data_visibility (tree decl)
28428 {
28429 if (!TARGET_AAPCS_BASED
28430 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28431 return;
28432
28433 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28434 is exported. However, on systems without dynamic vague linkage,
28435 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28436 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28437 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28438 else
28439 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28440 DECL_VISIBILITY_SPECIFIED (decl) = 1;
28441 }
28442
28443 static bool
28444 arm_cxx_class_data_always_comdat (void)
28445 {
28446 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28447 vague linkage if the class has no key function. */
28448 return !TARGET_AAPCS_BASED;
28449 }
28450
28451
28452 /* The EABI says __aeabi_atexit should be used to register static
28453 destructors. */
28454
28455 static bool
28456 arm_cxx_use_aeabi_atexit (void)
28457 {
28458 return TARGET_AAPCS_BASED;
28459 }
28460
28461
28462 void
28463 arm_set_return_address (rtx source, rtx scratch)
28464 {
28465 arm_stack_offsets *offsets;
28466 HOST_WIDE_INT delta;
28467 rtx addr;
28468 unsigned long saved_regs;
28469
28470 offsets = arm_get_frame_offsets ();
28471 saved_regs = offsets->saved_regs_mask;
28472
28473 if ((saved_regs & (1 << LR_REGNUM)) == 0)
28474 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28475 else
28476 {
28477 if (frame_pointer_needed)
28478 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28479 else
28480 {
28481 /* LR will be the first saved register. */
28482 delta = offsets->outgoing_args - (offsets->frame + 4);
28483
28484
28485 if (delta >= 4096)
28486 {
28487 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28488 GEN_INT (delta & ~4095)));
28489 addr = scratch;
28490 delta &= 4095;
28491 }
28492 else
28493 addr = stack_pointer_rtx;
28494
28495 addr = plus_constant (Pmode, addr, delta);
28496 }
28497 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28498 }
28499 }
28500
28501
28502 void
28503 thumb_set_return_address (rtx source, rtx scratch)
28504 {
28505 arm_stack_offsets *offsets;
28506 HOST_WIDE_INT delta;
28507 HOST_WIDE_INT limit;
28508 int reg;
28509 rtx addr;
28510 unsigned long mask;
28511
28512 emit_use (source);
28513
28514 offsets = arm_get_frame_offsets ();
28515 mask = offsets->saved_regs_mask;
28516 if (mask & (1 << LR_REGNUM))
28517 {
28518 limit = 1024;
28519 /* Find the saved regs. */
28520 if (frame_pointer_needed)
28521 {
28522 delta = offsets->soft_frame - offsets->saved_args;
28523 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
28524 if (TARGET_THUMB1)
28525 limit = 128;
28526 }
28527 else
28528 {
28529 delta = offsets->outgoing_args - offsets->saved_args;
28530 reg = SP_REGNUM;
28531 }
28532 /* Allow for the stack frame. */
28533 if (TARGET_THUMB1 && TARGET_BACKTRACE)
28534 delta -= 16;
28535 /* The link register is always the first saved register. */
28536 delta -= 4;
28537
28538 /* Construct the address. */
28539 addr = gen_rtx_REG (SImode, reg);
28540 if (delta > limit)
28541 {
28542 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
28543 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
28544 addr = scratch;
28545 }
28546 else
28547 addr = plus_constant (Pmode, addr, delta);
28548
28549 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28550 }
28551 else
28552 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28553 }
28554
28555 /* Implements target hook vector_mode_supported_p. */
28556 bool
28557 arm_vector_mode_supported_p (enum machine_mode mode)
28558 {
28559 /* Neon also supports V2SImode, etc. listed in the clause below. */
28560 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
28561 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
28562 return true;
28563
28564 if ((TARGET_NEON || TARGET_IWMMXT)
28565 && ((mode == V2SImode)
28566 || (mode == V4HImode)
28567 || (mode == V8QImode)))
28568 return true;
28569
28570 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
28571 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
28572 || mode == V2HAmode))
28573 return true;
28574
28575 return false;
28576 }
28577
28578 /* Implements target hook array_mode_supported_p. */
28579
28580 static bool
28581 arm_array_mode_supported_p (enum machine_mode mode,
28582 unsigned HOST_WIDE_INT nelems)
28583 {
28584 if (TARGET_NEON
28585 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
28586 && (nelems >= 2 && nelems <= 4))
28587 return true;
28588
28589 return false;
28590 }
28591
28592 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28593 registers when autovectorizing for Neon, at least until multiple vector
28594 widths are supported properly by the middle-end. */
28595
28596 static enum machine_mode
28597 arm_preferred_simd_mode (enum machine_mode mode)
28598 {
28599 if (TARGET_NEON)
28600 switch (mode)
28601 {
28602 case SFmode:
28603 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
28604 case SImode:
28605 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
28606 case HImode:
28607 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
28608 case QImode:
28609 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
28610 case DImode:
28611 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28612 return V2DImode;
28613 break;
28614
28615 default:;
28616 }
28617
28618 if (TARGET_REALLY_IWMMXT)
28619 switch (mode)
28620 {
28621 case SImode:
28622 return V2SImode;
28623 case HImode:
28624 return V4HImode;
28625 case QImode:
28626 return V8QImode;
28627
28628 default:;
28629 }
28630
28631 return word_mode;
28632 }
28633
28634 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28635
28636 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
28637 using r0-r4 for function arguments, r7 for the stack frame and don't have
28638 enough left over to do doubleword arithmetic. For Thumb-2 all the
28639 potentially problematic instructions accept high registers so this is not
28640 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
28641 that require many low registers. */
28642 static bool
28643 arm_class_likely_spilled_p (reg_class_t rclass)
28644 {
28645 if ((TARGET_THUMB1 && rclass == LO_REGS)
28646 || rclass == CC_REG)
28647 return true;
28648
28649 return false;
28650 }
28651
28652 /* Implements target hook small_register_classes_for_mode_p. */
28653 bool
28654 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
28655 {
28656 return TARGET_THUMB1;
28657 }
28658
28659 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
28660 ARM insns and therefore guarantee that the shift count is modulo 256.
28661 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
28662 guarantee no particular behavior for out-of-range counts. */
28663
28664 static unsigned HOST_WIDE_INT
28665 arm_shift_truncation_mask (enum machine_mode mode)
28666 {
28667 return mode == SImode ? 255 : 0;
28668 }
28669
28670
28671 /* Map internal gcc register numbers to DWARF2 register numbers. */
28672
28673 unsigned int
28674 arm_dbx_register_number (unsigned int regno)
28675 {
28676 if (regno < 16)
28677 return regno;
28678
28679 if (IS_VFP_REGNUM (regno))
28680 {
28681 /* See comment in arm_dwarf_register_span. */
28682 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28683 return 64 + regno - FIRST_VFP_REGNUM;
28684 else
28685 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
28686 }
28687
28688 if (IS_IWMMXT_GR_REGNUM (regno))
28689 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
28690
28691 if (IS_IWMMXT_REGNUM (regno))
28692 return 112 + regno - FIRST_IWMMXT_REGNUM;
28693
28694 gcc_unreachable ();
28695 }
28696
28697 /* Dwarf models VFPv3 registers as 32 64-bit registers.
28698 GCC models tham as 64 32-bit registers, so we need to describe this to
28699 the DWARF generation code. Other registers can use the default. */
28700 static rtx
28701 arm_dwarf_register_span (rtx rtl)
28702 {
28703 enum machine_mode mode;
28704 unsigned regno;
28705 rtx parts[16];
28706 int nregs;
28707 int i;
28708
28709 regno = REGNO (rtl);
28710 if (!IS_VFP_REGNUM (regno))
28711 return NULL_RTX;
28712
28713 /* XXX FIXME: The EABI defines two VFP register ranges:
28714 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
28715 256-287: D0-D31
28716 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
28717 corresponding D register. Until GDB supports this, we shall use the
28718 legacy encodings. We also use these encodings for D0-D15 for
28719 compatibility with older debuggers. */
28720 mode = GET_MODE (rtl);
28721 if (GET_MODE_SIZE (mode) < 8)
28722 return NULL_RTX;
28723
28724 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28725 {
28726 nregs = GET_MODE_SIZE (mode) / 4;
28727 for (i = 0; i < nregs; i += 2)
28728 if (TARGET_BIG_END)
28729 {
28730 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
28731 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
28732 }
28733 else
28734 {
28735 parts[i] = gen_rtx_REG (SImode, regno + i);
28736 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
28737 }
28738 }
28739 else
28740 {
28741 nregs = GET_MODE_SIZE (mode) / 8;
28742 for (i = 0; i < nregs; i++)
28743 parts[i] = gen_rtx_REG (DImode, regno + i);
28744 }
28745
28746 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
28747 }
28748
28749 #if ARM_UNWIND_INFO
28750 /* Emit unwind directives for a store-multiple instruction or stack pointer
28751 push during alignment.
28752 These should only ever be generated by the function prologue code, so
28753 expect them to have a particular form.
28754 The store-multiple instruction sometimes pushes pc as the last register,
28755 although it should not be tracked into unwind information, or for -Os
28756 sometimes pushes some dummy registers before first register that needs
28757 to be tracked in unwind information; such dummy registers are there just
28758 to avoid separate stack adjustment, and will not be restored in the
28759 epilogue. */
28760
28761 static void
28762 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
28763 {
28764 int i;
28765 HOST_WIDE_INT offset;
28766 HOST_WIDE_INT nregs;
28767 int reg_size;
28768 unsigned reg;
28769 unsigned lastreg;
28770 unsigned padfirst = 0, padlast = 0;
28771 rtx e;
28772
28773 e = XVECEXP (p, 0, 0);
28774 gcc_assert (GET_CODE (e) == SET);
28775
28776 /* First insn will adjust the stack pointer. */
28777 gcc_assert (GET_CODE (e) == SET
28778 && REG_P (SET_DEST (e))
28779 && REGNO (SET_DEST (e)) == SP_REGNUM
28780 && GET_CODE (SET_SRC (e)) == PLUS);
28781
28782 offset = -INTVAL (XEXP (SET_SRC (e), 1));
28783 nregs = XVECLEN (p, 0) - 1;
28784 gcc_assert (nregs);
28785
28786 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
28787 if (reg < 16)
28788 {
28789 /* For -Os dummy registers can be pushed at the beginning to
28790 avoid separate stack pointer adjustment. */
28791 e = XVECEXP (p, 0, 1);
28792 e = XEXP (SET_DEST (e), 0);
28793 if (GET_CODE (e) == PLUS)
28794 padfirst = INTVAL (XEXP (e, 1));
28795 gcc_assert (padfirst == 0 || optimize_size);
28796 /* The function prologue may also push pc, but not annotate it as it is
28797 never restored. We turn this into a stack pointer adjustment. */
28798 e = XVECEXP (p, 0, nregs);
28799 e = XEXP (SET_DEST (e), 0);
28800 if (GET_CODE (e) == PLUS)
28801 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
28802 else
28803 padlast = offset - 4;
28804 gcc_assert (padlast == 0 || padlast == 4);
28805 if (padlast == 4)
28806 fprintf (asm_out_file, "\t.pad #4\n");
28807 reg_size = 4;
28808 fprintf (asm_out_file, "\t.save {");
28809 }
28810 else if (IS_VFP_REGNUM (reg))
28811 {
28812 reg_size = 8;
28813 fprintf (asm_out_file, "\t.vsave {");
28814 }
28815 else
28816 /* Unknown register type. */
28817 gcc_unreachable ();
28818
28819 /* If the stack increment doesn't match the size of the saved registers,
28820 something has gone horribly wrong. */
28821 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
28822
28823 offset = padfirst;
28824 lastreg = 0;
28825 /* The remaining insns will describe the stores. */
28826 for (i = 1; i <= nregs; i++)
28827 {
28828 /* Expect (set (mem <addr>) (reg)).
28829 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
28830 e = XVECEXP (p, 0, i);
28831 gcc_assert (GET_CODE (e) == SET
28832 && MEM_P (SET_DEST (e))
28833 && REG_P (SET_SRC (e)));
28834
28835 reg = REGNO (SET_SRC (e));
28836 gcc_assert (reg >= lastreg);
28837
28838 if (i != 1)
28839 fprintf (asm_out_file, ", ");
28840 /* We can't use %r for vfp because we need to use the
28841 double precision register names. */
28842 if (IS_VFP_REGNUM (reg))
28843 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
28844 else
28845 asm_fprintf (asm_out_file, "%r", reg);
28846
28847 #ifdef ENABLE_CHECKING
28848 /* Check that the addresses are consecutive. */
28849 e = XEXP (SET_DEST (e), 0);
28850 if (GET_CODE (e) == PLUS)
28851 gcc_assert (REG_P (XEXP (e, 0))
28852 && REGNO (XEXP (e, 0)) == SP_REGNUM
28853 && CONST_INT_P (XEXP (e, 1))
28854 && offset == INTVAL (XEXP (e, 1)));
28855 else
28856 gcc_assert (i == 1
28857 && REG_P (e)
28858 && REGNO (e) == SP_REGNUM);
28859 offset += reg_size;
28860 #endif
28861 }
28862 fprintf (asm_out_file, "}\n");
28863 if (padfirst)
28864 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
28865 }
28866
28867 /* Emit unwind directives for a SET. */
28868
28869 static void
28870 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
28871 {
28872 rtx e0;
28873 rtx e1;
28874 unsigned reg;
28875
28876 e0 = XEXP (p, 0);
28877 e1 = XEXP (p, 1);
28878 switch (GET_CODE (e0))
28879 {
28880 case MEM:
28881 /* Pushing a single register. */
28882 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
28883 || !REG_P (XEXP (XEXP (e0, 0), 0))
28884 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
28885 abort ();
28886
28887 asm_fprintf (asm_out_file, "\t.save ");
28888 if (IS_VFP_REGNUM (REGNO (e1)))
28889 asm_fprintf(asm_out_file, "{d%d}\n",
28890 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
28891 else
28892 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
28893 break;
28894
28895 case REG:
28896 if (REGNO (e0) == SP_REGNUM)
28897 {
28898 /* A stack increment. */
28899 if (GET_CODE (e1) != PLUS
28900 || !REG_P (XEXP (e1, 0))
28901 || REGNO (XEXP (e1, 0)) != SP_REGNUM
28902 || !CONST_INT_P (XEXP (e1, 1)))
28903 abort ();
28904
28905 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
28906 -INTVAL (XEXP (e1, 1)));
28907 }
28908 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
28909 {
28910 HOST_WIDE_INT offset;
28911
28912 if (GET_CODE (e1) == PLUS)
28913 {
28914 if (!REG_P (XEXP (e1, 0))
28915 || !CONST_INT_P (XEXP (e1, 1)))
28916 abort ();
28917 reg = REGNO (XEXP (e1, 0));
28918 offset = INTVAL (XEXP (e1, 1));
28919 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
28920 HARD_FRAME_POINTER_REGNUM, reg,
28921 offset);
28922 }
28923 else if (REG_P (e1))
28924 {
28925 reg = REGNO (e1);
28926 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
28927 HARD_FRAME_POINTER_REGNUM, reg);
28928 }
28929 else
28930 abort ();
28931 }
28932 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
28933 {
28934 /* Move from sp to reg. */
28935 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
28936 }
28937 else if (GET_CODE (e1) == PLUS
28938 && REG_P (XEXP (e1, 0))
28939 && REGNO (XEXP (e1, 0)) == SP_REGNUM
28940 && CONST_INT_P (XEXP (e1, 1)))
28941 {
28942 /* Set reg to offset from sp. */
28943 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
28944 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
28945 }
28946 else
28947 abort ();
28948 break;
28949
28950 default:
28951 abort ();
28952 }
28953 }
28954
28955
28956 /* Emit unwind directives for the given insn. */
28957
28958 static void
28959 arm_unwind_emit (FILE * asm_out_file, rtx insn)
28960 {
28961 rtx note, pat;
28962 bool handled_one = false;
28963
28964 if (arm_except_unwind_info (&global_options) != UI_TARGET)
28965 return;
28966
28967 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
28968 && (TREE_NOTHROW (current_function_decl)
28969 || crtl->all_throwers_are_sibcalls))
28970 return;
28971
28972 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
28973 return;
28974
28975 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
28976 {
28977 switch (REG_NOTE_KIND (note))
28978 {
28979 case REG_FRAME_RELATED_EXPR:
28980 pat = XEXP (note, 0);
28981 goto found;
28982
28983 case REG_CFA_REGISTER:
28984 pat = XEXP (note, 0);
28985 if (pat == NULL)
28986 {
28987 pat = PATTERN (insn);
28988 if (GET_CODE (pat) == PARALLEL)
28989 pat = XVECEXP (pat, 0, 0);
28990 }
28991
28992 /* Only emitted for IS_STACKALIGN re-alignment. */
28993 {
28994 rtx dest, src;
28995 unsigned reg;
28996
28997 src = SET_SRC (pat);
28998 dest = SET_DEST (pat);
28999
29000 gcc_assert (src == stack_pointer_rtx);
29001 reg = REGNO (dest);
29002 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
29003 reg + 0x90, reg);
29004 }
29005 handled_one = true;
29006 break;
29007
29008 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
29009 to get correct dwarf information for shrink-wrap. We should not
29010 emit unwind information for it because these are used either for
29011 pretend arguments or notes to adjust sp and restore registers from
29012 stack. */
29013 case REG_CFA_DEF_CFA:
29014 case REG_CFA_ADJUST_CFA:
29015 case REG_CFA_RESTORE:
29016 return;
29017
29018 case REG_CFA_EXPRESSION:
29019 case REG_CFA_OFFSET:
29020 /* ??? Only handling here what we actually emit. */
29021 gcc_unreachable ();
29022
29023 default:
29024 break;
29025 }
29026 }
29027 if (handled_one)
29028 return;
29029 pat = PATTERN (insn);
29030 found:
29031
29032 switch (GET_CODE (pat))
29033 {
29034 case SET:
29035 arm_unwind_emit_set (asm_out_file, pat);
29036 break;
29037
29038 case SEQUENCE:
29039 /* Store multiple. */
29040 arm_unwind_emit_sequence (asm_out_file, pat);
29041 break;
29042
29043 default:
29044 abort();
29045 }
29046 }
29047
29048
29049 /* Output a reference from a function exception table to the type_info
29050 object X. The EABI specifies that the symbol should be relocated by
29051 an R_ARM_TARGET2 relocation. */
29052
29053 static bool
29054 arm_output_ttype (rtx x)
29055 {
29056 fputs ("\t.word\t", asm_out_file);
29057 output_addr_const (asm_out_file, x);
29058 /* Use special relocations for symbol references. */
29059 if (!CONST_INT_P (x))
29060 fputs ("(TARGET2)", asm_out_file);
29061 fputc ('\n', asm_out_file);
29062
29063 return TRUE;
29064 }
29065
29066 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29067
29068 static void
29069 arm_asm_emit_except_personality (rtx personality)
29070 {
29071 fputs ("\t.personality\t", asm_out_file);
29072 output_addr_const (asm_out_file, personality);
29073 fputc ('\n', asm_out_file);
29074 }
29075
29076 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29077
29078 static void
29079 arm_asm_init_sections (void)
29080 {
29081 exception_section = get_unnamed_section (0, output_section_asm_op,
29082 "\t.handlerdata");
29083 }
29084 #endif /* ARM_UNWIND_INFO */
29085
29086 /* Output unwind directives for the start/end of a function. */
29087
29088 void
29089 arm_output_fn_unwind (FILE * f, bool prologue)
29090 {
29091 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29092 return;
29093
29094 if (prologue)
29095 fputs ("\t.fnstart\n", f);
29096 else
29097 {
29098 /* If this function will never be unwound, then mark it as such.
29099 The came condition is used in arm_unwind_emit to suppress
29100 the frame annotations. */
29101 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29102 && (TREE_NOTHROW (current_function_decl)
29103 || crtl->all_throwers_are_sibcalls))
29104 fputs("\t.cantunwind\n", f);
29105
29106 fputs ("\t.fnend\n", f);
29107 }
29108 }
29109
29110 static bool
29111 arm_emit_tls_decoration (FILE *fp, rtx x)
29112 {
29113 enum tls_reloc reloc;
29114 rtx val;
29115
29116 val = XVECEXP (x, 0, 0);
29117 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29118
29119 output_addr_const (fp, val);
29120
29121 switch (reloc)
29122 {
29123 case TLS_GD32:
29124 fputs ("(tlsgd)", fp);
29125 break;
29126 case TLS_LDM32:
29127 fputs ("(tlsldm)", fp);
29128 break;
29129 case TLS_LDO32:
29130 fputs ("(tlsldo)", fp);
29131 break;
29132 case TLS_IE32:
29133 fputs ("(gottpoff)", fp);
29134 break;
29135 case TLS_LE32:
29136 fputs ("(tpoff)", fp);
29137 break;
29138 case TLS_DESCSEQ:
29139 fputs ("(tlsdesc)", fp);
29140 break;
29141 default:
29142 gcc_unreachable ();
29143 }
29144
29145 switch (reloc)
29146 {
29147 case TLS_GD32:
29148 case TLS_LDM32:
29149 case TLS_IE32:
29150 case TLS_DESCSEQ:
29151 fputs (" + (. - ", fp);
29152 output_addr_const (fp, XVECEXP (x, 0, 2));
29153 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29154 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29155 output_addr_const (fp, XVECEXP (x, 0, 3));
29156 fputc (')', fp);
29157 break;
29158 default:
29159 break;
29160 }
29161
29162 return TRUE;
29163 }
29164
29165 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29166
29167 static void
29168 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29169 {
29170 gcc_assert (size == 4);
29171 fputs ("\t.word\t", file);
29172 output_addr_const (file, x);
29173 fputs ("(tlsldo)", file);
29174 }
29175
29176 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29177
29178 static bool
29179 arm_output_addr_const_extra (FILE *fp, rtx x)
29180 {
29181 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29182 return arm_emit_tls_decoration (fp, x);
29183 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29184 {
29185 char label[256];
29186 int labelno = INTVAL (XVECEXP (x, 0, 0));
29187
29188 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29189 assemble_name_raw (fp, label);
29190
29191 return TRUE;
29192 }
29193 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29194 {
29195 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29196 if (GOT_PCREL)
29197 fputs ("+.", fp);
29198 fputs ("-(", fp);
29199 output_addr_const (fp, XVECEXP (x, 0, 0));
29200 fputc (')', fp);
29201 return TRUE;
29202 }
29203 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29204 {
29205 output_addr_const (fp, XVECEXP (x, 0, 0));
29206 if (GOT_PCREL)
29207 fputs ("+.", fp);
29208 fputs ("-(", fp);
29209 output_addr_const (fp, XVECEXP (x, 0, 1));
29210 fputc (')', fp);
29211 return TRUE;
29212 }
29213 else if (GET_CODE (x) == CONST_VECTOR)
29214 return arm_emit_vector_const (fp, x);
29215
29216 return FALSE;
29217 }
29218
29219 /* Output assembly for a shift instruction.
29220 SET_FLAGS determines how the instruction modifies the condition codes.
29221 0 - Do not set condition codes.
29222 1 - Set condition codes.
29223 2 - Use smallest instruction. */
29224 const char *
29225 arm_output_shift(rtx * operands, int set_flags)
29226 {
29227 char pattern[100];
29228 static const char flag_chars[3] = {'?', '.', '!'};
29229 const char *shift;
29230 HOST_WIDE_INT val;
29231 char c;
29232
29233 c = flag_chars[set_flags];
29234 if (TARGET_UNIFIED_ASM)
29235 {
29236 shift = shift_op(operands[3], &val);
29237 if (shift)
29238 {
29239 if (val != -1)
29240 operands[2] = GEN_INT(val);
29241 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29242 }
29243 else
29244 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29245 }
29246 else
29247 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
29248 output_asm_insn (pattern, operands);
29249 return "";
29250 }
29251
29252 /* Output assembly for a WMMX immediate shift instruction. */
29253 const char *
29254 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29255 {
29256 int shift = INTVAL (operands[2]);
29257 char templ[50];
29258 enum machine_mode opmode = GET_MODE (operands[0]);
29259
29260 gcc_assert (shift >= 0);
29261
29262 /* If the shift value in the register versions is > 63 (for D qualifier),
29263 31 (for W qualifier) or 15 (for H qualifier). */
29264 if (((opmode == V4HImode) && (shift > 15))
29265 || ((opmode == V2SImode) && (shift > 31))
29266 || ((opmode == DImode) && (shift > 63)))
29267 {
29268 if (wror_or_wsra)
29269 {
29270 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29271 output_asm_insn (templ, operands);
29272 if (opmode == DImode)
29273 {
29274 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29275 output_asm_insn (templ, operands);
29276 }
29277 }
29278 else
29279 {
29280 /* The destination register will contain all zeros. */
29281 sprintf (templ, "wzero\t%%0");
29282 output_asm_insn (templ, operands);
29283 }
29284 return "";
29285 }
29286
29287 if ((opmode == DImode) && (shift > 32))
29288 {
29289 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29290 output_asm_insn (templ, operands);
29291 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29292 output_asm_insn (templ, operands);
29293 }
29294 else
29295 {
29296 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29297 output_asm_insn (templ, operands);
29298 }
29299 return "";
29300 }
29301
29302 /* Output assembly for a WMMX tinsr instruction. */
29303 const char *
29304 arm_output_iwmmxt_tinsr (rtx *operands)
29305 {
29306 int mask = INTVAL (operands[3]);
29307 int i;
29308 char templ[50];
29309 int units = mode_nunits[GET_MODE (operands[0])];
29310 gcc_assert ((mask & (mask - 1)) == 0);
29311 for (i = 0; i < units; ++i)
29312 {
29313 if ((mask & 0x01) == 1)
29314 {
29315 break;
29316 }
29317 mask >>= 1;
29318 }
29319 gcc_assert (i < units);
29320 {
29321 switch (GET_MODE (operands[0]))
29322 {
29323 case V8QImode:
29324 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29325 break;
29326 case V4HImode:
29327 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29328 break;
29329 case V2SImode:
29330 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29331 break;
29332 default:
29333 gcc_unreachable ();
29334 break;
29335 }
29336 output_asm_insn (templ, operands);
29337 }
29338 return "";
29339 }
29340
29341 /* Output a Thumb-1 casesi dispatch sequence. */
29342 const char *
29343 thumb1_output_casesi (rtx *operands)
29344 {
29345 rtx diff_vec = PATTERN (NEXT_INSN (operands[0]));
29346
29347 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29348
29349 switch (GET_MODE(diff_vec))
29350 {
29351 case QImode:
29352 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29353 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29354 case HImode:
29355 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29356 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29357 case SImode:
29358 return "bl\t%___gnu_thumb1_case_si";
29359 default:
29360 gcc_unreachable ();
29361 }
29362 }
29363
29364 /* Output a Thumb-2 casesi instruction. */
29365 const char *
29366 thumb2_output_casesi (rtx *operands)
29367 {
29368 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
29369
29370 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29371
29372 output_asm_insn ("cmp\t%0, %1", operands);
29373 output_asm_insn ("bhi\t%l3", operands);
29374 switch (GET_MODE(diff_vec))
29375 {
29376 case QImode:
29377 return "tbb\t[%|pc, %0]";
29378 case HImode:
29379 return "tbh\t[%|pc, %0, lsl #1]";
29380 case SImode:
29381 if (flag_pic)
29382 {
29383 output_asm_insn ("adr\t%4, %l2", operands);
29384 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29385 output_asm_insn ("add\t%4, %4, %5", operands);
29386 return "bx\t%4";
29387 }
29388 else
29389 {
29390 output_asm_insn ("adr\t%4, %l2", operands);
29391 return "ldr\t%|pc, [%4, %0, lsl #2]";
29392 }
29393 default:
29394 gcc_unreachable ();
29395 }
29396 }
29397
29398 /* Most ARM cores are single issue, but some newer ones can dual issue.
29399 The scheduler descriptions rely on this being correct. */
29400 static int
29401 arm_issue_rate (void)
29402 {
29403 switch (arm_tune)
29404 {
29405 case cortexa15:
29406 case cortexa57:
29407 return 3;
29408
29409 case cortexr4:
29410 case cortexr4f:
29411 case cortexr5:
29412 case genericv7a:
29413 case cortexa5:
29414 case cortexa7:
29415 case cortexa8:
29416 case cortexa9:
29417 case cortexa12:
29418 case cortexa53:
29419 case fa726te:
29420 case marvell_pj4:
29421 return 2;
29422
29423 default:
29424 return 1;
29425 }
29426 }
29427
29428 /* A table and a function to perform ARM-specific name mangling for
29429 NEON vector types in order to conform to the AAPCS (see "Procedure
29430 Call Standard for the ARM Architecture", Appendix A). To qualify
29431 for emission with the mangled names defined in that document, a
29432 vector type must not only be of the correct mode but also be
29433 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29434 typedef struct
29435 {
29436 enum machine_mode mode;
29437 const char *element_type_name;
29438 const char *aapcs_name;
29439 } arm_mangle_map_entry;
29440
29441 static arm_mangle_map_entry arm_mangle_map[] = {
29442 /* 64-bit containerized types. */
29443 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
29444 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29445 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
29446 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29447 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
29448 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
29449 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
29450 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
29451 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29452 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29453
29454 /* 128-bit containerized types. */
29455 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
29456 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29457 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
29458 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29459 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
29460 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
29461 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
29462 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29463 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29464 { VOIDmode, NULL, NULL }
29465 };
29466
29467 const char *
29468 arm_mangle_type (const_tree type)
29469 {
29470 arm_mangle_map_entry *pos = arm_mangle_map;
29471
29472 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29473 has to be managled as if it is in the "std" namespace. */
29474 if (TARGET_AAPCS_BASED
29475 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29476 return "St9__va_list";
29477
29478 /* Half-precision float. */
29479 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29480 return "Dh";
29481
29482 if (TREE_CODE (type) != VECTOR_TYPE)
29483 return NULL;
29484
29485 /* Check the mode of the vector type, and the name of the vector
29486 element type, against the table. */
29487 while (pos->mode != VOIDmode)
29488 {
29489 tree elt_type = TREE_TYPE (type);
29490
29491 if (pos->mode == TYPE_MODE (type)
29492 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
29493 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
29494 pos->element_type_name))
29495 return pos->aapcs_name;
29496
29497 pos++;
29498 }
29499
29500 /* Use the default mangling for unrecognized (possibly user-defined)
29501 vector types. */
29502 return NULL;
29503 }
29504
29505 /* Order of allocation of core registers for Thumb: this allocation is
29506 written over the corresponding initial entries of the array
29507 initialized with REG_ALLOC_ORDER. We allocate all low registers
29508 first. Saving and restoring a low register is usually cheaper than
29509 using a call-clobbered high register. */
29510
29511 static const int thumb_core_reg_alloc_order[] =
29512 {
29513 3, 2, 1, 0, 4, 5, 6, 7,
29514 14, 12, 8, 9, 10, 11
29515 };
29516
29517 /* Adjust register allocation order when compiling for Thumb. */
29518
29519 void
29520 arm_order_regs_for_local_alloc (void)
29521 {
29522 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
29523 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
29524 if (TARGET_THUMB)
29525 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
29526 sizeof (thumb_core_reg_alloc_order));
29527 }
29528
29529 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29530
29531 bool
29532 arm_frame_pointer_required (void)
29533 {
29534 return (cfun->has_nonlocal_label
29535 || SUBTARGET_FRAME_POINTER_REQUIRED
29536 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
29537 }
29538
29539 /* Only thumb1 can't support conditional execution, so return true if
29540 the target is not thumb1. */
29541 static bool
29542 arm_have_conditional_execution (void)
29543 {
29544 return !TARGET_THUMB1;
29545 }
29546
29547 tree
29548 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
29549 {
29550 enum machine_mode in_mode, out_mode;
29551 int in_n, out_n;
29552
29553 if (TREE_CODE (type_out) != VECTOR_TYPE
29554 || TREE_CODE (type_in) != VECTOR_TYPE
29555 || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
29556 return NULL_TREE;
29557
29558 out_mode = TYPE_MODE (TREE_TYPE (type_out));
29559 out_n = TYPE_VECTOR_SUBPARTS (type_out);
29560 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29561 in_n = TYPE_VECTOR_SUBPARTS (type_in);
29562
29563 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
29564 decl of the vectorized builtin for the appropriate vector mode.
29565 NULL_TREE is returned if no such builtin is available. */
29566 #undef ARM_CHECK_BUILTIN_MODE
29567 #define ARM_CHECK_BUILTIN_MODE(C) \
29568 (out_mode == SFmode && out_n == C \
29569 && in_mode == SFmode && in_n == C)
29570
29571 #undef ARM_FIND_VRINT_VARIANT
29572 #define ARM_FIND_VRINT_VARIANT(N) \
29573 (ARM_CHECK_BUILTIN_MODE (2) \
29574 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
29575 : (ARM_CHECK_BUILTIN_MODE (4) \
29576 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
29577 : NULL_TREE))
29578
29579 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
29580 {
29581 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
29582 switch (fn)
29583 {
29584 case BUILT_IN_FLOORF:
29585 return ARM_FIND_VRINT_VARIANT (vrintm);
29586 case BUILT_IN_CEILF:
29587 return ARM_FIND_VRINT_VARIANT (vrintp);
29588 case BUILT_IN_TRUNCF:
29589 return ARM_FIND_VRINT_VARIANT (vrintz);
29590 case BUILT_IN_ROUNDF:
29591 return ARM_FIND_VRINT_VARIANT (vrinta);
29592 default:
29593 return NULL_TREE;
29594 }
29595 }
29596 return NULL_TREE;
29597 }
29598 #undef ARM_CHECK_BUILTIN_MODE
29599 #undef ARM_FIND_VRINT_VARIANT
29600
29601 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
29602 static HOST_WIDE_INT
29603 arm_vector_alignment (const_tree type)
29604 {
29605 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
29606
29607 if (TARGET_AAPCS_BASED)
29608 align = MIN (align, 64);
29609
29610 return align;
29611 }
29612
29613 static unsigned int
29614 arm_autovectorize_vector_sizes (void)
29615 {
29616 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
29617 }
29618
29619 static bool
29620 arm_vector_alignment_reachable (const_tree type, bool is_packed)
29621 {
29622 /* Vectors which aren't in packed structures will not be less aligned than
29623 the natural alignment of their element type, so this is safe. */
29624 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29625 return !is_packed;
29626
29627 return default_builtin_vector_alignment_reachable (type, is_packed);
29628 }
29629
29630 static bool
29631 arm_builtin_support_vector_misalignment (enum machine_mode mode,
29632 const_tree type, int misalignment,
29633 bool is_packed)
29634 {
29635 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29636 {
29637 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
29638
29639 if (is_packed)
29640 return align == 1;
29641
29642 /* If the misalignment is unknown, we should be able to handle the access
29643 so long as it is not to a member of a packed data structure. */
29644 if (misalignment == -1)
29645 return true;
29646
29647 /* Return true if the misalignment is a multiple of the natural alignment
29648 of the vector's element type. This is probably always going to be
29649 true in practice, since we've already established that this isn't a
29650 packed access. */
29651 return ((misalignment % align) == 0);
29652 }
29653
29654 return default_builtin_support_vector_misalignment (mode, type, misalignment,
29655 is_packed);
29656 }
29657
29658 static void
29659 arm_conditional_register_usage (void)
29660 {
29661 int regno;
29662
29663 if (TARGET_THUMB1 && optimize_size)
29664 {
29665 /* When optimizing for size on Thumb-1, it's better not
29666 to use the HI regs, because of the overhead of
29667 stacking them. */
29668 for (regno = FIRST_HI_REGNUM;
29669 regno <= LAST_HI_REGNUM; ++regno)
29670 fixed_regs[regno] = call_used_regs[regno] = 1;
29671 }
29672
29673 /* The link register can be clobbered by any branch insn,
29674 but we have no way to track that at present, so mark
29675 it as unavailable. */
29676 if (TARGET_THUMB1)
29677 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
29678
29679 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
29680 {
29681 /* VFPv3 registers are disabled when earlier VFP
29682 versions are selected due to the definition of
29683 LAST_VFP_REGNUM. */
29684 for (regno = FIRST_VFP_REGNUM;
29685 regno <= LAST_VFP_REGNUM; ++ regno)
29686 {
29687 fixed_regs[regno] = 0;
29688 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
29689 || regno >= FIRST_VFP_REGNUM + 32;
29690 }
29691 }
29692
29693 if (TARGET_REALLY_IWMMXT)
29694 {
29695 regno = FIRST_IWMMXT_GR_REGNUM;
29696 /* The 2002/10/09 revision of the XScale ABI has wCG0
29697 and wCG1 as call-preserved registers. The 2002/11/21
29698 revision changed this so that all wCG registers are
29699 scratch registers. */
29700 for (regno = FIRST_IWMMXT_GR_REGNUM;
29701 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
29702 fixed_regs[regno] = 0;
29703 /* The XScale ABI has wR0 - wR9 as scratch registers,
29704 the rest as call-preserved registers. */
29705 for (regno = FIRST_IWMMXT_REGNUM;
29706 regno <= LAST_IWMMXT_REGNUM; ++ regno)
29707 {
29708 fixed_regs[regno] = 0;
29709 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
29710 }
29711 }
29712
29713 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
29714 {
29715 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29716 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29717 }
29718 else if (TARGET_APCS_STACK)
29719 {
29720 fixed_regs[10] = 1;
29721 call_used_regs[10] = 1;
29722 }
29723 /* -mcaller-super-interworking reserves r11 for calls to
29724 _interwork_r11_call_via_rN(). Making the register global
29725 is an easy way of ensuring that it remains valid for all
29726 calls. */
29727 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
29728 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
29729 {
29730 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29731 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29732 if (TARGET_CALLER_INTERWORKING)
29733 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29734 }
29735 SUBTARGET_CONDITIONAL_REGISTER_USAGE
29736 }
29737
29738 static reg_class_t
29739 arm_preferred_rename_class (reg_class_t rclass)
29740 {
29741 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
29742 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
29743 and code size can be reduced. */
29744 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
29745 return LO_REGS;
29746 else
29747 return NO_REGS;
29748 }
29749
29750 /* Compute the atrribute "length" of insn "*push_multi".
29751 So this function MUST be kept in sync with that insn pattern. */
29752 int
29753 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
29754 {
29755 int i, regno, hi_reg;
29756 int num_saves = XVECLEN (parallel_op, 0);
29757
29758 /* ARM mode. */
29759 if (TARGET_ARM)
29760 return 4;
29761 /* Thumb1 mode. */
29762 if (TARGET_THUMB1)
29763 return 2;
29764
29765 /* Thumb2 mode. */
29766 regno = REGNO (first_op);
29767 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
29768 for (i = 1; i < num_saves && !hi_reg; i++)
29769 {
29770 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
29771 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
29772 }
29773
29774 if (!hi_reg)
29775 return 2;
29776 return 4;
29777 }
29778
29779 /* Compute the number of instructions emitted by output_move_double. */
29780 int
29781 arm_count_output_move_double_insns (rtx *operands)
29782 {
29783 int count;
29784 rtx ops[2];
29785 /* output_move_double may modify the operands array, so call it
29786 here on a copy of the array. */
29787 ops[0] = operands[0];
29788 ops[1] = operands[1];
29789 output_move_double (ops, false, &count);
29790 return count;
29791 }
29792
29793 int
29794 vfp3_const_double_for_fract_bits (rtx operand)
29795 {
29796 REAL_VALUE_TYPE r0;
29797
29798 if (!CONST_DOUBLE_P (operand))
29799 return 0;
29800
29801 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
29802 if (exact_real_inverse (DFmode, &r0))
29803 {
29804 if (exact_real_truncate (DFmode, &r0))
29805 {
29806 HOST_WIDE_INT value = real_to_integer (&r0);
29807 value = value & 0xffffffff;
29808 if ((value != 0) && ( (value & (value - 1)) == 0))
29809 return int_log2 (value);
29810 }
29811 }
29812 return 0;
29813 }
29814
29815 int
29816 vfp3_const_double_for_bits (rtx operand)
29817 {
29818 REAL_VALUE_TYPE r0;
29819
29820 if (!CONST_DOUBLE_P (operand))
29821 return 0;
29822
29823 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
29824 if (exact_real_truncate (DFmode, &r0))
29825 {
29826 HOST_WIDE_INT value = real_to_integer (&r0);
29827 value = value & 0xffffffff;
29828 if ((value != 0) && ( (value & (value - 1)) == 0))
29829 return int_log2 (value);
29830 }
29831
29832 return 0;
29833 }
29834 \f
29835 /* Emit a memory barrier around an atomic sequence according to MODEL. */
29836
29837 static void
29838 arm_pre_atomic_barrier (enum memmodel model)
29839 {
29840 if (need_atomic_barrier_p (model, true))
29841 emit_insn (gen_memory_barrier ());
29842 }
29843
29844 static void
29845 arm_post_atomic_barrier (enum memmodel model)
29846 {
29847 if (need_atomic_barrier_p (model, false))
29848 emit_insn (gen_memory_barrier ());
29849 }
29850
29851 /* Emit the load-exclusive and store-exclusive instructions.
29852 Use acquire and release versions if necessary. */
29853
29854 static void
29855 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
29856 {
29857 rtx (*gen) (rtx, rtx);
29858
29859 if (acq)
29860 {
29861 switch (mode)
29862 {
29863 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
29864 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
29865 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
29866 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
29867 default:
29868 gcc_unreachable ();
29869 }
29870 }
29871 else
29872 {
29873 switch (mode)
29874 {
29875 case QImode: gen = gen_arm_load_exclusiveqi; break;
29876 case HImode: gen = gen_arm_load_exclusivehi; break;
29877 case SImode: gen = gen_arm_load_exclusivesi; break;
29878 case DImode: gen = gen_arm_load_exclusivedi; break;
29879 default:
29880 gcc_unreachable ();
29881 }
29882 }
29883
29884 emit_insn (gen (rval, mem));
29885 }
29886
29887 static void
29888 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
29889 rtx mem, bool rel)
29890 {
29891 rtx (*gen) (rtx, rtx, rtx);
29892
29893 if (rel)
29894 {
29895 switch (mode)
29896 {
29897 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
29898 case HImode: gen = gen_arm_store_release_exclusivehi; break;
29899 case SImode: gen = gen_arm_store_release_exclusivesi; break;
29900 case DImode: gen = gen_arm_store_release_exclusivedi; break;
29901 default:
29902 gcc_unreachable ();
29903 }
29904 }
29905 else
29906 {
29907 switch (mode)
29908 {
29909 case QImode: gen = gen_arm_store_exclusiveqi; break;
29910 case HImode: gen = gen_arm_store_exclusivehi; break;
29911 case SImode: gen = gen_arm_store_exclusivesi; break;
29912 case DImode: gen = gen_arm_store_exclusivedi; break;
29913 default:
29914 gcc_unreachable ();
29915 }
29916 }
29917
29918 emit_insn (gen (bval, rval, mem));
29919 }
29920
29921 /* Mark the previous jump instruction as unlikely. */
29922
29923 static void
29924 emit_unlikely_jump (rtx insn)
29925 {
29926 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
29927
29928 insn = emit_jump_insn (insn);
29929 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
29930 }
29931
29932 /* Expand a compare and swap pattern. */
29933
29934 void
29935 arm_expand_compare_and_swap (rtx operands[])
29936 {
29937 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
29938 enum machine_mode mode;
29939 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
29940
29941 bval = operands[0];
29942 rval = operands[1];
29943 mem = operands[2];
29944 oldval = operands[3];
29945 newval = operands[4];
29946 is_weak = operands[5];
29947 mod_s = operands[6];
29948 mod_f = operands[7];
29949 mode = GET_MODE (mem);
29950
29951 /* Normally the succ memory model must be stronger than fail, but in the
29952 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
29953 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
29954
29955 if (TARGET_HAVE_LDACQ
29956 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
29957 && INTVAL (mod_s) == MEMMODEL_RELEASE)
29958 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
29959
29960 switch (mode)
29961 {
29962 case QImode:
29963 case HImode:
29964 /* For narrow modes, we're going to perform the comparison in SImode,
29965 so do the zero-extension now. */
29966 rval = gen_reg_rtx (SImode);
29967 oldval = convert_modes (SImode, mode, oldval, true);
29968 /* FALLTHRU */
29969
29970 case SImode:
29971 /* Force the value into a register if needed. We waited until after
29972 the zero-extension above to do this properly. */
29973 if (!arm_add_operand (oldval, SImode))
29974 oldval = force_reg (SImode, oldval);
29975 break;
29976
29977 case DImode:
29978 if (!cmpdi_operand (oldval, mode))
29979 oldval = force_reg (mode, oldval);
29980 break;
29981
29982 default:
29983 gcc_unreachable ();
29984 }
29985
29986 switch (mode)
29987 {
29988 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
29989 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
29990 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
29991 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
29992 default:
29993 gcc_unreachable ();
29994 }
29995
29996 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
29997
29998 if (mode == QImode || mode == HImode)
29999 emit_move_insn (operands[1], gen_lowpart (mode, rval));
30000
30001 /* In all cases, we arrange for success to be signaled by Z set.
30002 This arrangement allows for the boolean result to be used directly
30003 in a subsequent branch, post optimization. */
30004 x = gen_rtx_REG (CCmode, CC_REGNUM);
30005 x = gen_rtx_EQ (SImode, x, const0_rtx);
30006 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
30007 }
30008
30009 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30010 another memory store between the load-exclusive and store-exclusive can
30011 reset the monitor from Exclusive to Open state. This means we must wait
30012 until after reload to split the pattern, lest we get a register spill in
30013 the middle of the atomic sequence. */
30014
30015 void
30016 arm_split_compare_and_swap (rtx operands[])
30017 {
30018 rtx rval, mem, oldval, newval, scratch;
30019 enum machine_mode mode;
30020 enum memmodel mod_s, mod_f;
30021 bool is_weak;
30022 rtx label1, label2, x, cond;
30023
30024 rval = operands[0];
30025 mem = operands[1];
30026 oldval = operands[2];
30027 newval = operands[3];
30028 is_weak = (operands[4] != const0_rtx);
30029 mod_s = (enum memmodel) INTVAL (operands[5]);
30030 mod_f = (enum memmodel) INTVAL (operands[6]);
30031 scratch = operands[7];
30032 mode = GET_MODE (mem);
30033
30034 bool use_acquire = TARGET_HAVE_LDACQ
30035 && !(mod_s == MEMMODEL_RELAXED
30036 || mod_s == MEMMODEL_CONSUME
30037 || mod_s == MEMMODEL_RELEASE);
30038
30039 bool use_release = TARGET_HAVE_LDACQ
30040 && !(mod_s == MEMMODEL_RELAXED
30041 || mod_s == MEMMODEL_CONSUME
30042 || mod_s == MEMMODEL_ACQUIRE);
30043
30044 /* Checks whether a barrier is needed and emits one accordingly. */
30045 if (!(use_acquire || use_release))
30046 arm_pre_atomic_barrier (mod_s);
30047
30048 label1 = NULL_RTX;
30049 if (!is_weak)
30050 {
30051 label1 = gen_label_rtx ();
30052 emit_label (label1);
30053 }
30054 label2 = gen_label_rtx ();
30055
30056 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30057
30058 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
30059 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30060 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30061 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30062 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30063
30064 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
30065
30066 /* Weak or strong, we want EQ to be true for success, so that we
30067 match the flags that we got from the compare above. */
30068 cond = gen_rtx_REG (CCmode, CC_REGNUM);
30069 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
30070 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
30071
30072 if (!is_weak)
30073 {
30074 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30075 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30076 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
30077 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30078 }
30079
30080 if (mod_f != MEMMODEL_RELAXED)
30081 emit_label (label2);
30082
30083 /* Checks whether a barrier is needed and emits one accordingly. */
30084 if (!(use_acquire || use_release))
30085 arm_post_atomic_barrier (mod_s);
30086
30087 if (mod_f == MEMMODEL_RELAXED)
30088 emit_label (label2);
30089 }
30090
30091 void
30092 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30093 rtx value, rtx model_rtx, rtx cond)
30094 {
30095 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
30096 enum machine_mode mode = GET_MODE (mem);
30097 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
30098 rtx label, x;
30099
30100 bool use_acquire = TARGET_HAVE_LDACQ
30101 && !(model == MEMMODEL_RELAXED
30102 || model == MEMMODEL_CONSUME
30103 || model == MEMMODEL_RELEASE);
30104
30105 bool use_release = TARGET_HAVE_LDACQ
30106 && !(model == MEMMODEL_RELAXED
30107 || model == MEMMODEL_CONSUME
30108 || model == MEMMODEL_ACQUIRE);
30109
30110 /* Checks whether a barrier is needed and emits one accordingly. */
30111 if (!(use_acquire || use_release))
30112 arm_pre_atomic_barrier (model);
30113
30114 label = gen_label_rtx ();
30115 emit_label (label);
30116
30117 if (new_out)
30118 new_out = gen_lowpart (wmode, new_out);
30119 if (old_out)
30120 old_out = gen_lowpart (wmode, old_out);
30121 else
30122 old_out = new_out;
30123 value = simplify_gen_subreg (wmode, value, mode, 0);
30124
30125 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30126
30127 switch (code)
30128 {
30129 case SET:
30130 new_out = value;
30131 break;
30132
30133 case NOT:
30134 x = gen_rtx_AND (wmode, old_out, value);
30135 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30136 x = gen_rtx_NOT (wmode, new_out);
30137 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30138 break;
30139
30140 case MINUS:
30141 if (CONST_INT_P (value))
30142 {
30143 value = GEN_INT (-INTVAL (value));
30144 code = PLUS;
30145 }
30146 /* FALLTHRU */
30147
30148 case PLUS:
30149 if (mode == DImode)
30150 {
30151 /* DImode plus/minus need to clobber flags. */
30152 /* The adddi3 and subdi3 patterns are incorrectly written so that
30153 they require matching operands, even when we could easily support
30154 three operands. Thankfully, this can be fixed up post-splitting,
30155 as the individual add+adc patterns do accept three operands and
30156 post-reload cprop can make these moves go away. */
30157 emit_move_insn (new_out, old_out);
30158 if (code == PLUS)
30159 x = gen_adddi3 (new_out, new_out, value);
30160 else
30161 x = gen_subdi3 (new_out, new_out, value);
30162 emit_insn (x);
30163 break;
30164 }
30165 /* FALLTHRU */
30166
30167 default:
30168 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30169 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30170 break;
30171 }
30172
30173 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30174 use_release);
30175
30176 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30177 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30178
30179 /* Checks whether a barrier is needed and emits one accordingly. */
30180 if (!(use_acquire || use_release))
30181 arm_post_atomic_barrier (model);
30182 }
30183 \f
30184 #define MAX_VECT_LEN 16
30185
30186 struct expand_vec_perm_d
30187 {
30188 rtx target, op0, op1;
30189 unsigned char perm[MAX_VECT_LEN];
30190 enum machine_mode vmode;
30191 unsigned char nelt;
30192 bool one_vector_p;
30193 bool testing_p;
30194 };
30195
30196 /* Generate a variable permutation. */
30197
30198 static void
30199 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30200 {
30201 enum machine_mode vmode = GET_MODE (target);
30202 bool one_vector_p = rtx_equal_p (op0, op1);
30203
30204 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30205 gcc_checking_assert (GET_MODE (op0) == vmode);
30206 gcc_checking_assert (GET_MODE (op1) == vmode);
30207 gcc_checking_assert (GET_MODE (sel) == vmode);
30208 gcc_checking_assert (TARGET_NEON);
30209
30210 if (one_vector_p)
30211 {
30212 if (vmode == V8QImode)
30213 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30214 else
30215 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30216 }
30217 else
30218 {
30219 rtx pair;
30220
30221 if (vmode == V8QImode)
30222 {
30223 pair = gen_reg_rtx (V16QImode);
30224 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30225 pair = gen_lowpart (TImode, pair);
30226 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30227 }
30228 else
30229 {
30230 pair = gen_reg_rtx (OImode);
30231 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30232 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30233 }
30234 }
30235 }
30236
30237 void
30238 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30239 {
30240 enum machine_mode vmode = GET_MODE (target);
30241 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
30242 bool one_vector_p = rtx_equal_p (op0, op1);
30243 rtx rmask[MAX_VECT_LEN], mask;
30244
30245 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30246 numbering of elements for big-endian, we must reverse the order. */
30247 gcc_checking_assert (!BYTES_BIG_ENDIAN);
30248
30249 /* The VTBL instruction does not use a modulo index, so we must take care
30250 of that ourselves. */
30251 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30252 for (i = 0; i < nelt; ++i)
30253 rmask[i] = mask;
30254 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
30255 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30256
30257 arm_expand_vec_perm_1 (target, op0, op1, sel);
30258 }
30259
30260 /* Generate or test for an insn that supports a constant permutation. */
30261
30262 /* Recognize patterns for the VUZP insns. */
30263
30264 static bool
30265 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30266 {
30267 unsigned int i, odd, mask, nelt = d->nelt;
30268 rtx out0, out1, in0, in1, x;
30269 rtx (*gen)(rtx, rtx, rtx, rtx);
30270
30271 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30272 return false;
30273
30274 /* Note that these are little-endian tests. Adjust for big-endian later. */
30275 if (d->perm[0] == 0)
30276 odd = 0;
30277 else if (d->perm[0] == 1)
30278 odd = 1;
30279 else
30280 return false;
30281 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30282
30283 for (i = 0; i < nelt; i++)
30284 {
30285 unsigned elt = (i * 2 + odd) & mask;
30286 if (d->perm[i] != elt)
30287 return false;
30288 }
30289
30290 /* Success! */
30291 if (d->testing_p)
30292 return true;
30293
30294 switch (d->vmode)
30295 {
30296 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
30297 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
30298 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
30299 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
30300 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
30301 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
30302 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
30303 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
30304 default:
30305 gcc_unreachable ();
30306 }
30307
30308 in0 = d->op0;
30309 in1 = d->op1;
30310 if (BYTES_BIG_ENDIAN)
30311 {
30312 x = in0, in0 = in1, in1 = x;
30313 odd = !odd;
30314 }
30315
30316 out0 = d->target;
30317 out1 = gen_reg_rtx (d->vmode);
30318 if (odd)
30319 x = out0, out0 = out1, out1 = x;
30320
30321 emit_insn (gen (out0, in0, in1, out1));
30322 return true;
30323 }
30324
30325 /* Recognize patterns for the VZIP insns. */
30326
30327 static bool
30328 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30329 {
30330 unsigned int i, high, mask, nelt = d->nelt;
30331 rtx out0, out1, in0, in1, x;
30332 rtx (*gen)(rtx, rtx, rtx, rtx);
30333
30334 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30335 return false;
30336
30337 /* Note that these are little-endian tests. Adjust for big-endian later. */
30338 high = nelt / 2;
30339 if (d->perm[0] == high)
30340 ;
30341 else if (d->perm[0] == 0)
30342 high = 0;
30343 else
30344 return false;
30345 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30346
30347 for (i = 0; i < nelt / 2; i++)
30348 {
30349 unsigned elt = (i + high) & mask;
30350 if (d->perm[i * 2] != elt)
30351 return false;
30352 elt = (elt + nelt) & mask;
30353 if (d->perm[i * 2 + 1] != elt)
30354 return false;
30355 }
30356
30357 /* Success! */
30358 if (d->testing_p)
30359 return true;
30360
30361 switch (d->vmode)
30362 {
30363 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
30364 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
30365 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
30366 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
30367 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
30368 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
30369 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
30370 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
30371 default:
30372 gcc_unreachable ();
30373 }
30374
30375 in0 = d->op0;
30376 in1 = d->op1;
30377 if (BYTES_BIG_ENDIAN)
30378 {
30379 x = in0, in0 = in1, in1 = x;
30380 high = !high;
30381 }
30382
30383 out0 = d->target;
30384 out1 = gen_reg_rtx (d->vmode);
30385 if (high)
30386 x = out0, out0 = out1, out1 = x;
30387
30388 emit_insn (gen (out0, in0, in1, out1));
30389 return true;
30390 }
30391
30392 /* Recognize patterns for the VREV insns. */
30393
30394 static bool
30395 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
30396 {
30397 unsigned int i, j, diff, nelt = d->nelt;
30398 rtx (*gen)(rtx, rtx, rtx);
30399
30400 if (!d->one_vector_p)
30401 return false;
30402
30403 diff = d->perm[0];
30404 switch (diff)
30405 {
30406 case 7:
30407 switch (d->vmode)
30408 {
30409 case V16QImode: gen = gen_neon_vrev64v16qi; break;
30410 case V8QImode: gen = gen_neon_vrev64v8qi; break;
30411 default:
30412 return false;
30413 }
30414 break;
30415 case 3:
30416 switch (d->vmode)
30417 {
30418 case V16QImode: gen = gen_neon_vrev32v16qi; break;
30419 case V8QImode: gen = gen_neon_vrev32v8qi; break;
30420 case V8HImode: gen = gen_neon_vrev64v8hi; break;
30421 case V4HImode: gen = gen_neon_vrev64v4hi; break;
30422 default:
30423 return false;
30424 }
30425 break;
30426 case 1:
30427 switch (d->vmode)
30428 {
30429 case V16QImode: gen = gen_neon_vrev16v16qi; break;
30430 case V8QImode: gen = gen_neon_vrev16v8qi; break;
30431 case V8HImode: gen = gen_neon_vrev32v8hi; break;
30432 case V4HImode: gen = gen_neon_vrev32v4hi; break;
30433 case V4SImode: gen = gen_neon_vrev64v4si; break;
30434 case V2SImode: gen = gen_neon_vrev64v2si; break;
30435 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
30436 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
30437 default:
30438 return false;
30439 }
30440 break;
30441 default:
30442 return false;
30443 }
30444
30445 for (i = 0; i < nelt ; i += diff + 1)
30446 for (j = 0; j <= diff; j += 1)
30447 {
30448 /* This is guaranteed to be true as the value of diff
30449 is 7, 3, 1 and we should have enough elements in the
30450 queue to generate this. Getting a vector mask with a
30451 value of diff other than these values implies that
30452 something is wrong by the time we get here. */
30453 gcc_assert (i + j < nelt);
30454 if (d->perm[i + j] != i + diff - j)
30455 return false;
30456 }
30457
30458 /* Success! */
30459 if (d->testing_p)
30460 return true;
30461
30462 /* ??? The third operand is an artifact of the builtin infrastructure
30463 and is ignored by the actual instruction. */
30464 emit_insn (gen (d->target, d->op0, const0_rtx));
30465 return true;
30466 }
30467
30468 /* Recognize patterns for the VTRN insns. */
30469
30470 static bool
30471 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
30472 {
30473 unsigned int i, odd, mask, nelt = d->nelt;
30474 rtx out0, out1, in0, in1, x;
30475 rtx (*gen)(rtx, rtx, rtx, rtx);
30476
30477 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30478 return false;
30479
30480 /* Note that these are little-endian tests. Adjust for big-endian later. */
30481 if (d->perm[0] == 0)
30482 odd = 0;
30483 else if (d->perm[0] == 1)
30484 odd = 1;
30485 else
30486 return false;
30487 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30488
30489 for (i = 0; i < nelt; i += 2)
30490 {
30491 if (d->perm[i] != i + odd)
30492 return false;
30493 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
30494 return false;
30495 }
30496
30497 /* Success! */
30498 if (d->testing_p)
30499 return true;
30500
30501 switch (d->vmode)
30502 {
30503 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
30504 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
30505 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
30506 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
30507 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
30508 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
30509 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
30510 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
30511 default:
30512 gcc_unreachable ();
30513 }
30514
30515 in0 = d->op0;
30516 in1 = d->op1;
30517 if (BYTES_BIG_ENDIAN)
30518 {
30519 x = in0, in0 = in1, in1 = x;
30520 odd = !odd;
30521 }
30522
30523 out0 = d->target;
30524 out1 = gen_reg_rtx (d->vmode);
30525 if (odd)
30526 x = out0, out0 = out1, out1 = x;
30527
30528 emit_insn (gen (out0, in0, in1, out1));
30529 return true;
30530 }
30531
30532 /* Recognize patterns for the VEXT insns. */
30533
30534 static bool
30535 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
30536 {
30537 unsigned int i, nelt = d->nelt;
30538 rtx (*gen) (rtx, rtx, rtx, rtx);
30539 rtx offset;
30540
30541 unsigned int location;
30542
30543 unsigned int next = d->perm[0] + 1;
30544
30545 /* TODO: Handle GCC's numbering of elements for big-endian. */
30546 if (BYTES_BIG_ENDIAN)
30547 return false;
30548
30549 /* Check if the extracted indexes are increasing by one. */
30550 for (i = 1; i < nelt; next++, i++)
30551 {
30552 /* If we hit the most significant element of the 2nd vector in
30553 the previous iteration, no need to test further. */
30554 if (next == 2 * nelt)
30555 return false;
30556
30557 /* If we are operating on only one vector: it could be a
30558 rotation. If there are only two elements of size < 64, let
30559 arm_evpc_neon_vrev catch it. */
30560 if (d->one_vector_p && (next == nelt))
30561 {
30562 if ((nelt == 2) && (d->vmode != V2DImode))
30563 return false;
30564 else
30565 next = 0;
30566 }
30567
30568 if (d->perm[i] != next)
30569 return false;
30570 }
30571
30572 location = d->perm[0];
30573
30574 switch (d->vmode)
30575 {
30576 case V16QImode: gen = gen_neon_vextv16qi; break;
30577 case V8QImode: gen = gen_neon_vextv8qi; break;
30578 case V4HImode: gen = gen_neon_vextv4hi; break;
30579 case V8HImode: gen = gen_neon_vextv8hi; break;
30580 case V2SImode: gen = gen_neon_vextv2si; break;
30581 case V4SImode: gen = gen_neon_vextv4si; break;
30582 case V2SFmode: gen = gen_neon_vextv2sf; break;
30583 case V4SFmode: gen = gen_neon_vextv4sf; break;
30584 case V2DImode: gen = gen_neon_vextv2di; break;
30585 default:
30586 return false;
30587 }
30588
30589 /* Success! */
30590 if (d->testing_p)
30591 return true;
30592
30593 offset = GEN_INT (location);
30594 emit_insn (gen (d->target, d->op0, d->op1, offset));
30595 return true;
30596 }
30597
30598 /* The NEON VTBL instruction is a fully variable permuation that's even
30599 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
30600 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
30601 can do slightly better by expanding this as a constant where we don't
30602 have to apply a mask. */
30603
30604 static bool
30605 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
30606 {
30607 rtx rperm[MAX_VECT_LEN], sel;
30608 enum machine_mode vmode = d->vmode;
30609 unsigned int i, nelt = d->nelt;
30610
30611 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30612 numbering of elements for big-endian, we must reverse the order. */
30613 if (BYTES_BIG_ENDIAN)
30614 return false;
30615
30616 if (d->testing_p)
30617 return true;
30618
30619 /* Generic code will try constant permutation twice. Once with the
30620 original mode and again with the elements lowered to QImode.
30621 So wait and don't do the selector expansion ourselves. */
30622 if (vmode != V8QImode && vmode != V16QImode)
30623 return false;
30624
30625 for (i = 0; i < nelt; ++i)
30626 rperm[i] = GEN_INT (d->perm[i]);
30627 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
30628 sel = force_reg (vmode, sel);
30629
30630 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
30631 return true;
30632 }
30633
30634 static bool
30635 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
30636 {
30637 /* Check if the input mask matches vext before reordering the
30638 operands. */
30639 if (TARGET_NEON)
30640 if (arm_evpc_neon_vext (d))
30641 return true;
30642
30643 /* The pattern matching functions above are written to look for a small
30644 number to begin the sequence (0, 1, N/2). If we begin with an index
30645 from the second operand, we can swap the operands. */
30646 if (d->perm[0] >= d->nelt)
30647 {
30648 unsigned i, nelt = d->nelt;
30649 rtx x;
30650
30651 for (i = 0; i < nelt; ++i)
30652 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
30653
30654 x = d->op0;
30655 d->op0 = d->op1;
30656 d->op1 = x;
30657 }
30658
30659 if (TARGET_NEON)
30660 {
30661 if (arm_evpc_neon_vuzp (d))
30662 return true;
30663 if (arm_evpc_neon_vzip (d))
30664 return true;
30665 if (arm_evpc_neon_vrev (d))
30666 return true;
30667 if (arm_evpc_neon_vtrn (d))
30668 return true;
30669 return arm_evpc_neon_vtbl (d);
30670 }
30671 return false;
30672 }
30673
30674 /* Expand a vec_perm_const pattern. */
30675
30676 bool
30677 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
30678 {
30679 struct expand_vec_perm_d d;
30680 int i, nelt, which;
30681
30682 d.target = target;
30683 d.op0 = op0;
30684 d.op1 = op1;
30685
30686 d.vmode = GET_MODE (target);
30687 gcc_assert (VECTOR_MODE_P (d.vmode));
30688 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30689 d.testing_p = false;
30690
30691 for (i = which = 0; i < nelt; ++i)
30692 {
30693 rtx e = XVECEXP (sel, 0, i);
30694 int ei = INTVAL (e) & (2 * nelt - 1);
30695 which |= (ei < nelt ? 1 : 2);
30696 d.perm[i] = ei;
30697 }
30698
30699 switch (which)
30700 {
30701 default:
30702 gcc_unreachable();
30703
30704 case 3:
30705 d.one_vector_p = false;
30706 if (!rtx_equal_p (op0, op1))
30707 break;
30708
30709 /* The elements of PERM do not suggest that only the first operand
30710 is used, but both operands are identical. Allow easier matching
30711 of the permutation by folding the permutation into the single
30712 input vector. */
30713 /* FALLTHRU */
30714 case 2:
30715 for (i = 0; i < nelt; ++i)
30716 d.perm[i] &= nelt - 1;
30717 d.op0 = op1;
30718 d.one_vector_p = true;
30719 break;
30720
30721 case 1:
30722 d.op1 = op0;
30723 d.one_vector_p = true;
30724 break;
30725 }
30726
30727 return arm_expand_vec_perm_const_1 (&d);
30728 }
30729
30730 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
30731
30732 static bool
30733 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
30734 const unsigned char *sel)
30735 {
30736 struct expand_vec_perm_d d;
30737 unsigned int i, nelt, which;
30738 bool ret;
30739
30740 d.vmode = vmode;
30741 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30742 d.testing_p = true;
30743 memcpy (d.perm, sel, nelt);
30744
30745 /* Categorize the set of elements in the selector. */
30746 for (i = which = 0; i < nelt; ++i)
30747 {
30748 unsigned char e = d.perm[i];
30749 gcc_assert (e < 2 * nelt);
30750 which |= (e < nelt ? 1 : 2);
30751 }
30752
30753 /* For all elements from second vector, fold the elements to first. */
30754 if (which == 2)
30755 for (i = 0; i < nelt; ++i)
30756 d.perm[i] -= nelt;
30757
30758 /* Check whether the mask can be applied to the vector type. */
30759 d.one_vector_p = (which != 3);
30760
30761 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
30762 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
30763 if (!d.one_vector_p)
30764 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
30765
30766 start_sequence ();
30767 ret = arm_expand_vec_perm_const_1 (&d);
30768 end_sequence ();
30769
30770 return ret;
30771 }
30772
30773 bool
30774 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
30775 {
30776 /* If we are soft float and we do not have ldrd
30777 then all auto increment forms are ok. */
30778 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
30779 return true;
30780
30781 switch (code)
30782 {
30783 /* Post increment and Pre Decrement are supported for all
30784 instruction forms except for vector forms. */
30785 case ARM_POST_INC:
30786 case ARM_PRE_DEC:
30787 if (VECTOR_MODE_P (mode))
30788 {
30789 if (code != ARM_PRE_DEC)
30790 return true;
30791 else
30792 return false;
30793 }
30794
30795 return true;
30796
30797 case ARM_POST_DEC:
30798 case ARM_PRE_INC:
30799 /* Without LDRD and mode size greater than
30800 word size, there is no point in auto-incrementing
30801 because ldm and stm will not have these forms. */
30802 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
30803 return false;
30804
30805 /* Vector and floating point modes do not support
30806 these auto increment forms. */
30807 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
30808 return false;
30809
30810 return true;
30811
30812 default:
30813 return false;
30814
30815 }
30816
30817 return false;
30818 }
30819
30820 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
30821 on ARM, since we know that shifts by negative amounts are no-ops.
30822 Additionally, the default expansion code is not available or suitable
30823 for post-reload insn splits (this can occur when the register allocator
30824 chooses not to do a shift in NEON).
30825
30826 This function is used in both initial expand and post-reload splits, and
30827 handles all kinds of 64-bit shifts.
30828
30829 Input requirements:
30830 - It is safe for the input and output to be the same register, but
30831 early-clobber rules apply for the shift amount and scratch registers.
30832 - Shift by register requires both scratch registers. In all other cases
30833 the scratch registers may be NULL.
30834 - Ashiftrt by a register also clobbers the CC register. */
30835 void
30836 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
30837 rtx amount, rtx scratch1, rtx scratch2)
30838 {
30839 rtx out_high = gen_highpart (SImode, out);
30840 rtx out_low = gen_lowpart (SImode, out);
30841 rtx in_high = gen_highpart (SImode, in);
30842 rtx in_low = gen_lowpart (SImode, in);
30843
30844 /* Terminology:
30845 in = the register pair containing the input value.
30846 out = the destination register pair.
30847 up = the high- or low-part of each pair.
30848 down = the opposite part to "up".
30849 In a shift, we can consider bits to shift from "up"-stream to
30850 "down"-stream, so in a left-shift "up" is the low-part and "down"
30851 is the high-part of each register pair. */
30852
30853 rtx out_up = code == ASHIFT ? out_low : out_high;
30854 rtx out_down = code == ASHIFT ? out_high : out_low;
30855 rtx in_up = code == ASHIFT ? in_low : in_high;
30856 rtx in_down = code == ASHIFT ? in_high : in_low;
30857
30858 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
30859 gcc_assert (out
30860 && (REG_P (out) || GET_CODE (out) == SUBREG)
30861 && GET_MODE (out) == DImode);
30862 gcc_assert (in
30863 && (REG_P (in) || GET_CODE (in) == SUBREG)
30864 && GET_MODE (in) == DImode);
30865 gcc_assert (amount
30866 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
30867 && GET_MODE (amount) == SImode)
30868 || CONST_INT_P (amount)));
30869 gcc_assert (scratch1 == NULL
30870 || (GET_CODE (scratch1) == SCRATCH)
30871 || (GET_MODE (scratch1) == SImode
30872 && REG_P (scratch1)));
30873 gcc_assert (scratch2 == NULL
30874 || (GET_CODE (scratch2) == SCRATCH)
30875 || (GET_MODE (scratch2) == SImode
30876 && REG_P (scratch2)));
30877 gcc_assert (!REG_P (out) || !REG_P (amount)
30878 || !HARD_REGISTER_P (out)
30879 || (REGNO (out) != REGNO (amount)
30880 && REGNO (out) + 1 != REGNO (amount)));
30881
30882 /* Macros to make following code more readable. */
30883 #define SUB_32(DEST,SRC) \
30884 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
30885 #define RSB_32(DEST,SRC) \
30886 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
30887 #define SUB_S_32(DEST,SRC) \
30888 gen_addsi3_compare0 ((DEST), (SRC), \
30889 GEN_INT (-32))
30890 #define SET(DEST,SRC) \
30891 gen_rtx_SET (SImode, (DEST), (SRC))
30892 #define SHIFT(CODE,SRC,AMOUNT) \
30893 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
30894 #define LSHIFT(CODE,SRC,AMOUNT) \
30895 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
30896 SImode, (SRC), (AMOUNT))
30897 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
30898 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
30899 SImode, (SRC), (AMOUNT))
30900 #define ORR(A,B) \
30901 gen_rtx_IOR (SImode, (A), (B))
30902 #define BRANCH(COND,LABEL) \
30903 gen_arm_cond_branch ((LABEL), \
30904 gen_rtx_ ## COND (CCmode, cc_reg, \
30905 const0_rtx), \
30906 cc_reg)
30907
30908 /* Shifts by register and shifts by constant are handled separately. */
30909 if (CONST_INT_P (amount))
30910 {
30911 /* We have a shift-by-constant. */
30912
30913 /* First, handle out-of-range shift amounts.
30914 In both cases we try to match the result an ARM instruction in a
30915 shift-by-register would give. This helps reduce execution
30916 differences between optimization levels, but it won't stop other
30917 parts of the compiler doing different things. This is "undefined
30918 behaviour, in any case. */
30919 if (INTVAL (amount) <= 0)
30920 emit_insn (gen_movdi (out, in));
30921 else if (INTVAL (amount) >= 64)
30922 {
30923 if (code == ASHIFTRT)
30924 {
30925 rtx const31_rtx = GEN_INT (31);
30926 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
30927 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
30928 }
30929 else
30930 emit_insn (gen_movdi (out, const0_rtx));
30931 }
30932
30933 /* Now handle valid shifts. */
30934 else if (INTVAL (amount) < 32)
30935 {
30936 /* Shifts by a constant less than 32. */
30937 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
30938
30939 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
30940 emit_insn (SET (out_down,
30941 ORR (REV_LSHIFT (code, in_up, reverse_amount),
30942 out_down)));
30943 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
30944 }
30945 else
30946 {
30947 /* Shifts by a constant greater than 31. */
30948 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
30949
30950 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
30951 if (code == ASHIFTRT)
30952 emit_insn (gen_ashrsi3 (out_up, in_up,
30953 GEN_INT (31)));
30954 else
30955 emit_insn (SET (out_up, const0_rtx));
30956 }
30957 }
30958 else
30959 {
30960 /* We have a shift-by-register. */
30961 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
30962
30963 /* This alternative requires the scratch registers. */
30964 gcc_assert (scratch1 && REG_P (scratch1));
30965 gcc_assert (scratch2 && REG_P (scratch2));
30966
30967 /* We will need the values "amount-32" and "32-amount" later.
30968 Swapping them around now allows the later code to be more general. */
30969 switch (code)
30970 {
30971 case ASHIFT:
30972 emit_insn (SUB_32 (scratch1, amount));
30973 emit_insn (RSB_32 (scratch2, amount));
30974 break;
30975 case ASHIFTRT:
30976 emit_insn (RSB_32 (scratch1, amount));
30977 /* Also set CC = amount > 32. */
30978 emit_insn (SUB_S_32 (scratch2, amount));
30979 break;
30980 case LSHIFTRT:
30981 emit_insn (RSB_32 (scratch1, amount));
30982 emit_insn (SUB_32 (scratch2, amount));
30983 break;
30984 default:
30985 gcc_unreachable ();
30986 }
30987
30988 /* Emit code like this:
30989
30990 arithmetic-left:
30991 out_down = in_down << amount;
30992 out_down = (in_up << (amount - 32)) | out_down;
30993 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
30994 out_up = in_up << amount;
30995
30996 arithmetic-right:
30997 out_down = in_down >> amount;
30998 out_down = (in_up << (32 - amount)) | out_down;
30999 if (amount < 32)
31000 out_down = ((signed)in_up >> (amount - 32)) | out_down;
31001 out_up = in_up << amount;
31002
31003 logical-right:
31004 out_down = in_down >> amount;
31005 out_down = (in_up << (32 - amount)) | out_down;
31006 if (amount < 32)
31007 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
31008 out_up = in_up << amount;
31009
31010 The ARM and Thumb2 variants are the same but implemented slightly
31011 differently. If this were only called during expand we could just
31012 use the Thumb2 case and let combine do the right thing, but this
31013 can also be called from post-reload splitters. */
31014
31015 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31016
31017 if (!TARGET_THUMB2)
31018 {
31019 /* Emit code for ARM mode. */
31020 emit_insn (SET (out_down,
31021 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31022 if (code == ASHIFTRT)
31023 {
31024 rtx done_label = gen_label_rtx ();
31025 emit_jump_insn (BRANCH (LT, done_label));
31026 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31027 out_down)));
31028 emit_label (done_label);
31029 }
31030 else
31031 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31032 out_down)));
31033 }
31034 else
31035 {
31036 /* Emit code for Thumb2 mode.
31037 Thumb2 can't do shift and or in one insn. */
31038 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31039 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31040
31041 if (code == ASHIFTRT)
31042 {
31043 rtx done_label = gen_label_rtx ();
31044 emit_jump_insn (BRANCH (LT, done_label));
31045 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31046 emit_insn (SET (out_down, ORR (out_down, scratch2)));
31047 emit_label (done_label);
31048 }
31049 else
31050 {
31051 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31052 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31053 }
31054 }
31055
31056 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31057 }
31058
31059 #undef SUB_32
31060 #undef RSB_32
31061 #undef SUB_S_32
31062 #undef SET
31063 #undef SHIFT
31064 #undef LSHIFT
31065 #undef REV_LSHIFT
31066 #undef ORR
31067 #undef BRANCH
31068 }
31069
31070
31071 /* Returns true if a valid comparison operation and makes
31072 the operands in a form that is valid. */
31073 bool
31074 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31075 {
31076 enum rtx_code code = GET_CODE (*comparison);
31077 int code_int;
31078 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31079 ? GET_MODE (*op2) : GET_MODE (*op1);
31080
31081 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31082
31083 if (code == UNEQ || code == LTGT)
31084 return false;
31085
31086 code_int = (int)code;
31087 arm_canonicalize_comparison (&code_int, op1, op2, 0);
31088 PUT_CODE (*comparison, (enum rtx_code)code_int);
31089
31090 switch (mode)
31091 {
31092 case SImode:
31093 if (!arm_add_operand (*op1, mode))
31094 *op1 = force_reg (mode, *op1);
31095 if (!arm_add_operand (*op2, mode))
31096 *op2 = force_reg (mode, *op2);
31097 return true;
31098
31099 case DImode:
31100 if (!cmpdi_operand (*op1, mode))
31101 *op1 = force_reg (mode, *op1);
31102 if (!cmpdi_operand (*op2, mode))
31103 *op2 = force_reg (mode, *op2);
31104 return true;
31105
31106 case SFmode:
31107 case DFmode:
31108 if (!arm_float_compare_operand (*op1, mode))
31109 *op1 = force_reg (mode, *op1);
31110 if (!arm_float_compare_operand (*op2, mode))
31111 *op2 = force_reg (mode, *op2);
31112 return true;
31113 default:
31114 break;
31115 }
31116
31117 return false;
31118
31119 }
31120
31121 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31122
31123 static unsigned HOST_WIDE_INT
31124 arm_asan_shadow_offset (void)
31125 {
31126 return (unsigned HOST_WIDE_INT) 1 << 29;
31127 }
31128
31129
31130 /* This is a temporary fix for PR60655. Ideally we need
31131 to handle most of these cases in the generic part but
31132 currently we reject minus (..) (sym_ref). We try to
31133 ameliorate the case with minus (sym_ref1) (sym_ref2)
31134 where they are in the same section. */
31135
31136 static bool
31137 arm_const_not_ok_for_debug_p (rtx p)
31138 {
31139 tree decl_op0 = NULL;
31140 tree decl_op1 = NULL;
31141
31142 if (GET_CODE (p) == MINUS)
31143 {
31144 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
31145 {
31146 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
31147 if (decl_op1
31148 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
31149 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
31150 {
31151 if ((TREE_CODE (decl_op1) == VAR_DECL
31152 || TREE_CODE (decl_op1) == CONST_DECL)
31153 && (TREE_CODE (decl_op0) == VAR_DECL
31154 || TREE_CODE (decl_op0) == CONST_DECL))
31155 return (get_variable_section (decl_op1, false)
31156 != get_variable_section (decl_op0, false));
31157
31158 if (TREE_CODE (decl_op1) == LABEL_DECL
31159 && TREE_CODE (decl_op0) == LABEL_DECL)
31160 return (DECL_CONTEXT (decl_op1)
31161 != DECL_CONTEXT (decl_op0));
31162 }
31163
31164 return true;
31165 }
31166 }
31167
31168 return false;
31169 }
31170
31171 #include "gt-arm.h"