[ARM] PR63870 Add qualifiers for NEON builtins
[gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "cfghooks.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "optabs.h"
35 #include "regs.h"
36 #include "emit-rtl.h"
37 #include "recog.h"
38 #include "cgraph.h"
39 #include "diagnostic-core.h"
40 #include "alias.h"
41 #include "fold-const.h"
42 #include "stor-layout.h"
43 #include "calls.h"
44 #include "varasm.h"
45 #include "output.h"
46 #include "insn-attr.h"
47 #include "flags.h"
48 #include "reload.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "cfgrtl.h"
52 #include "sched-int.h"
53 #include "common/common-target.h"
54 #include "langhooks.h"
55 #include "intl.h"
56 #include "libfuncs.h"
57 #include "params.h"
58 #include "opts.h"
59 #include "dumpfile.h"
60 #include "target-globals.h"
61 #include "builtins.h"
62 #include "tm-constrs.h"
63 #include "rtl-iter.h"
64
65 /* This file should be included last. */
66 #include "target-def.h"
67
68 /* Forward definitions of types. */
69 typedef struct minipool_node Mnode;
70 typedef struct minipool_fixup Mfix;
71
72 void (*arm_lang_output_object_attributes_hook)(void);
73
74 struct four_ints
75 {
76 int i[4];
77 };
78
79 /* Forward function declarations. */
80 static bool arm_const_not_ok_for_debug_p (rtx);
81 static bool arm_needs_doubleword_align (machine_mode, const_tree);
82 static int arm_compute_static_chain_stack_bytes (void);
83 static arm_stack_offsets *arm_get_frame_offsets (void);
84 static void arm_add_gc_roots (void);
85 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
86 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
87 static unsigned bit_count (unsigned long);
88 static unsigned feature_count (const arm_feature_set*);
89 static int arm_address_register_rtx_p (rtx, int);
90 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
91 static bool is_called_in_ARM_mode (tree);
92 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
93 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
94 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
95 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
96 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
97 inline static int thumb1_index_register_rtx_p (rtx, int);
98 static int thumb_far_jump_used_p (void);
99 static bool thumb_force_lr_save (void);
100 static unsigned arm_size_return_regs (void);
101 static bool arm_assemble_integer (rtx, unsigned int, int);
102 static void arm_print_operand (FILE *, rtx, int);
103 static void arm_print_operand_address (FILE *, machine_mode, rtx);
104 static bool arm_print_operand_punct_valid_p (unsigned char code);
105 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
106 static arm_cc get_arm_condition_code (rtx);
107 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
108 static const char *output_multi_immediate (rtx *, const char *, const char *,
109 int, HOST_WIDE_INT);
110 static const char *shift_op (rtx, HOST_WIDE_INT *);
111 static struct machine_function *arm_init_machine_status (void);
112 static void thumb_exit (FILE *, int);
113 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
114 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
115 static Mnode *add_minipool_forward_ref (Mfix *);
116 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
117 static Mnode *add_minipool_backward_ref (Mfix *);
118 static void assign_minipool_offsets (Mfix *);
119 static void arm_print_value (FILE *, rtx);
120 static void dump_minipool (rtx_insn *);
121 static int arm_barrier_cost (rtx_insn *);
122 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
123 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
124 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
125 machine_mode, rtx);
126 static void arm_reorg (void);
127 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
128 static unsigned long arm_compute_save_reg0_reg12_mask (void);
129 static unsigned long arm_compute_save_reg_mask (void);
130 static unsigned long arm_isr_value (tree);
131 static unsigned long arm_compute_func_type (void);
132 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
133 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
134 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
135 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
136 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
137 #endif
138 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
139 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
140 static int arm_comp_type_attributes (const_tree, const_tree);
141 static void arm_set_default_type_attributes (tree);
142 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
143 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
144 static int optimal_immediate_sequence (enum rtx_code code,
145 unsigned HOST_WIDE_INT val,
146 struct four_ints *return_sequence);
147 static int optimal_immediate_sequence_1 (enum rtx_code code,
148 unsigned HOST_WIDE_INT val,
149 struct four_ints *return_sequence,
150 int i);
151 static int arm_get_strip_length (int);
152 static bool arm_function_ok_for_sibcall (tree, tree);
153 static machine_mode arm_promote_function_mode (const_tree,
154 machine_mode, int *,
155 const_tree, int);
156 static bool arm_return_in_memory (const_tree, const_tree);
157 static rtx arm_function_value (const_tree, const_tree, bool);
158 static rtx arm_libcall_value_1 (machine_mode);
159 static rtx arm_libcall_value (machine_mode, const_rtx);
160 static bool arm_function_value_regno_p (const unsigned int);
161 static void arm_internal_label (FILE *, const char *, unsigned long);
162 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
163 tree);
164 static bool arm_have_conditional_execution (void);
165 static bool arm_cannot_force_const_mem (machine_mode, rtx);
166 static bool arm_legitimate_constant_p (machine_mode, rtx);
167 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
168 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
169 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
170 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
171 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
172 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
173 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
174 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
175 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
176 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
177 static void emit_constant_insn (rtx cond, rtx pattern);
178 static rtx_insn *emit_set_insn (rtx, rtx);
179 static rtx emit_multi_reg_push (unsigned long, unsigned long);
180 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
181 tree, bool);
182 static rtx arm_function_arg (cumulative_args_t, machine_mode,
183 const_tree, bool);
184 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
185 const_tree, bool);
186 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
187 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
188 const_tree);
189 static rtx aapcs_libcall_value (machine_mode);
190 static int aapcs_select_return_coproc (const_tree, const_tree);
191
192 #ifdef OBJECT_FORMAT_ELF
193 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
194 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
195 #endif
196 #ifndef ARM_PE
197 static void arm_encode_section_info (tree, rtx, int);
198 #endif
199
200 static void arm_file_end (void);
201 static void arm_file_start (void);
202 static void arm_insert_attributes (tree, tree *);
203
204 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
205 tree, int *, int);
206 static bool arm_pass_by_reference (cumulative_args_t,
207 machine_mode, const_tree, bool);
208 static bool arm_promote_prototypes (const_tree);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree);
212 static bool arm_must_pass_in_stack (machine_mode, const_tree);
213 static bool arm_return_in_memory (const_tree, const_tree);
214 #if ARM_UNWIND_INFO
215 static void arm_unwind_emit (FILE *, rtx_insn *);
216 static bool arm_output_ttype (rtx);
217 static void arm_asm_emit_except_personality (rtx);
218 static void arm_asm_init_sections (void);
219 #endif
220 static rtx arm_dwarf_register_span (rtx);
221
222 static tree arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree arm_get_cookie_size (tree);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree, rtx);
234 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
235 static void arm_option_override (void);
236 static void arm_override_options_after_change (void);
237 static void arm_option_print (FILE *, int, struct cl_target_option *);
238 static void arm_set_current_function (tree);
239 static bool arm_can_inline_p (tree, tree);
240 static void arm_relayout_function (tree);
241 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
242 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
243 static bool arm_macro_fusion_p (void);
244 static bool arm_cannot_copy_insn_p (rtx_insn *);
245 static int arm_issue_rate (void);
246 static int arm_first_cycle_multipass_dfa_lookahead (void);
247 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
248 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
249 static bool arm_output_addr_const_extra (FILE *, rtx);
250 static bool arm_allocate_stack_slots_for_args (void);
251 static bool arm_warn_func_return (tree);
252 static const char *arm_invalid_parameter_type (const_tree t);
253 static const char *arm_invalid_return_type (const_tree t);
254 static tree arm_promoted_type (const_tree t);
255 static tree arm_convert_to_type (tree type, tree expr);
256 static bool arm_scalar_mode_supported_p (machine_mode);
257 static bool arm_frame_pointer_required (void);
258 static bool arm_can_eliminate (const int, const int);
259 static void arm_asm_trampoline_template (FILE *);
260 static void arm_trampoline_init (rtx, tree, rtx);
261 static rtx arm_trampoline_adjust_address (rtx);
262 static rtx arm_pic_static_addr (rtx orig, rtx reg);
263 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
264 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
265 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
266 static bool arm_array_mode_supported_p (machine_mode,
267 unsigned HOST_WIDE_INT);
268 static machine_mode arm_preferred_simd_mode (machine_mode);
269 static bool arm_class_likely_spilled_p (reg_class_t);
270 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
271 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
272 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
273 const_tree type,
274 int misalignment,
275 bool is_packed);
276 static void arm_conditional_register_usage (void);
277 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
278 static unsigned int arm_autovectorize_vector_sizes (void);
279 static int arm_default_branch_cost (bool, bool);
280 static int arm_cortex_a5_branch_cost (bool, bool);
281 static int arm_cortex_m_branch_cost (bool, bool);
282 static int arm_cortex_m7_branch_cost (bool, bool);
283
284 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
285 const unsigned char *sel);
286
287 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
288
289 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
290 tree vectype,
291 int misalign ATTRIBUTE_UNUSED);
292 static unsigned arm_add_stmt_cost (void *data, int count,
293 enum vect_cost_for_stmt kind,
294 struct _stmt_vec_info *stmt_info,
295 int misalign,
296 enum vect_cost_model_location where);
297
298 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
299 bool op0_preserve_value);
300 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
301
302 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
303 \f
304 /* Table of machine attributes. */
305 static const struct attribute_spec arm_attribute_table[] =
306 {
307 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
308 affects_type_identity } */
309 /* Function calls made to this symbol must be done indirectly, because
310 it may lie outside of the 26 bit addressing range of a normal function
311 call. */
312 { "long_call", 0, 0, false, true, true, NULL, false },
313 /* Whereas these functions are always known to reside within the 26 bit
314 addressing range. */
315 { "short_call", 0, 0, false, true, true, NULL, false },
316 /* Specify the procedure call conventions for a function. */
317 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
318 false },
319 /* Interrupt Service Routines have special prologue and epilogue requirements. */
320 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
321 false },
322 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
323 false },
324 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
325 false },
326 #ifdef ARM_PE
327 /* ARM/PE has three new attributes:
328 interfacearm - ?
329 dllexport - for exporting a function/variable that will live in a dll
330 dllimport - for importing a function/variable from a dll
331
332 Microsoft allows multiple declspecs in one __declspec, separating
333 them with spaces. We do NOT support this. Instead, use __declspec
334 multiple times.
335 */
336 { "dllimport", 0, 0, true, false, false, NULL, false },
337 { "dllexport", 0, 0, true, false, false, NULL, false },
338 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
339 false },
340 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
341 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
342 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
343 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
344 false },
345 #endif
346 { NULL, 0, 0, false, false, false, NULL, false }
347 };
348 \f
349 /* Initialize the GCC target structure. */
350 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
351 #undef TARGET_MERGE_DECL_ATTRIBUTES
352 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
353 #endif
354
355 #undef TARGET_LEGITIMIZE_ADDRESS
356 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
357
358 #undef TARGET_LRA_P
359 #define TARGET_LRA_P hook_bool_void_true
360
361 #undef TARGET_ATTRIBUTE_TABLE
362 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
363
364 #undef TARGET_INSERT_ATTRIBUTES
365 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
366
367 #undef TARGET_ASM_FILE_START
368 #define TARGET_ASM_FILE_START arm_file_start
369 #undef TARGET_ASM_FILE_END
370 #define TARGET_ASM_FILE_END arm_file_end
371
372 #undef TARGET_ASM_ALIGNED_SI_OP
373 #define TARGET_ASM_ALIGNED_SI_OP NULL
374 #undef TARGET_ASM_INTEGER
375 #define TARGET_ASM_INTEGER arm_assemble_integer
376
377 #undef TARGET_PRINT_OPERAND
378 #define TARGET_PRINT_OPERAND arm_print_operand
379 #undef TARGET_PRINT_OPERAND_ADDRESS
380 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
381 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
382 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
383
384 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
385 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
386
387 #undef TARGET_ASM_FUNCTION_PROLOGUE
388 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
389
390 #undef TARGET_ASM_FUNCTION_EPILOGUE
391 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
392
393 #undef TARGET_CAN_INLINE_P
394 #define TARGET_CAN_INLINE_P arm_can_inline_p
395
396 #undef TARGET_RELAYOUT_FUNCTION
397 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
398
399 #undef TARGET_OPTION_OVERRIDE
400 #define TARGET_OPTION_OVERRIDE arm_option_override
401
402 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
403 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
404
405 #undef TARGET_OPTION_PRINT
406 #define TARGET_OPTION_PRINT arm_option_print
407
408 #undef TARGET_COMP_TYPE_ATTRIBUTES
409 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
410
411 #undef TARGET_SCHED_MACRO_FUSION_P
412 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
413
414 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
415 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
416
417 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
418 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
419
420 #undef TARGET_SCHED_ADJUST_COST
421 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
422
423 #undef TARGET_SET_CURRENT_FUNCTION
424 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
425
426 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
427 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
428
429 #undef TARGET_SCHED_REORDER
430 #define TARGET_SCHED_REORDER arm_sched_reorder
431
432 #undef TARGET_REGISTER_MOVE_COST
433 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
434
435 #undef TARGET_MEMORY_MOVE_COST
436 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
437
438 #undef TARGET_ENCODE_SECTION_INFO
439 #ifdef ARM_PE
440 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
441 #else
442 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
443 #endif
444
445 #undef TARGET_STRIP_NAME_ENCODING
446 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
447
448 #undef TARGET_ASM_INTERNAL_LABEL
449 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
450
451 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
452 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
453
454 #undef TARGET_FUNCTION_VALUE
455 #define TARGET_FUNCTION_VALUE arm_function_value
456
457 #undef TARGET_LIBCALL_VALUE
458 #define TARGET_LIBCALL_VALUE arm_libcall_value
459
460 #undef TARGET_FUNCTION_VALUE_REGNO_P
461 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
462
463 #undef TARGET_ASM_OUTPUT_MI_THUNK
464 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
465 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
466 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
467
468 #undef TARGET_RTX_COSTS
469 #define TARGET_RTX_COSTS arm_rtx_costs
470 #undef TARGET_ADDRESS_COST
471 #define TARGET_ADDRESS_COST arm_address_cost
472
473 #undef TARGET_SHIFT_TRUNCATION_MASK
474 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
475 #undef TARGET_VECTOR_MODE_SUPPORTED_P
476 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
477 #undef TARGET_ARRAY_MODE_SUPPORTED_P
478 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
479 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
480 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
481 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
482 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
483 arm_autovectorize_vector_sizes
484
485 #undef TARGET_MACHINE_DEPENDENT_REORG
486 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
487
488 #undef TARGET_INIT_BUILTINS
489 #define TARGET_INIT_BUILTINS arm_init_builtins
490 #undef TARGET_EXPAND_BUILTIN
491 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
492 #undef TARGET_BUILTIN_DECL
493 #define TARGET_BUILTIN_DECL arm_builtin_decl
494
495 #undef TARGET_INIT_LIBFUNCS
496 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
497
498 #undef TARGET_PROMOTE_FUNCTION_MODE
499 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
500 #undef TARGET_PROMOTE_PROTOTYPES
501 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
502 #undef TARGET_PASS_BY_REFERENCE
503 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
504 #undef TARGET_ARG_PARTIAL_BYTES
505 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
506 #undef TARGET_FUNCTION_ARG
507 #define TARGET_FUNCTION_ARG arm_function_arg
508 #undef TARGET_FUNCTION_ARG_ADVANCE
509 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
510 #undef TARGET_FUNCTION_ARG_BOUNDARY
511 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
512
513 #undef TARGET_SETUP_INCOMING_VARARGS
514 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
515
516 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
517 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
518
519 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
520 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
521 #undef TARGET_TRAMPOLINE_INIT
522 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
523 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
524 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
525
526 #undef TARGET_WARN_FUNC_RETURN
527 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
528
529 #undef TARGET_DEFAULT_SHORT_ENUMS
530 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
531
532 #undef TARGET_ALIGN_ANON_BITFIELD
533 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
534
535 #undef TARGET_NARROW_VOLATILE_BITFIELD
536 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
537
538 #undef TARGET_CXX_GUARD_TYPE
539 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
540
541 #undef TARGET_CXX_GUARD_MASK_BIT
542 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
543
544 #undef TARGET_CXX_GET_COOKIE_SIZE
545 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
546
547 #undef TARGET_CXX_COOKIE_HAS_SIZE
548 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
549
550 #undef TARGET_CXX_CDTOR_RETURNS_THIS
551 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
552
553 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
554 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
555
556 #undef TARGET_CXX_USE_AEABI_ATEXIT
557 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
558
559 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
560 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
561 arm_cxx_determine_class_data_visibility
562
563 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
564 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
565
566 #undef TARGET_RETURN_IN_MSB
567 #define TARGET_RETURN_IN_MSB arm_return_in_msb
568
569 #undef TARGET_RETURN_IN_MEMORY
570 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
571
572 #undef TARGET_MUST_PASS_IN_STACK
573 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
574
575 #if ARM_UNWIND_INFO
576 #undef TARGET_ASM_UNWIND_EMIT
577 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
578
579 /* EABI unwinding tables use a different format for the typeinfo tables. */
580 #undef TARGET_ASM_TTYPE
581 #define TARGET_ASM_TTYPE arm_output_ttype
582
583 #undef TARGET_ARM_EABI_UNWINDER
584 #define TARGET_ARM_EABI_UNWINDER true
585
586 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
587 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
588
589 #undef TARGET_ASM_INIT_SECTIONS
590 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
591 #endif /* ARM_UNWIND_INFO */
592
593 #undef TARGET_DWARF_REGISTER_SPAN
594 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
595
596 #undef TARGET_CANNOT_COPY_INSN_P
597 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
598
599 #ifdef HAVE_AS_TLS
600 #undef TARGET_HAVE_TLS
601 #define TARGET_HAVE_TLS true
602 #endif
603
604 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
605 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
606
607 #undef TARGET_LEGITIMATE_CONSTANT_P
608 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
609
610 #undef TARGET_CANNOT_FORCE_CONST_MEM
611 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
612
613 #undef TARGET_MAX_ANCHOR_OFFSET
614 #define TARGET_MAX_ANCHOR_OFFSET 4095
615
616 /* The minimum is set such that the total size of the block
617 for a particular anchor is -4088 + 1 + 4095 bytes, which is
618 divisible by eight, ensuring natural spacing of anchors. */
619 #undef TARGET_MIN_ANCHOR_OFFSET
620 #define TARGET_MIN_ANCHOR_OFFSET -4088
621
622 #undef TARGET_SCHED_ISSUE_RATE
623 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
624
625 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
626 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
627 arm_first_cycle_multipass_dfa_lookahead
628
629 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
630 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
631 arm_first_cycle_multipass_dfa_lookahead_guard
632
633 #undef TARGET_MANGLE_TYPE
634 #define TARGET_MANGLE_TYPE arm_mangle_type
635
636 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
637 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
638
639 #undef TARGET_BUILD_BUILTIN_VA_LIST
640 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
641 #undef TARGET_EXPAND_BUILTIN_VA_START
642 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
643 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
644 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
645
646 #ifdef HAVE_AS_TLS
647 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
648 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
649 #endif
650
651 #undef TARGET_LEGITIMATE_ADDRESS_P
652 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
653
654 #undef TARGET_PREFERRED_RELOAD_CLASS
655 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
656
657 #undef TARGET_INVALID_PARAMETER_TYPE
658 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
659
660 #undef TARGET_INVALID_RETURN_TYPE
661 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
662
663 #undef TARGET_PROMOTED_TYPE
664 #define TARGET_PROMOTED_TYPE arm_promoted_type
665
666 #undef TARGET_CONVERT_TO_TYPE
667 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
668
669 #undef TARGET_SCALAR_MODE_SUPPORTED_P
670 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
671
672 #undef TARGET_FRAME_POINTER_REQUIRED
673 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
674
675 #undef TARGET_CAN_ELIMINATE
676 #define TARGET_CAN_ELIMINATE arm_can_eliminate
677
678 #undef TARGET_CONDITIONAL_REGISTER_USAGE
679 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
680
681 #undef TARGET_CLASS_LIKELY_SPILLED_P
682 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
683
684 #undef TARGET_VECTORIZE_BUILTINS
685 #define TARGET_VECTORIZE_BUILTINS
686
687 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
688 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
689 arm_builtin_vectorized_function
690
691 #undef TARGET_VECTOR_ALIGNMENT
692 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
693
694 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
695 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
696 arm_vector_alignment_reachable
697
698 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
699 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
700 arm_builtin_support_vector_misalignment
701
702 #undef TARGET_PREFERRED_RENAME_CLASS
703 #define TARGET_PREFERRED_RENAME_CLASS \
704 arm_preferred_rename_class
705
706 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
707 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
708 arm_vectorize_vec_perm_const_ok
709
710 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
711 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
712 arm_builtin_vectorization_cost
713 #undef TARGET_VECTORIZE_ADD_STMT_COST
714 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
715
716 #undef TARGET_CANONICALIZE_COMPARISON
717 #define TARGET_CANONICALIZE_COMPARISON \
718 arm_canonicalize_comparison
719
720 #undef TARGET_ASAN_SHADOW_OFFSET
721 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
722
723 #undef MAX_INSN_PER_IT_BLOCK
724 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
725
726 #undef TARGET_CAN_USE_DOLOOP_P
727 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
728
729 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
730 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
731
732 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
733 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
734
735 #undef TARGET_SCHED_FUSION_PRIORITY
736 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
737
738 struct gcc_target targetm = TARGET_INITIALIZER;
739 \f
740 /* Obstack for minipool constant handling. */
741 static struct obstack minipool_obstack;
742 static char * minipool_startobj;
743
744 /* The maximum number of insns skipped which
745 will be conditionalised if possible. */
746 static int max_insns_skipped = 5;
747
748 extern FILE * asm_out_file;
749
750 /* True if we are currently building a constant table. */
751 int making_const_table;
752
753 /* The processor for which instructions should be scheduled. */
754 enum processor_type arm_tune = arm_none;
755
756 /* The current tuning set. */
757 const struct tune_params *current_tune;
758
759 /* Which floating point hardware to schedule for. */
760 int arm_fpu_attr;
761
762 /* Which floating popint hardware to use. */
763 const struct arm_fpu_desc *arm_fpu_desc;
764
765 /* Used for Thumb call_via trampolines. */
766 rtx thumb_call_via_label[14];
767 static int thumb_call_reg_needed;
768
769 /* The bits in this mask specify which
770 instructions we are allowed to generate. */
771 arm_feature_set insn_flags = ARM_FSET_EMPTY;
772
773 /* The bits in this mask specify which instruction scheduling options should
774 be used. */
775 arm_feature_set tune_flags = ARM_FSET_EMPTY;
776
777 /* The highest ARM architecture version supported by the
778 target. */
779 enum base_architecture arm_base_arch = BASE_ARCH_0;
780
781 /* The following are used in the arm.md file as equivalents to bits
782 in the above two flag variables. */
783
784 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
785 int arm_arch3m = 0;
786
787 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
788 int arm_arch4 = 0;
789
790 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
791 int arm_arch4t = 0;
792
793 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
794 int arm_arch5 = 0;
795
796 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
797 int arm_arch5e = 0;
798
799 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
800 int arm_arch6 = 0;
801
802 /* Nonzero if this chip supports the ARM 6K extensions. */
803 int arm_arch6k = 0;
804
805 /* Nonzero if this chip supports the ARM 6KZ extensions. */
806 int arm_arch6kz = 0;
807
808 /* Nonzero if instructions present in ARMv6-M can be used. */
809 int arm_arch6m = 0;
810
811 /* Nonzero if this chip supports the ARM 7 extensions. */
812 int arm_arch7 = 0;
813
814 /* Nonzero if instructions not present in the 'M' profile can be used. */
815 int arm_arch_notm = 0;
816
817 /* Nonzero if instructions present in ARMv7E-M can be used. */
818 int arm_arch7em = 0;
819
820 /* Nonzero if instructions present in ARMv8 can be used. */
821 int arm_arch8 = 0;
822
823 /* Nonzero if this chip can benefit from load scheduling. */
824 int arm_ld_sched = 0;
825
826 /* Nonzero if this chip is a StrongARM. */
827 int arm_tune_strongarm = 0;
828
829 /* Nonzero if this chip supports Intel Wireless MMX technology. */
830 int arm_arch_iwmmxt = 0;
831
832 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
833 int arm_arch_iwmmxt2 = 0;
834
835 /* Nonzero if this chip is an XScale. */
836 int arm_arch_xscale = 0;
837
838 /* Nonzero if tuning for XScale */
839 int arm_tune_xscale = 0;
840
841 /* Nonzero if we want to tune for stores that access the write-buffer.
842 This typically means an ARM6 or ARM7 with MMU or MPU. */
843 int arm_tune_wbuf = 0;
844
845 /* Nonzero if tuning for Cortex-A9. */
846 int arm_tune_cortex_a9 = 0;
847
848 /* Nonzero if we should define __THUMB_INTERWORK__ in the
849 preprocessor.
850 XXX This is a bit of a hack, it's intended to help work around
851 problems in GLD which doesn't understand that armv5t code is
852 interworking clean. */
853 int arm_cpp_interwork = 0;
854
855 /* Nonzero if chip supports Thumb 2. */
856 int arm_arch_thumb2;
857
858 /* Nonzero if chip supports integer division instruction. */
859 int arm_arch_arm_hwdiv;
860 int arm_arch_thumb_hwdiv;
861
862 /* Nonzero if chip disallows volatile memory access in IT block. */
863 int arm_arch_no_volatile_ce;
864
865 /* Nonzero if we should use Neon to handle 64-bits operations rather
866 than core registers. */
867 int prefer_neon_for_64bits = 0;
868
869 /* Nonzero if we shouldn't use literal pools. */
870 bool arm_disable_literal_pool = false;
871
872 /* The register number to be used for the PIC offset register. */
873 unsigned arm_pic_register = INVALID_REGNUM;
874
875 enum arm_pcs arm_pcs_default;
876
877 /* For an explanation of these variables, see final_prescan_insn below. */
878 int arm_ccfsm_state;
879 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
880 enum arm_cond_code arm_current_cc;
881
882 rtx arm_target_insn;
883 int arm_target_label;
884 /* The number of conditionally executed insns, including the current insn. */
885 int arm_condexec_count = 0;
886 /* A bitmask specifying the patterns for the IT block.
887 Zero means do not output an IT block before this insn. */
888 int arm_condexec_mask = 0;
889 /* The number of bits used in arm_condexec_mask. */
890 int arm_condexec_masklen = 0;
891
892 /* Nonzero if chip supports the ARMv8 CRC instructions. */
893 int arm_arch_crc = 0;
894
895 /* Nonzero if the core has a very small, high-latency, multiply unit. */
896 int arm_m_profile_small_mul = 0;
897
898 /* The condition codes of the ARM, and the inverse function. */
899 static const char * const arm_condition_codes[] =
900 {
901 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
902 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
903 };
904
905 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
906 int arm_regs_in_sequence[] =
907 {
908 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
909 };
910
911 #define ARM_LSL_NAME "lsl"
912 #define streq(string1, string2) (strcmp (string1, string2) == 0)
913
914 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
915 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
916 | (1 << PIC_OFFSET_TABLE_REGNUM)))
917 \f
918 /* Initialization code. */
919
920 struct processors
921 {
922 const char *const name;
923 enum processor_type core;
924 const char *arch;
925 enum base_architecture base_arch;
926 const arm_feature_set flags;
927 const struct tune_params *const tune;
928 };
929
930
931 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
932 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
933 { \
934 num_slots, \
935 l1_size, \
936 l1_line_size \
937 }
938
939 /* arm generic vectorizer costs. */
940 static const
941 struct cpu_vec_costs arm_default_vec_cost = {
942 1, /* scalar_stmt_cost. */
943 1, /* scalar load_cost. */
944 1, /* scalar_store_cost. */
945 1, /* vec_stmt_cost. */
946 1, /* vec_to_scalar_cost. */
947 1, /* scalar_to_vec_cost. */
948 1, /* vec_align_load_cost. */
949 1, /* vec_unalign_load_cost. */
950 1, /* vec_unalign_store_cost. */
951 1, /* vec_store_cost. */
952 3, /* cond_taken_branch_cost. */
953 1, /* cond_not_taken_branch_cost. */
954 };
955
956 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
957 #include "aarch-cost-tables.h"
958
959
960
961 const struct cpu_cost_table cortexa9_extra_costs =
962 {
963 /* ALU */
964 {
965 0, /* arith. */
966 0, /* logical. */
967 0, /* shift. */
968 COSTS_N_INSNS (1), /* shift_reg. */
969 COSTS_N_INSNS (1), /* arith_shift. */
970 COSTS_N_INSNS (2), /* arith_shift_reg. */
971 0, /* log_shift. */
972 COSTS_N_INSNS (1), /* log_shift_reg. */
973 COSTS_N_INSNS (1), /* extend. */
974 COSTS_N_INSNS (2), /* extend_arith. */
975 COSTS_N_INSNS (1), /* bfi. */
976 COSTS_N_INSNS (1), /* bfx. */
977 0, /* clz. */
978 0, /* rev. */
979 0, /* non_exec. */
980 true /* non_exec_costs_exec. */
981 },
982 {
983 /* MULT SImode */
984 {
985 COSTS_N_INSNS (3), /* simple. */
986 COSTS_N_INSNS (3), /* flag_setting. */
987 COSTS_N_INSNS (2), /* extend. */
988 COSTS_N_INSNS (3), /* add. */
989 COSTS_N_INSNS (2), /* extend_add. */
990 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
991 },
992 /* MULT DImode */
993 {
994 0, /* simple (N/A). */
995 0, /* flag_setting (N/A). */
996 COSTS_N_INSNS (4), /* extend. */
997 0, /* add (N/A). */
998 COSTS_N_INSNS (4), /* extend_add. */
999 0 /* idiv (N/A). */
1000 }
1001 },
1002 /* LD/ST */
1003 {
1004 COSTS_N_INSNS (2), /* load. */
1005 COSTS_N_INSNS (2), /* load_sign_extend. */
1006 COSTS_N_INSNS (2), /* ldrd. */
1007 COSTS_N_INSNS (2), /* ldm_1st. */
1008 1, /* ldm_regs_per_insn_1st. */
1009 2, /* ldm_regs_per_insn_subsequent. */
1010 COSTS_N_INSNS (5), /* loadf. */
1011 COSTS_N_INSNS (5), /* loadd. */
1012 COSTS_N_INSNS (1), /* load_unaligned. */
1013 COSTS_N_INSNS (2), /* store. */
1014 COSTS_N_INSNS (2), /* strd. */
1015 COSTS_N_INSNS (2), /* stm_1st. */
1016 1, /* stm_regs_per_insn_1st. */
1017 2, /* stm_regs_per_insn_subsequent. */
1018 COSTS_N_INSNS (1), /* storef. */
1019 COSTS_N_INSNS (1), /* stored. */
1020 COSTS_N_INSNS (1), /* store_unaligned. */
1021 COSTS_N_INSNS (1), /* loadv. */
1022 COSTS_N_INSNS (1) /* storev. */
1023 },
1024 {
1025 /* FP SFmode */
1026 {
1027 COSTS_N_INSNS (14), /* div. */
1028 COSTS_N_INSNS (4), /* mult. */
1029 COSTS_N_INSNS (7), /* mult_addsub. */
1030 COSTS_N_INSNS (30), /* fma. */
1031 COSTS_N_INSNS (3), /* addsub. */
1032 COSTS_N_INSNS (1), /* fpconst. */
1033 COSTS_N_INSNS (1), /* neg. */
1034 COSTS_N_INSNS (3), /* compare. */
1035 COSTS_N_INSNS (3), /* widen. */
1036 COSTS_N_INSNS (3), /* narrow. */
1037 COSTS_N_INSNS (3), /* toint. */
1038 COSTS_N_INSNS (3), /* fromint. */
1039 COSTS_N_INSNS (3) /* roundint. */
1040 },
1041 /* FP DFmode */
1042 {
1043 COSTS_N_INSNS (24), /* div. */
1044 COSTS_N_INSNS (5), /* mult. */
1045 COSTS_N_INSNS (8), /* mult_addsub. */
1046 COSTS_N_INSNS (30), /* fma. */
1047 COSTS_N_INSNS (3), /* addsub. */
1048 COSTS_N_INSNS (1), /* fpconst. */
1049 COSTS_N_INSNS (1), /* neg. */
1050 COSTS_N_INSNS (3), /* compare. */
1051 COSTS_N_INSNS (3), /* widen. */
1052 COSTS_N_INSNS (3), /* narrow. */
1053 COSTS_N_INSNS (3), /* toint. */
1054 COSTS_N_INSNS (3), /* fromint. */
1055 COSTS_N_INSNS (3) /* roundint. */
1056 }
1057 },
1058 /* Vector */
1059 {
1060 COSTS_N_INSNS (1) /* alu. */
1061 }
1062 };
1063
1064 const struct cpu_cost_table cortexa8_extra_costs =
1065 {
1066 /* ALU */
1067 {
1068 0, /* arith. */
1069 0, /* logical. */
1070 COSTS_N_INSNS (1), /* shift. */
1071 0, /* shift_reg. */
1072 COSTS_N_INSNS (1), /* arith_shift. */
1073 0, /* arith_shift_reg. */
1074 COSTS_N_INSNS (1), /* log_shift. */
1075 0, /* log_shift_reg. */
1076 0, /* extend. */
1077 0, /* extend_arith. */
1078 0, /* bfi. */
1079 0, /* bfx. */
1080 0, /* clz. */
1081 0, /* rev. */
1082 0, /* non_exec. */
1083 true /* non_exec_costs_exec. */
1084 },
1085 {
1086 /* MULT SImode */
1087 {
1088 COSTS_N_INSNS (1), /* simple. */
1089 COSTS_N_INSNS (1), /* flag_setting. */
1090 COSTS_N_INSNS (1), /* extend. */
1091 COSTS_N_INSNS (1), /* add. */
1092 COSTS_N_INSNS (1), /* extend_add. */
1093 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1094 },
1095 /* MULT DImode */
1096 {
1097 0, /* simple (N/A). */
1098 0, /* flag_setting (N/A). */
1099 COSTS_N_INSNS (2), /* extend. */
1100 0, /* add (N/A). */
1101 COSTS_N_INSNS (2), /* extend_add. */
1102 0 /* idiv (N/A). */
1103 }
1104 },
1105 /* LD/ST */
1106 {
1107 COSTS_N_INSNS (1), /* load. */
1108 COSTS_N_INSNS (1), /* load_sign_extend. */
1109 COSTS_N_INSNS (1), /* ldrd. */
1110 COSTS_N_INSNS (1), /* ldm_1st. */
1111 1, /* ldm_regs_per_insn_1st. */
1112 2, /* ldm_regs_per_insn_subsequent. */
1113 COSTS_N_INSNS (1), /* loadf. */
1114 COSTS_N_INSNS (1), /* loadd. */
1115 COSTS_N_INSNS (1), /* load_unaligned. */
1116 COSTS_N_INSNS (1), /* store. */
1117 COSTS_N_INSNS (1), /* strd. */
1118 COSTS_N_INSNS (1), /* stm_1st. */
1119 1, /* stm_regs_per_insn_1st. */
1120 2, /* stm_regs_per_insn_subsequent. */
1121 COSTS_N_INSNS (1), /* storef. */
1122 COSTS_N_INSNS (1), /* stored. */
1123 COSTS_N_INSNS (1), /* store_unaligned. */
1124 COSTS_N_INSNS (1), /* loadv. */
1125 COSTS_N_INSNS (1) /* storev. */
1126 },
1127 {
1128 /* FP SFmode */
1129 {
1130 COSTS_N_INSNS (36), /* div. */
1131 COSTS_N_INSNS (11), /* mult. */
1132 COSTS_N_INSNS (20), /* mult_addsub. */
1133 COSTS_N_INSNS (30), /* fma. */
1134 COSTS_N_INSNS (9), /* addsub. */
1135 COSTS_N_INSNS (3), /* fpconst. */
1136 COSTS_N_INSNS (3), /* neg. */
1137 COSTS_N_INSNS (6), /* compare. */
1138 COSTS_N_INSNS (4), /* widen. */
1139 COSTS_N_INSNS (4), /* narrow. */
1140 COSTS_N_INSNS (8), /* toint. */
1141 COSTS_N_INSNS (8), /* fromint. */
1142 COSTS_N_INSNS (8) /* roundint. */
1143 },
1144 /* FP DFmode */
1145 {
1146 COSTS_N_INSNS (64), /* div. */
1147 COSTS_N_INSNS (16), /* mult. */
1148 COSTS_N_INSNS (25), /* mult_addsub. */
1149 COSTS_N_INSNS (30), /* fma. */
1150 COSTS_N_INSNS (9), /* addsub. */
1151 COSTS_N_INSNS (3), /* fpconst. */
1152 COSTS_N_INSNS (3), /* neg. */
1153 COSTS_N_INSNS (6), /* compare. */
1154 COSTS_N_INSNS (6), /* widen. */
1155 COSTS_N_INSNS (6), /* narrow. */
1156 COSTS_N_INSNS (8), /* toint. */
1157 COSTS_N_INSNS (8), /* fromint. */
1158 COSTS_N_INSNS (8) /* roundint. */
1159 }
1160 },
1161 /* Vector */
1162 {
1163 COSTS_N_INSNS (1) /* alu. */
1164 }
1165 };
1166
1167 const struct cpu_cost_table cortexa5_extra_costs =
1168 {
1169 /* ALU */
1170 {
1171 0, /* arith. */
1172 0, /* logical. */
1173 COSTS_N_INSNS (1), /* shift. */
1174 COSTS_N_INSNS (1), /* shift_reg. */
1175 COSTS_N_INSNS (1), /* arith_shift. */
1176 COSTS_N_INSNS (1), /* arith_shift_reg. */
1177 COSTS_N_INSNS (1), /* log_shift. */
1178 COSTS_N_INSNS (1), /* log_shift_reg. */
1179 COSTS_N_INSNS (1), /* extend. */
1180 COSTS_N_INSNS (1), /* extend_arith. */
1181 COSTS_N_INSNS (1), /* bfi. */
1182 COSTS_N_INSNS (1), /* bfx. */
1183 COSTS_N_INSNS (1), /* clz. */
1184 COSTS_N_INSNS (1), /* rev. */
1185 0, /* non_exec. */
1186 true /* non_exec_costs_exec. */
1187 },
1188
1189 {
1190 /* MULT SImode */
1191 {
1192 0, /* simple. */
1193 COSTS_N_INSNS (1), /* flag_setting. */
1194 COSTS_N_INSNS (1), /* extend. */
1195 COSTS_N_INSNS (1), /* add. */
1196 COSTS_N_INSNS (1), /* extend_add. */
1197 COSTS_N_INSNS (7) /* idiv. */
1198 },
1199 /* MULT DImode */
1200 {
1201 0, /* simple (N/A). */
1202 0, /* flag_setting (N/A). */
1203 COSTS_N_INSNS (1), /* extend. */
1204 0, /* add. */
1205 COSTS_N_INSNS (2), /* extend_add. */
1206 0 /* idiv (N/A). */
1207 }
1208 },
1209 /* LD/ST */
1210 {
1211 COSTS_N_INSNS (1), /* load. */
1212 COSTS_N_INSNS (1), /* load_sign_extend. */
1213 COSTS_N_INSNS (6), /* ldrd. */
1214 COSTS_N_INSNS (1), /* ldm_1st. */
1215 1, /* ldm_regs_per_insn_1st. */
1216 2, /* ldm_regs_per_insn_subsequent. */
1217 COSTS_N_INSNS (2), /* loadf. */
1218 COSTS_N_INSNS (4), /* loadd. */
1219 COSTS_N_INSNS (1), /* load_unaligned. */
1220 COSTS_N_INSNS (1), /* store. */
1221 COSTS_N_INSNS (3), /* strd. */
1222 COSTS_N_INSNS (1), /* stm_1st. */
1223 1, /* stm_regs_per_insn_1st. */
1224 2, /* stm_regs_per_insn_subsequent. */
1225 COSTS_N_INSNS (2), /* storef. */
1226 COSTS_N_INSNS (2), /* stored. */
1227 COSTS_N_INSNS (1), /* store_unaligned. */
1228 COSTS_N_INSNS (1), /* loadv. */
1229 COSTS_N_INSNS (1) /* storev. */
1230 },
1231 {
1232 /* FP SFmode */
1233 {
1234 COSTS_N_INSNS (15), /* div. */
1235 COSTS_N_INSNS (3), /* mult. */
1236 COSTS_N_INSNS (7), /* mult_addsub. */
1237 COSTS_N_INSNS (7), /* fma. */
1238 COSTS_N_INSNS (3), /* addsub. */
1239 COSTS_N_INSNS (3), /* fpconst. */
1240 COSTS_N_INSNS (3), /* neg. */
1241 COSTS_N_INSNS (3), /* compare. */
1242 COSTS_N_INSNS (3), /* widen. */
1243 COSTS_N_INSNS (3), /* narrow. */
1244 COSTS_N_INSNS (3), /* toint. */
1245 COSTS_N_INSNS (3), /* fromint. */
1246 COSTS_N_INSNS (3) /* roundint. */
1247 },
1248 /* FP DFmode */
1249 {
1250 COSTS_N_INSNS (30), /* div. */
1251 COSTS_N_INSNS (6), /* mult. */
1252 COSTS_N_INSNS (10), /* mult_addsub. */
1253 COSTS_N_INSNS (7), /* fma. */
1254 COSTS_N_INSNS (3), /* addsub. */
1255 COSTS_N_INSNS (3), /* fpconst. */
1256 COSTS_N_INSNS (3), /* neg. */
1257 COSTS_N_INSNS (3), /* compare. */
1258 COSTS_N_INSNS (3), /* widen. */
1259 COSTS_N_INSNS (3), /* narrow. */
1260 COSTS_N_INSNS (3), /* toint. */
1261 COSTS_N_INSNS (3), /* fromint. */
1262 COSTS_N_INSNS (3) /* roundint. */
1263 }
1264 },
1265 /* Vector */
1266 {
1267 COSTS_N_INSNS (1) /* alu. */
1268 }
1269 };
1270
1271
1272 const struct cpu_cost_table cortexa7_extra_costs =
1273 {
1274 /* ALU */
1275 {
1276 0, /* arith. */
1277 0, /* logical. */
1278 COSTS_N_INSNS (1), /* shift. */
1279 COSTS_N_INSNS (1), /* shift_reg. */
1280 COSTS_N_INSNS (1), /* arith_shift. */
1281 COSTS_N_INSNS (1), /* arith_shift_reg. */
1282 COSTS_N_INSNS (1), /* log_shift. */
1283 COSTS_N_INSNS (1), /* log_shift_reg. */
1284 COSTS_N_INSNS (1), /* extend. */
1285 COSTS_N_INSNS (1), /* extend_arith. */
1286 COSTS_N_INSNS (1), /* bfi. */
1287 COSTS_N_INSNS (1), /* bfx. */
1288 COSTS_N_INSNS (1), /* clz. */
1289 COSTS_N_INSNS (1), /* rev. */
1290 0, /* non_exec. */
1291 true /* non_exec_costs_exec. */
1292 },
1293
1294 {
1295 /* MULT SImode */
1296 {
1297 0, /* simple. */
1298 COSTS_N_INSNS (1), /* flag_setting. */
1299 COSTS_N_INSNS (1), /* extend. */
1300 COSTS_N_INSNS (1), /* add. */
1301 COSTS_N_INSNS (1), /* extend_add. */
1302 COSTS_N_INSNS (7) /* idiv. */
1303 },
1304 /* MULT DImode */
1305 {
1306 0, /* simple (N/A). */
1307 0, /* flag_setting (N/A). */
1308 COSTS_N_INSNS (1), /* extend. */
1309 0, /* add. */
1310 COSTS_N_INSNS (2), /* extend_add. */
1311 0 /* idiv (N/A). */
1312 }
1313 },
1314 /* LD/ST */
1315 {
1316 COSTS_N_INSNS (1), /* load. */
1317 COSTS_N_INSNS (1), /* load_sign_extend. */
1318 COSTS_N_INSNS (3), /* ldrd. */
1319 COSTS_N_INSNS (1), /* ldm_1st. */
1320 1, /* ldm_regs_per_insn_1st. */
1321 2, /* ldm_regs_per_insn_subsequent. */
1322 COSTS_N_INSNS (2), /* loadf. */
1323 COSTS_N_INSNS (2), /* loadd. */
1324 COSTS_N_INSNS (1), /* load_unaligned. */
1325 COSTS_N_INSNS (1), /* store. */
1326 COSTS_N_INSNS (3), /* strd. */
1327 COSTS_N_INSNS (1), /* stm_1st. */
1328 1, /* stm_regs_per_insn_1st. */
1329 2, /* stm_regs_per_insn_subsequent. */
1330 COSTS_N_INSNS (2), /* storef. */
1331 COSTS_N_INSNS (2), /* stored. */
1332 COSTS_N_INSNS (1), /* store_unaligned. */
1333 COSTS_N_INSNS (1), /* loadv. */
1334 COSTS_N_INSNS (1) /* storev. */
1335 },
1336 {
1337 /* FP SFmode */
1338 {
1339 COSTS_N_INSNS (15), /* div. */
1340 COSTS_N_INSNS (3), /* mult. */
1341 COSTS_N_INSNS (7), /* mult_addsub. */
1342 COSTS_N_INSNS (7), /* fma. */
1343 COSTS_N_INSNS (3), /* addsub. */
1344 COSTS_N_INSNS (3), /* fpconst. */
1345 COSTS_N_INSNS (3), /* neg. */
1346 COSTS_N_INSNS (3), /* compare. */
1347 COSTS_N_INSNS (3), /* widen. */
1348 COSTS_N_INSNS (3), /* narrow. */
1349 COSTS_N_INSNS (3), /* toint. */
1350 COSTS_N_INSNS (3), /* fromint. */
1351 COSTS_N_INSNS (3) /* roundint. */
1352 },
1353 /* FP DFmode */
1354 {
1355 COSTS_N_INSNS (30), /* div. */
1356 COSTS_N_INSNS (6), /* mult. */
1357 COSTS_N_INSNS (10), /* mult_addsub. */
1358 COSTS_N_INSNS (7), /* fma. */
1359 COSTS_N_INSNS (3), /* addsub. */
1360 COSTS_N_INSNS (3), /* fpconst. */
1361 COSTS_N_INSNS (3), /* neg. */
1362 COSTS_N_INSNS (3), /* compare. */
1363 COSTS_N_INSNS (3), /* widen. */
1364 COSTS_N_INSNS (3), /* narrow. */
1365 COSTS_N_INSNS (3), /* toint. */
1366 COSTS_N_INSNS (3), /* fromint. */
1367 COSTS_N_INSNS (3) /* roundint. */
1368 }
1369 },
1370 /* Vector */
1371 {
1372 COSTS_N_INSNS (1) /* alu. */
1373 }
1374 };
1375
1376 const struct cpu_cost_table cortexa12_extra_costs =
1377 {
1378 /* ALU */
1379 {
1380 0, /* arith. */
1381 0, /* logical. */
1382 0, /* shift. */
1383 COSTS_N_INSNS (1), /* shift_reg. */
1384 COSTS_N_INSNS (1), /* arith_shift. */
1385 COSTS_N_INSNS (1), /* arith_shift_reg. */
1386 COSTS_N_INSNS (1), /* log_shift. */
1387 COSTS_N_INSNS (1), /* log_shift_reg. */
1388 0, /* extend. */
1389 COSTS_N_INSNS (1), /* extend_arith. */
1390 0, /* bfi. */
1391 COSTS_N_INSNS (1), /* bfx. */
1392 COSTS_N_INSNS (1), /* clz. */
1393 COSTS_N_INSNS (1), /* rev. */
1394 0, /* non_exec. */
1395 true /* non_exec_costs_exec. */
1396 },
1397 /* MULT SImode */
1398 {
1399 {
1400 COSTS_N_INSNS (2), /* simple. */
1401 COSTS_N_INSNS (3), /* flag_setting. */
1402 COSTS_N_INSNS (2), /* extend. */
1403 COSTS_N_INSNS (3), /* add. */
1404 COSTS_N_INSNS (2), /* extend_add. */
1405 COSTS_N_INSNS (18) /* idiv. */
1406 },
1407 /* MULT DImode */
1408 {
1409 0, /* simple (N/A). */
1410 0, /* flag_setting (N/A). */
1411 COSTS_N_INSNS (3), /* extend. */
1412 0, /* add (N/A). */
1413 COSTS_N_INSNS (3), /* extend_add. */
1414 0 /* idiv (N/A). */
1415 }
1416 },
1417 /* LD/ST */
1418 {
1419 COSTS_N_INSNS (3), /* load. */
1420 COSTS_N_INSNS (3), /* load_sign_extend. */
1421 COSTS_N_INSNS (3), /* ldrd. */
1422 COSTS_N_INSNS (3), /* ldm_1st. */
1423 1, /* ldm_regs_per_insn_1st. */
1424 2, /* ldm_regs_per_insn_subsequent. */
1425 COSTS_N_INSNS (3), /* loadf. */
1426 COSTS_N_INSNS (3), /* loadd. */
1427 0, /* load_unaligned. */
1428 0, /* store. */
1429 0, /* strd. */
1430 0, /* stm_1st. */
1431 1, /* stm_regs_per_insn_1st. */
1432 2, /* stm_regs_per_insn_subsequent. */
1433 COSTS_N_INSNS (2), /* storef. */
1434 COSTS_N_INSNS (2), /* stored. */
1435 0, /* store_unaligned. */
1436 COSTS_N_INSNS (1), /* loadv. */
1437 COSTS_N_INSNS (1) /* storev. */
1438 },
1439 {
1440 /* FP SFmode */
1441 {
1442 COSTS_N_INSNS (17), /* div. */
1443 COSTS_N_INSNS (4), /* mult. */
1444 COSTS_N_INSNS (8), /* mult_addsub. */
1445 COSTS_N_INSNS (8), /* fma. */
1446 COSTS_N_INSNS (4), /* addsub. */
1447 COSTS_N_INSNS (2), /* fpconst. */
1448 COSTS_N_INSNS (2), /* neg. */
1449 COSTS_N_INSNS (2), /* compare. */
1450 COSTS_N_INSNS (4), /* widen. */
1451 COSTS_N_INSNS (4), /* narrow. */
1452 COSTS_N_INSNS (4), /* toint. */
1453 COSTS_N_INSNS (4), /* fromint. */
1454 COSTS_N_INSNS (4) /* roundint. */
1455 },
1456 /* FP DFmode */
1457 {
1458 COSTS_N_INSNS (31), /* div. */
1459 COSTS_N_INSNS (4), /* mult. */
1460 COSTS_N_INSNS (8), /* mult_addsub. */
1461 COSTS_N_INSNS (8), /* fma. */
1462 COSTS_N_INSNS (4), /* addsub. */
1463 COSTS_N_INSNS (2), /* fpconst. */
1464 COSTS_N_INSNS (2), /* neg. */
1465 COSTS_N_INSNS (2), /* compare. */
1466 COSTS_N_INSNS (4), /* widen. */
1467 COSTS_N_INSNS (4), /* narrow. */
1468 COSTS_N_INSNS (4), /* toint. */
1469 COSTS_N_INSNS (4), /* fromint. */
1470 COSTS_N_INSNS (4) /* roundint. */
1471 }
1472 },
1473 /* Vector */
1474 {
1475 COSTS_N_INSNS (1) /* alu. */
1476 }
1477 };
1478
1479 const struct cpu_cost_table cortexa15_extra_costs =
1480 {
1481 /* ALU */
1482 {
1483 0, /* arith. */
1484 0, /* logical. */
1485 0, /* shift. */
1486 0, /* shift_reg. */
1487 COSTS_N_INSNS (1), /* arith_shift. */
1488 COSTS_N_INSNS (1), /* arith_shift_reg. */
1489 COSTS_N_INSNS (1), /* log_shift. */
1490 COSTS_N_INSNS (1), /* log_shift_reg. */
1491 0, /* extend. */
1492 COSTS_N_INSNS (1), /* extend_arith. */
1493 COSTS_N_INSNS (1), /* bfi. */
1494 0, /* bfx. */
1495 0, /* clz. */
1496 0, /* rev. */
1497 0, /* non_exec. */
1498 true /* non_exec_costs_exec. */
1499 },
1500 /* MULT SImode */
1501 {
1502 {
1503 COSTS_N_INSNS (2), /* simple. */
1504 COSTS_N_INSNS (3), /* flag_setting. */
1505 COSTS_N_INSNS (2), /* extend. */
1506 COSTS_N_INSNS (2), /* add. */
1507 COSTS_N_INSNS (2), /* extend_add. */
1508 COSTS_N_INSNS (18) /* idiv. */
1509 },
1510 /* MULT DImode */
1511 {
1512 0, /* simple (N/A). */
1513 0, /* flag_setting (N/A). */
1514 COSTS_N_INSNS (3), /* extend. */
1515 0, /* add (N/A). */
1516 COSTS_N_INSNS (3), /* extend_add. */
1517 0 /* idiv (N/A). */
1518 }
1519 },
1520 /* LD/ST */
1521 {
1522 COSTS_N_INSNS (3), /* load. */
1523 COSTS_N_INSNS (3), /* load_sign_extend. */
1524 COSTS_N_INSNS (3), /* ldrd. */
1525 COSTS_N_INSNS (4), /* ldm_1st. */
1526 1, /* ldm_regs_per_insn_1st. */
1527 2, /* ldm_regs_per_insn_subsequent. */
1528 COSTS_N_INSNS (4), /* loadf. */
1529 COSTS_N_INSNS (4), /* loadd. */
1530 0, /* load_unaligned. */
1531 0, /* store. */
1532 0, /* strd. */
1533 COSTS_N_INSNS (1), /* stm_1st. */
1534 1, /* stm_regs_per_insn_1st. */
1535 2, /* stm_regs_per_insn_subsequent. */
1536 0, /* storef. */
1537 0, /* stored. */
1538 0, /* store_unaligned. */
1539 COSTS_N_INSNS (1), /* loadv. */
1540 COSTS_N_INSNS (1) /* storev. */
1541 },
1542 {
1543 /* FP SFmode */
1544 {
1545 COSTS_N_INSNS (17), /* div. */
1546 COSTS_N_INSNS (4), /* mult. */
1547 COSTS_N_INSNS (8), /* mult_addsub. */
1548 COSTS_N_INSNS (8), /* fma. */
1549 COSTS_N_INSNS (4), /* addsub. */
1550 COSTS_N_INSNS (2), /* fpconst. */
1551 COSTS_N_INSNS (2), /* neg. */
1552 COSTS_N_INSNS (5), /* compare. */
1553 COSTS_N_INSNS (4), /* widen. */
1554 COSTS_N_INSNS (4), /* narrow. */
1555 COSTS_N_INSNS (4), /* toint. */
1556 COSTS_N_INSNS (4), /* fromint. */
1557 COSTS_N_INSNS (4) /* roundint. */
1558 },
1559 /* FP DFmode */
1560 {
1561 COSTS_N_INSNS (31), /* div. */
1562 COSTS_N_INSNS (4), /* mult. */
1563 COSTS_N_INSNS (8), /* mult_addsub. */
1564 COSTS_N_INSNS (8), /* fma. */
1565 COSTS_N_INSNS (4), /* addsub. */
1566 COSTS_N_INSNS (2), /* fpconst. */
1567 COSTS_N_INSNS (2), /* neg. */
1568 COSTS_N_INSNS (2), /* compare. */
1569 COSTS_N_INSNS (4), /* widen. */
1570 COSTS_N_INSNS (4), /* narrow. */
1571 COSTS_N_INSNS (4), /* toint. */
1572 COSTS_N_INSNS (4), /* fromint. */
1573 COSTS_N_INSNS (4) /* roundint. */
1574 }
1575 },
1576 /* Vector */
1577 {
1578 COSTS_N_INSNS (1) /* alu. */
1579 }
1580 };
1581
1582 const struct cpu_cost_table v7m_extra_costs =
1583 {
1584 /* ALU */
1585 {
1586 0, /* arith. */
1587 0, /* logical. */
1588 0, /* shift. */
1589 0, /* shift_reg. */
1590 0, /* arith_shift. */
1591 COSTS_N_INSNS (1), /* arith_shift_reg. */
1592 0, /* log_shift. */
1593 COSTS_N_INSNS (1), /* log_shift_reg. */
1594 0, /* extend. */
1595 COSTS_N_INSNS (1), /* extend_arith. */
1596 0, /* bfi. */
1597 0, /* bfx. */
1598 0, /* clz. */
1599 0, /* rev. */
1600 COSTS_N_INSNS (1), /* non_exec. */
1601 false /* non_exec_costs_exec. */
1602 },
1603 {
1604 /* MULT SImode */
1605 {
1606 COSTS_N_INSNS (1), /* simple. */
1607 COSTS_N_INSNS (1), /* flag_setting. */
1608 COSTS_N_INSNS (2), /* extend. */
1609 COSTS_N_INSNS (1), /* add. */
1610 COSTS_N_INSNS (3), /* extend_add. */
1611 COSTS_N_INSNS (8) /* idiv. */
1612 },
1613 /* MULT DImode */
1614 {
1615 0, /* simple (N/A). */
1616 0, /* flag_setting (N/A). */
1617 COSTS_N_INSNS (2), /* extend. */
1618 0, /* add (N/A). */
1619 COSTS_N_INSNS (3), /* extend_add. */
1620 0 /* idiv (N/A). */
1621 }
1622 },
1623 /* LD/ST */
1624 {
1625 COSTS_N_INSNS (2), /* load. */
1626 0, /* load_sign_extend. */
1627 COSTS_N_INSNS (3), /* ldrd. */
1628 COSTS_N_INSNS (2), /* ldm_1st. */
1629 1, /* ldm_regs_per_insn_1st. */
1630 1, /* ldm_regs_per_insn_subsequent. */
1631 COSTS_N_INSNS (2), /* loadf. */
1632 COSTS_N_INSNS (3), /* loadd. */
1633 COSTS_N_INSNS (1), /* load_unaligned. */
1634 COSTS_N_INSNS (2), /* store. */
1635 COSTS_N_INSNS (3), /* strd. */
1636 COSTS_N_INSNS (2), /* stm_1st. */
1637 1, /* stm_regs_per_insn_1st. */
1638 1, /* stm_regs_per_insn_subsequent. */
1639 COSTS_N_INSNS (2), /* storef. */
1640 COSTS_N_INSNS (3), /* stored. */
1641 COSTS_N_INSNS (1), /* store_unaligned. */
1642 COSTS_N_INSNS (1), /* loadv. */
1643 COSTS_N_INSNS (1) /* storev. */
1644 },
1645 {
1646 /* FP SFmode */
1647 {
1648 COSTS_N_INSNS (7), /* div. */
1649 COSTS_N_INSNS (2), /* mult. */
1650 COSTS_N_INSNS (5), /* mult_addsub. */
1651 COSTS_N_INSNS (3), /* fma. */
1652 COSTS_N_INSNS (1), /* addsub. */
1653 0, /* fpconst. */
1654 0, /* neg. */
1655 0, /* compare. */
1656 0, /* widen. */
1657 0, /* narrow. */
1658 0, /* toint. */
1659 0, /* fromint. */
1660 0 /* roundint. */
1661 },
1662 /* FP DFmode */
1663 {
1664 COSTS_N_INSNS (15), /* div. */
1665 COSTS_N_INSNS (5), /* mult. */
1666 COSTS_N_INSNS (7), /* mult_addsub. */
1667 COSTS_N_INSNS (7), /* fma. */
1668 COSTS_N_INSNS (3), /* addsub. */
1669 0, /* fpconst. */
1670 0, /* neg. */
1671 0, /* compare. */
1672 0, /* widen. */
1673 0, /* narrow. */
1674 0, /* toint. */
1675 0, /* fromint. */
1676 0 /* roundint. */
1677 }
1678 },
1679 /* Vector */
1680 {
1681 COSTS_N_INSNS (1) /* alu. */
1682 }
1683 };
1684
1685 const struct tune_params arm_slowmul_tune =
1686 {
1687 arm_slowmul_rtx_costs,
1688 NULL, /* Insn extra costs. */
1689 NULL, /* Sched adj cost. */
1690 arm_default_branch_cost,
1691 &arm_default_vec_cost,
1692 3, /* Constant limit. */
1693 5, /* Max cond insns. */
1694 8, /* Memset max inline. */
1695 1, /* Issue rate. */
1696 ARM_PREFETCH_NOT_BENEFICIAL,
1697 tune_params::PREF_CONST_POOL_TRUE,
1698 tune_params::PREF_LDRD_FALSE,
1699 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1700 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1701 tune_params::DISPARAGE_FLAGS_NEITHER,
1702 tune_params::PREF_NEON_64_FALSE,
1703 tune_params::PREF_NEON_STRINGOPS_FALSE,
1704 tune_params::FUSE_NOTHING,
1705 tune_params::SCHED_AUTOPREF_OFF
1706 };
1707
1708 const struct tune_params arm_fastmul_tune =
1709 {
1710 arm_fastmul_rtx_costs,
1711 NULL, /* Insn extra costs. */
1712 NULL, /* Sched adj cost. */
1713 arm_default_branch_cost,
1714 &arm_default_vec_cost,
1715 1, /* Constant limit. */
1716 5, /* Max cond insns. */
1717 8, /* Memset max inline. */
1718 1, /* Issue rate. */
1719 ARM_PREFETCH_NOT_BENEFICIAL,
1720 tune_params::PREF_CONST_POOL_TRUE,
1721 tune_params::PREF_LDRD_FALSE,
1722 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1723 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1724 tune_params::DISPARAGE_FLAGS_NEITHER,
1725 tune_params::PREF_NEON_64_FALSE,
1726 tune_params::PREF_NEON_STRINGOPS_FALSE,
1727 tune_params::FUSE_NOTHING,
1728 tune_params::SCHED_AUTOPREF_OFF
1729 };
1730
1731 /* StrongARM has early execution of branches, so a sequence that is worth
1732 skipping is shorter. Set max_insns_skipped to a lower value. */
1733
1734 const struct tune_params arm_strongarm_tune =
1735 {
1736 arm_fastmul_rtx_costs,
1737 NULL, /* Insn extra costs. */
1738 NULL, /* Sched adj cost. */
1739 arm_default_branch_cost,
1740 &arm_default_vec_cost,
1741 1, /* Constant limit. */
1742 3, /* Max cond insns. */
1743 8, /* Memset max inline. */
1744 1, /* Issue rate. */
1745 ARM_PREFETCH_NOT_BENEFICIAL,
1746 tune_params::PREF_CONST_POOL_TRUE,
1747 tune_params::PREF_LDRD_FALSE,
1748 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1749 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1750 tune_params::DISPARAGE_FLAGS_NEITHER,
1751 tune_params::PREF_NEON_64_FALSE,
1752 tune_params::PREF_NEON_STRINGOPS_FALSE,
1753 tune_params::FUSE_NOTHING,
1754 tune_params::SCHED_AUTOPREF_OFF
1755 };
1756
1757 const struct tune_params arm_xscale_tune =
1758 {
1759 arm_xscale_rtx_costs,
1760 NULL, /* Insn extra costs. */
1761 xscale_sched_adjust_cost,
1762 arm_default_branch_cost,
1763 &arm_default_vec_cost,
1764 2, /* Constant limit. */
1765 3, /* Max cond insns. */
1766 8, /* Memset max inline. */
1767 1, /* Issue rate. */
1768 ARM_PREFETCH_NOT_BENEFICIAL,
1769 tune_params::PREF_CONST_POOL_TRUE,
1770 tune_params::PREF_LDRD_FALSE,
1771 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1772 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1773 tune_params::DISPARAGE_FLAGS_NEITHER,
1774 tune_params::PREF_NEON_64_FALSE,
1775 tune_params::PREF_NEON_STRINGOPS_FALSE,
1776 tune_params::FUSE_NOTHING,
1777 tune_params::SCHED_AUTOPREF_OFF
1778 };
1779
1780 const struct tune_params arm_9e_tune =
1781 {
1782 arm_9e_rtx_costs,
1783 NULL, /* Insn extra costs. */
1784 NULL, /* Sched adj cost. */
1785 arm_default_branch_cost,
1786 &arm_default_vec_cost,
1787 1, /* Constant limit. */
1788 5, /* Max cond insns. */
1789 8, /* Memset max inline. */
1790 1, /* Issue rate. */
1791 ARM_PREFETCH_NOT_BENEFICIAL,
1792 tune_params::PREF_CONST_POOL_TRUE,
1793 tune_params::PREF_LDRD_FALSE,
1794 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1795 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1796 tune_params::DISPARAGE_FLAGS_NEITHER,
1797 tune_params::PREF_NEON_64_FALSE,
1798 tune_params::PREF_NEON_STRINGOPS_FALSE,
1799 tune_params::FUSE_NOTHING,
1800 tune_params::SCHED_AUTOPREF_OFF
1801 };
1802
1803 const struct tune_params arm_marvell_pj4_tune =
1804 {
1805 arm_9e_rtx_costs,
1806 NULL, /* Insn extra costs. */
1807 NULL, /* Sched adj cost. */
1808 arm_default_branch_cost,
1809 &arm_default_vec_cost,
1810 1, /* Constant limit. */
1811 5, /* Max cond insns. */
1812 8, /* Memset max inline. */
1813 2, /* Issue rate. */
1814 ARM_PREFETCH_NOT_BENEFICIAL,
1815 tune_params::PREF_CONST_POOL_TRUE,
1816 tune_params::PREF_LDRD_FALSE,
1817 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1818 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1819 tune_params::DISPARAGE_FLAGS_NEITHER,
1820 tune_params::PREF_NEON_64_FALSE,
1821 tune_params::PREF_NEON_STRINGOPS_FALSE,
1822 tune_params::FUSE_NOTHING,
1823 tune_params::SCHED_AUTOPREF_OFF
1824 };
1825
1826 const struct tune_params arm_v6t2_tune =
1827 {
1828 arm_9e_rtx_costs,
1829 NULL, /* Insn extra costs. */
1830 NULL, /* Sched adj cost. */
1831 arm_default_branch_cost,
1832 &arm_default_vec_cost,
1833 1, /* Constant limit. */
1834 5, /* Max cond insns. */
1835 8, /* Memset max inline. */
1836 1, /* Issue rate. */
1837 ARM_PREFETCH_NOT_BENEFICIAL,
1838 tune_params::PREF_CONST_POOL_FALSE,
1839 tune_params::PREF_LDRD_FALSE,
1840 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1841 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1842 tune_params::DISPARAGE_FLAGS_NEITHER,
1843 tune_params::PREF_NEON_64_FALSE,
1844 tune_params::PREF_NEON_STRINGOPS_FALSE,
1845 tune_params::FUSE_NOTHING,
1846 tune_params::SCHED_AUTOPREF_OFF
1847 };
1848
1849
1850 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1851 const struct tune_params arm_cortex_tune =
1852 {
1853 arm_9e_rtx_costs,
1854 &generic_extra_costs,
1855 NULL, /* Sched adj cost. */
1856 arm_default_branch_cost,
1857 &arm_default_vec_cost,
1858 1, /* Constant limit. */
1859 5, /* Max cond insns. */
1860 8, /* Memset max inline. */
1861 2, /* Issue rate. */
1862 ARM_PREFETCH_NOT_BENEFICIAL,
1863 tune_params::PREF_CONST_POOL_FALSE,
1864 tune_params::PREF_LDRD_FALSE,
1865 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1866 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1867 tune_params::DISPARAGE_FLAGS_NEITHER,
1868 tune_params::PREF_NEON_64_FALSE,
1869 tune_params::PREF_NEON_STRINGOPS_FALSE,
1870 tune_params::FUSE_NOTHING,
1871 tune_params::SCHED_AUTOPREF_OFF
1872 };
1873
1874 const struct tune_params arm_cortex_a8_tune =
1875 {
1876 arm_9e_rtx_costs,
1877 &cortexa8_extra_costs,
1878 NULL, /* Sched adj cost. */
1879 arm_default_branch_cost,
1880 &arm_default_vec_cost,
1881 1, /* Constant limit. */
1882 5, /* Max cond insns. */
1883 8, /* Memset max inline. */
1884 2, /* Issue rate. */
1885 ARM_PREFETCH_NOT_BENEFICIAL,
1886 tune_params::PREF_CONST_POOL_FALSE,
1887 tune_params::PREF_LDRD_FALSE,
1888 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1889 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1890 tune_params::DISPARAGE_FLAGS_NEITHER,
1891 tune_params::PREF_NEON_64_FALSE,
1892 tune_params::PREF_NEON_STRINGOPS_TRUE,
1893 tune_params::FUSE_NOTHING,
1894 tune_params::SCHED_AUTOPREF_OFF
1895 };
1896
1897 const struct tune_params arm_cortex_a7_tune =
1898 {
1899 arm_9e_rtx_costs,
1900 &cortexa7_extra_costs,
1901 NULL, /* Sched adj cost. */
1902 arm_default_branch_cost,
1903 &arm_default_vec_cost,
1904 1, /* Constant limit. */
1905 5, /* Max cond insns. */
1906 8, /* Memset max inline. */
1907 2, /* Issue rate. */
1908 ARM_PREFETCH_NOT_BENEFICIAL,
1909 tune_params::PREF_CONST_POOL_FALSE,
1910 tune_params::PREF_LDRD_FALSE,
1911 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1912 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1913 tune_params::DISPARAGE_FLAGS_NEITHER,
1914 tune_params::PREF_NEON_64_FALSE,
1915 tune_params::PREF_NEON_STRINGOPS_TRUE,
1916 tune_params::FUSE_NOTHING,
1917 tune_params::SCHED_AUTOPREF_OFF
1918 };
1919
1920 const struct tune_params arm_cortex_a15_tune =
1921 {
1922 arm_9e_rtx_costs,
1923 &cortexa15_extra_costs,
1924 NULL, /* Sched adj cost. */
1925 arm_default_branch_cost,
1926 &arm_default_vec_cost,
1927 1, /* Constant limit. */
1928 2, /* Max cond insns. */
1929 8, /* Memset max inline. */
1930 3, /* Issue rate. */
1931 ARM_PREFETCH_NOT_BENEFICIAL,
1932 tune_params::PREF_CONST_POOL_FALSE,
1933 tune_params::PREF_LDRD_TRUE,
1934 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1935 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1936 tune_params::DISPARAGE_FLAGS_ALL,
1937 tune_params::PREF_NEON_64_FALSE,
1938 tune_params::PREF_NEON_STRINGOPS_TRUE,
1939 tune_params::FUSE_NOTHING,
1940 tune_params::SCHED_AUTOPREF_FULL
1941 };
1942
1943 const struct tune_params arm_cortex_a53_tune =
1944 {
1945 arm_9e_rtx_costs,
1946 &cortexa53_extra_costs,
1947 NULL, /* Sched adj cost. */
1948 arm_default_branch_cost,
1949 &arm_default_vec_cost,
1950 1, /* Constant limit. */
1951 5, /* Max cond insns. */
1952 8, /* Memset max inline. */
1953 2, /* Issue rate. */
1954 ARM_PREFETCH_NOT_BENEFICIAL,
1955 tune_params::PREF_CONST_POOL_FALSE,
1956 tune_params::PREF_LDRD_FALSE,
1957 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1958 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1959 tune_params::DISPARAGE_FLAGS_NEITHER,
1960 tune_params::PREF_NEON_64_FALSE,
1961 tune_params::PREF_NEON_STRINGOPS_TRUE,
1962 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
1963 tune_params::SCHED_AUTOPREF_OFF
1964 };
1965
1966 const struct tune_params arm_cortex_a57_tune =
1967 {
1968 arm_9e_rtx_costs,
1969 &cortexa57_extra_costs,
1970 NULL, /* Sched adj cost. */
1971 arm_default_branch_cost,
1972 &arm_default_vec_cost,
1973 1, /* Constant limit. */
1974 2, /* Max cond insns. */
1975 8, /* Memset max inline. */
1976 3, /* Issue rate. */
1977 ARM_PREFETCH_NOT_BENEFICIAL,
1978 tune_params::PREF_CONST_POOL_FALSE,
1979 tune_params::PREF_LDRD_TRUE,
1980 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1981 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1982 tune_params::DISPARAGE_FLAGS_ALL,
1983 tune_params::PREF_NEON_64_FALSE,
1984 tune_params::PREF_NEON_STRINGOPS_TRUE,
1985 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
1986 tune_params::SCHED_AUTOPREF_FULL
1987 };
1988
1989 const struct tune_params arm_xgene1_tune =
1990 {
1991 arm_9e_rtx_costs,
1992 &xgene1_extra_costs,
1993 NULL, /* Sched adj cost. */
1994 arm_default_branch_cost,
1995 &arm_default_vec_cost,
1996 1, /* Constant limit. */
1997 2, /* Max cond insns. */
1998 32, /* Memset max inline. */
1999 4, /* Issue rate. */
2000 ARM_PREFETCH_NOT_BENEFICIAL,
2001 tune_params::PREF_CONST_POOL_FALSE,
2002 tune_params::PREF_LDRD_TRUE,
2003 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2004 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2005 tune_params::DISPARAGE_FLAGS_ALL,
2006 tune_params::PREF_NEON_64_FALSE,
2007 tune_params::PREF_NEON_STRINGOPS_FALSE,
2008 tune_params::FUSE_NOTHING,
2009 tune_params::SCHED_AUTOPREF_OFF
2010 };
2011
2012 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2013 less appealing. Set max_insns_skipped to a low value. */
2014
2015 const struct tune_params arm_cortex_a5_tune =
2016 {
2017 arm_9e_rtx_costs,
2018 &cortexa5_extra_costs,
2019 NULL, /* Sched adj cost. */
2020 arm_cortex_a5_branch_cost,
2021 &arm_default_vec_cost,
2022 1, /* Constant limit. */
2023 1, /* Max cond insns. */
2024 8, /* Memset max inline. */
2025 2, /* Issue rate. */
2026 ARM_PREFETCH_NOT_BENEFICIAL,
2027 tune_params::PREF_CONST_POOL_FALSE,
2028 tune_params::PREF_LDRD_FALSE,
2029 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2030 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2031 tune_params::DISPARAGE_FLAGS_NEITHER,
2032 tune_params::PREF_NEON_64_FALSE,
2033 tune_params::PREF_NEON_STRINGOPS_TRUE,
2034 tune_params::FUSE_NOTHING,
2035 tune_params::SCHED_AUTOPREF_OFF
2036 };
2037
2038 const struct tune_params arm_cortex_a9_tune =
2039 {
2040 arm_9e_rtx_costs,
2041 &cortexa9_extra_costs,
2042 cortex_a9_sched_adjust_cost,
2043 arm_default_branch_cost,
2044 &arm_default_vec_cost,
2045 1, /* Constant limit. */
2046 5, /* Max cond insns. */
2047 8, /* Memset max inline. */
2048 2, /* Issue rate. */
2049 ARM_PREFETCH_BENEFICIAL(4,32,32),
2050 tune_params::PREF_CONST_POOL_FALSE,
2051 tune_params::PREF_LDRD_FALSE,
2052 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2053 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2054 tune_params::DISPARAGE_FLAGS_NEITHER,
2055 tune_params::PREF_NEON_64_FALSE,
2056 tune_params::PREF_NEON_STRINGOPS_FALSE,
2057 tune_params::FUSE_NOTHING,
2058 tune_params::SCHED_AUTOPREF_OFF
2059 };
2060
2061 const struct tune_params arm_cortex_a12_tune =
2062 {
2063 arm_9e_rtx_costs,
2064 &cortexa12_extra_costs,
2065 NULL, /* Sched adj cost. */
2066 arm_default_branch_cost,
2067 &arm_default_vec_cost, /* Vectorizer costs. */
2068 1, /* Constant limit. */
2069 2, /* Max cond insns. */
2070 8, /* Memset max inline. */
2071 2, /* Issue rate. */
2072 ARM_PREFETCH_NOT_BENEFICIAL,
2073 tune_params::PREF_CONST_POOL_FALSE,
2074 tune_params::PREF_LDRD_TRUE,
2075 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2076 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2077 tune_params::DISPARAGE_FLAGS_ALL,
2078 tune_params::PREF_NEON_64_FALSE,
2079 tune_params::PREF_NEON_STRINGOPS_TRUE,
2080 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2081 tune_params::SCHED_AUTOPREF_OFF
2082 };
2083
2084 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2085 cycle to execute each. An LDR from the constant pool also takes two cycles
2086 to execute, but mildly increases pipelining opportunity (consecutive
2087 loads/stores can be pipelined together, saving one cycle), and may also
2088 improve icache utilisation. Hence we prefer the constant pool for such
2089 processors. */
2090
2091 const struct tune_params arm_v7m_tune =
2092 {
2093 arm_9e_rtx_costs,
2094 &v7m_extra_costs,
2095 NULL, /* Sched adj cost. */
2096 arm_cortex_m_branch_cost,
2097 &arm_default_vec_cost,
2098 1, /* Constant limit. */
2099 2, /* Max cond insns. */
2100 8, /* Memset max inline. */
2101 1, /* Issue rate. */
2102 ARM_PREFETCH_NOT_BENEFICIAL,
2103 tune_params::PREF_CONST_POOL_TRUE,
2104 tune_params::PREF_LDRD_FALSE,
2105 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2106 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2107 tune_params::DISPARAGE_FLAGS_NEITHER,
2108 tune_params::PREF_NEON_64_FALSE,
2109 tune_params::PREF_NEON_STRINGOPS_FALSE,
2110 tune_params::FUSE_NOTHING,
2111 tune_params::SCHED_AUTOPREF_OFF
2112 };
2113
2114 /* Cortex-M7 tuning. */
2115
2116 const struct tune_params arm_cortex_m7_tune =
2117 {
2118 arm_9e_rtx_costs,
2119 &v7m_extra_costs,
2120 NULL, /* Sched adj cost. */
2121 arm_cortex_m7_branch_cost,
2122 &arm_default_vec_cost,
2123 0, /* Constant limit. */
2124 1, /* Max cond insns. */
2125 8, /* Memset max inline. */
2126 2, /* Issue rate. */
2127 ARM_PREFETCH_NOT_BENEFICIAL,
2128 tune_params::PREF_CONST_POOL_TRUE,
2129 tune_params::PREF_LDRD_FALSE,
2130 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2131 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2132 tune_params::DISPARAGE_FLAGS_NEITHER,
2133 tune_params::PREF_NEON_64_FALSE,
2134 tune_params::PREF_NEON_STRINGOPS_FALSE,
2135 tune_params::FUSE_NOTHING,
2136 tune_params::SCHED_AUTOPREF_OFF
2137 };
2138
2139 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2140 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2141 const struct tune_params arm_v6m_tune =
2142 {
2143 arm_9e_rtx_costs,
2144 NULL, /* Insn extra costs. */
2145 NULL, /* Sched adj cost. */
2146 arm_default_branch_cost,
2147 &arm_default_vec_cost, /* Vectorizer costs. */
2148 1, /* Constant limit. */
2149 5, /* Max cond insns. */
2150 8, /* Memset max inline. */
2151 1, /* Issue rate. */
2152 ARM_PREFETCH_NOT_BENEFICIAL,
2153 tune_params::PREF_CONST_POOL_FALSE,
2154 tune_params::PREF_LDRD_FALSE,
2155 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2156 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2157 tune_params::DISPARAGE_FLAGS_NEITHER,
2158 tune_params::PREF_NEON_64_FALSE,
2159 tune_params::PREF_NEON_STRINGOPS_FALSE,
2160 tune_params::FUSE_NOTHING,
2161 tune_params::SCHED_AUTOPREF_OFF
2162 };
2163
2164 const struct tune_params arm_fa726te_tune =
2165 {
2166 arm_9e_rtx_costs,
2167 NULL, /* Insn extra costs. */
2168 fa726te_sched_adjust_cost,
2169 arm_default_branch_cost,
2170 &arm_default_vec_cost,
2171 1, /* Constant limit. */
2172 5, /* Max cond insns. */
2173 8, /* Memset max inline. */
2174 2, /* Issue rate. */
2175 ARM_PREFETCH_NOT_BENEFICIAL,
2176 tune_params::PREF_CONST_POOL_TRUE,
2177 tune_params::PREF_LDRD_FALSE,
2178 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2179 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2180 tune_params::DISPARAGE_FLAGS_NEITHER,
2181 tune_params::PREF_NEON_64_FALSE,
2182 tune_params::PREF_NEON_STRINGOPS_FALSE,
2183 tune_params::FUSE_NOTHING,
2184 tune_params::SCHED_AUTOPREF_OFF
2185 };
2186
2187
2188 /* Not all of these give usefully different compilation alternatives,
2189 but there is no simple way of generalizing them. */
2190 static const struct processors all_cores[] =
2191 {
2192 /* ARM Cores */
2193 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2194 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2195 FLAGS, &arm_##COSTS##_tune},
2196 #include "arm-cores.def"
2197 #undef ARM_CORE
2198 {NULL, arm_none, NULL, BASE_ARCH_0, ARM_FSET_EMPTY, NULL}
2199 };
2200
2201 static const struct processors all_architectures[] =
2202 {
2203 /* ARM Architectures */
2204 /* We don't specify tuning costs here as it will be figured out
2205 from the core. */
2206
2207 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2208 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2209 #include "arm-arches.def"
2210 #undef ARM_ARCH
2211 {NULL, arm_none, NULL, BASE_ARCH_0, ARM_FSET_EMPTY, NULL}
2212 };
2213
2214
2215 /* These are populated as commandline arguments are processed, or NULL
2216 if not specified. */
2217 static const struct processors *arm_selected_arch;
2218 static const struct processors *arm_selected_cpu;
2219 static const struct processors *arm_selected_tune;
2220
2221 /* The name of the preprocessor macro to define for this architecture. */
2222
2223 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2224
2225 /* Available values for -mfpu=. */
2226
2227 static const struct arm_fpu_desc all_fpus[] =
2228 {
2229 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, FEATURES) \
2230 { NAME, MODEL, REV, VFP_REGS, FEATURES },
2231 #include "arm-fpus.def"
2232 #undef ARM_FPU
2233 };
2234
2235
2236 /* Supported TLS relocations. */
2237
2238 enum tls_reloc {
2239 TLS_GD32,
2240 TLS_LDM32,
2241 TLS_LDO32,
2242 TLS_IE32,
2243 TLS_LE32,
2244 TLS_DESCSEQ /* GNU scheme */
2245 };
2246
2247 /* The maximum number of insns to be used when loading a constant. */
2248 inline static int
2249 arm_constant_limit (bool size_p)
2250 {
2251 return size_p ? 1 : current_tune->constant_limit;
2252 }
2253
2254 /* Emit an insn that's a simple single-set. Both the operands must be known
2255 to be valid. */
2256 inline static rtx_insn *
2257 emit_set_insn (rtx x, rtx y)
2258 {
2259 return emit_insn (gen_rtx_SET (x, y));
2260 }
2261
2262 /* Return the number of bits set in VALUE. */
2263 static unsigned
2264 bit_count (unsigned long value)
2265 {
2266 unsigned long count = 0;
2267
2268 while (value)
2269 {
2270 count++;
2271 value &= value - 1; /* Clear the least-significant set bit. */
2272 }
2273
2274 return count;
2275 }
2276
2277 /* Return the number of features in feature-set SET. */
2278 static unsigned
2279 feature_count (const arm_feature_set * set)
2280 {
2281 return (bit_count (ARM_FSET_CPU1 (*set))
2282 + bit_count (ARM_FSET_CPU2 (*set)));
2283 }
2284
2285 typedef struct
2286 {
2287 machine_mode mode;
2288 const char *name;
2289 } arm_fixed_mode_set;
2290
2291 /* A small helper for setting fixed-point library libfuncs. */
2292
2293 static void
2294 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2295 const char *funcname, const char *modename,
2296 int num_suffix)
2297 {
2298 char buffer[50];
2299
2300 if (num_suffix == 0)
2301 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2302 else
2303 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2304
2305 set_optab_libfunc (optable, mode, buffer);
2306 }
2307
2308 static void
2309 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2310 machine_mode from, const char *funcname,
2311 const char *toname, const char *fromname)
2312 {
2313 char buffer[50];
2314 const char *maybe_suffix_2 = "";
2315
2316 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2317 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2318 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2319 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2320 maybe_suffix_2 = "2";
2321
2322 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2323 maybe_suffix_2);
2324
2325 set_conv_libfunc (optable, to, from, buffer);
2326 }
2327
2328 /* Set up library functions unique to ARM. */
2329
2330 static void
2331 arm_init_libfuncs (void)
2332 {
2333 /* For Linux, we have access to kernel support for atomic operations. */
2334 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2335 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2336
2337 /* There are no special library functions unless we are using the
2338 ARM BPABI. */
2339 if (!TARGET_BPABI)
2340 return;
2341
2342 /* The functions below are described in Section 4 of the "Run-Time
2343 ABI for the ARM architecture", Version 1.0. */
2344
2345 /* Double-precision floating-point arithmetic. Table 2. */
2346 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2347 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2348 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2349 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2350 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2351
2352 /* Double-precision comparisons. Table 3. */
2353 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2354 set_optab_libfunc (ne_optab, DFmode, NULL);
2355 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2356 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2357 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2358 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2359 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2360
2361 /* Single-precision floating-point arithmetic. Table 4. */
2362 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2363 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2364 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2365 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2366 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2367
2368 /* Single-precision comparisons. Table 5. */
2369 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2370 set_optab_libfunc (ne_optab, SFmode, NULL);
2371 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2372 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2373 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2374 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2375 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2376
2377 /* Floating-point to integer conversions. Table 6. */
2378 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2379 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2380 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2381 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2382 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2383 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2384 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2385 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2386
2387 /* Conversions between floating types. Table 7. */
2388 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2389 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2390
2391 /* Integer to floating-point conversions. Table 8. */
2392 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2393 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2394 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2395 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2396 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2397 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2398 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2399 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2400
2401 /* Long long. Table 9. */
2402 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2403 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2404 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2405 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2406 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2407 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2408 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2409 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2410
2411 /* Integer (32/32->32) division. \S 4.3.1. */
2412 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2413 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2414
2415 /* The divmod functions are designed so that they can be used for
2416 plain division, even though they return both the quotient and the
2417 remainder. The quotient is returned in the usual location (i.e.,
2418 r0 for SImode, {r0, r1} for DImode), just as would be expected
2419 for an ordinary division routine. Because the AAPCS calling
2420 conventions specify that all of { r0, r1, r2, r3 } are
2421 callee-saved registers, there is no need to tell the compiler
2422 explicitly that those registers are clobbered by these
2423 routines. */
2424 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2425 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2426
2427 /* For SImode division the ABI provides div-without-mod routines,
2428 which are faster. */
2429 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2430 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2431
2432 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2433 divmod libcalls instead. */
2434 set_optab_libfunc (smod_optab, DImode, NULL);
2435 set_optab_libfunc (umod_optab, DImode, NULL);
2436 set_optab_libfunc (smod_optab, SImode, NULL);
2437 set_optab_libfunc (umod_optab, SImode, NULL);
2438
2439 /* Half-precision float operations. The compiler handles all operations
2440 with NULL libfuncs by converting the SFmode. */
2441 switch (arm_fp16_format)
2442 {
2443 case ARM_FP16_FORMAT_IEEE:
2444 case ARM_FP16_FORMAT_ALTERNATIVE:
2445
2446 /* Conversions. */
2447 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2448 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2449 ? "__gnu_f2h_ieee"
2450 : "__gnu_f2h_alternative"));
2451 set_conv_libfunc (sext_optab, SFmode, HFmode,
2452 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2453 ? "__gnu_h2f_ieee"
2454 : "__gnu_h2f_alternative"));
2455
2456 /* Arithmetic. */
2457 set_optab_libfunc (add_optab, HFmode, NULL);
2458 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2459 set_optab_libfunc (smul_optab, HFmode, NULL);
2460 set_optab_libfunc (neg_optab, HFmode, NULL);
2461 set_optab_libfunc (sub_optab, HFmode, NULL);
2462
2463 /* Comparisons. */
2464 set_optab_libfunc (eq_optab, HFmode, NULL);
2465 set_optab_libfunc (ne_optab, HFmode, NULL);
2466 set_optab_libfunc (lt_optab, HFmode, NULL);
2467 set_optab_libfunc (le_optab, HFmode, NULL);
2468 set_optab_libfunc (ge_optab, HFmode, NULL);
2469 set_optab_libfunc (gt_optab, HFmode, NULL);
2470 set_optab_libfunc (unord_optab, HFmode, NULL);
2471 break;
2472
2473 default:
2474 break;
2475 }
2476
2477 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2478 {
2479 const arm_fixed_mode_set fixed_arith_modes[] =
2480 {
2481 { QQmode, "qq" },
2482 { UQQmode, "uqq" },
2483 { HQmode, "hq" },
2484 { UHQmode, "uhq" },
2485 { SQmode, "sq" },
2486 { USQmode, "usq" },
2487 { DQmode, "dq" },
2488 { UDQmode, "udq" },
2489 { TQmode, "tq" },
2490 { UTQmode, "utq" },
2491 { HAmode, "ha" },
2492 { UHAmode, "uha" },
2493 { SAmode, "sa" },
2494 { USAmode, "usa" },
2495 { DAmode, "da" },
2496 { UDAmode, "uda" },
2497 { TAmode, "ta" },
2498 { UTAmode, "uta" }
2499 };
2500 const arm_fixed_mode_set fixed_conv_modes[] =
2501 {
2502 { QQmode, "qq" },
2503 { UQQmode, "uqq" },
2504 { HQmode, "hq" },
2505 { UHQmode, "uhq" },
2506 { SQmode, "sq" },
2507 { USQmode, "usq" },
2508 { DQmode, "dq" },
2509 { UDQmode, "udq" },
2510 { TQmode, "tq" },
2511 { UTQmode, "utq" },
2512 { HAmode, "ha" },
2513 { UHAmode, "uha" },
2514 { SAmode, "sa" },
2515 { USAmode, "usa" },
2516 { DAmode, "da" },
2517 { UDAmode, "uda" },
2518 { TAmode, "ta" },
2519 { UTAmode, "uta" },
2520 { QImode, "qi" },
2521 { HImode, "hi" },
2522 { SImode, "si" },
2523 { DImode, "di" },
2524 { TImode, "ti" },
2525 { SFmode, "sf" },
2526 { DFmode, "df" }
2527 };
2528 unsigned int i, j;
2529
2530 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2531 {
2532 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2533 "add", fixed_arith_modes[i].name, 3);
2534 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2535 "ssadd", fixed_arith_modes[i].name, 3);
2536 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2537 "usadd", fixed_arith_modes[i].name, 3);
2538 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2539 "sub", fixed_arith_modes[i].name, 3);
2540 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2541 "sssub", fixed_arith_modes[i].name, 3);
2542 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2543 "ussub", fixed_arith_modes[i].name, 3);
2544 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2545 "mul", fixed_arith_modes[i].name, 3);
2546 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2547 "ssmul", fixed_arith_modes[i].name, 3);
2548 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2549 "usmul", fixed_arith_modes[i].name, 3);
2550 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2551 "div", fixed_arith_modes[i].name, 3);
2552 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2553 "udiv", fixed_arith_modes[i].name, 3);
2554 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2555 "ssdiv", fixed_arith_modes[i].name, 3);
2556 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2557 "usdiv", fixed_arith_modes[i].name, 3);
2558 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2559 "neg", fixed_arith_modes[i].name, 2);
2560 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2561 "ssneg", fixed_arith_modes[i].name, 2);
2562 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2563 "usneg", fixed_arith_modes[i].name, 2);
2564 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2565 "ashl", fixed_arith_modes[i].name, 3);
2566 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2567 "ashr", fixed_arith_modes[i].name, 3);
2568 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2569 "lshr", fixed_arith_modes[i].name, 3);
2570 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2571 "ssashl", fixed_arith_modes[i].name, 3);
2572 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2573 "usashl", fixed_arith_modes[i].name, 3);
2574 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2575 "cmp", fixed_arith_modes[i].name, 2);
2576 }
2577
2578 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2579 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2580 {
2581 if (i == j
2582 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2583 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2584 continue;
2585
2586 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2587 fixed_conv_modes[j].mode, "fract",
2588 fixed_conv_modes[i].name,
2589 fixed_conv_modes[j].name);
2590 arm_set_fixed_conv_libfunc (satfract_optab,
2591 fixed_conv_modes[i].mode,
2592 fixed_conv_modes[j].mode, "satfract",
2593 fixed_conv_modes[i].name,
2594 fixed_conv_modes[j].name);
2595 arm_set_fixed_conv_libfunc (fractuns_optab,
2596 fixed_conv_modes[i].mode,
2597 fixed_conv_modes[j].mode, "fractuns",
2598 fixed_conv_modes[i].name,
2599 fixed_conv_modes[j].name);
2600 arm_set_fixed_conv_libfunc (satfractuns_optab,
2601 fixed_conv_modes[i].mode,
2602 fixed_conv_modes[j].mode, "satfractuns",
2603 fixed_conv_modes[i].name,
2604 fixed_conv_modes[j].name);
2605 }
2606 }
2607
2608 if (TARGET_AAPCS_BASED)
2609 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2610 }
2611
2612 /* On AAPCS systems, this is the "struct __va_list". */
2613 static GTY(()) tree va_list_type;
2614
2615 /* Return the type to use as __builtin_va_list. */
2616 static tree
2617 arm_build_builtin_va_list (void)
2618 {
2619 tree va_list_name;
2620 tree ap_field;
2621
2622 if (!TARGET_AAPCS_BASED)
2623 return std_build_builtin_va_list ();
2624
2625 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2626 defined as:
2627
2628 struct __va_list
2629 {
2630 void *__ap;
2631 };
2632
2633 The C Library ABI further reinforces this definition in \S
2634 4.1.
2635
2636 We must follow this definition exactly. The structure tag
2637 name is visible in C++ mangled names, and thus forms a part
2638 of the ABI. The field name may be used by people who
2639 #include <stdarg.h>. */
2640 /* Create the type. */
2641 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2642 /* Give it the required name. */
2643 va_list_name = build_decl (BUILTINS_LOCATION,
2644 TYPE_DECL,
2645 get_identifier ("__va_list"),
2646 va_list_type);
2647 DECL_ARTIFICIAL (va_list_name) = 1;
2648 TYPE_NAME (va_list_type) = va_list_name;
2649 TYPE_STUB_DECL (va_list_type) = va_list_name;
2650 /* Create the __ap field. */
2651 ap_field = build_decl (BUILTINS_LOCATION,
2652 FIELD_DECL,
2653 get_identifier ("__ap"),
2654 ptr_type_node);
2655 DECL_ARTIFICIAL (ap_field) = 1;
2656 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2657 TYPE_FIELDS (va_list_type) = ap_field;
2658 /* Compute its layout. */
2659 layout_type (va_list_type);
2660
2661 return va_list_type;
2662 }
2663
2664 /* Return an expression of type "void *" pointing to the next
2665 available argument in a variable-argument list. VALIST is the
2666 user-level va_list object, of type __builtin_va_list. */
2667 static tree
2668 arm_extract_valist_ptr (tree valist)
2669 {
2670 if (TREE_TYPE (valist) == error_mark_node)
2671 return error_mark_node;
2672
2673 /* On an AAPCS target, the pointer is stored within "struct
2674 va_list". */
2675 if (TARGET_AAPCS_BASED)
2676 {
2677 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2678 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2679 valist, ap_field, NULL_TREE);
2680 }
2681
2682 return valist;
2683 }
2684
2685 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2686 static void
2687 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2688 {
2689 valist = arm_extract_valist_ptr (valist);
2690 std_expand_builtin_va_start (valist, nextarg);
2691 }
2692
2693 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2694 static tree
2695 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2696 gimple_seq *post_p)
2697 {
2698 valist = arm_extract_valist_ptr (valist);
2699 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2700 }
2701
2702 /* Check any incompatible options that the user has specified. */
2703 static void
2704 arm_option_check_internal (struct gcc_options *opts)
2705 {
2706 int flags = opts->x_target_flags;
2707
2708 /* Make sure that the processor choice does not conflict with any of the
2709 other command line choices. */
2710 if (TARGET_ARM_P (flags) && !ARM_FSET_HAS_CPU1 (insn_flags, FL_NOTM))
2711 error ("target CPU does not support ARM mode");
2712
2713 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2714 from here where no function is being compiled currently. */
2715 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2716 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2717
2718 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2719 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2720
2721 /* If this target is normally configured to use APCS frames, warn if they
2722 are turned off and debugging is turned on. */
2723 if (TARGET_ARM_P (flags)
2724 && write_symbols != NO_DEBUG
2725 && !TARGET_APCS_FRAME
2726 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2727 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2728
2729 /* iWMMXt unsupported under Thumb mode. */
2730 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2731 error ("iWMMXt unsupported under Thumb mode");
2732
2733 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2734 error ("can not use -mtp=cp15 with 16-bit Thumb");
2735
2736 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2737 {
2738 error ("RTP PIC is incompatible with Thumb");
2739 flag_pic = 0;
2740 }
2741
2742 /* We only support -mslow-flash-data on armv7-m targets. */
2743 if (target_slow_flash_data
2744 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2745 || (TARGET_THUMB1_P (flags) || flag_pic || TARGET_NEON)))
2746 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2747 }
2748
2749 /* Recompute the global settings depending on target attribute options. */
2750
2751 static void
2752 arm_option_params_internal (void)
2753 {
2754 /* If we are not using the default (ARM mode) section anchor offset
2755 ranges, then set the correct ranges now. */
2756 if (TARGET_THUMB1)
2757 {
2758 /* Thumb-1 LDR instructions cannot have negative offsets.
2759 Permissible positive offset ranges are 5-bit (for byte loads),
2760 6-bit (for halfword loads), or 7-bit (for word loads).
2761 Empirical results suggest a 7-bit anchor range gives the best
2762 overall code size. */
2763 targetm.min_anchor_offset = 0;
2764 targetm.max_anchor_offset = 127;
2765 }
2766 else if (TARGET_THUMB2)
2767 {
2768 /* The minimum is set such that the total size of the block
2769 for a particular anchor is 248 + 1 + 4095 bytes, which is
2770 divisible by eight, ensuring natural spacing of anchors. */
2771 targetm.min_anchor_offset = -248;
2772 targetm.max_anchor_offset = 4095;
2773 }
2774 else
2775 {
2776 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2777 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2778 }
2779
2780 if (optimize_size)
2781 {
2782 /* If optimizing for size, bump the number of instructions that we
2783 are prepared to conditionally execute (even on a StrongARM). */
2784 max_insns_skipped = 6;
2785
2786 /* For THUMB2, we limit the conditional sequence to one IT block. */
2787 if (TARGET_THUMB2)
2788 max_insns_skipped = arm_restrict_it ? 1 : 4;
2789 }
2790 else
2791 /* When -mrestrict-it is in use tone down the if-conversion. */
2792 max_insns_skipped = (TARGET_THUMB2 && arm_restrict_it)
2793 ? 1 : current_tune->max_insns_skipped;
2794 }
2795
2796 /* True if -mflip-thumb should next add an attribute for the default
2797 mode, false if it should next add an attribute for the opposite mode. */
2798 static GTY(()) bool thumb_flipper;
2799
2800 /* Options after initial target override. */
2801 static GTY(()) tree init_optimize;
2802
2803 static void
2804 arm_override_options_after_change_1 (struct gcc_options *opts)
2805 {
2806 if (opts->x_align_functions <= 0)
2807 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2808 && opts->x_optimize_size ? 2 : 4;
2809 }
2810
2811 /* Implement targetm.override_options_after_change. */
2812
2813 static void
2814 arm_override_options_after_change (void)
2815 {
2816 arm_override_options_after_change_1 (&global_options);
2817 }
2818
2819 /* Reset options between modes that the user has specified. */
2820 static void
2821 arm_option_override_internal (struct gcc_options *opts,
2822 struct gcc_options *opts_set)
2823 {
2824 arm_override_options_after_change_1 (opts);
2825
2826 if (TARGET_THUMB_P (opts->x_target_flags)
2827 && !(ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB)))
2828 {
2829 warning (0, "target CPU does not support THUMB instructions");
2830 opts->x_target_flags &= ~MASK_THUMB;
2831 }
2832
2833 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2834 {
2835 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2836 opts->x_target_flags &= ~MASK_APCS_FRAME;
2837 }
2838
2839 /* Callee super interworking implies thumb interworking. Adding
2840 this to the flags here simplifies the logic elsewhere. */
2841 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2842 opts->x_target_flags |= MASK_INTERWORK;
2843
2844 /* need to remember initial values so combinaisons of options like
2845 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2846 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2847
2848 if (! opts_set->x_arm_restrict_it)
2849 opts->x_arm_restrict_it = arm_arch8;
2850
2851 if (!TARGET_THUMB2_P (opts->x_target_flags))
2852 opts->x_arm_restrict_it = 0;
2853
2854 /* Don't warn since it's on by default in -O2. */
2855 if (TARGET_THUMB1_P (opts->x_target_flags))
2856 opts->x_flag_schedule_insns = 0;
2857 else
2858 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
2859
2860 /* Disable shrink-wrap when optimizing function for size, since it tends to
2861 generate additional returns. */
2862 if (optimize_function_for_size_p (cfun)
2863 && TARGET_THUMB2_P (opts->x_target_flags))
2864 opts->x_flag_shrink_wrap = false;
2865 else
2866 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
2867
2868 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
2869 - epilogue_insns - does not accurately model the corresponding insns
2870 emitted in the asm file. In particular, see the comment in thumb_exit
2871 'Find out how many of the (return) argument registers we can corrupt'.
2872 As a consequence, the epilogue may clobber registers without fipa-ra
2873 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
2874 TODO: Accurately model clobbers for epilogue_insns and reenable
2875 fipa-ra. */
2876 if (TARGET_THUMB1_P (opts->x_target_flags))
2877 opts->x_flag_ipa_ra = 0;
2878 else
2879 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
2880
2881 /* Thumb2 inline assembly code should always use unified syntax.
2882 This will apply to ARM and Thumb1 eventually. */
2883 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
2884 }
2885
2886 /* Fix up any incompatible options that the user has specified. */
2887 static void
2888 arm_option_override (void)
2889 {
2890 arm_selected_arch = NULL;
2891 arm_selected_cpu = NULL;
2892 arm_selected_tune = NULL;
2893
2894 if (global_options_set.x_arm_arch_option)
2895 arm_selected_arch = &all_architectures[arm_arch_option];
2896
2897 if (global_options_set.x_arm_cpu_option)
2898 {
2899 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2900 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2901 }
2902
2903 if (global_options_set.x_arm_tune_option)
2904 arm_selected_tune = &all_cores[(int) arm_tune_option];
2905
2906 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2907 SUBTARGET_OVERRIDE_OPTIONS;
2908 #endif
2909
2910 if (arm_selected_arch)
2911 {
2912 if (arm_selected_cpu)
2913 {
2914 const arm_feature_set tuning_flags = ARM_FSET_MAKE_CPU1 (FL_TUNE);
2915 arm_feature_set selected_flags;
2916 ARM_FSET_XOR (selected_flags, arm_selected_cpu->flags,
2917 arm_selected_arch->flags);
2918 ARM_FSET_EXCLUDE (selected_flags, selected_flags, tuning_flags);
2919 /* Check for conflict between mcpu and march. */
2920 if (!ARM_FSET_IS_EMPTY (selected_flags))
2921 {
2922 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2923 arm_selected_cpu->name, arm_selected_arch->name);
2924 /* -march wins for code generation.
2925 -mcpu wins for default tuning. */
2926 if (!arm_selected_tune)
2927 arm_selected_tune = arm_selected_cpu;
2928
2929 arm_selected_cpu = arm_selected_arch;
2930 }
2931 else
2932 /* -mcpu wins. */
2933 arm_selected_arch = NULL;
2934 }
2935 else
2936 /* Pick a CPU based on the architecture. */
2937 arm_selected_cpu = arm_selected_arch;
2938 }
2939
2940 /* If the user did not specify a processor, choose one for them. */
2941 if (!arm_selected_cpu)
2942 {
2943 const struct processors * sel;
2944 arm_feature_set sought = ARM_FSET_EMPTY;;
2945
2946 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2947 if (!arm_selected_cpu->name)
2948 {
2949 #ifdef SUBTARGET_CPU_DEFAULT
2950 /* Use the subtarget default CPU if none was specified by
2951 configure. */
2952 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2953 #endif
2954 /* Default to ARM6. */
2955 if (!arm_selected_cpu->name)
2956 arm_selected_cpu = &all_cores[arm6];
2957 }
2958
2959 sel = arm_selected_cpu;
2960 insn_flags = sel->flags;
2961
2962 /* Now check to see if the user has specified some command line
2963 switch that require certain abilities from the cpu. */
2964
2965 if (TARGET_INTERWORK || TARGET_THUMB)
2966 {
2967 ARM_FSET_ADD_CPU1 (sought, FL_THUMB);
2968 ARM_FSET_ADD_CPU1 (sought, FL_MODE32);
2969
2970 /* There are no ARM processors that support both APCS-26 and
2971 interworking. Therefore we force FL_MODE26 to be removed
2972 from insn_flags here (if it was set), so that the search
2973 below will always be able to find a compatible processor. */
2974 ARM_FSET_DEL_CPU1 (insn_flags, FL_MODE26);
2975 }
2976
2977 if (!ARM_FSET_IS_EMPTY (sought)
2978 && !(ARM_FSET_CPU_SUBSET (sought, insn_flags)))
2979 {
2980 /* Try to locate a CPU type that supports all of the abilities
2981 of the default CPU, plus the extra abilities requested by
2982 the user. */
2983 for (sel = all_cores; sel->name != NULL; sel++)
2984 if (ARM_FSET_CPU_SUBSET (sought, sel->flags))
2985 break;
2986
2987 if (sel->name == NULL)
2988 {
2989 unsigned current_bit_count = 0;
2990 const struct processors * best_fit = NULL;
2991
2992 /* Ideally we would like to issue an error message here
2993 saying that it was not possible to find a CPU compatible
2994 with the default CPU, but which also supports the command
2995 line options specified by the programmer, and so they
2996 ought to use the -mcpu=<name> command line option to
2997 override the default CPU type.
2998
2999 If we cannot find a cpu that has both the
3000 characteristics of the default cpu and the given
3001 command line options we scan the array again looking
3002 for a best match. */
3003 for (sel = all_cores; sel->name != NULL; sel++)
3004 {
3005 arm_feature_set required = ARM_FSET_EMPTY;
3006 ARM_FSET_UNION (required, sought, insn_flags);
3007 if (ARM_FSET_CPU_SUBSET (required, sel->flags))
3008 {
3009 unsigned count;
3010 arm_feature_set flags;
3011 ARM_FSET_INTER (flags, sel->flags, insn_flags);
3012 count = feature_count (&flags);
3013
3014 if (count >= current_bit_count)
3015 {
3016 best_fit = sel;
3017 current_bit_count = count;
3018 }
3019 }
3020 }
3021 gcc_assert (best_fit);
3022 sel = best_fit;
3023 }
3024
3025 arm_selected_cpu = sel;
3026 }
3027 }
3028
3029 gcc_assert (arm_selected_cpu);
3030 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3031 if (!arm_selected_tune)
3032 arm_selected_tune = &all_cores[arm_selected_cpu->core];
3033
3034 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
3035 insn_flags = arm_selected_cpu->flags;
3036 arm_base_arch = arm_selected_cpu->base_arch;
3037
3038 arm_tune = arm_selected_tune->core;
3039 tune_flags = arm_selected_tune->flags;
3040 current_tune = arm_selected_tune->tune;
3041
3042 /* TBD: Dwarf info for apcs frame is not handled yet. */
3043 if (TARGET_APCS_FRAME)
3044 flag_shrink_wrap = false;
3045
3046 /* BPABI targets use linker tricks to allow interworking on cores
3047 without thumb support. */
3048 if (TARGET_INTERWORK
3049 && !(ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB) || TARGET_BPABI))
3050 {
3051 warning (0, "target CPU does not support interworking" );
3052 target_flags &= ~MASK_INTERWORK;
3053 }
3054
3055 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3056 {
3057 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3058 target_flags |= MASK_APCS_FRAME;
3059 }
3060
3061 if (TARGET_POKE_FUNCTION_NAME)
3062 target_flags |= MASK_APCS_FRAME;
3063
3064 if (TARGET_APCS_REENT && flag_pic)
3065 error ("-fpic and -mapcs-reent are incompatible");
3066
3067 if (TARGET_APCS_REENT)
3068 warning (0, "APCS reentrant code not supported. Ignored");
3069
3070 if (TARGET_APCS_FLOAT)
3071 warning (0, "passing floating point arguments in fp regs not yet supported");
3072
3073 /* Initialize boolean versions of the flags, for use in the arm.md file. */
3074 arm_arch3m = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH3M);
3075 arm_arch4 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH4);
3076 arm_arch4t = arm_arch4 && (ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB));
3077 arm_arch5 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH5);
3078 arm_arch5e = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH5E);
3079 arm_arch6 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6);
3080 arm_arch6k = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6K);
3081 arm_arch6kz = arm_arch6k && ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH6KZ);
3082 arm_arch_notm = ARM_FSET_HAS_CPU1 (insn_flags, FL_NOTM);
3083 arm_arch6m = arm_arch6 && !arm_arch_notm;
3084 arm_arch7 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7);
3085 arm_arch7em = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7EM);
3086 arm_arch8 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH8);
3087 arm_arch_thumb2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB2);
3088 arm_arch_xscale = ARM_FSET_HAS_CPU1 (insn_flags, FL_XSCALE);
3089
3090 arm_ld_sched = ARM_FSET_HAS_CPU1 (tune_flags, FL_LDSCHED);
3091 arm_tune_strongarm = ARM_FSET_HAS_CPU1 (tune_flags, FL_STRONG);
3092 arm_tune_wbuf = ARM_FSET_HAS_CPU1 (tune_flags, FL_WBUF);
3093 arm_tune_xscale = ARM_FSET_HAS_CPU1 (tune_flags, FL_XSCALE);
3094 arm_arch_iwmmxt = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT);
3095 arm_arch_iwmmxt2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_IWMMXT2);
3096 arm_arch_thumb_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB_DIV);
3097 arm_arch_arm_hwdiv = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARM_DIV);
3098 arm_arch_no_volatile_ce = ARM_FSET_HAS_CPU1 (insn_flags, FL_NO_VOLATILE_CE);
3099 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
3100 arm_arch_crc = ARM_FSET_HAS_CPU1 (insn_flags, FL_CRC32);
3101 arm_m_profile_small_mul = ARM_FSET_HAS_CPU1 (insn_flags, FL_SMALLMUL);
3102
3103 /* V5 code we generate is completely interworking capable, so we turn off
3104 TARGET_INTERWORK here to avoid many tests later on. */
3105
3106 /* XXX However, we must pass the right pre-processor defines to CPP
3107 or GLD can get confused. This is a hack. */
3108 if (TARGET_INTERWORK)
3109 arm_cpp_interwork = 1;
3110
3111 if (arm_arch5)
3112 target_flags &= ~MASK_INTERWORK;
3113
3114 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3115 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3116
3117 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3118 error ("iwmmxt abi requires an iwmmxt capable cpu");
3119
3120 if (!global_options_set.x_arm_fpu_index)
3121 {
3122 const char *target_fpu_name;
3123 bool ok;
3124
3125 #ifdef FPUTYPE_DEFAULT
3126 target_fpu_name = FPUTYPE_DEFAULT;
3127 #else
3128 target_fpu_name = "vfp";
3129 #endif
3130
3131 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
3132 CL_TARGET);
3133 gcc_assert (ok);
3134 }
3135
3136 arm_fpu_desc = &all_fpus[arm_fpu_index];
3137
3138 switch (arm_fpu_desc->model)
3139 {
3140 case ARM_FP_MODEL_VFP:
3141 arm_fpu_attr = FPU_VFP;
3142 break;
3143
3144 default:
3145 gcc_unreachable();
3146 }
3147
3148 if (TARGET_AAPCS_BASED)
3149 {
3150 if (TARGET_CALLER_INTERWORKING)
3151 error ("AAPCS does not support -mcaller-super-interworking");
3152 else
3153 if (TARGET_CALLEE_INTERWORKING)
3154 error ("AAPCS does not support -mcallee-super-interworking");
3155 }
3156
3157 /* iWMMXt and NEON are incompatible. */
3158 if (TARGET_IWMMXT && TARGET_NEON)
3159 error ("iWMMXt and NEON are incompatible");
3160
3161 /* __fp16 support currently assumes the core has ldrh. */
3162 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3163 sorry ("__fp16 and no ldrh");
3164
3165 /* If soft-float is specified then don't use FPU. */
3166 if (TARGET_SOFT_FLOAT)
3167 arm_fpu_attr = FPU_NONE;
3168
3169 if (TARGET_AAPCS_BASED)
3170 {
3171 if (arm_abi == ARM_ABI_IWMMXT)
3172 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3173 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3174 && TARGET_HARD_FLOAT
3175 && TARGET_VFP)
3176 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3177 else
3178 arm_pcs_default = ARM_PCS_AAPCS;
3179 }
3180 else
3181 {
3182 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
3183 sorry ("-mfloat-abi=hard and VFP");
3184
3185 if (arm_abi == ARM_ABI_APCS)
3186 arm_pcs_default = ARM_PCS_APCS;
3187 else
3188 arm_pcs_default = ARM_PCS_ATPCS;
3189 }
3190
3191 /* For arm2/3 there is no need to do any scheduling if we are doing
3192 software floating-point. */
3193 if (TARGET_SOFT_FLOAT && !ARM_FSET_HAS_CPU1 (tune_flags, FL_MODE32))
3194 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3195
3196 /* Use the cp15 method if it is available. */
3197 if (target_thread_pointer == TP_AUTO)
3198 {
3199 if (arm_arch6k && !TARGET_THUMB1)
3200 target_thread_pointer = TP_CP15;
3201 else
3202 target_thread_pointer = TP_SOFT;
3203 }
3204
3205 /* Override the default structure alignment for AAPCS ABI. */
3206 if (!global_options_set.x_arm_structure_size_boundary)
3207 {
3208 if (TARGET_AAPCS_BASED)
3209 arm_structure_size_boundary = 8;
3210 }
3211 else
3212 {
3213 if (arm_structure_size_boundary != 8
3214 && arm_structure_size_boundary != 32
3215 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3216 {
3217 if (ARM_DOUBLEWORD_ALIGN)
3218 warning (0,
3219 "structure size boundary can only be set to 8, 32 or 64");
3220 else
3221 warning (0, "structure size boundary can only be set to 8 or 32");
3222 arm_structure_size_boundary
3223 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3224 }
3225 }
3226
3227 /* If stack checking is disabled, we can use r10 as the PIC register,
3228 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3229 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3230 {
3231 if (TARGET_VXWORKS_RTP)
3232 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3233 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3234 }
3235
3236 if (flag_pic && TARGET_VXWORKS_RTP)
3237 arm_pic_register = 9;
3238
3239 if (arm_pic_register_string != NULL)
3240 {
3241 int pic_register = decode_reg_name (arm_pic_register_string);
3242
3243 if (!flag_pic)
3244 warning (0, "-mpic-register= is useless without -fpic");
3245
3246 /* Prevent the user from choosing an obviously stupid PIC register. */
3247 else if (pic_register < 0 || call_used_regs[pic_register]
3248 || pic_register == HARD_FRAME_POINTER_REGNUM
3249 || pic_register == STACK_POINTER_REGNUM
3250 || pic_register >= PC_REGNUM
3251 || (TARGET_VXWORKS_RTP
3252 && (unsigned int) pic_register != arm_pic_register))
3253 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3254 else
3255 arm_pic_register = pic_register;
3256 }
3257
3258 if (TARGET_VXWORKS_RTP
3259 && !global_options_set.x_arm_pic_data_is_text_relative)
3260 arm_pic_data_is_text_relative = 0;
3261
3262 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3263 if (fix_cm3_ldrd == 2)
3264 {
3265 if (arm_selected_cpu->core == cortexm3)
3266 fix_cm3_ldrd = 1;
3267 else
3268 fix_cm3_ldrd = 0;
3269 }
3270
3271 /* Enable -munaligned-access by default for
3272 - all ARMv6 architecture-based processors
3273 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3274 - ARMv8 architecture-base processors.
3275
3276 Disable -munaligned-access by default for
3277 - all pre-ARMv6 architecture-based processors
3278 - ARMv6-M architecture-based processors. */
3279
3280 if (unaligned_access == 2)
3281 {
3282 if (arm_arch6 && (arm_arch_notm || arm_arch7))
3283 unaligned_access = 1;
3284 else
3285 unaligned_access = 0;
3286 }
3287 else if (unaligned_access == 1
3288 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3289 {
3290 warning (0, "target CPU does not support unaligned accesses");
3291 unaligned_access = 0;
3292 }
3293
3294 /* Hot/Cold partitioning is not currently supported, since we can't
3295 handle literal pool placement in that case. */
3296 if (flag_reorder_blocks_and_partition)
3297 {
3298 inform (input_location,
3299 "-freorder-blocks-and-partition not supported on this architecture");
3300 flag_reorder_blocks_and_partition = 0;
3301 flag_reorder_blocks = 1;
3302 }
3303
3304 if (flag_pic)
3305 /* Hoisting PIC address calculations more aggressively provides a small,
3306 but measurable, size reduction for PIC code. Therefore, we decrease
3307 the bar for unrestricted expression hoisting to the cost of PIC address
3308 calculation, which is 2 instructions. */
3309 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3310 global_options.x_param_values,
3311 global_options_set.x_param_values);
3312
3313 /* ARM EABI defaults to strict volatile bitfields. */
3314 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3315 && abi_version_at_least(2))
3316 flag_strict_volatile_bitfields = 1;
3317
3318 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3319 have deemed it beneficial (signified by setting
3320 prefetch.num_slots to 1 or more). */
3321 if (flag_prefetch_loop_arrays < 0
3322 && HAVE_prefetch
3323 && optimize >= 3
3324 && current_tune->prefetch.num_slots > 0)
3325 flag_prefetch_loop_arrays = 1;
3326
3327 /* Set up parameters to be used in prefetching algorithm. Do not
3328 override the defaults unless we are tuning for a core we have
3329 researched values for. */
3330 if (current_tune->prefetch.num_slots > 0)
3331 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3332 current_tune->prefetch.num_slots,
3333 global_options.x_param_values,
3334 global_options_set.x_param_values);
3335 if (current_tune->prefetch.l1_cache_line_size >= 0)
3336 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3337 current_tune->prefetch.l1_cache_line_size,
3338 global_options.x_param_values,
3339 global_options_set.x_param_values);
3340 if (current_tune->prefetch.l1_cache_size >= 0)
3341 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3342 current_tune->prefetch.l1_cache_size,
3343 global_options.x_param_values,
3344 global_options_set.x_param_values);
3345
3346 /* Use Neon to perform 64-bits operations rather than core
3347 registers. */
3348 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3349 if (use_neon_for_64bits == 1)
3350 prefer_neon_for_64bits = true;
3351
3352 /* Use the alternative scheduling-pressure algorithm by default. */
3353 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3354 global_options.x_param_values,
3355 global_options_set.x_param_values);
3356
3357 /* Look through ready list and all of queue for instructions
3358 relevant for L2 auto-prefetcher. */
3359 int param_sched_autopref_queue_depth;
3360
3361 switch (current_tune->sched_autopref)
3362 {
3363 case tune_params::SCHED_AUTOPREF_OFF:
3364 param_sched_autopref_queue_depth = -1;
3365 break;
3366
3367 case tune_params::SCHED_AUTOPREF_RANK:
3368 param_sched_autopref_queue_depth = 0;
3369 break;
3370
3371 case tune_params::SCHED_AUTOPREF_FULL:
3372 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3373 break;
3374
3375 default:
3376 gcc_unreachable ();
3377 }
3378
3379 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3380 param_sched_autopref_queue_depth,
3381 global_options.x_param_values,
3382 global_options_set.x_param_values);
3383
3384 /* Currently, for slow flash data, we just disable literal pools. */
3385 if (target_slow_flash_data)
3386 arm_disable_literal_pool = true;
3387
3388 /* Disable scheduling fusion by default if it's not armv7 processor
3389 or doesn't prefer ldrd/strd. */
3390 if (flag_schedule_fusion == 2
3391 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3392 flag_schedule_fusion = 0;
3393
3394 /* Need to remember initial options before they are overriden. */
3395 init_optimize = build_optimization_node (&global_options);
3396
3397 arm_option_override_internal (&global_options, &global_options_set);
3398 arm_option_check_internal (&global_options);
3399 arm_option_params_internal ();
3400
3401 /* Register global variables with the garbage collector. */
3402 arm_add_gc_roots ();
3403
3404 /* Save the initial options in case the user does function specific
3405 options. */
3406 target_option_default_node = target_option_current_node
3407 = build_target_option_node (&global_options);
3408
3409 /* Init initial mode for testing. */
3410 thumb_flipper = TARGET_THUMB;
3411 }
3412
3413 static void
3414 arm_add_gc_roots (void)
3415 {
3416 gcc_obstack_init(&minipool_obstack);
3417 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3418 }
3419 \f
3420 /* A table of known ARM exception types.
3421 For use with the interrupt function attribute. */
3422
3423 typedef struct
3424 {
3425 const char *const arg;
3426 const unsigned long return_value;
3427 }
3428 isr_attribute_arg;
3429
3430 static const isr_attribute_arg isr_attribute_args [] =
3431 {
3432 { "IRQ", ARM_FT_ISR },
3433 { "irq", ARM_FT_ISR },
3434 { "FIQ", ARM_FT_FIQ },
3435 { "fiq", ARM_FT_FIQ },
3436 { "ABORT", ARM_FT_ISR },
3437 { "abort", ARM_FT_ISR },
3438 { "ABORT", ARM_FT_ISR },
3439 { "abort", ARM_FT_ISR },
3440 { "UNDEF", ARM_FT_EXCEPTION },
3441 { "undef", ARM_FT_EXCEPTION },
3442 { "SWI", ARM_FT_EXCEPTION },
3443 { "swi", ARM_FT_EXCEPTION },
3444 { NULL, ARM_FT_NORMAL }
3445 };
3446
3447 /* Returns the (interrupt) function type of the current
3448 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3449
3450 static unsigned long
3451 arm_isr_value (tree argument)
3452 {
3453 const isr_attribute_arg * ptr;
3454 const char * arg;
3455
3456 if (!arm_arch_notm)
3457 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3458
3459 /* No argument - default to IRQ. */
3460 if (argument == NULL_TREE)
3461 return ARM_FT_ISR;
3462
3463 /* Get the value of the argument. */
3464 if (TREE_VALUE (argument) == NULL_TREE
3465 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3466 return ARM_FT_UNKNOWN;
3467
3468 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3469
3470 /* Check it against the list of known arguments. */
3471 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3472 if (streq (arg, ptr->arg))
3473 return ptr->return_value;
3474
3475 /* An unrecognized interrupt type. */
3476 return ARM_FT_UNKNOWN;
3477 }
3478
3479 /* Computes the type of the current function. */
3480
3481 static unsigned long
3482 arm_compute_func_type (void)
3483 {
3484 unsigned long type = ARM_FT_UNKNOWN;
3485 tree a;
3486 tree attr;
3487
3488 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3489
3490 /* Decide if the current function is volatile. Such functions
3491 never return, and many memory cycles can be saved by not storing
3492 register values that will never be needed again. This optimization
3493 was added to speed up context switching in a kernel application. */
3494 if (optimize > 0
3495 && (TREE_NOTHROW (current_function_decl)
3496 || !(flag_unwind_tables
3497 || (flag_exceptions
3498 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3499 && TREE_THIS_VOLATILE (current_function_decl))
3500 type |= ARM_FT_VOLATILE;
3501
3502 if (cfun->static_chain_decl != NULL)
3503 type |= ARM_FT_NESTED;
3504
3505 attr = DECL_ATTRIBUTES (current_function_decl);
3506
3507 a = lookup_attribute ("naked", attr);
3508 if (a != NULL_TREE)
3509 type |= ARM_FT_NAKED;
3510
3511 a = lookup_attribute ("isr", attr);
3512 if (a == NULL_TREE)
3513 a = lookup_attribute ("interrupt", attr);
3514
3515 if (a == NULL_TREE)
3516 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3517 else
3518 type |= arm_isr_value (TREE_VALUE (a));
3519
3520 return type;
3521 }
3522
3523 /* Returns the type of the current function. */
3524
3525 unsigned long
3526 arm_current_func_type (void)
3527 {
3528 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3529 cfun->machine->func_type = arm_compute_func_type ();
3530
3531 return cfun->machine->func_type;
3532 }
3533
3534 bool
3535 arm_allocate_stack_slots_for_args (void)
3536 {
3537 /* Naked functions should not allocate stack slots for arguments. */
3538 return !IS_NAKED (arm_current_func_type ());
3539 }
3540
3541 static bool
3542 arm_warn_func_return (tree decl)
3543 {
3544 /* Naked functions are implemented entirely in assembly, including the
3545 return sequence, so suppress warnings about this. */
3546 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3547 }
3548
3549 \f
3550 /* Output assembler code for a block containing the constant parts
3551 of a trampoline, leaving space for the variable parts.
3552
3553 On the ARM, (if r8 is the static chain regnum, and remembering that
3554 referencing pc adds an offset of 8) the trampoline looks like:
3555 ldr r8, [pc, #0]
3556 ldr pc, [pc]
3557 .word static chain value
3558 .word function's address
3559 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3560
3561 static void
3562 arm_asm_trampoline_template (FILE *f)
3563 {
3564 fprintf (f, "\t.syntax unified\n");
3565
3566 if (TARGET_ARM)
3567 {
3568 fprintf (f, "\t.arm\n");
3569 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3570 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3571 }
3572 else if (TARGET_THUMB2)
3573 {
3574 fprintf (f, "\t.thumb\n");
3575 /* The Thumb-2 trampoline is similar to the arm implementation.
3576 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3577 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3578 STATIC_CHAIN_REGNUM, PC_REGNUM);
3579 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3580 }
3581 else
3582 {
3583 ASM_OUTPUT_ALIGN (f, 2);
3584 fprintf (f, "\t.code\t16\n");
3585 fprintf (f, ".Ltrampoline_start:\n");
3586 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3587 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3588 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3589 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3590 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3591 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3592 }
3593 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3594 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3595 }
3596
3597 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3598
3599 static void
3600 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3601 {
3602 rtx fnaddr, mem, a_tramp;
3603
3604 emit_block_move (m_tramp, assemble_trampoline_template (),
3605 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3606
3607 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3608 emit_move_insn (mem, chain_value);
3609
3610 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3611 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3612 emit_move_insn (mem, fnaddr);
3613
3614 a_tramp = XEXP (m_tramp, 0);
3615 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3616 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3617 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3618 }
3619
3620 /* Thumb trampolines should be entered in thumb mode, so set
3621 the bottom bit of the address. */
3622
3623 static rtx
3624 arm_trampoline_adjust_address (rtx addr)
3625 {
3626 if (TARGET_THUMB)
3627 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3628 NULL, 0, OPTAB_LIB_WIDEN);
3629 return addr;
3630 }
3631 \f
3632 /* Return 1 if it is possible to return using a single instruction.
3633 If SIBLING is non-null, this is a test for a return before a sibling
3634 call. SIBLING is the call insn, so we can examine its register usage. */
3635
3636 int
3637 use_return_insn (int iscond, rtx sibling)
3638 {
3639 int regno;
3640 unsigned int func_type;
3641 unsigned long saved_int_regs;
3642 unsigned HOST_WIDE_INT stack_adjust;
3643 arm_stack_offsets *offsets;
3644
3645 /* Never use a return instruction before reload has run. */
3646 if (!reload_completed)
3647 return 0;
3648
3649 func_type = arm_current_func_type ();
3650
3651 /* Naked, volatile and stack alignment functions need special
3652 consideration. */
3653 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3654 return 0;
3655
3656 /* So do interrupt functions that use the frame pointer and Thumb
3657 interrupt functions. */
3658 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3659 return 0;
3660
3661 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3662 && !optimize_function_for_size_p (cfun))
3663 return 0;
3664
3665 offsets = arm_get_frame_offsets ();
3666 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3667
3668 /* As do variadic functions. */
3669 if (crtl->args.pretend_args_size
3670 || cfun->machine->uses_anonymous_args
3671 /* Or if the function calls __builtin_eh_return () */
3672 || crtl->calls_eh_return
3673 /* Or if the function calls alloca */
3674 || cfun->calls_alloca
3675 /* Or if there is a stack adjustment. However, if the stack pointer
3676 is saved on the stack, we can use a pre-incrementing stack load. */
3677 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3678 && stack_adjust == 4))
3679 /* Or if the static chain register was saved above the frame, under the
3680 assumption that the stack pointer isn't saved on the stack. */
3681 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3682 && arm_compute_static_chain_stack_bytes() != 0))
3683 return 0;
3684
3685 saved_int_regs = offsets->saved_regs_mask;
3686
3687 /* Unfortunately, the insn
3688
3689 ldmib sp, {..., sp, ...}
3690
3691 triggers a bug on most SA-110 based devices, such that the stack
3692 pointer won't be correctly restored if the instruction takes a
3693 page fault. We work around this problem by popping r3 along with
3694 the other registers, since that is never slower than executing
3695 another instruction.
3696
3697 We test for !arm_arch5 here, because code for any architecture
3698 less than this could potentially be run on one of the buggy
3699 chips. */
3700 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3701 {
3702 /* Validate that r3 is a call-clobbered register (always true in
3703 the default abi) ... */
3704 if (!call_used_regs[3])
3705 return 0;
3706
3707 /* ... that it isn't being used for a return value ... */
3708 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3709 return 0;
3710
3711 /* ... or for a tail-call argument ... */
3712 if (sibling)
3713 {
3714 gcc_assert (CALL_P (sibling));
3715
3716 if (find_regno_fusage (sibling, USE, 3))
3717 return 0;
3718 }
3719
3720 /* ... and that there are no call-saved registers in r0-r2
3721 (always true in the default ABI). */
3722 if (saved_int_regs & 0x7)
3723 return 0;
3724 }
3725
3726 /* Can't be done if interworking with Thumb, and any registers have been
3727 stacked. */
3728 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3729 return 0;
3730
3731 /* On StrongARM, conditional returns are expensive if they aren't
3732 taken and multiple registers have been stacked. */
3733 if (iscond && arm_tune_strongarm)
3734 {
3735 /* Conditional return when just the LR is stored is a simple
3736 conditional-load instruction, that's not expensive. */
3737 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3738 return 0;
3739
3740 if (flag_pic
3741 && arm_pic_register != INVALID_REGNUM
3742 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3743 return 0;
3744 }
3745
3746 /* If there are saved registers but the LR isn't saved, then we need
3747 two instructions for the return. */
3748 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3749 return 0;
3750
3751 /* Can't be done if any of the VFP regs are pushed,
3752 since this also requires an insn. */
3753 if (TARGET_HARD_FLOAT && TARGET_VFP)
3754 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3755 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3756 return 0;
3757
3758 if (TARGET_REALLY_IWMMXT)
3759 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3760 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3761 return 0;
3762
3763 return 1;
3764 }
3765
3766 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3767 shrink-wrapping if possible. This is the case if we need to emit a
3768 prologue, which we can test by looking at the offsets. */
3769 bool
3770 use_simple_return_p (void)
3771 {
3772 arm_stack_offsets *offsets;
3773
3774 offsets = arm_get_frame_offsets ();
3775 return offsets->outgoing_args != 0;
3776 }
3777
3778 /* Return TRUE if int I is a valid immediate ARM constant. */
3779
3780 int
3781 const_ok_for_arm (HOST_WIDE_INT i)
3782 {
3783 int lowbit;
3784
3785 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3786 be all zero, or all one. */
3787 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3788 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3789 != ((~(unsigned HOST_WIDE_INT) 0)
3790 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3791 return FALSE;
3792
3793 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3794
3795 /* Fast return for 0 and small values. We must do this for zero, since
3796 the code below can't handle that one case. */
3797 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3798 return TRUE;
3799
3800 /* Get the number of trailing zeros. */
3801 lowbit = ffs((int) i) - 1;
3802
3803 /* Only even shifts are allowed in ARM mode so round down to the
3804 nearest even number. */
3805 if (TARGET_ARM)
3806 lowbit &= ~1;
3807
3808 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3809 return TRUE;
3810
3811 if (TARGET_ARM)
3812 {
3813 /* Allow rotated constants in ARM mode. */
3814 if (lowbit <= 4
3815 && ((i & ~0xc000003f) == 0
3816 || (i & ~0xf000000f) == 0
3817 || (i & ~0xfc000003) == 0))
3818 return TRUE;
3819 }
3820 else
3821 {
3822 HOST_WIDE_INT v;
3823
3824 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3825 v = i & 0xff;
3826 v |= v << 16;
3827 if (i == v || i == (v | (v << 8)))
3828 return TRUE;
3829
3830 /* Allow repeated pattern 0xXY00XY00. */
3831 v = i & 0xff00;
3832 v |= v << 16;
3833 if (i == v)
3834 return TRUE;
3835 }
3836
3837 return FALSE;
3838 }
3839
3840 /* Return true if I is a valid constant for the operation CODE. */
3841 int
3842 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3843 {
3844 if (const_ok_for_arm (i))
3845 return 1;
3846
3847 switch (code)
3848 {
3849 case SET:
3850 /* See if we can use movw. */
3851 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3852 return 1;
3853 else
3854 /* Otherwise, try mvn. */
3855 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3856
3857 case PLUS:
3858 /* See if we can use addw or subw. */
3859 if (TARGET_THUMB2
3860 && ((i & 0xfffff000) == 0
3861 || ((-i) & 0xfffff000) == 0))
3862 return 1;
3863 /* else fall through. */
3864
3865 case COMPARE:
3866 case EQ:
3867 case NE:
3868 case GT:
3869 case LE:
3870 case LT:
3871 case GE:
3872 case GEU:
3873 case LTU:
3874 case GTU:
3875 case LEU:
3876 case UNORDERED:
3877 case ORDERED:
3878 case UNEQ:
3879 case UNGE:
3880 case UNLT:
3881 case UNGT:
3882 case UNLE:
3883 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3884
3885 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3886 case XOR:
3887 return 0;
3888
3889 case IOR:
3890 if (TARGET_THUMB2)
3891 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3892 return 0;
3893
3894 case AND:
3895 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3896
3897 default:
3898 gcc_unreachable ();
3899 }
3900 }
3901
3902 /* Return true if I is a valid di mode constant for the operation CODE. */
3903 int
3904 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3905 {
3906 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3907 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3908 rtx hi = GEN_INT (hi_val);
3909 rtx lo = GEN_INT (lo_val);
3910
3911 if (TARGET_THUMB1)
3912 return 0;
3913
3914 switch (code)
3915 {
3916 case AND:
3917 case IOR:
3918 case XOR:
3919 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3920 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3921 case PLUS:
3922 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3923
3924 default:
3925 return 0;
3926 }
3927 }
3928
3929 /* Emit a sequence of insns to handle a large constant.
3930 CODE is the code of the operation required, it can be any of SET, PLUS,
3931 IOR, AND, XOR, MINUS;
3932 MODE is the mode in which the operation is being performed;
3933 VAL is the integer to operate on;
3934 SOURCE is the other operand (a register, or a null-pointer for SET);
3935 SUBTARGETS means it is safe to create scratch registers if that will
3936 either produce a simpler sequence, or we will want to cse the values.
3937 Return value is the number of insns emitted. */
3938
3939 /* ??? Tweak this for thumb2. */
3940 int
3941 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3942 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3943 {
3944 rtx cond;
3945
3946 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3947 cond = COND_EXEC_TEST (PATTERN (insn));
3948 else
3949 cond = NULL_RTX;
3950
3951 if (subtargets || code == SET
3952 || (REG_P (target) && REG_P (source)
3953 && REGNO (target) != REGNO (source)))
3954 {
3955 /* After arm_reorg has been called, we can't fix up expensive
3956 constants by pushing them into memory so we must synthesize
3957 them in-line, regardless of the cost. This is only likely to
3958 be more costly on chips that have load delay slots and we are
3959 compiling without running the scheduler (so no splitting
3960 occurred before the final instruction emission).
3961
3962 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3963 */
3964 if (!cfun->machine->after_arm_reorg
3965 && !cond
3966 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3967 1, 0)
3968 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3969 + (code != SET))))
3970 {
3971 if (code == SET)
3972 {
3973 /* Currently SET is the only monadic value for CODE, all
3974 the rest are diadic. */
3975 if (TARGET_USE_MOVT)
3976 arm_emit_movpair (target, GEN_INT (val));
3977 else
3978 emit_set_insn (target, GEN_INT (val));
3979
3980 return 1;
3981 }
3982 else
3983 {
3984 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3985
3986 if (TARGET_USE_MOVT)
3987 arm_emit_movpair (temp, GEN_INT (val));
3988 else
3989 emit_set_insn (temp, GEN_INT (val));
3990
3991 /* For MINUS, the value is subtracted from, since we never
3992 have subtraction of a constant. */
3993 if (code == MINUS)
3994 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3995 else
3996 emit_set_insn (target,
3997 gen_rtx_fmt_ee (code, mode, source, temp));
3998 return 2;
3999 }
4000 }
4001 }
4002
4003 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4004 1);
4005 }
4006
4007 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4008 ARM/THUMB2 immediates, and add up to VAL.
4009 Thr function return value gives the number of insns required. */
4010 static int
4011 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4012 struct four_ints *return_sequence)
4013 {
4014 int best_consecutive_zeros = 0;
4015 int i;
4016 int best_start = 0;
4017 int insns1, insns2;
4018 struct four_ints tmp_sequence;
4019
4020 /* If we aren't targeting ARM, the best place to start is always at
4021 the bottom, otherwise look more closely. */
4022 if (TARGET_ARM)
4023 {
4024 for (i = 0; i < 32; i += 2)
4025 {
4026 int consecutive_zeros = 0;
4027
4028 if (!(val & (3 << i)))
4029 {
4030 while ((i < 32) && !(val & (3 << i)))
4031 {
4032 consecutive_zeros += 2;
4033 i += 2;
4034 }
4035 if (consecutive_zeros > best_consecutive_zeros)
4036 {
4037 best_consecutive_zeros = consecutive_zeros;
4038 best_start = i - consecutive_zeros;
4039 }
4040 i -= 2;
4041 }
4042 }
4043 }
4044
4045 /* So long as it won't require any more insns to do so, it's
4046 desirable to emit a small constant (in bits 0...9) in the last
4047 insn. This way there is more chance that it can be combined with
4048 a later addressing insn to form a pre-indexed load or store
4049 operation. Consider:
4050
4051 *((volatile int *)0xe0000100) = 1;
4052 *((volatile int *)0xe0000110) = 2;
4053
4054 We want this to wind up as:
4055
4056 mov rA, #0xe0000000
4057 mov rB, #1
4058 str rB, [rA, #0x100]
4059 mov rB, #2
4060 str rB, [rA, #0x110]
4061
4062 rather than having to synthesize both large constants from scratch.
4063
4064 Therefore, we calculate how many insns would be required to emit
4065 the constant starting from `best_start', and also starting from
4066 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4067 yield a shorter sequence, we may as well use zero. */
4068 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4069 if (best_start != 0
4070 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
4071 {
4072 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4073 if (insns2 <= insns1)
4074 {
4075 *return_sequence = tmp_sequence;
4076 insns1 = insns2;
4077 }
4078 }
4079
4080 return insns1;
4081 }
4082
4083 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4084 static int
4085 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4086 struct four_ints *return_sequence, int i)
4087 {
4088 int remainder = val & 0xffffffff;
4089 int insns = 0;
4090
4091 /* Try and find a way of doing the job in either two or three
4092 instructions.
4093
4094 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4095 location. We start at position I. This may be the MSB, or
4096 optimial_immediate_sequence may have positioned it at the largest block
4097 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4098 wrapping around to the top of the word when we drop off the bottom.
4099 In the worst case this code should produce no more than four insns.
4100
4101 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4102 constants, shifted to any arbitrary location. We should always start
4103 at the MSB. */
4104 do
4105 {
4106 int end;
4107 unsigned int b1, b2, b3, b4;
4108 unsigned HOST_WIDE_INT result;
4109 int loc;
4110
4111 gcc_assert (insns < 4);
4112
4113 if (i <= 0)
4114 i += 32;
4115
4116 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4117 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4118 {
4119 loc = i;
4120 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4121 /* We can use addw/subw for the last 12 bits. */
4122 result = remainder;
4123 else
4124 {
4125 /* Use an 8-bit shifted/rotated immediate. */
4126 end = i - 8;
4127 if (end < 0)
4128 end += 32;
4129 result = remainder & ((0x0ff << end)
4130 | ((i < end) ? (0xff >> (32 - end))
4131 : 0));
4132 i -= 8;
4133 }
4134 }
4135 else
4136 {
4137 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4138 arbitrary shifts. */
4139 i -= TARGET_ARM ? 2 : 1;
4140 continue;
4141 }
4142
4143 /* Next, see if we can do a better job with a thumb2 replicated
4144 constant.
4145
4146 We do it this way around to catch the cases like 0x01F001E0 where
4147 two 8-bit immediates would work, but a replicated constant would
4148 make it worse.
4149
4150 TODO: 16-bit constants that don't clear all the bits, but still win.
4151 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4152 if (TARGET_THUMB2)
4153 {
4154 b1 = (remainder & 0xff000000) >> 24;
4155 b2 = (remainder & 0x00ff0000) >> 16;
4156 b3 = (remainder & 0x0000ff00) >> 8;
4157 b4 = remainder & 0xff;
4158
4159 if (loc > 24)
4160 {
4161 /* The 8-bit immediate already found clears b1 (and maybe b2),
4162 but must leave b3 and b4 alone. */
4163
4164 /* First try to find a 32-bit replicated constant that clears
4165 almost everything. We can assume that we can't do it in one,
4166 or else we wouldn't be here. */
4167 unsigned int tmp = b1 & b2 & b3 & b4;
4168 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4169 + (tmp << 24);
4170 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4171 + (tmp == b3) + (tmp == b4);
4172 if (tmp
4173 && (matching_bytes >= 3
4174 || (matching_bytes == 2
4175 && const_ok_for_op (remainder & ~tmp2, code))))
4176 {
4177 /* At least 3 of the bytes match, and the fourth has at
4178 least as many bits set, or two of the bytes match
4179 and it will only require one more insn to finish. */
4180 result = tmp2;
4181 i = tmp != b1 ? 32
4182 : tmp != b2 ? 24
4183 : tmp != b3 ? 16
4184 : 8;
4185 }
4186
4187 /* Second, try to find a 16-bit replicated constant that can
4188 leave three of the bytes clear. If b2 or b4 is already
4189 zero, then we can. If the 8-bit from above would not
4190 clear b2 anyway, then we still win. */
4191 else if (b1 == b3 && (!b2 || !b4
4192 || (remainder & 0x00ff0000 & ~result)))
4193 {
4194 result = remainder & 0xff00ff00;
4195 i = 24;
4196 }
4197 }
4198 else if (loc > 16)
4199 {
4200 /* The 8-bit immediate already found clears b2 (and maybe b3)
4201 and we don't get here unless b1 is alredy clear, but it will
4202 leave b4 unchanged. */
4203
4204 /* If we can clear b2 and b4 at once, then we win, since the
4205 8-bits couldn't possibly reach that far. */
4206 if (b2 == b4)
4207 {
4208 result = remainder & 0x00ff00ff;
4209 i = 16;
4210 }
4211 }
4212 }
4213
4214 return_sequence->i[insns++] = result;
4215 remainder &= ~result;
4216
4217 if (code == SET || code == MINUS)
4218 code = PLUS;
4219 }
4220 while (remainder);
4221
4222 return insns;
4223 }
4224
4225 /* Emit an instruction with the indicated PATTERN. If COND is
4226 non-NULL, conditionalize the execution of the instruction on COND
4227 being true. */
4228
4229 static void
4230 emit_constant_insn (rtx cond, rtx pattern)
4231 {
4232 if (cond)
4233 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4234 emit_insn (pattern);
4235 }
4236
4237 /* As above, but extra parameter GENERATE which, if clear, suppresses
4238 RTL generation. */
4239
4240 static int
4241 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4242 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4243 int subtargets, int generate)
4244 {
4245 int can_invert = 0;
4246 int can_negate = 0;
4247 int final_invert = 0;
4248 int i;
4249 int set_sign_bit_copies = 0;
4250 int clear_sign_bit_copies = 0;
4251 int clear_zero_bit_copies = 0;
4252 int set_zero_bit_copies = 0;
4253 int insns = 0, neg_insns, inv_insns;
4254 unsigned HOST_WIDE_INT temp1, temp2;
4255 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4256 struct four_ints *immediates;
4257 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4258
4259 /* Find out which operations are safe for a given CODE. Also do a quick
4260 check for degenerate cases; these can occur when DImode operations
4261 are split. */
4262 switch (code)
4263 {
4264 case SET:
4265 can_invert = 1;
4266 break;
4267
4268 case PLUS:
4269 can_negate = 1;
4270 break;
4271
4272 case IOR:
4273 if (remainder == 0xffffffff)
4274 {
4275 if (generate)
4276 emit_constant_insn (cond,
4277 gen_rtx_SET (target,
4278 GEN_INT (ARM_SIGN_EXTEND (val))));
4279 return 1;
4280 }
4281
4282 if (remainder == 0)
4283 {
4284 if (reload_completed && rtx_equal_p (target, source))
4285 return 0;
4286
4287 if (generate)
4288 emit_constant_insn (cond, gen_rtx_SET (target, source));
4289 return 1;
4290 }
4291 break;
4292
4293 case AND:
4294 if (remainder == 0)
4295 {
4296 if (generate)
4297 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4298 return 1;
4299 }
4300 if (remainder == 0xffffffff)
4301 {
4302 if (reload_completed && rtx_equal_p (target, source))
4303 return 0;
4304 if (generate)
4305 emit_constant_insn (cond, gen_rtx_SET (target, source));
4306 return 1;
4307 }
4308 can_invert = 1;
4309 break;
4310
4311 case XOR:
4312 if (remainder == 0)
4313 {
4314 if (reload_completed && rtx_equal_p (target, source))
4315 return 0;
4316 if (generate)
4317 emit_constant_insn (cond, gen_rtx_SET (target, source));
4318 return 1;
4319 }
4320
4321 if (remainder == 0xffffffff)
4322 {
4323 if (generate)
4324 emit_constant_insn (cond,
4325 gen_rtx_SET (target,
4326 gen_rtx_NOT (mode, source)));
4327 return 1;
4328 }
4329 final_invert = 1;
4330 break;
4331
4332 case MINUS:
4333 /* We treat MINUS as (val - source), since (source - val) is always
4334 passed as (source + (-val)). */
4335 if (remainder == 0)
4336 {
4337 if (generate)
4338 emit_constant_insn (cond,
4339 gen_rtx_SET (target,
4340 gen_rtx_NEG (mode, source)));
4341 return 1;
4342 }
4343 if (const_ok_for_arm (val))
4344 {
4345 if (generate)
4346 emit_constant_insn (cond,
4347 gen_rtx_SET (target,
4348 gen_rtx_MINUS (mode, GEN_INT (val),
4349 source)));
4350 return 1;
4351 }
4352
4353 break;
4354
4355 default:
4356 gcc_unreachable ();
4357 }
4358
4359 /* If we can do it in one insn get out quickly. */
4360 if (const_ok_for_op (val, code))
4361 {
4362 if (generate)
4363 emit_constant_insn (cond,
4364 gen_rtx_SET (target,
4365 (source
4366 ? gen_rtx_fmt_ee (code, mode, source,
4367 GEN_INT (val))
4368 : GEN_INT (val))));
4369 return 1;
4370 }
4371
4372 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4373 insn. */
4374 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4375 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4376 {
4377 if (generate)
4378 {
4379 if (mode == SImode && i == 16)
4380 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4381 smaller insn. */
4382 emit_constant_insn (cond,
4383 gen_zero_extendhisi2
4384 (target, gen_lowpart (HImode, source)));
4385 else
4386 /* Extz only supports SImode, but we can coerce the operands
4387 into that mode. */
4388 emit_constant_insn (cond,
4389 gen_extzv_t2 (gen_lowpart (SImode, target),
4390 gen_lowpart (SImode, source),
4391 GEN_INT (i), const0_rtx));
4392 }
4393
4394 return 1;
4395 }
4396
4397 /* Calculate a few attributes that may be useful for specific
4398 optimizations. */
4399 /* Count number of leading zeros. */
4400 for (i = 31; i >= 0; i--)
4401 {
4402 if ((remainder & (1 << i)) == 0)
4403 clear_sign_bit_copies++;
4404 else
4405 break;
4406 }
4407
4408 /* Count number of leading 1's. */
4409 for (i = 31; i >= 0; i--)
4410 {
4411 if ((remainder & (1 << i)) != 0)
4412 set_sign_bit_copies++;
4413 else
4414 break;
4415 }
4416
4417 /* Count number of trailing zero's. */
4418 for (i = 0; i <= 31; i++)
4419 {
4420 if ((remainder & (1 << i)) == 0)
4421 clear_zero_bit_copies++;
4422 else
4423 break;
4424 }
4425
4426 /* Count number of trailing 1's. */
4427 for (i = 0; i <= 31; i++)
4428 {
4429 if ((remainder & (1 << i)) != 0)
4430 set_zero_bit_copies++;
4431 else
4432 break;
4433 }
4434
4435 switch (code)
4436 {
4437 case SET:
4438 /* See if we can do this by sign_extending a constant that is known
4439 to be negative. This is a good, way of doing it, since the shift
4440 may well merge into a subsequent insn. */
4441 if (set_sign_bit_copies > 1)
4442 {
4443 if (const_ok_for_arm
4444 (temp1 = ARM_SIGN_EXTEND (remainder
4445 << (set_sign_bit_copies - 1))))
4446 {
4447 if (generate)
4448 {
4449 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4450 emit_constant_insn (cond,
4451 gen_rtx_SET (new_src, GEN_INT (temp1)));
4452 emit_constant_insn (cond,
4453 gen_ashrsi3 (target, new_src,
4454 GEN_INT (set_sign_bit_copies - 1)));
4455 }
4456 return 2;
4457 }
4458 /* For an inverted constant, we will need to set the low bits,
4459 these will be shifted out of harm's way. */
4460 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4461 if (const_ok_for_arm (~temp1))
4462 {
4463 if (generate)
4464 {
4465 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4466 emit_constant_insn (cond,
4467 gen_rtx_SET (new_src, GEN_INT (temp1)));
4468 emit_constant_insn (cond,
4469 gen_ashrsi3 (target, new_src,
4470 GEN_INT (set_sign_bit_copies - 1)));
4471 }
4472 return 2;
4473 }
4474 }
4475
4476 /* See if we can calculate the value as the difference between two
4477 valid immediates. */
4478 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4479 {
4480 int topshift = clear_sign_bit_copies & ~1;
4481
4482 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4483 & (0xff000000 >> topshift));
4484
4485 /* If temp1 is zero, then that means the 9 most significant
4486 bits of remainder were 1 and we've caused it to overflow.
4487 When topshift is 0 we don't need to do anything since we
4488 can borrow from 'bit 32'. */
4489 if (temp1 == 0 && topshift != 0)
4490 temp1 = 0x80000000 >> (topshift - 1);
4491
4492 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4493
4494 if (const_ok_for_arm (temp2))
4495 {
4496 if (generate)
4497 {
4498 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4499 emit_constant_insn (cond,
4500 gen_rtx_SET (new_src, GEN_INT (temp1)));
4501 emit_constant_insn (cond,
4502 gen_addsi3 (target, new_src,
4503 GEN_INT (-temp2)));
4504 }
4505
4506 return 2;
4507 }
4508 }
4509
4510 /* See if we can generate this by setting the bottom (or the top)
4511 16 bits, and then shifting these into the other half of the
4512 word. We only look for the simplest cases, to do more would cost
4513 too much. Be careful, however, not to generate this when the
4514 alternative would take fewer insns. */
4515 if (val & 0xffff0000)
4516 {
4517 temp1 = remainder & 0xffff0000;
4518 temp2 = remainder & 0x0000ffff;
4519
4520 /* Overlaps outside this range are best done using other methods. */
4521 for (i = 9; i < 24; i++)
4522 {
4523 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4524 && !const_ok_for_arm (temp2))
4525 {
4526 rtx new_src = (subtargets
4527 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4528 : target);
4529 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4530 source, subtargets, generate);
4531 source = new_src;
4532 if (generate)
4533 emit_constant_insn
4534 (cond,
4535 gen_rtx_SET
4536 (target,
4537 gen_rtx_IOR (mode,
4538 gen_rtx_ASHIFT (mode, source,
4539 GEN_INT (i)),
4540 source)));
4541 return insns + 1;
4542 }
4543 }
4544
4545 /* Don't duplicate cases already considered. */
4546 for (i = 17; i < 24; i++)
4547 {
4548 if (((temp1 | (temp1 >> i)) == remainder)
4549 && !const_ok_for_arm (temp1))
4550 {
4551 rtx new_src = (subtargets
4552 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4553 : target);
4554 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4555 source, subtargets, generate);
4556 source = new_src;
4557 if (generate)
4558 emit_constant_insn
4559 (cond,
4560 gen_rtx_SET (target,
4561 gen_rtx_IOR
4562 (mode,
4563 gen_rtx_LSHIFTRT (mode, source,
4564 GEN_INT (i)),
4565 source)));
4566 return insns + 1;
4567 }
4568 }
4569 }
4570 break;
4571
4572 case IOR:
4573 case XOR:
4574 /* If we have IOR or XOR, and the constant can be loaded in a
4575 single instruction, and we can find a temporary to put it in,
4576 then this can be done in two instructions instead of 3-4. */
4577 if (subtargets
4578 /* TARGET can't be NULL if SUBTARGETS is 0 */
4579 || (reload_completed && !reg_mentioned_p (target, source)))
4580 {
4581 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4582 {
4583 if (generate)
4584 {
4585 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4586
4587 emit_constant_insn (cond,
4588 gen_rtx_SET (sub, GEN_INT (val)));
4589 emit_constant_insn (cond,
4590 gen_rtx_SET (target,
4591 gen_rtx_fmt_ee (code, mode,
4592 source, sub)));
4593 }
4594 return 2;
4595 }
4596 }
4597
4598 if (code == XOR)
4599 break;
4600
4601 /* Convert.
4602 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4603 and the remainder 0s for e.g. 0xfff00000)
4604 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4605
4606 This can be done in 2 instructions by using shifts with mov or mvn.
4607 e.g. for
4608 x = x | 0xfff00000;
4609 we generate.
4610 mvn r0, r0, asl #12
4611 mvn r0, r0, lsr #12 */
4612 if (set_sign_bit_copies > 8
4613 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4614 {
4615 if (generate)
4616 {
4617 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4618 rtx shift = GEN_INT (set_sign_bit_copies);
4619
4620 emit_constant_insn
4621 (cond,
4622 gen_rtx_SET (sub,
4623 gen_rtx_NOT (mode,
4624 gen_rtx_ASHIFT (mode,
4625 source,
4626 shift))));
4627 emit_constant_insn
4628 (cond,
4629 gen_rtx_SET (target,
4630 gen_rtx_NOT (mode,
4631 gen_rtx_LSHIFTRT (mode, sub,
4632 shift))));
4633 }
4634 return 2;
4635 }
4636
4637 /* Convert
4638 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4639 to
4640 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4641
4642 For eg. r0 = r0 | 0xfff
4643 mvn r0, r0, lsr #12
4644 mvn r0, r0, asl #12
4645
4646 */
4647 if (set_zero_bit_copies > 8
4648 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4649 {
4650 if (generate)
4651 {
4652 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4653 rtx shift = GEN_INT (set_zero_bit_copies);
4654
4655 emit_constant_insn
4656 (cond,
4657 gen_rtx_SET (sub,
4658 gen_rtx_NOT (mode,
4659 gen_rtx_LSHIFTRT (mode,
4660 source,
4661 shift))));
4662 emit_constant_insn
4663 (cond,
4664 gen_rtx_SET (target,
4665 gen_rtx_NOT (mode,
4666 gen_rtx_ASHIFT (mode, sub,
4667 shift))));
4668 }
4669 return 2;
4670 }
4671
4672 /* This will never be reached for Thumb2 because orn is a valid
4673 instruction. This is for Thumb1 and the ARM 32 bit cases.
4674
4675 x = y | constant (such that ~constant is a valid constant)
4676 Transform this to
4677 x = ~(~y & ~constant).
4678 */
4679 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4680 {
4681 if (generate)
4682 {
4683 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4684 emit_constant_insn (cond,
4685 gen_rtx_SET (sub,
4686 gen_rtx_NOT (mode, source)));
4687 source = sub;
4688 if (subtargets)
4689 sub = gen_reg_rtx (mode);
4690 emit_constant_insn (cond,
4691 gen_rtx_SET (sub,
4692 gen_rtx_AND (mode, source,
4693 GEN_INT (temp1))));
4694 emit_constant_insn (cond,
4695 gen_rtx_SET (target,
4696 gen_rtx_NOT (mode, sub)));
4697 }
4698 return 3;
4699 }
4700 break;
4701
4702 case AND:
4703 /* See if two shifts will do 2 or more insn's worth of work. */
4704 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4705 {
4706 HOST_WIDE_INT shift_mask = ((0xffffffff
4707 << (32 - clear_sign_bit_copies))
4708 & 0xffffffff);
4709
4710 if ((remainder | shift_mask) != 0xffffffff)
4711 {
4712 HOST_WIDE_INT new_val
4713 = ARM_SIGN_EXTEND (remainder | shift_mask);
4714
4715 if (generate)
4716 {
4717 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4718 insns = arm_gen_constant (AND, SImode, cond, new_val,
4719 new_src, source, subtargets, 1);
4720 source = new_src;
4721 }
4722 else
4723 {
4724 rtx targ = subtargets ? NULL_RTX : target;
4725 insns = arm_gen_constant (AND, mode, cond, new_val,
4726 targ, source, subtargets, 0);
4727 }
4728 }
4729
4730 if (generate)
4731 {
4732 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4733 rtx shift = GEN_INT (clear_sign_bit_copies);
4734
4735 emit_insn (gen_ashlsi3 (new_src, source, shift));
4736 emit_insn (gen_lshrsi3 (target, new_src, shift));
4737 }
4738
4739 return insns + 2;
4740 }
4741
4742 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4743 {
4744 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4745
4746 if ((remainder | shift_mask) != 0xffffffff)
4747 {
4748 HOST_WIDE_INT new_val
4749 = ARM_SIGN_EXTEND (remainder | shift_mask);
4750 if (generate)
4751 {
4752 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4753
4754 insns = arm_gen_constant (AND, mode, cond, new_val,
4755 new_src, source, subtargets, 1);
4756 source = new_src;
4757 }
4758 else
4759 {
4760 rtx targ = subtargets ? NULL_RTX : target;
4761
4762 insns = arm_gen_constant (AND, mode, cond, new_val,
4763 targ, source, subtargets, 0);
4764 }
4765 }
4766
4767 if (generate)
4768 {
4769 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4770 rtx shift = GEN_INT (clear_zero_bit_copies);
4771
4772 emit_insn (gen_lshrsi3 (new_src, source, shift));
4773 emit_insn (gen_ashlsi3 (target, new_src, shift));
4774 }
4775
4776 return insns + 2;
4777 }
4778
4779 break;
4780
4781 default:
4782 break;
4783 }
4784
4785 /* Calculate what the instruction sequences would be if we generated it
4786 normally, negated, or inverted. */
4787 if (code == AND)
4788 /* AND cannot be split into multiple insns, so invert and use BIC. */
4789 insns = 99;
4790 else
4791 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4792
4793 if (can_negate)
4794 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4795 &neg_immediates);
4796 else
4797 neg_insns = 99;
4798
4799 if (can_invert || final_invert)
4800 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4801 &inv_immediates);
4802 else
4803 inv_insns = 99;
4804
4805 immediates = &pos_immediates;
4806
4807 /* Is the negated immediate sequence more efficient? */
4808 if (neg_insns < insns && neg_insns <= inv_insns)
4809 {
4810 insns = neg_insns;
4811 immediates = &neg_immediates;
4812 }
4813 else
4814 can_negate = 0;
4815
4816 /* Is the inverted immediate sequence more efficient?
4817 We must allow for an extra NOT instruction for XOR operations, although
4818 there is some chance that the final 'mvn' will get optimized later. */
4819 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4820 {
4821 insns = inv_insns;
4822 immediates = &inv_immediates;
4823 }
4824 else
4825 {
4826 can_invert = 0;
4827 final_invert = 0;
4828 }
4829
4830 /* Now output the chosen sequence as instructions. */
4831 if (generate)
4832 {
4833 for (i = 0; i < insns; i++)
4834 {
4835 rtx new_src, temp1_rtx;
4836
4837 temp1 = immediates->i[i];
4838
4839 if (code == SET || code == MINUS)
4840 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4841 else if ((final_invert || i < (insns - 1)) && subtargets)
4842 new_src = gen_reg_rtx (mode);
4843 else
4844 new_src = target;
4845
4846 if (can_invert)
4847 temp1 = ~temp1;
4848 else if (can_negate)
4849 temp1 = -temp1;
4850
4851 temp1 = trunc_int_for_mode (temp1, mode);
4852 temp1_rtx = GEN_INT (temp1);
4853
4854 if (code == SET)
4855 ;
4856 else if (code == MINUS)
4857 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4858 else
4859 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4860
4861 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
4862 source = new_src;
4863
4864 if (code == SET)
4865 {
4866 can_negate = can_invert;
4867 can_invert = 0;
4868 code = PLUS;
4869 }
4870 else if (code == MINUS)
4871 code = PLUS;
4872 }
4873 }
4874
4875 if (final_invert)
4876 {
4877 if (generate)
4878 emit_constant_insn (cond, gen_rtx_SET (target,
4879 gen_rtx_NOT (mode, source)));
4880 insns++;
4881 }
4882
4883 return insns;
4884 }
4885
4886 /* Canonicalize a comparison so that we are more likely to recognize it.
4887 This can be done for a few constant compares, where we can make the
4888 immediate value easier to load. */
4889
4890 static void
4891 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4892 bool op0_preserve_value)
4893 {
4894 machine_mode mode;
4895 unsigned HOST_WIDE_INT i, maxval;
4896
4897 mode = GET_MODE (*op0);
4898 if (mode == VOIDmode)
4899 mode = GET_MODE (*op1);
4900
4901 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4902
4903 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4904 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4905 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4906 for GTU/LEU in Thumb mode. */
4907 if (mode == DImode)
4908 {
4909
4910 if (*code == GT || *code == LE
4911 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4912 {
4913 /* Missing comparison. First try to use an available
4914 comparison. */
4915 if (CONST_INT_P (*op1))
4916 {
4917 i = INTVAL (*op1);
4918 switch (*code)
4919 {
4920 case GT:
4921 case LE:
4922 if (i != maxval
4923 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4924 {
4925 *op1 = GEN_INT (i + 1);
4926 *code = *code == GT ? GE : LT;
4927 return;
4928 }
4929 break;
4930 case GTU:
4931 case LEU:
4932 if (i != ~((unsigned HOST_WIDE_INT) 0)
4933 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4934 {
4935 *op1 = GEN_INT (i + 1);
4936 *code = *code == GTU ? GEU : LTU;
4937 return;
4938 }
4939 break;
4940 default:
4941 gcc_unreachable ();
4942 }
4943 }
4944
4945 /* If that did not work, reverse the condition. */
4946 if (!op0_preserve_value)
4947 {
4948 std::swap (*op0, *op1);
4949 *code = (int)swap_condition ((enum rtx_code)*code);
4950 }
4951 }
4952 return;
4953 }
4954
4955 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4956 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4957 to facilitate possible combining with a cmp into 'ands'. */
4958 if (mode == SImode
4959 && GET_CODE (*op0) == ZERO_EXTEND
4960 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4961 && GET_MODE (XEXP (*op0, 0)) == QImode
4962 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4963 && subreg_lowpart_p (XEXP (*op0, 0))
4964 && *op1 == const0_rtx)
4965 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4966 GEN_INT (255));
4967
4968 /* Comparisons smaller than DImode. Only adjust comparisons against
4969 an out-of-range constant. */
4970 if (!CONST_INT_P (*op1)
4971 || const_ok_for_arm (INTVAL (*op1))
4972 || const_ok_for_arm (- INTVAL (*op1)))
4973 return;
4974
4975 i = INTVAL (*op1);
4976
4977 switch (*code)
4978 {
4979 case EQ:
4980 case NE:
4981 return;
4982
4983 case GT:
4984 case LE:
4985 if (i != maxval
4986 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4987 {
4988 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
4989 *code = *code == GT ? GE : LT;
4990 return;
4991 }
4992 break;
4993
4994 case GE:
4995 case LT:
4996 if (i != ~maxval
4997 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4998 {
4999 *op1 = GEN_INT (i - 1);
5000 *code = *code == GE ? GT : LE;
5001 return;
5002 }
5003 break;
5004
5005 case GTU:
5006 case LEU:
5007 if (i != ~((unsigned HOST_WIDE_INT) 0)
5008 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5009 {
5010 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5011 *code = *code == GTU ? GEU : LTU;
5012 return;
5013 }
5014 break;
5015
5016 case GEU:
5017 case LTU:
5018 if (i != 0
5019 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5020 {
5021 *op1 = GEN_INT (i - 1);
5022 *code = *code == GEU ? GTU : LEU;
5023 return;
5024 }
5025 break;
5026
5027 default:
5028 gcc_unreachable ();
5029 }
5030 }
5031
5032
5033 /* Define how to find the value returned by a function. */
5034
5035 static rtx
5036 arm_function_value(const_tree type, const_tree func,
5037 bool outgoing ATTRIBUTE_UNUSED)
5038 {
5039 machine_mode mode;
5040 int unsignedp ATTRIBUTE_UNUSED;
5041 rtx r ATTRIBUTE_UNUSED;
5042
5043 mode = TYPE_MODE (type);
5044
5045 if (TARGET_AAPCS_BASED)
5046 return aapcs_allocate_return_reg (mode, type, func);
5047
5048 /* Promote integer types. */
5049 if (INTEGRAL_TYPE_P (type))
5050 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5051
5052 /* Promotes small structs returned in a register to full-word size
5053 for big-endian AAPCS. */
5054 if (arm_return_in_msb (type))
5055 {
5056 HOST_WIDE_INT size = int_size_in_bytes (type);
5057 if (size % UNITS_PER_WORD != 0)
5058 {
5059 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5060 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5061 }
5062 }
5063
5064 return arm_libcall_value_1 (mode);
5065 }
5066
5067 /* libcall hashtable helpers. */
5068
5069 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5070 {
5071 static inline hashval_t hash (const rtx_def *);
5072 static inline bool equal (const rtx_def *, const rtx_def *);
5073 static inline void remove (rtx_def *);
5074 };
5075
5076 inline bool
5077 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5078 {
5079 return rtx_equal_p (p1, p2);
5080 }
5081
5082 inline hashval_t
5083 libcall_hasher::hash (const rtx_def *p1)
5084 {
5085 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5086 }
5087
5088 typedef hash_table<libcall_hasher> libcall_table_type;
5089
5090 static void
5091 add_libcall (libcall_table_type *htab, rtx libcall)
5092 {
5093 *htab->find_slot (libcall, INSERT) = libcall;
5094 }
5095
5096 static bool
5097 arm_libcall_uses_aapcs_base (const_rtx libcall)
5098 {
5099 static bool init_done = false;
5100 static libcall_table_type *libcall_htab = NULL;
5101
5102 if (!init_done)
5103 {
5104 init_done = true;
5105
5106 libcall_htab = new libcall_table_type (31);
5107 add_libcall (libcall_htab,
5108 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5109 add_libcall (libcall_htab,
5110 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5111 add_libcall (libcall_htab,
5112 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5113 add_libcall (libcall_htab,
5114 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5115
5116 add_libcall (libcall_htab,
5117 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5118 add_libcall (libcall_htab,
5119 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5120 add_libcall (libcall_htab,
5121 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5122 add_libcall (libcall_htab,
5123 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5124
5125 add_libcall (libcall_htab,
5126 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5127 add_libcall (libcall_htab,
5128 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5129 add_libcall (libcall_htab,
5130 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5131 add_libcall (libcall_htab,
5132 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5133 add_libcall (libcall_htab,
5134 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5135 add_libcall (libcall_htab,
5136 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5137 add_libcall (libcall_htab,
5138 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5139 add_libcall (libcall_htab,
5140 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5141
5142 /* Values from double-precision helper functions are returned in core
5143 registers if the selected core only supports single-precision
5144 arithmetic, even if we are using the hard-float ABI. The same is
5145 true for single-precision helpers, but we will never be using the
5146 hard-float ABI on a CPU which doesn't support single-precision
5147 operations in hardware. */
5148 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5149 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5150 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5151 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5152 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5153 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5154 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5155 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5156 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5157 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5158 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5159 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5160 SFmode));
5161 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5162 DFmode));
5163 }
5164
5165 return libcall && libcall_htab->find (libcall) != NULL;
5166 }
5167
5168 static rtx
5169 arm_libcall_value_1 (machine_mode mode)
5170 {
5171 if (TARGET_AAPCS_BASED)
5172 return aapcs_libcall_value (mode);
5173 else if (TARGET_IWMMXT_ABI
5174 && arm_vector_mode_supported_p (mode))
5175 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5176 else
5177 return gen_rtx_REG (mode, ARG_REGISTER (1));
5178 }
5179
5180 /* Define how to find the value returned by a library function
5181 assuming the value has mode MODE. */
5182
5183 static rtx
5184 arm_libcall_value (machine_mode mode, const_rtx libcall)
5185 {
5186 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5187 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5188 {
5189 /* The following libcalls return their result in integer registers,
5190 even though they return a floating point value. */
5191 if (arm_libcall_uses_aapcs_base (libcall))
5192 return gen_rtx_REG (mode, ARG_REGISTER(1));
5193
5194 }
5195
5196 return arm_libcall_value_1 (mode);
5197 }
5198
5199 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5200
5201 static bool
5202 arm_function_value_regno_p (const unsigned int regno)
5203 {
5204 if (regno == ARG_REGISTER (1)
5205 || (TARGET_32BIT
5206 && TARGET_AAPCS_BASED
5207 && TARGET_VFP
5208 && TARGET_HARD_FLOAT
5209 && regno == FIRST_VFP_REGNUM)
5210 || (TARGET_IWMMXT_ABI
5211 && regno == FIRST_IWMMXT_REGNUM))
5212 return true;
5213
5214 return false;
5215 }
5216
5217 /* Determine the amount of memory needed to store the possible return
5218 registers of an untyped call. */
5219 int
5220 arm_apply_result_size (void)
5221 {
5222 int size = 16;
5223
5224 if (TARGET_32BIT)
5225 {
5226 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
5227 size += 32;
5228 if (TARGET_IWMMXT_ABI)
5229 size += 8;
5230 }
5231
5232 return size;
5233 }
5234
5235 /* Decide whether TYPE should be returned in memory (true)
5236 or in a register (false). FNTYPE is the type of the function making
5237 the call. */
5238 static bool
5239 arm_return_in_memory (const_tree type, const_tree fntype)
5240 {
5241 HOST_WIDE_INT size;
5242
5243 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5244
5245 if (TARGET_AAPCS_BASED)
5246 {
5247 /* Simple, non-aggregate types (ie not including vectors and
5248 complex) are always returned in a register (or registers).
5249 We don't care about which register here, so we can short-cut
5250 some of the detail. */
5251 if (!AGGREGATE_TYPE_P (type)
5252 && TREE_CODE (type) != VECTOR_TYPE
5253 && TREE_CODE (type) != COMPLEX_TYPE)
5254 return false;
5255
5256 /* Any return value that is no larger than one word can be
5257 returned in r0. */
5258 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5259 return false;
5260
5261 /* Check any available co-processors to see if they accept the
5262 type as a register candidate (VFP, for example, can return
5263 some aggregates in consecutive registers). These aren't
5264 available if the call is variadic. */
5265 if (aapcs_select_return_coproc (type, fntype) >= 0)
5266 return false;
5267
5268 /* Vector values should be returned using ARM registers, not
5269 memory (unless they're over 16 bytes, which will break since
5270 we only have four call-clobbered registers to play with). */
5271 if (TREE_CODE (type) == VECTOR_TYPE)
5272 return (size < 0 || size > (4 * UNITS_PER_WORD));
5273
5274 /* The rest go in memory. */
5275 return true;
5276 }
5277
5278 if (TREE_CODE (type) == VECTOR_TYPE)
5279 return (size < 0 || size > (4 * UNITS_PER_WORD));
5280
5281 if (!AGGREGATE_TYPE_P (type) &&
5282 (TREE_CODE (type) != VECTOR_TYPE))
5283 /* All simple types are returned in registers. */
5284 return false;
5285
5286 if (arm_abi != ARM_ABI_APCS)
5287 {
5288 /* ATPCS and later return aggregate types in memory only if they are
5289 larger than a word (or are variable size). */
5290 return (size < 0 || size > UNITS_PER_WORD);
5291 }
5292
5293 /* For the arm-wince targets we choose to be compatible with Microsoft's
5294 ARM and Thumb compilers, which always return aggregates in memory. */
5295 #ifndef ARM_WINCE
5296 /* All structures/unions bigger than one word are returned in memory.
5297 Also catch the case where int_size_in_bytes returns -1. In this case
5298 the aggregate is either huge or of variable size, and in either case
5299 we will want to return it via memory and not in a register. */
5300 if (size < 0 || size > UNITS_PER_WORD)
5301 return true;
5302
5303 if (TREE_CODE (type) == RECORD_TYPE)
5304 {
5305 tree field;
5306
5307 /* For a struct the APCS says that we only return in a register
5308 if the type is 'integer like' and every addressable element
5309 has an offset of zero. For practical purposes this means
5310 that the structure can have at most one non bit-field element
5311 and that this element must be the first one in the structure. */
5312
5313 /* Find the first field, ignoring non FIELD_DECL things which will
5314 have been created by C++. */
5315 for (field = TYPE_FIELDS (type);
5316 field && TREE_CODE (field) != FIELD_DECL;
5317 field = DECL_CHAIN (field))
5318 continue;
5319
5320 if (field == NULL)
5321 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5322
5323 /* Check that the first field is valid for returning in a register. */
5324
5325 /* ... Floats are not allowed */
5326 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5327 return true;
5328
5329 /* ... Aggregates that are not themselves valid for returning in
5330 a register are not allowed. */
5331 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5332 return true;
5333
5334 /* Now check the remaining fields, if any. Only bitfields are allowed,
5335 since they are not addressable. */
5336 for (field = DECL_CHAIN (field);
5337 field;
5338 field = DECL_CHAIN (field))
5339 {
5340 if (TREE_CODE (field) != FIELD_DECL)
5341 continue;
5342
5343 if (!DECL_BIT_FIELD_TYPE (field))
5344 return true;
5345 }
5346
5347 return false;
5348 }
5349
5350 if (TREE_CODE (type) == UNION_TYPE)
5351 {
5352 tree field;
5353
5354 /* Unions can be returned in registers if every element is
5355 integral, or can be returned in an integer register. */
5356 for (field = TYPE_FIELDS (type);
5357 field;
5358 field = DECL_CHAIN (field))
5359 {
5360 if (TREE_CODE (field) != FIELD_DECL)
5361 continue;
5362
5363 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5364 return true;
5365
5366 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5367 return true;
5368 }
5369
5370 return false;
5371 }
5372 #endif /* not ARM_WINCE */
5373
5374 /* Return all other types in memory. */
5375 return true;
5376 }
5377
5378 const struct pcs_attribute_arg
5379 {
5380 const char *arg;
5381 enum arm_pcs value;
5382 } pcs_attribute_args[] =
5383 {
5384 {"aapcs", ARM_PCS_AAPCS},
5385 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5386 #if 0
5387 /* We could recognize these, but changes would be needed elsewhere
5388 * to implement them. */
5389 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5390 {"atpcs", ARM_PCS_ATPCS},
5391 {"apcs", ARM_PCS_APCS},
5392 #endif
5393 {NULL, ARM_PCS_UNKNOWN}
5394 };
5395
5396 static enum arm_pcs
5397 arm_pcs_from_attribute (tree attr)
5398 {
5399 const struct pcs_attribute_arg *ptr;
5400 const char *arg;
5401
5402 /* Get the value of the argument. */
5403 if (TREE_VALUE (attr) == NULL_TREE
5404 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5405 return ARM_PCS_UNKNOWN;
5406
5407 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5408
5409 /* Check it against the list of known arguments. */
5410 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5411 if (streq (arg, ptr->arg))
5412 return ptr->value;
5413
5414 /* An unrecognized interrupt type. */
5415 return ARM_PCS_UNKNOWN;
5416 }
5417
5418 /* Get the PCS variant to use for this call. TYPE is the function's type
5419 specification, DECL is the specific declartion. DECL may be null if
5420 the call could be indirect or if this is a library call. */
5421 static enum arm_pcs
5422 arm_get_pcs_model (const_tree type, const_tree decl)
5423 {
5424 bool user_convention = false;
5425 enum arm_pcs user_pcs = arm_pcs_default;
5426 tree attr;
5427
5428 gcc_assert (type);
5429
5430 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5431 if (attr)
5432 {
5433 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5434 user_convention = true;
5435 }
5436
5437 if (TARGET_AAPCS_BASED)
5438 {
5439 /* Detect varargs functions. These always use the base rules
5440 (no argument is ever a candidate for a co-processor
5441 register). */
5442 bool base_rules = stdarg_p (type);
5443
5444 if (user_convention)
5445 {
5446 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5447 sorry ("non-AAPCS derived PCS variant");
5448 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5449 error ("variadic functions must use the base AAPCS variant");
5450 }
5451
5452 if (base_rules)
5453 return ARM_PCS_AAPCS;
5454 else if (user_convention)
5455 return user_pcs;
5456 else if (decl && flag_unit_at_a_time)
5457 {
5458 /* Local functions never leak outside this compilation unit,
5459 so we are free to use whatever conventions are
5460 appropriate. */
5461 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5462 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5463 if (i && i->local)
5464 return ARM_PCS_AAPCS_LOCAL;
5465 }
5466 }
5467 else if (user_convention && user_pcs != arm_pcs_default)
5468 sorry ("PCS variant");
5469
5470 /* For everything else we use the target's default. */
5471 return arm_pcs_default;
5472 }
5473
5474
5475 static void
5476 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5477 const_tree fntype ATTRIBUTE_UNUSED,
5478 rtx libcall ATTRIBUTE_UNUSED,
5479 const_tree fndecl ATTRIBUTE_UNUSED)
5480 {
5481 /* Record the unallocated VFP registers. */
5482 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5483 pcum->aapcs_vfp_reg_alloc = 0;
5484 }
5485
5486 /* Walk down the type tree of TYPE counting consecutive base elements.
5487 If *MODEP is VOIDmode, then set it to the first valid floating point
5488 type. If a non-floating point type is found, or if a floating point
5489 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5490 otherwise return the count in the sub-tree. */
5491 static int
5492 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5493 {
5494 machine_mode mode;
5495 HOST_WIDE_INT size;
5496
5497 switch (TREE_CODE (type))
5498 {
5499 case REAL_TYPE:
5500 mode = TYPE_MODE (type);
5501 if (mode != DFmode && mode != SFmode)
5502 return -1;
5503
5504 if (*modep == VOIDmode)
5505 *modep = mode;
5506
5507 if (*modep == mode)
5508 return 1;
5509
5510 break;
5511
5512 case COMPLEX_TYPE:
5513 mode = TYPE_MODE (TREE_TYPE (type));
5514 if (mode != DFmode && mode != SFmode)
5515 return -1;
5516
5517 if (*modep == VOIDmode)
5518 *modep = mode;
5519
5520 if (*modep == mode)
5521 return 2;
5522
5523 break;
5524
5525 case VECTOR_TYPE:
5526 /* Use V2SImode and V4SImode as representatives of all 64-bit
5527 and 128-bit vector types, whether or not those modes are
5528 supported with the present options. */
5529 size = int_size_in_bytes (type);
5530 switch (size)
5531 {
5532 case 8:
5533 mode = V2SImode;
5534 break;
5535 case 16:
5536 mode = V4SImode;
5537 break;
5538 default:
5539 return -1;
5540 }
5541
5542 if (*modep == VOIDmode)
5543 *modep = mode;
5544
5545 /* Vector modes are considered to be opaque: two vectors are
5546 equivalent for the purposes of being homogeneous aggregates
5547 if they are the same size. */
5548 if (*modep == mode)
5549 return 1;
5550
5551 break;
5552
5553 case ARRAY_TYPE:
5554 {
5555 int count;
5556 tree index = TYPE_DOMAIN (type);
5557
5558 /* Can't handle incomplete types nor sizes that are not
5559 fixed. */
5560 if (!COMPLETE_TYPE_P (type)
5561 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5562 return -1;
5563
5564 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5565 if (count == -1
5566 || !index
5567 || !TYPE_MAX_VALUE (index)
5568 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5569 || !TYPE_MIN_VALUE (index)
5570 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5571 || count < 0)
5572 return -1;
5573
5574 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5575 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5576
5577 /* There must be no padding. */
5578 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5579 return -1;
5580
5581 return count;
5582 }
5583
5584 case RECORD_TYPE:
5585 {
5586 int count = 0;
5587 int sub_count;
5588 tree field;
5589
5590 /* Can't handle incomplete types nor sizes that are not
5591 fixed. */
5592 if (!COMPLETE_TYPE_P (type)
5593 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5594 return -1;
5595
5596 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5597 {
5598 if (TREE_CODE (field) != FIELD_DECL)
5599 continue;
5600
5601 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5602 if (sub_count < 0)
5603 return -1;
5604 count += sub_count;
5605 }
5606
5607 /* There must be no padding. */
5608 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5609 return -1;
5610
5611 return count;
5612 }
5613
5614 case UNION_TYPE:
5615 case QUAL_UNION_TYPE:
5616 {
5617 /* These aren't very interesting except in a degenerate case. */
5618 int count = 0;
5619 int sub_count;
5620 tree field;
5621
5622 /* Can't handle incomplete types nor sizes that are not
5623 fixed. */
5624 if (!COMPLETE_TYPE_P (type)
5625 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5626 return -1;
5627
5628 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5629 {
5630 if (TREE_CODE (field) != FIELD_DECL)
5631 continue;
5632
5633 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5634 if (sub_count < 0)
5635 return -1;
5636 count = count > sub_count ? count : sub_count;
5637 }
5638
5639 /* There must be no padding. */
5640 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5641 return -1;
5642
5643 return count;
5644 }
5645
5646 default:
5647 break;
5648 }
5649
5650 return -1;
5651 }
5652
5653 /* Return true if PCS_VARIANT should use VFP registers. */
5654 static bool
5655 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5656 {
5657 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5658 {
5659 static bool seen_thumb1_vfp = false;
5660
5661 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5662 {
5663 sorry ("Thumb-1 hard-float VFP ABI");
5664 /* sorry() is not immediately fatal, so only display this once. */
5665 seen_thumb1_vfp = true;
5666 }
5667
5668 return true;
5669 }
5670
5671 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5672 return false;
5673
5674 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5675 (TARGET_VFP_DOUBLE || !is_double));
5676 }
5677
5678 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5679 suitable for passing or returning in VFP registers for the PCS
5680 variant selected. If it is, then *BASE_MODE is updated to contain
5681 a machine mode describing each element of the argument's type and
5682 *COUNT to hold the number of such elements. */
5683 static bool
5684 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5685 machine_mode mode, const_tree type,
5686 machine_mode *base_mode, int *count)
5687 {
5688 machine_mode new_mode = VOIDmode;
5689
5690 /* If we have the type information, prefer that to working things
5691 out from the mode. */
5692 if (type)
5693 {
5694 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5695
5696 if (ag_count > 0 && ag_count <= 4)
5697 *count = ag_count;
5698 else
5699 return false;
5700 }
5701 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5702 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5703 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5704 {
5705 *count = 1;
5706 new_mode = mode;
5707 }
5708 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5709 {
5710 *count = 2;
5711 new_mode = (mode == DCmode ? DFmode : SFmode);
5712 }
5713 else
5714 return false;
5715
5716
5717 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5718 return false;
5719
5720 *base_mode = new_mode;
5721 return true;
5722 }
5723
5724 static bool
5725 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5726 machine_mode mode, const_tree type)
5727 {
5728 int count ATTRIBUTE_UNUSED;
5729 machine_mode ag_mode ATTRIBUTE_UNUSED;
5730
5731 if (!use_vfp_abi (pcs_variant, false))
5732 return false;
5733 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5734 &ag_mode, &count);
5735 }
5736
5737 static bool
5738 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5739 const_tree type)
5740 {
5741 if (!use_vfp_abi (pcum->pcs_variant, false))
5742 return false;
5743
5744 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5745 &pcum->aapcs_vfp_rmode,
5746 &pcum->aapcs_vfp_rcount);
5747 }
5748
5749 static bool
5750 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5751 const_tree type ATTRIBUTE_UNUSED)
5752 {
5753 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5754 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5755 int regno;
5756
5757 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5758 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5759 {
5760 pcum->aapcs_vfp_reg_alloc = mask << regno;
5761 if (mode == BLKmode
5762 || (mode == TImode && ! TARGET_NEON)
5763 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5764 {
5765 int i;
5766 int rcount = pcum->aapcs_vfp_rcount;
5767 int rshift = shift;
5768 machine_mode rmode = pcum->aapcs_vfp_rmode;
5769 rtx par;
5770 if (!TARGET_NEON)
5771 {
5772 /* Avoid using unsupported vector modes. */
5773 if (rmode == V2SImode)
5774 rmode = DImode;
5775 else if (rmode == V4SImode)
5776 {
5777 rmode = DImode;
5778 rcount *= 2;
5779 rshift /= 2;
5780 }
5781 }
5782 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5783 for (i = 0; i < rcount; i++)
5784 {
5785 rtx tmp = gen_rtx_REG (rmode,
5786 FIRST_VFP_REGNUM + regno + i * rshift);
5787 tmp = gen_rtx_EXPR_LIST
5788 (VOIDmode, tmp,
5789 GEN_INT (i * GET_MODE_SIZE (rmode)));
5790 XVECEXP (par, 0, i) = tmp;
5791 }
5792
5793 pcum->aapcs_reg = par;
5794 }
5795 else
5796 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5797 return true;
5798 }
5799 return false;
5800 }
5801
5802 static rtx
5803 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5804 machine_mode mode,
5805 const_tree type ATTRIBUTE_UNUSED)
5806 {
5807 if (!use_vfp_abi (pcs_variant, false))
5808 return NULL;
5809
5810 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5811 {
5812 int count;
5813 machine_mode ag_mode;
5814 int i;
5815 rtx par;
5816 int shift;
5817
5818 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5819 &ag_mode, &count);
5820
5821 if (!TARGET_NEON)
5822 {
5823 if (ag_mode == V2SImode)
5824 ag_mode = DImode;
5825 else if (ag_mode == V4SImode)
5826 {
5827 ag_mode = DImode;
5828 count *= 2;
5829 }
5830 }
5831 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5832 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5833 for (i = 0; i < count; i++)
5834 {
5835 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5836 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5837 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5838 XVECEXP (par, 0, i) = tmp;
5839 }
5840
5841 return par;
5842 }
5843
5844 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5845 }
5846
5847 static void
5848 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5849 machine_mode mode ATTRIBUTE_UNUSED,
5850 const_tree type ATTRIBUTE_UNUSED)
5851 {
5852 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5853 pcum->aapcs_vfp_reg_alloc = 0;
5854 return;
5855 }
5856
5857 #define AAPCS_CP(X) \
5858 { \
5859 aapcs_ ## X ## _cum_init, \
5860 aapcs_ ## X ## _is_call_candidate, \
5861 aapcs_ ## X ## _allocate, \
5862 aapcs_ ## X ## _is_return_candidate, \
5863 aapcs_ ## X ## _allocate_return_reg, \
5864 aapcs_ ## X ## _advance \
5865 }
5866
5867 /* Table of co-processors that can be used to pass arguments in
5868 registers. Idealy no arugment should be a candidate for more than
5869 one co-processor table entry, but the table is processed in order
5870 and stops after the first match. If that entry then fails to put
5871 the argument into a co-processor register, the argument will go on
5872 the stack. */
5873 static struct
5874 {
5875 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5876 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5877
5878 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5879 BLKmode) is a candidate for this co-processor's registers; this
5880 function should ignore any position-dependent state in
5881 CUMULATIVE_ARGS and only use call-type dependent information. */
5882 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5883
5884 /* Return true if the argument does get a co-processor register; it
5885 should set aapcs_reg to an RTX of the register allocated as is
5886 required for a return from FUNCTION_ARG. */
5887 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5888
5889 /* Return true if a result of mode MODE (or type TYPE if MODE is
5890 BLKmode) is can be returned in this co-processor's registers. */
5891 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5892
5893 /* Allocate and return an RTX element to hold the return type of a
5894 call, this routine must not fail and will only be called if
5895 is_return_candidate returned true with the same parameters. */
5896 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5897
5898 /* Finish processing this argument and prepare to start processing
5899 the next one. */
5900 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5901 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5902 {
5903 AAPCS_CP(vfp)
5904 };
5905
5906 #undef AAPCS_CP
5907
5908 static int
5909 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5910 const_tree type)
5911 {
5912 int i;
5913
5914 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5915 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5916 return i;
5917
5918 return -1;
5919 }
5920
5921 static int
5922 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5923 {
5924 /* We aren't passed a decl, so we can't check that a call is local.
5925 However, it isn't clear that that would be a win anyway, since it
5926 might limit some tail-calling opportunities. */
5927 enum arm_pcs pcs_variant;
5928
5929 if (fntype)
5930 {
5931 const_tree fndecl = NULL_TREE;
5932
5933 if (TREE_CODE (fntype) == FUNCTION_DECL)
5934 {
5935 fndecl = fntype;
5936 fntype = TREE_TYPE (fntype);
5937 }
5938
5939 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5940 }
5941 else
5942 pcs_variant = arm_pcs_default;
5943
5944 if (pcs_variant != ARM_PCS_AAPCS)
5945 {
5946 int i;
5947
5948 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5949 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5950 TYPE_MODE (type),
5951 type))
5952 return i;
5953 }
5954 return -1;
5955 }
5956
5957 static rtx
5958 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5959 const_tree fntype)
5960 {
5961 /* We aren't passed a decl, so we can't check that a call is local.
5962 However, it isn't clear that that would be a win anyway, since it
5963 might limit some tail-calling opportunities. */
5964 enum arm_pcs pcs_variant;
5965 int unsignedp ATTRIBUTE_UNUSED;
5966
5967 if (fntype)
5968 {
5969 const_tree fndecl = NULL_TREE;
5970
5971 if (TREE_CODE (fntype) == FUNCTION_DECL)
5972 {
5973 fndecl = fntype;
5974 fntype = TREE_TYPE (fntype);
5975 }
5976
5977 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5978 }
5979 else
5980 pcs_variant = arm_pcs_default;
5981
5982 /* Promote integer types. */
5983 if (type && INTEGRAL_TYPE_P (type))
5984 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5985
5986 if (pcs_variant != ARM_PCS_AAPCS)
5987 {
5988 int i;
5989
5990 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5991 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5992 type))
5993 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5994 mode, type);
5995 }
5996
5997 /* Promotes small structs returned in a register to full-word size
5998 for big-endian AAPCS. */
5999 if (type && arm_return_in_msb (type))
6000 {
6001 HOST_WIDE_INT size = int_size_in_bytes (type);
6002 if (size % UNITS_PER_WORD != 0)
6003 {
6004 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6005 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
6006 }
6007 }
6008
6009 return gen_rtx_REG (mode, R0_REGNUM);
6010 }
6011
6012 static rtx
6013 aapcs_libcall_value (machine_mode mode)
6014 {
6015 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6016 && GET_MODE_SIZE (mode) <= 4)
6017 mode = SImode;
6018
6019 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6020 }
6021
6022 /* Lay out a function argument using the AAPCS rules. The rule
6023 numbers referred to here are those in the AAPCS. */
6024 static void
6025 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6026 const_tree type, bool named)
6027 {
6028 int nregs, nregs2;
6029 int ncrn;
6030
6031 /* We only need to do this once per argument. */
6032 if (pcum->aapcs_arg_processed)
6033 return;
6034
6035 pcum->aapcs_arg_processed = true;
6036
6037 /* Special case: if named is false then we are handling an incoming
6038 anonymous argument which is on the stack. */
6039 if (!named)
6040 return;
6041
6042 /* Is this a potential co-processor register candidate? */
6043 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6044 {
6045 int slot = aapcs_select_call_coproc (pcum, mode, type);
6046 pcum->aapcs_cprc_slot = slot;
6047
6048 /* We don't have to apply any of the rules from part B of the
6049 preparation phase, these are handled elsewhere in the
6050 compiler. */
6051
6052 if (slot >= 0)
6053 {
6054 /* A Co-processor register candidate goes either in its own
6055 class of registers or on the stack. */
6056 if (!pcum->aapcs_cprc_failed[slot])
6057 {
6058 /* C1.cp - Try to allocate the argument to co-processor
6059 registers. */
6060 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6061 return;
6062
6063 /* C2.cp - Put the argument on the stack and note that we
6064 can't assign any more candidates in this slot. We also
6065 need to note that we have allocated stack space, so that
6066 we won't later try to split a non-cprc candidate between
6067 core registers and the stack. */
6068 pcum->aapcs_cprc_failed[slot] = true;
6069 pcum->can_split = false;
6070 }
6071
6072 /* We didn't get a register, so this argument goes on the
6073 stack. */
6074 gcc_assert (pcum->can_split == false);
6075 return;
6076 }
6077 }
6078
6079 /* C3 - For double-word aligned arguments, round the NCRN up to the
6080 next even number. */
6081 ncrn = pcum->aapcs_ncrn;
6082 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
6083 ncrn++;
6084
6085 nregs = ARM_NUM_REGS2(mode, type);
6086
6087 /* Sigh, this test should really assert that nregs > 0, but a GCC
6088 extension allows empty structs and then gives them empty size; it
6089 then allows such a structure to be passed by value. For some of
6090 the code below we have to pretend that such an argument has
6091 non-zero size so that we 'locate' it correctly either in
6092 registers or on the stack. */
6093 gcc_assert (nregs >= 0);
6094
6095 nregs2 = nregs ? nregs : 1;
6096
6097 /* C4 - Argument fits entirely in core registers. */
6098 if (ncrn + nregs2 <= NUM_ARG_REGS)
6099 {
6100 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6101 pcum->aapcs_next_ncrn = ncrn + nregs;
6102 return;
6103 }
6104
6105 /* C5 - Some core registers left and there are no arguments already
6106 on the stack: split this argument between the remaining core
6107 registers and the stack. */
6108 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6109 {
6110 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6111 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6112 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6113 return;
6114 }
6115
6116 /* C6 - NCRN is set to 4. */
6117 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6118
6119 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6120 return;
6121 }
6122
6123 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6124 for a call to a function whose data type is FNTYPE.
6125 For a library call, FNTYPE is NULL. */
6126 void
6127 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6128 rtx libname,
6129 tree fndecl ATTRIBUTE_UNUSED)
6130 {
6131 /* Long call handling. */
6132 if (fntype)
6133 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6134 else
6135 pcum->pcs_variant = arm_pcs_default;
6136
6137 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6138 {
6139 if (arm_libcall_uses_aapcs_base (libname))
6140 pcum->pcs_variant = ARM_PCS_AAPCS;
6141
6142 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6143 pcum->aapcs_reg = NULL_RTX;
6144 pcum->aapcs_partial = 0;
6145 pcum->aapcs_arg_processed = false;
6146 pcum->aapcs_cprc_slot = -1;
6147 pcum->can_split = true;
6148
6149 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6150 {
6151 int i;
6152
6153 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6154 {
6155 pcum->aapcs_cprc_failed[i] = false;
6156 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6157 }
6158 }
6159 return;
6160 }
6161
6162 /* Legacy ABIs */
6163
6164 /* On the ARM, the offset starts at 0. */
6165 pcum->nregs = 0;
6166 pcum->iwmmxt_nregs = 0;
6167 pcum->can_split = true;
6168
6169 /* Varargs vectors are treated the same as long long.
6170 named_count avoids having to change the way arm handles 'named' */
6171 pcum->named_count = 0;
6172 pcum->nargs = 0;
6173
6174 if (TARGET_REALLY_IWMMXT && fntype)
6175 {
6176 tree fn_arg;
6177
6178 for (fn_arg = TYPE_ARG_TYPES (fntype);
6179 fn_arg;
6180 fn_arg = TREE_CHAIN (fn_arg))
6181 pcum->named_count += 1;
6182
6183 if (! pcum->named_count)
6184 pcum->named_count = INT_MAX;
6185 }
6186 }
6187
6188 /* Return true if mode/type need doubleword alignment. */
6189 static bool
6190 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6191 {
6192 if (!type)
6193 return PARM_BOUNDARY < GET_MODE_ALIGNMENT (mode);
6194
6195 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6196 if (!AGGREGATE_TYPE_P (type))
6197 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6198
6199 /* Array types: Use member alignment of element type. */
6200 if (TREE_CODE (type) == ARRAY_TYPE)
6201 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6202
6203 /* Record/aggregate types: Use greatest member alignment of any member. */
6204 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6205 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6206 return true;
6207
6208 return false;
6209 }
6210
6211
6212 /* Determine where to put an argument to a function.
6213 Value is zero to push the argument on the stack,
6214 or a hard register in which to store the argument.
6215
6216 MODE is the argument's machine mode.
6217 TYPE is the data type of the argument (as a tree).
6218 This is null for libcalls where that information may
6219 not be available.
6220 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6221 the preceding args and about the function being called.
6222 NAMED is nonzero if this argument is a named parameter
6223 (otherwise it is an extra parameter matching an ellipsis).
6224
6225 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6226 other arguments are passed on the stack. If (NAMED == 0) (which happens
6227 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6228 defined), say it is passed in the stack (function_prologue will
6229 indeed make it pass in the stack if necessary). */
6230
6231 static rtx
6232 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6233 const_tree type, bool named)
6234 {
6235 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6236 int nregs;
6237
6238 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6239 a call insn (op3 of a call_value insn). */
6240 if (mode == VOIDmode)
6241 return const0_rtx;
6242
6243 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6244 {
6245 aapcs_layout_arg (pcum, mode, type, named);
6246 return pcum->aapcs_reg;
6247 }
6248
6249 /* Varargs vectors are treated the same as long long.
6250 named_count avoids having to change the way arm handles 'named' */
6251 if (TARGET_IWMMXT_ABI
6252 && arm_vector_mode_supported_p (mode)
6253 && pcum->named_count > pcum->nargs + 1)
6254 {
6255 if (pcum->iwmmxt_nregs <= 9)
6256 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6257 else
6258 {
6259 pcum->can_split = false;
6260 return NULL_RTX;
6261 }
6262 }
6263
6264 /* Put doubleword aligned quantities in even register pairs. */
6265 if (pcum->nregs & 1
6266 && ARM_DOUBLEWORD_ALIGN
6267 && arm_needs_doubleword_align (mode, type))
6268 pcum->nregs++;
6269
6270 /* Only allow splitting an arg between regs and memory if all preceding
6271 args were allocated to regs. For args passed by reference we only count
6272 the reference pointer. */
6273 if (pcum->can_split)
6274 nregs = 1;
6275 else
6276 nregs = ARM_NUM_REGS2 (mode, type);
6277
6278 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6279 return NULL_RTX;
6280
6281 return gen_rtx_REG (mode, pcum->nregs);
6282 }
6283
6284 static unsigned int
6285 arm_function_arg_boundary (machine_mode mode, const_tree type)
6286 {
6287 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6288 ? DOUBLEWORD_ALIGNMENT
6289 : PARM_BOUNDARY);
6290 }
6291
6292 static int
6293 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6294 tree type, bool named)
6295 {
6296 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6297 int nregs = pcum->nregs;
6298
6299 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6300 {
6301 aapcs_layout_arg (pcum, mode, type, named);
6302 return pcum->aapcs_partial;
6303 }
6304
6305 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6306 return 0;
6307
6308 if (NUM_ARG_REGS > nregs
6309 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6310 && pcum->can_split)
6311 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6312
6313 return 0;
6314 }
6315
6316 /* Update the data in PCUM to advance over an argument
6317 of mode MODE and data type TYPE.
6318 (TYPE is null for libcalls where that information may not be available.) */
6319
6320 static void
6321 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6322 const_tree type, bool named)
6323 {
6324 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6325
6326 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6327 {
6328 aapcs_layout_arg (pcum, mode, type, named);
6329
6330 if (pcum->aapcs_cprc_slot >= 0)
6331 {
6332 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6333 type);
6334 pcum->aapcs_cprc_slot = -1;
6335 }
6336
6337 /* Generic stuff. */
6338 pcum->aapcs_arg_processed = false;
6339 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6340 pcum->aapcs_reg = NULL_RTX;
6341 pcum->aapcs_partial = 0;
6342 }
6343 else
6344 {
6345 pcum->nargs += 1;
6346 if (arm_vector_mode_supported_p (mode)
6347 && pcum->named_count > pcum->nargs
6348 && TARGET_IWMMXT_ABI)
6349 pcum->iwmmxt_nregs += 1;
6350 else
6351 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6352 }
6353 }
6354
6355 /* Variable sized types are passed by reference. This is a GCC
6356 extension to the ARM ABI. */
6357
6358 static bool
6359 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6360 machine_mode mode ATTRIBUTE_UNUSED,
6361 const_tree type, bool named ATTRIBUTE_UNUSED)
6362 {
6363 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6364 }
6365 \f
6366 /* Encode the current state of the #pragma [no_]long_calls. */
6367 typedef enum
6368 {
6369 OFF, /* No #pragma [no_]long_calls is in effect. */
6370 LONG, /* #pragma long_calls is in effect. */
6371 SHORT /* #pragma no_long_calls is in effect. */
6372 } arm_pragma_enum;
6373
6374 static arm_pragma_enum arm_pragma_long_calls = OFF;
6375
6376 void
6377 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6378 {
6379 arm_pragma_long_calls = LONG;
6380 }
6381
6382 void
6383 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6384 {
6385 arm_pragma_long_calls = SHORT;
6386 }
6387
6388 void
6389 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6390 {
6391 arm_pragma_long_calls = OFF;
6392 }
6393 \f
6394 /* Handle an attribute requiring a FUNCTION_DECL;
6395 arguments as in struct attribute_spec.handler. */
6396 static tree
6397 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6398 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6399 {
6400 if (TREE_CODE (*node) != FUNCTION_DECL)
6401 {
6402 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6403 name);
6404 *no_add_attrs = true;
6405 }
6406
6407 return NULL_TREE;
6408 }
6409
6410 /* Handle an "interrupt" or "isr" attribute;
6411 arguments as in struct attribute_spec.handler. */
6412 static tree
6413 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6414 bool *no_add_attrs)
6415 {
6416 if (DECL_P (*node))
6417 {
6418 if (TREE_CODE (*node) != FUNCTION_DECL)
6419 {
6420 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6421 name);
6422 *no_add_attrs = true;
6423 }
6424 /* FIXME: the argument if any is checked for type attributes;
6425 should it be checked for decl ones? */
6426 }
6427 else
6428 {
6429 if (TREE_CODE (*node) == FUNCTION_TYPE
6430 || TREE_CODE (*node) == METHOD_TYPE)
6431 {
6432 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6433 {
6434 warning (OPT_Wattributes, "%qE attribute ignored",
6435 name);
6436 *no_add_attrs = true;
6437 }
6438 }
6439 else if (TREE_CODE (*node) == POINTER_TYPE
6440 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6441 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6442 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6443 {
6444 *node = build_variant_type_copy (*node);
6445 TREE_TYPE (*node) = build_type_attribute_variant
6446 (TREE_TYPE (*node),
6447 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6448 *no_add_attrs = true;
6449 }
6450 else
6451 {
6452 /* Possibly pass this attribute on from the type to a decl. */
6453 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6454 | (int) ATTR_FLAG_FUNCTION_NEXT
6455 | (int) ATTR_FLAG_ARRAY_NEXT))
6456 {
6457 *no_add_attrs = true;
6458 return tree_cons (name, args, NULL_TREE);
6459 }
6460 else
6461 {
6462 warning (OPT_Wattributes, "%qE attribute ignored",
6463 name);
6464 }
6465 }
6466 }
6467
6468 return NULL_TREE;
6469 }
6470
6471 /* Handle a "pcs" attribute; arguments as in struct
6472 attribute_spec.handler. */
6473 static tree
6474 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6475 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6476 {
6477 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6478 {
6479 warning (OPT_Wattributes, "%qE attribute ignored", name);
6480 *no_add_attrs = true;
6481 }
6482 return NULL_TREE;
6483 }
6484
6485 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6486 /* Handle the "notshared" attribute. This attribute is another way of
6487 requesting hidden visibility. ARM's compiler supports
6488 "__declspec(notshared)"; we support the same thing via an
6489 attribute. */
6490
6491 static tree
6492 arm_handle_notshared_attribute (tree *node,
6493 tree name ATTRIBUTE_UNUSED,
6494 tree args ATTRIBUTE_UNUSED,
6495 int flags ATTRIBUTE_UNUSED,
6496 bool *no_add_attrs)
6497 {
6498 tree decl = TYPE_NAME (*node);
6499
6500 if (decl)
6501 {
6502 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6503 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6504 *no_add_attrs = false;
6505 }
6506 return NULL_TREE;
6507 }
6508 #endif
6509
6510 /* Return 0 if the attributes for two types are incompatible, 1 if they
6511 are compatible, and 2 if they are nearly compatible (which causes a
6512 warning to be generated). */
6513 static int
6514 arm_comp_type_attributes (const_tree type1, const_tree type2)
6515 {
6516 int l1, l2, s1, s2;
6517
6518 /* Check for mismatch of non-default calling convention. */
6519 if (TREE_CODE (type1) != FUNCTION_TYPE)
6520 return 1;
6521
6522 /* Check for mismatched call attributes. */
6523 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6524 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6525 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6526 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6527
6528 /* Only bother to check if an attribute is defined. */
6529 if (l1 | l2 | s1 | s2)
6530 {
6531 /* If one type has an attribute, the other must have the same attribute. */
6532 if ((l1 != l2) || (s1 != s2))
6533 return 0;
6534
6535 /* Disallow mixed attributes. */
6536 if ((l1 & s2) || (l2 & s1))
6537 return 0;
6538 }
6539
6540 /* Check for mismatched ISR attribute. */
6541 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6542 if (! l1)
6543 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6544 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6545 if (! l2)
6546 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6547 if (l1 != l2)
6548 return 0;
6549
6550 return 1;
6551 }
6552
6553 /* Assigns default attributes to newly defined type. This is used to
6554 set short_call/long_call attributes for function types of
6555 functions defined inside corresponding #pragma scopes. */
6556 static void
6557 arm_set_default_type_attributes (tree type)
6558 {
6559 /* Add __attribute__ ((long_call)) to all functions, when
6560 inside #pragma long_calls or __attribute__ ((short_call)),
6561 when inside #pragma no_long_calls. */
6562 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6563 {
6564 tree type_attr_list, attr_name;
6565 type_attr_list = TYPE_ATTRIBUTES (type);
6566
6567 if (arm_pragma_long_calls == LONG)
6568 attr_name = get_identifier ("long_call");
6569 else if (arm_pragma_long_calls == SHORT)
6570 attr_name = get_identifier ("short_call");
6571 else
6572 return;
6573
6574 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6575 TYPE_ATTRIBUTES (type) = type_attr_list;
6576 }
6577 }
6578 \f
6579 /* Return true if DECL is known to be linked into section SECTION. */
6580
6581 static bool
6582 arm_function_in_section_p (tree decl, section *section)
6583 {
6584 /* We can only be certain about the prevailing symbol definition. */
6585 if (!decl_binds_to_current_def_p (decl))
6586 return false;
6587
6588 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6589 if (!DECL_SECTION_NAME (decl))
6590 {
6591 /* Make sure that we will not create a unique section for DECL. */
6592 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6593 return false;
6594 }
6595
6596 return function_section (decl) == section;
6597 }
6598
6599 /* Return nonzero if a 32-bit "long_call" should be generated for
6600 a call from the current function to DECL. We generate a long_call
6601 if the function:
6602
6603 a. has an __attribute__((long call))
6604 or b. is within the scope of a #pragma long_calls
6605 or c. the -mlong-calls command line switch has been specified
6606
6607 However we do not generate a long call if the function:
6608
6609 d. has an __attribute__ ((short_call))
6610 or e. is inside the scope of a #pragma no_long_calls
6611 or f. is defined in the same section as the current function. */
6612
6613 bool
6614 arm_is_long_call_p (tree decl)
6615 {
6616 tree attrs;
6617
6618 if (!decl)
6619 return TARGET_LONG_CALLS;
6620
6621 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6622 if (lookup_attribute ("short_call", attrs))
6623 return false;
6624
6625 /* For "f", be conservative, and only cater for cases in which the
6626 whole of the current function is placed in the same section. */
6627 if (!flag_reorder_blocks_and_partition
6628 && TREE_CODE (decl) == FUNCTION_DECL
6629 && arm_function_in_section_p (decl, current_function_section ()))
6630 return false;
6631
6632 if (lookup_attribute ("long_call", attrs))
6633 return true;
6634
6635 return TARGET_LONG_CALLS;
6636 }
6637
6638 /* Return nonzero if it is ok to make a tail-call to DECL. */
6639 static bool
6640 arm_function_ok_for_sibcall (tree decl, tree exp)
6641 {
6642 unsigned long func_type;
6643
6644 if (cfun->machine->sibcall_blocked)
6645 return false;
6646
6647 /* Never tailcall something if we are generating code for Thumb-1. */
6648 if (TARGET_THUMB1)
6649 return false;
6650
6651 /* The PIC register is live on entry to VxWorks PLT entries, so we
6652 must make the call before restoring the PIC register. */
6653 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6654 return false;
6655
6656 /* If we are interworking and the function is not declared static
6657 then we can't tail-call it unless we know that it exists in this
6658 compilation unit (since it might be a Thumb routine). */
6659 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6660 && !TREE_ASM_WRITTEN (decl))
6661 return false;
6662
6663 func_type = arm_current_func_type ();
6664 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6665 if (IS_INTERRUPT (func_type))
6666 return false;
6667
6668 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6669 {
6670 /* Check that the return value locations are the same. For
6671 example that we aren't returning a value from the sibling in
6672 a VFP register but then need to transfer it to a core
6673 register. */
6674 rtx a, b;
6675
6676 a = arm_function_value (TREE_TYPE (exp), decl, false);
6677 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6678 cfun->decl, false);
6679 if (!rtx_equal_p (a, b))
6680 return false;
6681 }
6682
6683 /* Never tailcall if function may be called with a misaligned SP. */
6684 if (IS_STACKALIGN (func_type))
6685 return false;
6686
6687 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6688 references should become a NOP. Don't convert such calls into
6689 sibling calls. */
6690 if (TARGET_AAPCS_BASED
6691 && arm_abi == ARM_ABI_AAPCS
6692 && decl
6693 && DECL_WEAK (decl))
6694 return false;
6695
6696 /* Everything else is ok. */
6697 return true;
6698 }
6699
6700 \f
6701 /* Addressing mode support functions. */
6702
6703 /* Return nonzero if X is a legitimate immediate operand when compiling
6704 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6705 int
6706 legitimate_pic_operand_p (rtx x)
6707 {
6708 if (GET_CODE (x) == SYMBOL_REF
6709 || (GET_CODE (x) == CONST
6710 && GET_CODE (XEXP (x, 0)) == PLUS
6711 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6712 return 0;
6713
6714 return 1;
6715 }
6716
6717 /* Record that the current function needs a PIC register. Initialize
6718 cfun->machine->pic_reg if we have not already done so. */
6719
6720 static void
6721 require_pic_register (void)
6722 {
6723 /* A lot of the logic here is made obscure by the fact that this
6724 routine gets called as part of the rtx cost estimation process.
6725 We don't want those calls to affect any assumptions about the real
6726 function; and further, we can't call entry_of_function() until we
6727 start the real expansion process. */
6728 if (!crtl->uses_pic_offset_table)
6729 {
6730 gcc_assert (can_create_pseudo_p ());
6731 if (arm_pic_register != INVALID_REGNUM
6732 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6733 {
6734 if (!cfun->machine->pic_reg)
6735 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6736
6737 /* Play games to avoid marking the function as needing pic
6738 if we are being called as part of the cost-estimation
6739 process. */
6740 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6741 crtl->uses_pic_offset_table = 1;
6742 }
6743 else
6744 {
6745 rtx_insn *seq, *insn;
6746
6747 if (!cfun->machine->pic_reg)
6748 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6749
6750 /* Play games to avoid marking the function as needing pic
6751 if we are being called as part of the cost-estimation
6752 process. */
6753 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6754 {
6755 crtl->uses_pic_offset_table = 1;
6756 start_sequence ();
6757
6758 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6759 && arm_pic_register > LAST_LO_REGNUM)
6760 emit_move_insn (cfun->machine->pic_reg,
6761 gen_rtx_REG (Pmode, arm_pic_register));
6762 else
6763 arm_load_pic_register (0UL);
6764
6765 seq = get_insns ();
6766 end_sequence ();
6767
6768 for (insn = seq; insn; insn = NEXT_INSN (insn))
6769 if (INSN_P (insn))
6770 INSN_LOCATION (insn) = prologue_location;
6771
6772 /* We can be called during expansion of PHI nodes, where
6773 we can't yet emit instructions directly in the final
6774 insn stream. Queue the insns on the entry edge, they will
6775 be committed after everything else is expanded. */
6776 insert_insn_on_edge (seq,
6777 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6778 }
6779 }
6780 }
6781 }
6782
6783 rtx
6784 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6785 {
6786 if (GET_CODE (orig) == SYMBOL_REF
6787 || GET_CODE (orig) == LABEL_REF)
6788 {
6789 rtx insn;
6790
6791 if (reg == 0)
6792 {
6793 gcc_assert (can_create_pseudo_p ());
6794 reg = gen_reg_rtx (Pmode);
6795 }
6796
6797 /* VxWorks does not impose a fixed gap between segments; the run-time
6798 gap can be different from the object-file gap. We therefore can't
6799 use GOTOFF unless we are absolutely sure that the symbol is in the
6800 same segment as the GOT. Unfortunately, the flexibility of linker
6801 scripts means that we can't be sure of that in general, so assume
6802 that GOTOFF is never valid on VxWorks. */
6803 if ((GET_CODE (orig) == LABEL_REF
6804 || (GET_CODE (orig) == SYMBOL_REF &&
6805 SYMBOL_REF_LOCAL_P (orig)))
6806 && NEED_GOT_RELOC
6807 && arm_pic_data_is_text_relative)
6808 insn = arm_pic_static_addr (orig, reg);
6809 else
6810 {
6811 rtx pat;
6812 rtx mem;
6813
6814 /* If this function doesn't have a pic register, create one now. */
6815 require_pic_register ();
6816
6817 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6818
6819 /* Make the MEM as close to a constant as possible. */
6820 mem = SET_SRC (pat);
6821 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6822 MEM_READONLY_P (mem) = 1;
6823 MEM_NOTRAP_P (mem) = 1;
6824
6825 insn = emit_insn (pat);
6826 }
6827
6828 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6829 by loop. */
6830 set_unique_reg_note (insn, REG_EQUAL, orig);
6831
6832 return reg;
6833 }
6834 else if (GET_CODE (orig) == CONST)
6835 {
6836 rtx base, offset;
6837
6838 if (GET_CODE (XEXP (orig, 0)) == PLUS
6839 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6840 return orig;
6841
6842 /* Handle the case where we have: const (UNSPEC_TLS). */
6843 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6844 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6845 return orig;
6846
6847 /* Handle the case where we have:
6848 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6849 CONST_INT. */
6850 if (GET_CODE (XEXP (orig, 0)) == PLUS
6851 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6852 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6853 {
6854 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6855 return orig;
6856 }
6857
6858 if (reg == 0)
6859 {
6860 gcc_assert (can_create_pseudo_p ());
6861 reg = gen_reg_rtx (Pmode);
6862 }
6863
6864 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6865
6866 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6867 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6868 base == reg ? 0 : reg);
6869
6870 if (CONST_INT_P (offset))
6871 {
6872 /* The base register doesn't really matter, we only want to
6873 test the index for the appropriate mode. */
6874 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6875 {
6876 gcc_assert (can_create_pseudo_p ());
6877 offset = force_reg (Pmode, offset);
6878 }
6879
6880 if (CONST_INT_P (offset))
6881 return plus_constant (Pmode, base, INTVAL (offset));
6882 }
6883
6884 if (GET_MODE_SIZE (mode) > 4
6885 && (GET_MODE_CLASS (mode) == MODE_INT
6886 || TARGET_SOFT_FLOAT))
6887 {
6888 emit_insn (gen_addsi3 (reg, base, offset));
6889 return reg;
6890 }
6891
6892 return gen_rtx_PLUS (Pmode, base, offset);
6893 }
6894
6895 return orig;
6896 }
6897
6898
6899 /* Find a spare register to use during the prolog of a function. */
6900
6901 static int
6902 thumb_find_work_register (unsigned long pushed_regs_mask)
6903 {
6904 int reg;
6905
6906 /* Check the argument registers first as these are call-used. The
6907 register allocation order means that sometimes r3 might be used
6908 but earlier argument registers might not, so check them all. */
6909 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6910 if (!df_regs_ever_live_p (reg))
6911 return reg;
6912
6913 /* Before going on to check the call-saved registers we can try a couple
6914 more ways of deducing that r3 is available. The first is when we are
6915 pushing anonymous arguments onto the stack and we have less than 4
6916 registers worth of fixed arguments(*). In this case r3 will be part of
6917 the variable argument list and so we can be sure that it will be
6918 pushed right at the start of the function. Hence it will be available
6919 for the rest of the prologue.
6920 (*): ie crtl->args.pretend_args_size is greater than 0. */
6921 if (cfun->machine->uses_anonymous_args
6922 && crtl->args.pretend_args_size > 0)
6923 return LAST_ARG_REGNUM;
6924
6925 /* The other case is when we have fixed arguments but less than 4 registers
6926 worth. In this case r3 might be used in the body of the function, but
6927 it is not being used to convey an argument into the function. In theory
6928 we could just check crtl->args.size to see how many bytes are
6929 being passed in argument registers, but it seems that it is unreliable.
6930 Sometimes it will have the value 0 when in fact arguments are being
6931 passed. (See testcase execute/20021111-1.c for an example). So we also
6932 check the args_info.nregs field as well. The problem with this field is
6933 that it makes no allowances for arguments that are passed to the
6934 function but which are not used. Hence we could miss an opportunity
6935 when a function has an unused argument in r3. But it is better to be
6936 safe than to be sorry. */
6937 if (! cfun->machine->uses_anonymous_args
6938 && crtl->args.size >= 0
6939 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6940 && (TARGET_AAPCS_BASED
6941 ? crtl->args.info.aapcs_ncrn < 4
6942 : crtl->args.info.nregs < 4))
6943 return LAST_ARG_REGNUM;
6944
6945 /* Otherwise look for a call-saved register that is going to be pushed. */
6946 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6947 if (pushed_regs_mask & (1 << reg))
6948 return reg;
6949
6950 if (TARGET_THUMB2)
6951 {
6952 /* Thumb-2 can use high regs. */
6953 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6954 if (pushed_regs_mask & (1 << reg))
6955 return reg;
6956 }
6957 /* Something went wrong - thumb_compute_save_reg_mask()
6958 should have arranged for a suitable register to be pushed. */
6959 gcc_unreachable ();
6960 }
6961
6962 static GTY(()) int pic_labelno;
6963
6964 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6965 low register. */
6966
6967 void
6968 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6969 {
6970 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6971
6972 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6973 return;
6974
6975 gcc_assert (flag_pic);
6976
6977 pic_reg = cfun->machine->pic_reg;
6978 if (TARGET_VXWORKS_RTP)
6979 {
6980 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6981 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6982 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6983
6984 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6985
6986 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6987 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6988 }
6989 else
6990 {
6991 /* We use an UNSPEC rather than a LABEL_REF because this label
6992 never appears in the code stream. */
6993
6994 labelno = GEN_INT (pic_labelno++);
6995 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6996 l1 = gen_rtx_CONST (VOIDmode, l1);
6997
6998 /* On the ARM the PC register contains 'dot + 8' at the time of the
6999 addition, on the Thumb it is 'dot + 4'. */
7000 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7001 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7002 UNSPEC_GOTSYM_OFF);
7003 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7004
7005 if (TARGET_32BIT)
7006 {
7007 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7008 }
7009 else /* TARGET_THUMB1 */
7010 {
7011 if (arm_pic_register != INVALID_REGNUM
7012 && REGNO (pic_reg) > LAST_LO_REGNUM)
7013 {
7014 /* We will have pushed the pic register, so we should always be
7015 able to find a work register. */
7016 pic_tmp = gen_rtx_REG (SImode,
7017 thumb_find_work_register (saved_regs));
7018 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7019 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7020 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7021 }
7022 else if (arm_pic_register != INVALID_REGNUM
7023 && arm_pic_register > LAST_LO_REGNUM
7024 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7025 {
7026 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7027 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7028 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7029 }
7030 else
7031 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7032 }
7033 }
7034
7035 /* Need to emit this whether or not we obey regdecls,
7036 since setjmp/longjmp can cause life info to screw up. */
7037 emit_use (pic_reg);
7038 }
7039
7040 /* Generate code to load the address of a static var when flag_pic is set. */
7041 static rtx
7042 arm_pic_static_addr (rtx orig, rtx reg)
7043 {
7044 rtx l1, labelno, offset_rtx, insn;
7045
7046 gcc_assert (flag_pic);
7047
7048 /* We use an UNSPEC rather than a LABEL_REF because this label
7049 never appears in the code stream. */
7050 labelno = GEN_INT (pic_labelno++);
7051 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7052 l1 = gen_rtx_CONST (VOIDmode, l1);
7053
7054 /* On the ARM the PC register contains 'dot + 8' at the time of the
7055 addition, on the Thumb it is 'dot + 4'. */
7056 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7057 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7058 UNSPEC_SYMBOL_OFFSET);
7059 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7060
7061 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7062 return insn;
7063 }
7064
7065 /* Return nonzero if X is valid as an ARM state addressing register. */
7066 static int
7067 arm_address_register_rtx_p (rtx x, int strict_p)
7068 {
7069 int regno;
7070
7071 if (!REG_P (x))
7072 return 0;
7073
7074 regno = REGNO (x);
7075
7076 if (strict_p)
7077 return ARM_REGNO_OK_FOR_BASE_P (regno);
7078
7079 return (regno <= LAST_ARM_REGNUM
7080 || regno >= FIRST_PSEUDO_REGISTER
7081 || regno == FRAME_POINTER_REGNUM
7082 || regno == ARG_POINTER_REGNUM);
7083 }
7084
7085 /* Return TRUE if this rtx is the difference of a symbol and a label,
7086 and will reduce to a PC-relative relocation in the object file.
7087 Expressions like this can be left alone when generating PIC, rather
7088 than forced through the GOT. */
7089 static int
7090 pcrel_constant_p (rtx x)
7091 {
7092 if (GET_CODE (x) == MINUS)
7093 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7094
7095 return FALSE;
7096 }
7097
7098 /* Return true if X will surely end up in an index register after next
7099 splitting pass. */
7100 static bool
7101 will_be_in_index_register (const_rtx x)
7102 {
7103 /* arm.md: calculate_pic_address will split this into a register. */
7104 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7105 }
7106
7107 /* Return nonzero if X is a valid ARM state address operand. */
7108 int
7109 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7110 int strict_p)
7111 {
7112 bool use_ldrd;
7113 enum rtx_code code = GET_CODE (x);
7114
7115 if (arm_address_register_rtx_p (x, strict_p))
7116 return 1;
7117
7118 use_ldrd = (TARGET_LDRD
7119 && (mode == DImode
7120 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7121
7122 if (code == POST_INC || code == PRE_DEC
7123 || ((code == PRE_INC || code == POST_DEC)
7124 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7125 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7126
7127 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7128 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7129 && GET_CODE (XEXP (x, 1)) == PLUS
7130 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7131 {
7132 rtx addend = XEXP (XEXP (x, 1), 1);
7133
7134 /* Don't allow ldrd post increment by register because it's hard
7135 to fixup invalid register choices. */
7136 if (use_ldrd
7137 && GET_CODE (x) == POST_MODIFY
7138 && REG_P (addend))
7139 return 0;
7140
7141 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7142 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7143 }
7144
7145 /* After reload constants split into minipools will have addresses
7146 from a LABEL_REF. */
7147 else if (reload_completed
7148 && (code == LABEL_REF
7149 || (code == CONST
7150 && GET_CODE (XEXP (x, 0)) == PLUS
7151 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7152 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7153 return 1;
7154
7155 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7156 return 0;
7157
7158 else if (code == PLUS)
7159 {
7160 rtx xop0 = XEXP (x, 0);
7161 rtx xop1 = XEXP (x, 1);
7162
7163 return ((arm_address_register_rtx_p (xop0, strict_p)
7164 && ((CONST_INT_P (xop1)
7165 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7166 || (!strict_p && will_be_in_index_register (xop1))))
7167 || (arm_address_register_rtx_p (xop1, strict_p)
7168 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7169 }
7170
7171 #if 0
7172 /* Reload currently can't handle MINUS, so disable this for now */
7173 else if (GET_CODE (x) == MINUS)
7174 {
7175 rtx xop0 = XEXP (x, 0);
7176 rtx xop1 = XEXP (x, 1);
7177
7178 return (arm_address_register_rtx_p (xop0, strict_p)
7179 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7180 }
7181 #endif
7182
7183 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7184 && code == SYMBOL_REF
7185 && CONSTANT_POOL_ADDRESS_P (x)
7186 && ! (flag_pic
7187 && symbol_mentioned_p (get_pool_constant (x))
7188 && ! pcrel_constant_p (get_pool_constant (x))))
7189 return 1;
7190
7191 return 0;
7192 }
7193
7194 /* Return nonzero if X is a valid Thumb-2 address operand. */
7195 static int
7196 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7197 {
7198 bool use_ldrd;
7199 enum rtx_code code = GET_CODE (x);
7200
7201 if (arm_address_register_rtx_p (x, strict_p))
7202 return 1;
7203
7204 use_ldrd = (TARGET_LDRD
7205 && (mode == DImode
7206 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7207
7208 if (code == POST_INC || code == PRE_DEC
7209 || ((code == PRE_INC || code == POST_DEC)
7210 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7211 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7212
7213 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7214 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7215 && GET_CODE (XEXP (x, 1)) == PLUS
7216 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7217 {
7218 /* Thumb-2 only has autoincrement by constant. */
7219 rtx addend = XEXP (XEXP (x, 1), 1);
7220 HOST_WIDE_INT offset;
7221
7222 if (!CONST_INT_P (addend))
7223 return 0;
7224
7225 offset = INTVAL(addend);
7226 if (GET_MODE_SIZE (mode) <= 4)
7227 return (offset > -256 && offset < 256);
7228
7229 return (use_ldrd && offset > -1024 && offset < 1024
7230 && (offset & 3) == 0);
7231 }
7232
7233 /* After reload constants split into minipools will have addresses
7234 from a LABEL_REF. */
7235 else if (reload_completed
7236 && (code == LABEL_REF
7237 || (code == CONST
7238 && GET_CODE (XEXP (x, 0)) == PLUS
7239 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7240 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7241 return 1;
7242
7243 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7244 return 0;
7245
7246 else if (code == PLUS)
7247 {
7248 rtx xop0 = XEXP (x, 0);
7249 rtx xop1 = XEXP (x, 1);
7250
7251 return ((arm_address_register_rtx_p (xop0, strict_p)
7252 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7253 || (!strict_p && will_be_in_index_register (xop1))))
7254 || (arm_address_register_rtx_p (xop1, strict_p)
7255 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7256 }
7257
7258 /* Normally we can assign constant values to target registers without
7259 the help of constant pool. But there are cases we have to use constant
7260 pool like:
7261 1) assign a label to register.
7262 2) sign-extend a 8bit value to 32bit and then assign to register.
7263
7264 Constant pool access in format:
7265 (set (reg r0) (mem (symbol_ref (".LC0"))))
7266 will cause the use of literal pool (later in function arm_reorg).
7267 So here we mark such format as an invalid format, then the compiler
7268 will adjust it into:
7269 (set (reg r0) (symbol_ref (".LC0")))
7270 (set (reg r0) (mem (reg r0))).
7271 No extra register is required, and (mem (reg r0)) won't cause the use
7272 of literal pools. */
7273 else if (arm_disable_literal_pool && code == SYMBOL_REF
7274 && CONSTANT_POOL_ADDRESS_P (x))
7275 return 0;
7276
7277 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7278 && code == SYMBOL_REF
7279 && CONSTANT_POOL_ADDRESS_P (x)
7280 && ! (flag_pic
7281 && symbol_mentioned_p (get_pool_constant (x))
7282 && ! pcrel_constant_p (get_pool_constant (x))))
7283 return 1;
7284
7285 return 0;
7286 }
7287
7288 /* Return nonzero if INDEX is valid for an address index operand in
7289 ARM state. */
7290 static int
7291 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7292 int strict_p)
7293 {
7294 HOST_WIDE_INT range;
7295 enum rtx_code code = GET_CODE (index);
7296
7297 /* Standard coprocessor addressing modes. */
7298 if (TARGET_HARD_FLOAT
7299 && TARGET_VFP
7300 && (mode == SFmode || mode == DFmode))
7301 return (code == CONST_INT && INTVAL (index) < 1024
7302 && INTVAL (index) > -1024
7303 && (INTVAL (index) & 3) == 0);
7304
7305 /* For quad modes, we restrict the constant offset to be slightly less
7306 than what the instruction format permits. We do this because for
7307 quad mode moves, we will actually decompose them into two separate
7308 double-mode reads or writes. INDEX must therefore be a valid
7309 (double-mode) offset and so should INDEX+8. */
7310 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7311 return (code == CONST_INT
7312 && INTVAL (index) < 1016
7313 && INTVAL (index) > -1024
7314 && (INTVAL (index) & 3) == 0);
7315
7316 /* We have no such constraint on double mode offsets, so we permit the
7317 full range of the instruction format. */
7318 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7319 return (code == CONST_INT
7320 && INTVAL (index) < 1024
7321 && INTVAL (index) > -1024
7322 && (INTVAL (index) & 3) == 0);
7323
7324 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7325 return (code == CONST_INT
7326 && INTVAL (index) < 1024
7327 && INTVAL (index) > -1024
7328 && (INTVAL (index) & 3) == 0);
7329
7330 if (arm_address_register_rtx_p (index, strict_p)
7331 && (GET_MODE_SIZE (mode) <= 4))
7332 return 1;
7333
7334 if (mode == DImode || mode == DFmode)
7335 {
7336 if (code == CONST_INT)
7337 {
7338 HOST_WIDE_INT val = INTVAL (index);
7339
7340 if (TARGET_LDRD)
7341 return val > -256 && val < 256;
7342 else
7343 return val > -4096 && val < 4092;
7344 }
7345
7346 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7347 }
7348
7349 if (GET_MODE_SIZE (mode) <= 4
7350 && ! (arm_arch4
7351 && (mode == HImode
7352 || mode == HFmode
7353 || (mode == QImode && outer == SIGN_EXTEND))))
7354 {
7355 if (code == MULT)
7356 {
7357 rtx xiop0 = XEXP (index, 0);
7358 rtx xiop1 = XEXP (index, 1);
7359
7360 return ((arm_address_register_rtx_p (xiop0, strict_p)
7361 && power_of_two_operand (xiop1, SImode))
7362 || (arm_address_register_rtx_p (xiop1, strict_p)
7363 && power_of_two_operand (xiop0, SImode)));
7364 }
7365 else if (code == LSHIFTRT || code == ASHIFTRT
7366 || code == ASHIFT || code == ROTATERT)
7367 {
7368 rtx op = XEXP (index, 1);
7369
7370 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7371 && CONST_INT_P (op)
7372 && INTVAL (op) > 0
7373 && INTVAL (op) <= 31);
7374 }
7375 }
7376
7377 /* For ARM v4 we may be doing a sign-extend operation during the
7378 load. */
7379 if (arm_arch4)
7380 {
7381 if (mode == HImode
7382 || mode == HFmode
7383 || (outer == SIGN_EXTEND && mode == QImode))
7384 range = 256;
7385 else
7386 range = 4096;
7387 }
7388 else
7389 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7390
7391 return (code == CONST_INT
7392 && INTVAL (index) < range
7393 && INTVAL (index) > -range);
7394 }
7395
7396 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7397 index operand. i.e. 1, 2, 4 or 8. */
7398 static bool
7399 thumb2_index_mul_operand (rtx op)
7400 {
7401 HOST_WIDE_INT val;
7402
7403 if (!CONST_INT_P (op))
7404 return false;
7405
7406 val = INTVAL(op);
7407 return (val == 1 || val == 2 || val == 4 || val == 8);
7408 }
7409
7410 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7411 static int
7412 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7413 {
7414 enum rtx_code code = GET_CODE (index);
7415
7416 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7417 /* Standard coprocessor addressing modes. */
7418 if (TARGET_HARD_FLOAT
7419 && TARGET_VFP
7420 && (mode == SFmode || mode == DFmode))
7421 return (code == CONST_INT && INTVAL (index) < 1024
7422 /* Thumb-2 allows only > -256 index range for it's core register
7423 load/stores. Since we allow SF/DF in core registers, we have
7424 to use the intersection between -256~4096 (core) and -1024~1024
7425 (coprocessor). */
7426 && INTVAL (index) > -256
7427 && (INTVAL (index) & 3) == 0);
7428
7429 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7430 {
7431 /* For DImode assume values will usually live in core regs
7432 and only allow LDRD addressing modes. */
7433 if (!TARGET_LDRD || mode != DImode)
7434 return (code == CONST_INT
7435 && INTVAL (index) < 1024
7436 && INTVAL (index) > -1024
7437 && (INTVAL (index) & 3) == 0);
7438 }
7439
7440 /* For quad modes, we restrict the constant offset to be slightly less
7441 than what the instruction format permits. We do this because for
7442 quad mode moves, we will actually decompose them into two separate
7443 double-mode reads or writes. INDEX must therefore be a valid
7444 (double-mode) offset and so should INDEX+8. */
7445 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7446 return (code == CONST_INT
7447 && INTVAL (index) < 1016
7448 && INTVAL (index) > -1024
7449 && (INTVAL (index) & 3) == 0);
7450
7451 /* We have no such constraint on double mode offsets, so we permit the
7452 full range of the instruction format. */
7453 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7454 return (code == CONST_INT
7455 && INTVAL (index) < 1024
7456 && INTVAL (index) > -1024
7457 && (INTVAL (index) & 3) == 0);
7458
7459 if (arm_address_register_rtx_p (index, strict_p)
7460 && (GET_MODE_SIZE (mode) <= 4))
7461 return 1;
7462
7463 if (mode == DImode || mode == DFmode)
7464 {
7465 if (code == CONST_INT)
7466 {
7467 HOST_WIDE_INT val = INTVAL (index);
7468 /* ??? Can we assume ldrd for thumb2? */
7469 /* Thumb-2 ldrd only has reg+const addressing modes. */
7470 /* ldrd supports offsets of +-1020.
7471 However the ldr fallback does not. */
7472 return val > -256 && val < 256 && (val & 3) == 0;
7473 }
7474 else
7475 return 0;
7476 }
7477
7478 if (code == MULT)
7479 {
7480 rtx xiop0 = XEXP (index, 0);
7481 rtx xiop1 = XEXP (index, 1);
7482
7483 return ((arm_address_register_rtx_p (xiop0, strict_p)
7484 && thumb2_index_mul_operand (xiop1))
7485 || (arm_address_register_rtx_p (xiop1, strict_p)
7486 && thumb2_index_mul_operand (xiop0)));
7487 }
7488 else if (code == ASHIFT)
7489 {
7490 rtx op = XEXP (index, 1);
7491
7492 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7493 && CONST_INT_P (op)
7494 && INTVAL (op) > 0
7495 && INTVAL (op) <= 3);
7496 }
7497
7498 return (code == CONST_INT
7499 && INTVAL (index) < 4096
7500 && INTVAL (index) > -256);
7501 }
7502
7503 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7504 static int
7505 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7506 {
7507 int regno;
7508
7509 if (!REG_P (x))
7510 return 0;
7511
7512 regno = REGNO (x);
7513
7514 if (strict_p)
7515 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7516
7517 return (regno <= LAST_LO_REGNUM
7518 || regno > LAST_VIRTUAL_REGISTER
7519 || regno == FRAME_POINTER_REGNUM
7520 || (GET_MODE_SIZE (mode) >= 4
7521 && (regno == STACK_POINTER_REGNUM
7522 || regno >= FIRST_PSEUDO_REGISTER
7523 || x == hard_frame_pointer_rtx
7524 || x == arg_pointer_rtx)));
7525 }
7526
7527 /* Return nonzero if x is a legitimate index register. This is the case
7528 for any base register that can access a QImode object. */
7529 inline static int
7530 thumb1_index_register_rtx_p (rtx x, int strict_p)
7531 {
7532 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7533 }
7534
7535 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7536
7537 The AP may be eliminated to either the SP or the FP, so we use the
7538 least common denominator, e.g. SImode, and offsets from 0 to 64.
7539
7540 ??? Verify whether the above is the right approach.
7541
7542 ??? Also, the FP may be eliminated to the SP, so perhaps that
7543 needs special handling also.
7544
7545 ??? Look at how the mips16 port solves this problem. It probably uses
7546 better ways to solve some of these problems.
7547
7548 Although it is not incorrect, we don't accept QImode and HImode
7549 addresses based on the frame pointer or arg pointer until the
7550 reload pass starts. This is so that eliminating such addresses
7551 into stack based ones won't produce impossible code. */
7552 int
7553 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7554 {
7555 /* ??? Not clear if this is right. Experiment. */
7556 if (GET_MODE_SIZE (mode) < 4
7557 && !(reload_in_progress || reload_completed)
7558 && (reg_mentioned_p (frame_pointer_rtx, x)
7559 || reg_mentioned_p (arg_pointer_rtx, x)
7560 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7561 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7562 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7563 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7564 return 0;
7565
7566 /* Accept any base register. SP only in SImode or larger. */
7567 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7568 return 1;
7569
7570 /* This is PC relative data before arm_reorg runs. */
7571 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7572 && GET_CODE (x) == SYMBOL_REF
7573 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7574 return 1;
7575
7576 /* This is PC relative data after arm_reorg runs. */
7577 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7578 && reload_completed
7579 && (GET_CODE (x) == LABEL_REF
7580 || (GET_CODE (x) == CONST
7581 && GET_CODE (XEXP (x, 0)) == PLUS
7582 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7583 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7584 return 1;
7585
7586 /* Post-inc indexing only supported for SImode and larger. */
7587 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7588 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7589 return 1;
7590
7591 else if (GET_CODE (x) == PLUS)
7592 {
7593 /* REG+REG address can be any two index registers. */
7594 /* We disallow FRAME+REG addressing since we know that FRAME
7595 will be replaced with STACK, and SP relative addressing only
7596 permits SP+OFFSET. */
7597 if (GET_MODE_SIZE (mode) <= 4
7598 && XEXP (x, 0) != frame_pointer_rtx
7599 && XEXP (x, 1) != frame_pointer_rtx
7600 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7601 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7602 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7603 return 1;
7604
7605 /* REG+const has 5-7 bit offset for non-SP registers. */
7606 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7607 || XEXP (x, 0) == arg_pointer_rtx)
7608 && CONST_INT_P (XEXP (x, 1))
7609 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7610 return 1;
7611
7612 /* REG+const has 10-bit offset for SP, but only SImode and
7613 larger is supported. */
7614 /* ??? Should probably check for DI/DFmode overflow here
7615 just like GO_IF_LEGITIMATE_OFFSET does. */
7616 else if (REG_P (XEXP (x, 0))
7617 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7618 && GET_MODE_SIZE (mode) >= 4
7619 && CONST_INT_P (XEXP (x, 1))
7620 && INTVAL (XEXP (x, 1)) >= 0
7621 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7622 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7623 return 1;
7624
7625 else if (REG_P (XEXP (x, 0))
7626 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7627 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7628 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7629 && REGNO (XEXP (x, 0))
7630 <= LAST_VIRTUAL_POINTER_REGISTER))
7631 && GET_MODE_SIZE (mode) >= 4
7632 && CONST_INT_P (XEXP (x, 1))
7633 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7634 return 1;
7635 }
7636
7637 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7638 && GET_MODE_SIZE (mode) == 4
7639 && GET_CODE (x) == SYMBOL_REF
7640 && CONSTANT_POOL_ADDRESS_P (x)
7641 && ! (flag_pic
7642 && symbol_mentioned_p (get_pool_constant (x))
7643 && ! pcrel_constant_p (get_pool_constant (x))))
7644 return 1;
7645
7646 return 0;
7647 }
7648
7649 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7650 instruction of mode MODE. */
7651 int
7652 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7653 {
7654 switch (GET_MODE_SIZE (mode))
7655 {
7656 case 1:
7657 return val >= 0 && val < 32;
7658
7659 case 2:
7660 return val >= 0 && val < 64 && (val & 1) == 0;
7661
7662 default:
7663 return (val >= 0
7664 && (val + GET_MODE_SIZE (mode)) <= 128
7665 && (val & 3) == 0);
7666 }
7667 }
7668
7669 bool
7670 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7671 {
7672 if (TARGET_ARM)
7673 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7674 else if (TARGET_THUMB2)
7675 return thumb2_legitimate_address_p (mode, x, strict_p);
7676 else /* if (TARGET_THUMB1) */
7677 return thumb1_legitimate_address_p (mode, x, strict_p);
7678 }
7679
7680 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7681
7682 Given an rtx X being reloaded into a reg required to be
7683 in class CLASS, return the class of reg to actually use.
7684 In general this is just CLASS, but for the Thumb core registers and
7685 immediate constants we prefer a LO_REGS class or a subset. */
7686
7687 static reg_class_t
7688 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7689 {
7690 if (TARGET_32BIT)
7691 return rclass;
7692 else
7693 {
7694 if (rclass == GENERAL_REGS)
7695 return LO_REGS;
7696 else
7697 return rclass;
7698 }
7699 }
7700
7701 /* Build the SYMBOL_REF for __tls_get_addr. */
7702
7703 static GTY(()) rtx tls_get_addr_libfunc;
7704
7705 static rtx
7706 get_tls_get_addr (void)
7707 {
7708 if (!tls_get_addr_libfunc)
7709 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7710 return tls_get_addr_libfunc;
7711 }
7712
7713 rtx
7714 arm_load_tp (rtx target)
7715 {
7716 if (!target)
7717 target = gen_reg_rtx (SImode);
7718
7719 if (TARGET_HARD_TP)
7720 {
7721 /* Can return in any reg. */
7722 emit_insn (gen_load_tp_hard (target));
7723 }
7724 else
7725 {
7726 /* Always returned in r0. Immediately copy the result into a pseudo,
7727 otherwise other uses of r0 (e.g. setting up function arguments) may
7728 clobber the value. */
7729
7730 rtx tmp;
7731
7732 emit_insn (gen_load_tp_soft ());
7733
7734 tmp = gen_rtx_REG (SImode, R0_REGNUM);
7735 emit_move_insn (target, tmp);
7736 }
7737 return target;
7738 }
7739
7740 static rtx
7741 load_tls_operand (rtx x, rtx reg)
7742 {
7743 rtx tmp;
7744
7745 if (reg == NULL_RTX)
7746 reg = gen_reg_rtx (SImode);
7747
7748 tmp = gen_rtx_CONST (SImode, x);
7749
7750 emit_move_insn (reg, tmp);
7751
7752 return reg;
7753 }
7754
7755 static rtx
7756 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7757 {
7758 rtx insns, label, labelno, sum;
7759
7760 gcc_assert (reloc != TLS_DESCSEQ);
7761 start_sequence ();
7762
7763 labelno = GEN_INT (pic_labelno++);
7764 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7765 label = gen_rtx_CONST (VOIDmode, label);
7766
7767 sum = gen_rtx_UNSPEC (Pmode,
7768 gen_rtvec (4, x, GEN_INT (reloc), label,
7769 GEN_INT (TARGET_ARM ? 8 : 4)),
7770 UNSPEC_TLS);
7771 reg = load_tls_operand (sum, reg);
7772
7773 if (TARGET_ARM)
7774 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7775 else
7776 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7777
7778 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7779 LCT_PURE, /* LCT_CONST? */
7780 Pmode, 1, reg, Pmode);
7781
7782 insns = get_insns ();
7783 end_sequence ();
7784
7785 return insns;
7786 }
7787
7788 static rtx
7789 arm_tls_descseq_addr (rtx x, rtx reg)
7790 {
7791 rtx labelno = GEN_INT (pic_labelno++);
7792 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7793 rtx sum = gen_rtx_UNSPEC (Pmode,
7794 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7795 gen_rtx_CONST (VOIDmode, label),
7796 GEN_INT (!TARGET_ARM)),
7797 UNSPEC_TLS);
7798 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
7799
7800 emit_insn (gen_tlscall (x, labelno));
7801 if (!reg)
7802 reg = gen_reg_rtx (SImode);
7803 else
7804 gcc_assert (REGNO (reg) != R0_REGNUM);
7805
7806 emit_move_insn (reg, reg0);
7807
7808 return reg;
7809 }
7810
7811 rtx
7812 legitimize_tls_address (rtx x, rtx reg)
7813 {
7814 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7815 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7816
7817 switch (model)
7818 {
7819 case TLS_MODEL_GLOBAL_DYNAMIC:
7820 if (TARGET_GNU2_TLS)
7821 {
7822 reg = arm_tls_descseq_addr (x, reg);
7823
7824 tp = arm_load_tp (NULL_RTX);
7825
7826 dest = gen_rtx_PLUS (Pmode, tp, reg);
7827 }
7828 else
7829 {
7830 /* Original scheme */
7831 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7832 dest = gen_reg_rtx (Pmode);
7833 emit_libcall_block (insns, dest, ret, x);
7834 }
7835 return dest;
7836
7837 case TLS_MODEL_LOCAL_DYNAMIC:
7838 if (TARGET_GNU2_TLS)
7839 {
7840 reg = arm_tls_descseq_addr (x, reg);
7841
7842 tp = arm_load_tp (NULL_RTX);
7843
7844 dest = gen_rtx_PLUS (Pmode, tp, reg);
7845 }
7846 else
7847 {
7848 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7849
7850 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7851 share the LDM result with other LD model accesses. */
7852 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7853 UNSPEC_TLS);
7854 dest = gen_reg_rtx (Pmode);
7855 emit_libcall_block (insns, dest, ret, eqv);
7856
7857 /* Load the addend. */
7858 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7859 GEN_INT (TLS_LDO32)),
7860 UNSPEC_TLS);
7861 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7862 dest = gen_rtx_PLUS (Pmode, dest, addend);
7863 }
7864 return dest;
7865
7866 case TLS_MODEL_INITIAL_EXEC:
7867 labelno = GEN_INT (pic_labelno++);
7868 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7869 label = gen_rtx_CONST (VOIDmode, label);
7870 sum = gen_rtx_UNSPEC (Pmode,
7871 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7872 GEN_INT (TARGET_ARM ? 8 : 4)),
7873 UNSPEC_TLS);
7874 reg = load_tls_operand (sum, reg);
7875
7876 if (TARGET_ARM)
7877 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7878 else if (TARGET_THUMB2)
7879 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7880 else
7881 {
7882 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7883 emit_move_insn (reg, gen_const_mem (SImode, reg));
7884 }
7885
7886 tp = arm_load_tp (NULL_RTX);
7887
7888 return gen_rtx_PLUS (Pmode, tp, reg);
7889
7890 case TLS_MODEL_LOCAL_EXEC:
7891 tp = arm_load_tp (NULL_RTX);
7892
7893 reg = gen_rtx_UNSPEC (Pmode,
7894 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7895 UNSPEC_TLS);
7896 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7897
7898 return gen_rtx_PLUS (Pmode, tp, reg);
7899
7900 default:
7901 abort ();
7902 }
7903 }
7904
7905 /* Try machine-dependent ways of modifying an illegitimate address
7906 to be legitimate. If we find one, return the new, valid address. */
7907 rtx
7908 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7909 {
7910 if (arm_tls_referenced_p (x))
7911 {
7912 rtx addend = NULL;
7913
7914 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7915 {
7916 addend = XEXP (XEXP (x, 0), 1);
7917 x = XEXP (XEXP (x, 0), 0);
7918 }
7919
7920 if (GET_CODE (x) != SYMBOL_REF)
7921 return x;
7922
7923 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7924
7925 x = legitimize_tls_address (x, NULL_RTX);
7926
7927 if (addend)
7928 {
7929 x = gen_rtx_PLUS (SImode, x, addend);
7930 orig_x = x;
7931 }
7932 else
7933 return x;
7934 }
7935
7936 if (!TARGET_ARM)
7937 {
7938 /* TODO: legitimize_address for Thumb2. */
7939 if (TARGET_THUMB2)
7940 return x;
7941 return thumb_legitimize_address (x, orig_x, mode);
7942 }
7943
7944 if (GET_CODE (x) == PLUS)
7945 {
7946 rtx xop0 = XEXP (x, 0);
7947 rtx xop1 = XEXP (x, 1);
7948
7949 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7950 xop0 = force_reg (SImode, xop0);
7951
7952 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7953 && !symbol_mentioned_p (xop1))
7954 xop1 = force_reg (SImode, xop1);
7955
7956 if (ARM_BASE_REGISTER_RTX_P (xop0)
7957 && CONST_INT_P (xop1))
7958 {
7959 HOST_WIDE_INT n, low_n;
7960 rtx base_reg, val;
7961 n = INTVAL (xop1);
7962
7963 /* VFP addressing modes actually allow greater offsets, but for
7964 now we just stick with the lowest common denominator. */
7965 if (mode == DImode
7966 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7967 {
7968 low_n = n & 0x0f;
7969 n &= ~0x0f;
7970 if (low_n > 4)
7971 {
7972 n += 16;
7973 low_n -= 16;
7974 }
7975 }
7976 else
7977 {
7978 low_n = ((mode) == TImode ? 0
7979 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7980 n -= low_n;
7981 }
7982
7983 base_reg = gen_reg_rtx (SImode);
7984 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7985 emit_move_insn (base_reg, val);
7986 x = plus_constant (Pmode, base_reg, low_n);
7987 }
7988 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7989 x = gen_rtx_PLUS (SImode, xop0, xop1);
7990 }
7991
7992 /* XXX We don't allow MINUS any more -- see comment in
7993 arm_legitimate_address_outer_p (). */
7994 else if (GET_CODE (x) == MINUS)
7995 {
7996 rtx xop0 = XEXP (x, 0);
7997 rtx xop1 = XEXP (x, 1);
7998
7999 if (CONSTANT_P (xop0))
8000 xop0 = force_reg (SImode, xop0);
8001
8002 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8003 xop1 = force_reg (SImode, xop1);
8004
8005 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8006 x = gen_rtx_MINUS (SImode, xop0, xop1);
8007 }
8008
8009 /* Make sure to take full advantage of the pre-indexed addressing mode
8010 with absolute addresses which often allows for the base register to
8011 be factorized for multiple adjacent memory references, and it might
8012 even allows for the mini pool to be avoided entirely. */
8013 else if (CONST_INT_P (x) && optimize > 0)
8014 {
8015 unsigned int bits;
8016 HOST_WIDE_INT mask, base, index;
8017 rtx base_reg;
8018
8019 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8020 use a 8-bit index. So let's use a 12-bit index for SImode only and
8021 hope that arm_gen_constant will enable ldrb to use more bits. */
8022 bits = (mode == SImode) ? 12 : 8;
8023 mask = (1 << bits) - 1;
8024 base = INTVAL (x) & ~mask;
8025 index = INTVAL (x) & mask;
8026 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8027 {
8028 /* It'll most probably be more efficient to generate the base
8029 with more bits set and use a negative index instead. */
8030 base |= mask;
8031 index -= mask;
8032 }
8033 base_reg = force_reg (SImode, GEN_INT (base));
8034 x = plus_constant (Pmode, base_reg, index);
8035 }
8036
8037 if (flag_pic)
8038 {
8039 /* We need to find and carefully transform any SYMBOL and LABEL
8040 references; so go back to the original address expression. */
8041 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8042
8043 if (new_x != orig_x)
8044 x = new_x;
8045 }
8046
8047 return x;
8048 }
8049
8050
8051 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8052 to be legitimate. If we find one, return the new, valid address. */
8053 rtx
8054 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8055 {
8056 if (GET_CODE (x) == PLUS
8057 && CONST_INT_P (XEXP (x, 1))
8058 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8059 || INTVAL (XEXP (x, 1)) < 0))
8060 {
8061 rtx xop0 = XEXP (x, 0);
8062 rtx xop1 = XEXP (x, 1);
8063 HOST_WIDE_INT offset = INTVAL (xop1);
8064
8065 /* Try and fold the offset into a biasing of the base register and
8066 then offsetting that. Don't do this when optimizing for space
8067 since it can cause too many CSEs. */
8068 if (optimize_size && offset >= 0
8069 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8070 {
8071 HOST_WIDE_INT delta;
8072
8073 if (offset >= 256)
8074 delta = offset - (256 - GET_MODE_SIZE (mode));
8075 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8076 delta = 31 * GET_MODE_SIZE (mode);
8077 else
8078 delta = offset & (~31 * GET_MODE_SIZE (mode));
8079
8080 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8081 NULL_RTX);
8082 x = plus_constant (Pmode, xop0, delta);
8083 }
8084 else if (offset < 0 && offset > -256)
8085 /* Small negative offsets are best done with a subtract before the
8086 dereference, forcing these into a register normally takes two
8087 instructions. */
8088 x = force_operand (x, NULL_RTX);
8089 else
8090 {
8091 /* For the remaining cases, force the constant into a register. */
8092 xop1 = force_reg (SImode, xop1);
8093 x = gen_rtx_PLUS (SImode, xop0, xop1);
8094 }
8095 }
8096 else if (GET_CODE (x) == PLUS
8097 && s_register_operand (XEXP (x, 1), SImode)
8098 && !s_register_operand (XEXP (x, 0), SImode))
8099 {
8100 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8101
8102 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8103 }
8104
8105 if (flag_pic)
8106 {
8107 /* We need to find and carefully transform any SYMBOL and LABEL
8108 references; so go back to the original address expression. */
8109 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8110
8111 if (new_x != orig_x)
8112 x = new_x;
8113 }
8114
8115 return x;
8116 }
8117
8118 /* Return TRUE if X contains any TLS symbol references. */
8119
8120 bool
8121 arm_tls_referenced_p (rtx x)
8122 {
8123 if (! TARGET_HAVE_TLS)
8124 return false;
8125
8126 subrtx_iterator::array_type array;
8127 FOR_EACH_SUBRTX (iter, array, x, ALL)
8128 {
8129 const_rtx x = *iter;
8130 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8131 return true;
8132
8133 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8134 TLS offsets, not real symbol references. */
8135 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8136 iter.skip_subrtxes ();
8137 }
8138 return false;
8139 }
8140
8141 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8142
8143 On the ARM, allow any integer (invalid ones are removed later by insn
8144 patterns), nice doubles and symbol_refs which refer to the function's
8145 constant pool XXX.
8146
8147 When generating pic allow anything. */
8148
8149 static bool
8150 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8151 {
8152 return flag_pic || !label_mentioned_p (x);
8153 }
8154
8155 static bool
8156 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8157 {
8158 return (CONST_INT_P (x)
8159 || CONST_DOUBLE_P (x)
8160 || CONSTANT_ADDRESS_P (x)
8161 || flag_pic);
8162 }
8163
8164 static bool
8165 arm_legitimate_constant_p (machine_mode mode, rtx x)
8166 {
8167 return (!arm_cannot_force_const_mem (mode, x)
8168 && (TARGET_32BIT
8169 ? arm_legitimate_constant_p_1 (mode, x)
8170 : thumb_legitimate_constant_p (mode, x)));
8171 }
8172
8173 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8174
8175 static bool
8176 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8177 {
8178 rtx base, offset;
8179
8180 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8181 {
8182 split_const (x, &base, &offset);
8183 if (GET_CODE (base) == SYMBOL_REF
8184 && !offset_within_block_p (base, INTVAL (offset)))
8185 return true;
8186 }
8187 return arm_tls_referenced_p (x);
8188 }
8189 \f
8190 #define REG_OR_SUBREG_REG(X) \
8191 (REG_P (X) \
8192 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8193
8194 #define REG_OR_SUBREG_RTX(X) \
8195 (REG_P (X) ? (X) : SUBREG_REG (X))
8196
8197 static inline int
8198 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8199 {
8200 machine_mode mode = GET_MODE (x);
8201 int total, words;
8202
8203 switch (code)
8204 {
8205 case ASHIFT:
8206 case ASHIFTRT:
8207 case LSHIFTRT:
8208 case ROTATERT:
8209 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8210
8211 case PLUS:
8212 case MINUS:
8213 case COMPARE:
8214 case NEG:
8215 case NOT:
8216 return COSTS_N_INSNS (1);
8217
8218 case MULT:
8219 if (CONST_INT_P (XEXP (x, 1)))
8220 {
8221 int cycles = 0;
8222 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8223
8224 while (i)
8225 {
8226 i >>= 2;
8227 cycles++;
8228 }
8229 return COSTS_N_INSNS (2) + cycles;
8230 }
8231 return COSTS_N_INSNS (1) + 16;
8232
8233 case SET:
8234 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8235 the mode. */
8236 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8237 return (COSTS_N_INSNS (words)
8238 + 4 * ((MEM_P (SET_SRC (x)))
8239 + MEM_P (SET_DEST (x))));
8240
8241 case CONST_INT:
8242 if (outer == SET)
8243 {
8244 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8245 return 0;
8246 if (thumb_shiftable_const (INTVAL (x)))
8247 return COSTS_N_INSNS (2);
8248 return COSTS_N_INSNS (3);
8249 }
8250 else if ((outer == PLUS || outer == COMPARE)
8251 && INTVAL (x) < 256 && INTVAL (x) > -256)
8252 return 0;
8253 else if ((outer == IOR || outer == XOR || outer == AND)
8254 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8255 return COSTS_N_INSNS (1);
8256 else if (outer == AND)
8257 {
8258 int i;
8259 /* This duplicates the tests in the andsi3 expander. */
8260 for (i = 9; i <= 31; i++)
8261 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8262 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8263 return COSTS_N_INSNS (2);
8264 }
8265 else if (outer == ASHIFT || outer == ASHIFTRT
8266 || outer == LSHIFTRT)
8267 return 0;
8268 return COSTS_N_INSNS (2);
8269
8270 case CONST:
8271 case CONST_DOUBLE:
8272 case LABEL_REF:
8273 case SYMBOL_REF:
8274 return COSTS_N_INSNS (3);
8275
8276 case UDIV:
8277 case UMOD:
8278 case DIV:
8279 case MOD:
8280 return 100;
8281
8282 case TRUNCATE:
8283 return 99;
8284
8285 case AND:
8286 case XOR:
8287 case IOR:
8288 /* XXX guess. */
8289 return 8;
8290
8291 case MEM:
8292 /* XXX another guess. */
8293 /* Memory costs quite a lot for the first word, but subsequent words
8294 load at the equivalent of a single insn each. */
8295 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8296 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8297 ? 4 : 0));
8298
8299 case IF_THEN_ELSE:
8300 /* XXX a guess. */
8301 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8302 return 14;
8303 return 2;
8304
8305 case SIGN_EXTEND:
8306 case ZERO_EXTEND:
8307 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8308 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8309
8310 if (mode == SImode)
8311 return total;
8312
8313 if (arm_arch6)
8314 return total + COSTS_N_INSNS (1);
8315
8316 /* Assume a two-shift sequence. Increase the cost slightly so
8317 we prefer actual shifts over an extend operation. */
8318 return total + 1 + COSTS_N_INSNS (2);
8319
8320 default:
8321 return 99;
8322 }
8323 }
8324
8325 static inline bool
8326 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8327 {
8328 machine_mode mode = GET_MODE (x);
8329 enum rtx_code subcode;
8330 rtx operand;
8331 enum rtx_code code = GET_CODE (x);
8332 *total = 0;
8333
8334 switch (code)
8335 {
8336 case MEM:
8337 /* Memory costs quite a lot for the first word, but subsequent words
8338 load at the equivalent of a single insn each. */
8339 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8340 return true;
8341
8342 case DIV:
8343 case MOD:
8344 case UDIV:
8345 case UMOD:
8346 if (TARGET_HARD_FLOAT && mode == SFmode)
8347 *total = COSTS_N_INSNS (2);
8348 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8349 *total = COSTS_N_INSNS (4);
8350 else
8351 *total = COSTS_N_INSNS (20);
8352 return false;
8353
8354 case ROTATE:
8355 if (REG_P (XEXP (x, 1)))
8356 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8357 else if (!CONST_INT_P (XEXP (x, 1)))
8358 *total = rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8359
8360 /* Fall through */
8361 case ROTATERT:
8362 if (mode != SImode)
8363 {
8364 *total += COSTS_N_INSNS (4);
8365 return true;
8366 }
8367
8368 /* Fall through */
8369 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8370 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8371 if (mode == DImode)
8372 {
8373 *total += COSTS_N_INSNS (3);
8374 return true;
8375 }
8376
8377 *total += COSTS_N_INSNS (1);
8378 /* Increase the cost of complex shifts because they aren't any faster,
8379 and reduce dual issue opportunities. */
8380 if (arm_tune_cortex_a9
8381 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8382 ++*total;
8383
8384 return true;
8385
8386 case MINUS:
8387 if (mode == DImode)
8388 {
8389 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8390 if (CONST_INT_P (XEXP (x, 0))
8391 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8392 {
8393 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8394 return true;
8395 }
8396
8397 if (CONST_INT_P (XEXP (x, 1))
8398 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8399 {
8400 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8401 return true;
8402 }
8403
8404 return false;
8405 }
8406
8407 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8408 {
8409 if (TARGET_HARD_FLOAT
8410 && (mode == SFmode
8411 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8412 {
8413 *total = COSTS_N_INSNS (1);
8414 if (CONST_DOUBLE_P (XEXP (x, 0))
8415 && arm_const_double_rtx (XEXP (x, 0)))
8416 {
8417 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8418 return true;
8419 }
8420
8421 if (CONST_DOUBLE_P (XEXP (x, 1))
8422 && arm_const_double_rtx (XEXP (x, 1)))
8423 {
8424 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8425 return true;
8426 }
8427
8428 return false;
8429 }
8430 *total = COSTS_N_INSNS (20);
8431 return false;
8432 }
8433
8434 *total = COSTS_N_INSNS (1);
8435 if (CONST_INT_P (XEXP (x, 0))
8436 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8437 {
8438 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8439 return true;
8440 }
8441
8442 subcode = GET_CODE (XEXP (x, 1));
8443 if (subcode == ASHIFT || subcode == ASHIFTRT
8444 || subcode == LSHIFTRT
8445 || subcode == ROTATE || subcode == ROTATERT)
8446 {
8447 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8448 *total += rtx_cost (XEXP (XEXP (x, 1), 0), mode, subcode, 0, speed);
8449 return true;
8450 }
8451
8452 /* A shift as a part of RSB costs no more than RSB itself. */
8453 if (GET_CODE (XEXP (x, 0)) == MULT
8454 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8455 {
8456 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code, 0, speed);
8457 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8458 return true;
8459 }
8460
8461 if (subcode == MULT
8462 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8463 {
8464 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8465 *total += rtx_cost (XEXP (XEXP (x, 1), 0), mode, subcode, 0, speed);
8466 return true;
8467 }
8468
8469 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8470 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8471 {
8472 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code,
8473 0, speed);
8474 if (REG_P (XEXP (XEXP (x, 1), 0))
8475 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8476 *total += COSTS_N_INSNS (1);
8477
8478 return true;
8479 }
8480
8481 /* Fall through */
8482
8483 case PLUS:
8484 if (code == PLUS && arm_arch6 && mode == SImode
8485 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8486 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8487 {
8488 *total = COSTS_N_INSNS (1);
8489 *total += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
8490 GET_CODE (XEXP (x, 0)), 0, speed);
8491 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8492 return true;
8493 }
8494
8495 /* MLA: All arguments must be registers. We filter out
8496 multiplication by a power of two, so that we fall down into
8497 the code below. */
8498 if (GET_CODE (XEXP (x, 0)) == MULT
8499 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8500 {
8501 /* The cost comes from the cost of the multiply. */
8502 return false;
8503 }
8504
8505 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8506 {
8507 if (TARGET_HARD_FLOAT
8508 && (mode == SFmode
8509 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8510 {
8511 *total = COSTS_N_INSNS (1);
8512 if (CONST_DOUBLE_P (XEXP (x, 1))
8513 && arm_const_double_rtx (XEXP (x, 1)))
8514 {
8515 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8516 return true;
8517 }
8518
8519 return false;
8520 }
8521
8522 *total = COSTS_N_INSNS (20);
8523 return false;
8524 }
8525
8526 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8527 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8528 {
8529 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), mode, code,
8530 1, speed);
8531 if (REG_P (XEXP (XEXP (x, 0), 0))
8532 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8533 *total += COSTS_N_INSNS (1);
8534 return true;
8535 }
8536
8537 /* Fall through */
8538
8539 case AND: case XOR: case IOR:
8540
8541 /* Normally the frame registers will be spilt into reg+const during
8542 reload, so it is a bad idea to combine them with other instructions,
8543 since then they might not be moved outside of loops. As a compromise
8544 we allow integration with ops that have a constant as their second
8545 operand. */
8546 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8547 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8548 && !CONST_INT_P (XEXP (x, 1)))
8549 *total = COSTS_N_INSNS (1);
8550
8551 if (mode == DImode)
8552 {
8553 *total += COSTS_N_INSNS (2);
8554 if (CONST_INT_P (XEXP (x, 1))
8555 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8556 {
8557 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8558 return true;
8559 }
8560
8561 return false;
8562 }
8563
8564 *total += COSTS_N_INSNS (1);
8565 if (CONST_INT_P (XEXP (x, 1))
8566 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8567 {
8568 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8569 return true;
8570 }
8571 subcode = GET_CODE (XEXP (x, 0));
8572 if (subcode == ASHIFT || subcode == ASHIFTRT
8573 || subcode == LSHIFTRT
8574 || subcode == ROTATE || subcode == ROTATERT)
8575 {
8576 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8577 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8578 return true;
8579 }
8580
8581 if (subcode == MULT
8582 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8583 {
8584 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8585 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8586 return true;
8587 }
8588
8589 if (subcode == UMIN || subcode == UMAX
8590 || subcode == SMIN || subcode == SMAX)
8591 {
8592 *total = COSTS_N_INSNS (3);
8593 return true;
8594 }
8595
8596 return false;
8597
8598 case MULT:
8599 /* This should have been handled by the CPU specific routines. */
8600 gcc_unreachable ();
8601
8602 case TRUNCATE:
8603 if (arm_arch3m && mode == SImode
8604 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8605 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8606 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8607 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8608 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8609 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8610 {
8611 *total = rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, LSHIFTRT,
8612 0, speed);
8613 return true;
8614 }
8615 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8616 return false;
8617
8618 case NEG:
8619 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8620 {
8621 if (TARGET_HARD_FLOAT
8622 && (mode == SFmode
8623 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8624 {
8625 *total = COSTS_N_INSNS (1);
8626 return false;
8627 }
8628 *total = COSTS_N_INSNS (2);
8629 return false;
8630 }
8631
8632 /* Fall through */
8633 case NOT:
8634 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8635 if (mode == SImode && code == NOT)
8636 {
8637 subcode = GET_CODE (XEXP (x, 0));
8638 if (subcode == ASHIFT || subcode == ASHIFTRT
8639 || subcode == LSHIFTRT
8640 || subcode == ROTATE || subcode == ROTATERT
8641 || (subcode == MULT
8642 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8643 {
8644 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode,
8645 0, speed);
8646 /* Register shifts cost an extra cycle. */
8647 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8648 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8649 mode, subcode,
8650 1, speed);
8651 return true;
8652 }
8653 }
8654
8655 return false;
8656
8657 case IF_THEN_ELSE:
8658 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8659 {
8660 *total = COSTS_N_INSNS (4);
8661 return true;
8662 }
8663
8664 operand = XEXP (x, 0);
8665
8666 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8667 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8668 && REG_P (XEXP (operand, 0))
8669 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8670 *total += COSTS_N_INSNS (1);
8671 *total += rtx_cost (XEXP (x, 1), VOIDmode, code, 1, speed);
8672 *total += rtx_cost (XEXP (x, 2), VOIDmode, code, 2, speed);
8673 return true;
8674
8675 case NE:
8676 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8677 {
8678 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code,
8679 0, speed);
8680 return true;
8681 }
8682 goto scc_insn;
8683
8684 case GE:
8685 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8686 && mode == SImode && XEXP (x, 1) == const0_rtx)
8687 {
8688 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code,
8689 0, speed);
8690 return true;
8691 }
8692 goto scc_insn;
8693
8694 case LT:
8695 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8696 && mode == SImode && XEXP (x, 1) == const0_rtx)
8697 {
8698 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code,
8699 0, speed);
8700 return true;
8701 }
8702 goto scc_insn;
8703
8704 case EQ:
8705 case GT:
8706 case LE:
8707 case GEU:
8708 case LTU:
8709 case GTU:
8710 case LEU:
8711 case UNORDERED:
8712 case ORDERED:
8713 case UNEQ:
8714 case UNGE:
8715 case UNLT:
8716 case UNGT:
8717 case UNLE:
8718 scc_insn:
8719 /* SCC insns. In the case where the comparison has already been
8720 performed, then they cost 2 instructions. Otherwise they need
8721 an additional comparison before them. */
8722 *total = COSTS_N_INSNS (2);
8723 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8724 {
8725 return true;
8726 }
8727
8728 /* Fall through */
8729 case COMPARE:
8730 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8731 {
8732 *total = 0;
8733 return true;
8734 }
8735
8736 *total += COSTS_N_INSNS (1);
8737 if (CONST_INT_P (XEXP (x, 1))
8738 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8739 {
8740 *total += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed);
8741 return true;
8742 }
8743
8744 subcode = GET_CODE (XEXP (x, 0));
8745 if (subcode == ASHIFT || subcode == ASHIFTRT
8746 || subcode == LSHIFTRT
8747 || subcode == ROTATE || subcode == ROTATERT)
8748 {
8749 mode = GET_MODE (XEXP (x, 0));
8750 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8751 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8752 return true;
8753 }
8754
8755 if (subcode == MULT
8756 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8757 {
8758 mode = GET_MODE (XEXP (x, 0));
8759 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8760 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed);
8761 return true;
8762 }
8763
8764 return false;
8765
8766 case UMIN:
8767 case UMAX:
8768 case SMIN:
8769 case SMAX:
8770 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8771 if (!CONST_INT_P (XEXP (x, 1))
8772 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8773 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
8774 return true;
8775
8776 case ABS:
8777 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8778 {
8779 if (TARGET_HARD_FLOAT
8780 && (mode == SFmode
8781 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8782 {
8783 *total = COSTS_N_INSNS (1);
8784 return false;
8785 }
8786 *total = COSTS_N_INSNS (20);
8787 return false;
8788 }
8789 *total = COSTS_N_INSNS (1);
8790 if (mode == DImode)
8791 *total += COSTS_N_INSNS (3);
8792 return false;
8793
8794 case SIGN_EXTEND:
8795 case ZERO_EXTEND:
8796 *total = 0;
8797 if (GET_MODE_CLASS (mode) == MODE_INT)
8798 {
8799 rtx op = XEXP (x, 0);
8800 machine_mode opmode = GET_MODE (op);
8801
8802 if (mode == DImode)
8803 *total += COSTS_N_INSNS (1);
8804
8805 if (opmode != SImode)
8806 {
8807 if (MEM_P (op))
8808 {
8809 /* If !arm_arch4, we use one of the extendhisi2_mem
8810 or movhi_bytes patterns for HImode. For a QImode
8811 sign extension, we first zero-extend from memory
8812 and then perform a shift sequence. */
8813 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8814 *total += COSTS_N_INSNS (2);
8815 }
8816 else if (arm_arch6)
8817 *total += COSTS_N_INSNS (1);
8818
8819 /* We don't have the necessary insn, so we need to perform some
8820 other operation. */
8821 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8822 /* An and with constant 255. */
8823 *total += COSTS_N_INSNS (1);
8824 else
8825 /* A shift sequence. Increase costs slightly to avoid
8826 combining two shifts into an extend operation. */
8827 *total += COSTS_N_INSNS (2) + 1;
8828 }
8829
8830 return false;
8831 }
8832
8833 switch (GET_MODE (XEXP (x, 0)))
8834 {
8835 case V8QImode:
8836 case V4HImode:
8837 case V2SImode:
8838 case V4QImode:
8839 case V2HImode:
8840 *total = COSTS_N_INSNS (1);
8841 return false;
8842
8843 default:
8844 gcc_unreachable ();
8845 }
8846 gcc_unreachable ();
8847
8848 case ZERO_EXTRACT:
8849 case SIGN_EXTRACT:
8850 mode = GET_MODE (XEXP (x, 0));
8851 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8852 return true;
8853
8854 case CONST_INT:
8855 if (const_ok_for_arm (INTVAL (x))
8856 || const_ok_for_arm (~INTVAL (x)))
8857 *total = COSTS_N_INSNS (1);
8858 else
8859 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8860 INTVAL (x), NULL_RTX,
8861 NULL_RTX, 0, 0));
8862 return true;
8863
8864 case CONST:
8865 case LABEL_REF:
8866 case SYMBOL_REF:
8867 *total = COSTS_N_INSNS (3);
8868 return true;
8869
8870 case HIGH:
8871 *total = COSTS_N_INSNS (1);
8872 return true;
8873
8874 case LO_SUM:
8875 *total = COSTS_N_INSNS (1);
8876 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
8877 return true;
8878
8879 case CONST_DOUBLE:
8880 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8881 && (mode == SFmode || !TARGET_VFP_SINGLE))
8882 *total = COSTS_N_INSNS (1);
8883 else
8884 *total = COSTS_N_INSNS (4);
8885 return true;
8886
8887 case SET:
8888 /* The vec_extract patterns accept memory operands that require an
8889 address reload. Account for the cost of that reload to give the
8890 auto-inc-dec pass an incentive to try to replace them. */
8891 if (TARGET_NEON && MEM_P (SET_DEST (x))
8892 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8893 {
8894 mode = GET_MODE (SET_DEST (x));
8895 *total = rtx_cost (SET_DEST (x), mode, code, 0, speed);
8896 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8897 *total += COSTS_N_INSNS (1);
8898 return true;
8899 }
8900 /* Likewise for the vec_set patterns. */
8901 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8902 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8903 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8904 {
8905 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8906 mode = GET_MODE (SET_DEST (x));
8907 *total = rtx_cost (mem, mode, code, 0, speed);
8908 if (!neon_vector_mem_operand (mem, 2, true))
8909 *total += COSTS_N_INSNS (1);
8910 return true;
8911 }
8912 return false;
8913
8914 case UNSPEC:
8915 /* We cost this as high as our memory costs to allow this to
8916 be hoisted from loops. */
8917 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8918 {
8919 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8920 }
8921 return true;
8922
8923 case CONST_VECTOR:
8924 if (TARGET_NEON
8925 && TARGET_HARD_FLOAT
8926 && outer == SET
8927 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8928 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8929 *total = COSTS_N_INSNS (1);
8930 else
8931 *total = COSTS_N_INSNS (4);
8932 return true;
8933
8934 default:
8935 *total = COSTS_N_INSNS (4);
8936 return false;
8937 }
8938 }
8939
8940 /* Estimates the size cost of thumb1 instructions.
8941 For now most of the code is copied from thumb1_rtx_costs. We need more
8942 fine grain tuning when we have more related test cases. */
8943 static inline int
8944 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8945 {
8946 machine_mode mode = GET_MODE (x);
8947 int words;
8948
8949 switch (code)
8950 {
8951 case ASHIFT:
8952 case ASHIFTRT:
8953 case LSHIFTRT:
8954 case ROTATERT:
8955 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8956
8957 case PLUS:
8958 case MINUS:
8959 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8960 defined by RTL expansion, especially for the expansion of
8961 multiplication. */
8962 if ((GET_CODE (XEXP (x, 0)) == MULT
8963 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8964 || (GET_CODE (XEXP (x, 1)) == MULT
8965 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8966 return COSTS_N_INSNS (2);
8967 /* On purpose fall through for normal RTX. */
8968 case COMPARE:
8969 case NEG:
8970 case NOT:
8971 return COSTS_N_INSNS (1);
8972
8973 case MULT:
8974 if (CONST_INT_P (XEXP (x, 1)))
8975 {
8976 /* Thumb1 mul instruction can't operate on const. We must Load it
8977 into a register first. */
8978 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8979 /* For the targets which have a very small and high-latency multiply
8980 unit, we prefer to synthesize the mult with up to 5 instructions,
8981 giving a good balance between size and performance. */
8982 if (arm_arch6m && arm_m_profile_small_mul)
8983 return COSTS_N_INSNS (5);
8984 else
8985 return COSTS_N_INSNS (1) + const_size;
8986 }
8987 return COSTS_N_INSNS (1);
8988
8989 case SET:
8990 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8991 the mode. */
8992 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8993 return COSTS_N_INSNS (words)
8994 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8995 || satisfies_constraint_K (SET_SRC (x))
8996 /* thumb1_movdi_insn. */
8997 || ((words > 1) && MEM_P (SET_SRC (x))));
8998
8999 case CONST_INT:
9000 if (outer == SET)
9001 {
9002 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
9003 return COSTS_N_INSNS (1);
9004 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9005 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9006 return COSTS_N_INSNS (2);
9007 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9008 if (thumb_shiftable_const (INTVAL (x)))
9009 return COSTS_N_INSNS (2);
9010 return COSTS_N_INSNS (3);
9011 }
9012 else if ((outer == PLUS || outer == COMPARE)
9013 && INTVAL (x) < 256 && INTVAL (x) > -256)
9014 return 0;
9015 else if ((outer == IOR || outer == XOR || outer == AND)
9016 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9017 return COSTS_N_INSNS (1);
9018 else if (outer == AND)
9019 {
9020 int i;
9021 /* This duplicates the tests in the andsi3 expander. */
9022 for (i = 9; i <= 31; i++)
9023 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
9024 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
9025 return COSTS_N_INSNS (2);
9026 }
9027 else if (outer == ASHIFT || outer == ASHIFTRT
9028 || outer == LSHIFTRT)
9029 return 0;
9030 return COSTS_N_INSNS (2);
9031
9032 case CONST:
9033 case CONST_DOUBLE:
9034 case LABEL_REF:
9035 case SYMBOL_REF:
9036 return COSTS_N_INSNS (3);
9037
9038 case UDIV:
9039 case UMOD:
9040 case DIV:
9041 case MOD:
9042 return 100;
9043
9044 case TRUNCATE:
9045 return 99;
9046
9047 case AND:
9048 case XOR:
9049 case IOR:
9050 return COSTS_N_INSNS (1);
9051
9052 case MEM:
9053 return (COSTS_N_INSNS (1)
9054 + COSTS_N_INSNS (1)
9055 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9056 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9057 ? COSTS_N_INSNS (1) : 0));
9058
9059 case IF_THEN_ELSE:
9060 /* XXX a guess. */
9061 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9062 return 14;
9063 return 2;
9064
9065 case ZERO_EXTEND:
9066 /* XXX still guessing. */
9067 switch (GET_MODE (XEXP (x, 0)))
9068 {
9069 case QImode:
9070 return (1 + (mode == DImode ? 4 : 0)
9071 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9072
9073 case HImode:
9074 return (4 + (mode == DImode ? 4 : 0)
9075 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9076
9077 case SImode:
9078 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9079
9080 default:
9081 return 99;
9082 }
9083
9084 default:
9085 return 99;
9086 }
9087 }
9088
9089 /* RTX costs when optimizing for size. */
9090 static bool
9091 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9092 int *total)
9093 {
9094 machine_mode mode = GET_MODE (x);
9095 if (TARGET_THUMB1)
9096 {
9097 *total = thumb1_size_rtx_costs (x, code, outer_code);
9098 return true;
9099 }
9100
9101 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9102 switch (code)
9103 {
9104 case MEM:
9105 /* A memory access costs 1 insn if the mode is small, or the address is
9106 a single register, otherwise it costs one insn per word. */
9107 if (REG_P (XEXP (x, 0)))
9108 *total = COSTS_N_INSNS (1);
9109 else if (flag_pic
9110 && GET_CODE (XEXP (x, 0)) == PLUS
9111 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9112 /* This will be split into two instructions.
9113 See arm.md:calculate_pic_address. */
9114 *total = COSTS_N_INSNS (2);
9115 else
9116 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9117 return true;
9118
9119 case DIV:
9120 case MOD:
9121 case UDIV:
9122 case UMOD:
9123 /* Needs a libcall, so it costs about this. */
9124 *total = COSTS_N_INSNS (2);
9125 return false;
9126
9127 case ROTATE:
9128 if (mode == SImode && REG_P (XEXP (x, 1)))
9129 {
9130 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code,
9131 0, false);
9132 return true;
9133 }
9134 /* Fall through */
9135 case ROTATERT:
9136 case ASHIFT:
9137 case LSHIFTRT:
9138 case ASHIFTRT:
9139 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9140 {
9141 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), mode, code,
9142 0, false);
9143 return true;
9144 }
9145 else if (mode == SImode)
9146 {
9147 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code,
9148 0, false);
9149 /* Slightly disparage register shifts, but not by much. */
9150 if (!CONST_INT_P (XEXP (x, 1)))
9151 *total += 1 + rtx_cost (XEXP (x, 1), mode, code, 1, false);
9152 return true;
9153 }
9154
9155 /* Needs a libcall. */
9156 *total = COSTS_N_INSNS (2);
9157 return false;
9158
9159 case MINUS:
9160 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9161 && (mode == SFmode || !TARGET_VFP_SINGLE))
9162 {
9163 *total = COSTS_N_INSNS (1);
9164 return false;
9165 }
9166
9167 if (mode == SImode)
9168 {
9169 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9170 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9171
9172 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9173 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9174 || subcode1 == ROTATE || subcode1 == ROTATERT
9175 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9176 || subcode1 == ASHIFTRT)
9177 {
9178 /* It's just the cost of the two operands. */
9179 *total = 0;
9180 return false;
9181 }
9182
9183 *total = COSTS_N_INSNS (1);
9184 return false;
9185 }
9186
9187 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9188 return false;
9189
9190 case PLUS:
9191 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9192 && (mode == SFmode || !TARGET_VFP_SINGLE))
9193 {
9194 *total = COSTS_N_INSNS (1);
9195 return false;
9196 }
9197
9198 /* A shift as a part of ADD costs nothing. */
9199 if (GET_CODE (XEXP (x, 0)) == MULT
9200 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9201 {
9202 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9203 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code, 0, false);
9204 *total += rtx_cost (XEXP (x, 1), mode, code, 1, false);
9205 return true;
9206 }
9207
9208 /* Fall through */
9209 case AND: case XOR: case IOR:
9210 if (mode == SImode)
9211 {
9212 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9213
9214 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9215 || subcode == LSHIFTRT || subcode == ASHIFTRT
9216 || (code == AND && subcode == NOT))
9217 {
9218 /* It's just the cost of the two operands. */
9219 *total = 0;
9220 return false;
9221 }
9222 }
9223
9224 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9225 return false;
9226
9227 case MULT:
9228 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9229 return false;
9230
9231 case NEG:
9232 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9233 && (mode == SFmode || !TARGET_VFP_SINGLE))
9234 {
9235 *total = COSTS_N_INSNS (1);
9236 return false;
9237 }
9238
9239 /* Fall through */
9240 case NOT:
9241 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9242
9243 return false;
9244
9245 case IF_THEN_ELSE:
9246 *total = 0;
9247 return false;
9248
9249 case COMPARE:
9250 if (cc_register (XEXP (x, 0), VOIDmode))
9251 * total = 0;
9252 else
9253 *total = COSTS_N_INSNS (1);
9254 return false;
9255
9256 case ABS:
9257 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9258 && (mode == SFmode || !TARGET_VFP_SINGLE))
9259 *total = COSTS_N_INSNS (1);
9260 else
9261 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9262 return false;
9263
9264 case SIGN_EXTEND:
9265 case ZERO_EXTEND:
9266 return arm_rtx_costs_1 (x, outer_code, total, 0);
9267
9268 case CONST_INT:
9269 if (const_ok_for_arm (INTVAL (x)))
9270 /* A multiplication by a constant requires another instruction
9271 to load the constant to a register. */
9272 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9273 ? 1 : 0);
9274 else if (const_ok_for_arm (~INTVAL (x)))
9275 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9276 else if (const_ok_for_arm (-INTVAL (x)))
9277 {
9278 if (outer_code == COMPARE || outer_code == PLUS
9279 || outer_code == MINUS)
9280 *total = 0;
9281 else
9282 *total = COSTS_N_INSNS (1);
9283 }
9284 else
9285 *total = COSTS_N_INSNS (2);
9286 return true;
9287
9288 case CONST:
9289 case LABEL_REF:
9290 case SYMBOL_REF:
9291 *total = COSTS_N_INSNS (2);
9292 return true;
9293
9294 case CONST_DOUBLE:
9295 *total = COSTS_N_INSNS (4);
9296 return true;
9297
9298 case CONST_VECTOR:
9299 if (TARGET_NEON
9300 && TARGET_HARD_FLOAT
9301 && outer_code == SET
9302 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9303 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9304 *total = COSTS_N_INSNS (1);
9305 else
9306 *total = COSTS_N_INSNS (4);
9307 return true;
9308
9309 case HIGH:
9310 case LO_SUM:
9311 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9312 cost of these slightly. */
9313 *total = COSTS_N_INSNS (1) + 1;
9314 return true;
9315
9316 case SET:
9317 return false;
9318
9319 default:
9320 if (mode != VOIDmode)
9321 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9322 else
9323 *total = COSTS_N_INSNS (4); /* How knows? */
9324 return false;
9325 }
9326 }
9327
9328 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9329 operand, then return the operand that is being shifted. If the shift
9330 is not by a constant, then set SHIFT_REG to point to the operand.
9331 Return NULL if OP is not a shifter operand. */
9332 static rtx
9333 shifter_op_p (rtx op, rtx *shift_reg)
9334 {
9335 enum rtx_code code = GET_CODE (op);
9336
9337 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9338 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9339 return XEXP (op, 0);
9340 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9341 return XEXP (op, 0);
9342 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9343 || code == ASHIFTRT)
9344 {
9345 if (!CONST_INT_P (XEXP (op, 1)))
9346 *shift_reg = XEXP (op, 1);
9347 return XEXP (op, 0);
9348 }
9349
9350 return NULL;
9351 }
9352
9353 static bool
9354 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9355 {
9356 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9357 rtx_code code = GET_CODE (x);
9358 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9359
9360 switch (XINT (x, 1))
9361 {
9362 case UNSPEC_UNALIGNED_LOAD:
9363 /* We can only do unaligned loads into the integer unit, and we can't
9364 use LDM or LDRD. */
9365 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9366 if (speed_p)
9367 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9368 + extra_cost->ldst.load_unaligned);
9369
9370 #ifdef NOT_YET
9371 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9372 ADDR_SPACE_GENERIC, speed_p);
9373 #endif
9374 return true;
9375
9376 case UNSPEC_UNALIGNED_STORE:
9377 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9378 if (speed_p)
9379 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9380 + extra_cost->ldst.store_unaligned);
9381
9382 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9383 #ifdef NOT_YET
9384 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9385 ADDR_SPACE_GENERIC, speed_p);
9386 #endif
9387 return true;
9388
9389 case UNSPEC_VRINTZ:
9390 case UNSPEC_VRINTP:
9391 case UNSPEC_VRINTM:
9392 case UNSPEC_VRINTR:
9393 case UNSPEC_VRINTX:
9394 case UNSPEC_VRINTA:
9395 if (speed_p)
9396 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9397
9398 return true;
9399 default:
9400 *cost = COSTS_N_INSNS (2);
9401 break;
9402 }
9403 return true;
9404 }
9405
9406 /* Cost of a libcall. We assume one insn per argument, an amount for the
9407 call (one insn for -Os) and then one for processing the result. */
9408 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9409
9410 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9411 do \
9412 { \
9413 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9414 if (shift_op != NULL \
9415 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9416 { \
9417 if (shift_reg) \
9418 { \
9419 if (speed_p) \
9420 *cost += extra_cost->alu.arith_shift_reg; \
9421 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9422 ASHIFT, 1, speed_p); \
9423 } \
9424 else if (speed_p) \
9425 *cost += extra_cost->alu.arith_shift; \
9426 \
9427 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9428 ASHIFT, 0, speed_p) \
9429 + rtx_cost (XEXP (x, 1 - IDX), \
9430 GET_MODE (shift_op), \
9431 OP, 1, speed_p)); \
9432 return true; \
9433 } \
9434 } \
9435 while (0);
9436
9437 /* RTX costs. Make an estimate of the cost of executing the operation
9438 X, which is contained with an operation with code OUTER_CODE.
9439 SPEED_P indicates whether the cost desired is the performance cost,
9440 or the size cost. The estimate is stored in COST and the return
9441 value is TRUE if the cost calculation is final, or FALSE if the
9442 caller should recurse through the operands of X to add additional
9443 costs.
9444
9445 We currently make no attempt to model the size savings of Thumb-2
9446 16-bit instructions. At the normal points in compilation where
9447 this code is called we have no measure of whether the condition
9448 flags are live or not, and thus no realistic way to determine what
9449 the size will eventually be. */
9450 static bool
9451 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9452 const struct cpu_cost_table *extra_cost,
9453 int *cost, bool speed_p)
9454 {
9455 machine_mode mode = GET_MODE (x);
9456
9457 *cost = COSTS_N_INSNS (1);
9458
9459 if (TARGET_THUMB1)
9460 {
9461 if (speed_p)
9462 *cost = thumb1_rtx_costs (x, code, outer_code);
9463 else
9464 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9465 return true;
9466 }
9467
9468 switch (code)
9469 {
9470 case SET:
9471 *cost = 0;
9472 /* SET RTXs don't have a mode so we get it from the destination. */
9473 mode = GET_MODE (SET_DEST (x));
9474
9475 if (REG_P (SET_SRC (x))
9476 && REG_P (SET_DEST (x)))
9477 {
9478 /* Assume that most copies can be done with a single insn,
9479 unless we don't have HW FP, in which case everything
9480 larger than word mode will require two insns. */
9481 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9482 && GET_MODE_SIZE (mode) > 4)
9483 || mode == DImode)
9484 ? 2 : 1);
9485 /* Conditional register moves can be encoded
9486 in 16 bits in Thumb mode. */
9487 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9488 *cost >>= 1;
9489
9490 return true;
9491 }
9492
9493 if (CONST_INT_P (SET_SRC (x)))
9494 {
9495 /* Handle CONST_INT here, since the value doesn't have a mode
9496 and we would otherwise be unable to work out the true cost. */
9497 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9498 0, speed_p);
9499 outer_code = SET;
9500 /* Slightly lower the cost of setting a core reg to a constant.
9501 This helps break up chains and allows for better scheduling. */
9502 if (REG_P (SET_DEST (x))
9503 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9504 *cost -= 1;
9505 x = SET_SRC (x);
9506 /* Immediate moves with an immediate in the range [0, 255] can be
9507 encoded in 16 bits in Thumb mode. */
9508 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9509 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9510 *cost >>= 1;
9511 goto const_int_cost;
9512 }
9513
9514 return false;
9515
9516 case MEM:
9517 /* A memory access costs 1 insn if the mode is small, or the address is
9518 a single register, otherwise it costs one insn per word. */
9519 if (REG_P (XEXP (x, 0)))
9520 *cost = COSTS_N_INSNS (1);
9521 else if (flag_pic
9522 && GET_CODE (XEXP (x, 0)) == PLUS
9523 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9524 /* This will be split into two instructions.
9525 See arm.md:calculate_pic_address. */
9526 *cost = COSTS_N_INSNS (2);
9527 else
9528 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9529
9530 /* For speed optimizations, add the costs of the address and
9531 accessing memory. */
9532 if (speed_p)
9533 #ifdef NOT_YET
9534 *cost += (extra_cost->ldst.load
9535 + arm_address_cost (XEXP (x, 0), mode,
9536 ADDR_SPACE_GENERIC, speed_p));
9537 #else
9538 *cost += extra_cost->ldst.load;
9539 #endif
9540 return true;
9541
9542 case PARALLEL:
9543 {
9544 /* Calculations of LDM costs are complex. We assume an initial cost
9545 (ldm_1st) which will load the number of registers mentioned in
9546 ldm_regs_per_insn_1st registers; then each additional
9547 ldm_regs_per_insn_subsequent registers cost one more insn. The
9548 formula for N regs is thus:
9549
9550 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9551 + ldm_regs_per_insn_subsequent - 1)
9552 / ldm_regs_per_insn_subsequent).
9553
9554 Additional costs may also be added for addressing. A similar
9555 formula is used for STM. */
9556
9557 bool is_ldm = load_multiple_operation (x, SImode);
9558 bool is_stm = store_multiple_operation (x, SImode);
9559
9560 if (is_ldm || is_stm)
9561 {
9562 if (speed_p)
9563 {
9564 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9565 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9566 ? extra_cost->ldst.ldm_regs_per_insn_1st
9567 : extra_cost->ldst.stm_regs_per_insn_1st;
9568 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9569 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9570 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9571
9572 *cost += regs_per_insn_1st
9573 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9574 + regs_per_insn_sub - 1)
9575 / regs_per_insn_sub);
9576 return true;
9577 }
9578
9579 }
9580 return false;
9581 }
9582 case DIV:
9583 case UDIV:
9584 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9585 && (mode == SFmode || !TARGET_VFP_SINGLE))
9586 *cost += COSTS_N_INSNS (speed_p
9587 ? extra_cost->fp[mode != SFmode].div : 0);
9588 else if (mode == SImode && TARGET_IDIV)
9589 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9590 else
9591 *cost = LIBCALL_COST (2);
9592 return false; /* All arguments must be in registers. */
9593
9594 case MOD:
9595 /* MOD by a power of 2 can be expanded as:
9596 rsbs r1, r0, #0
9597 and r0, r0, #(n - 1)
9598 and r1, r1, #(n - 1)
9599 rsbpl r0, r1, #0. */
9600 if (CONST_INT_P (XEXP (x, 1))
9601 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9602 && mode == SImode)
9603 {
9604 *cost += COSTS_N_INSNS (3);
9605
9606 if (speed_p)
9607 *cost += 2 * extra_cost->alu.logical
9608 + extra_cost->alu.arith;
9609 return true;
9610 }
9611
9612 /* Fall-through. */
9613 case UMOD:
9614 *cost = LIBCALL_COST (2);
9615 return false; /* All arguments must be in registers. */
9616
9617 case ROTATE:
9618 if (mode == SImode && REG_P (XEXP (x, 1)))
9619 {
9620 *cost += (COSTS_N_INSNS (1)
9621 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9622 if (speed_p)
9623 *cost += extra_cost->alu.shift_reg;
9624 return true;
9625 }
9626 /* Fall through */
9627 case ROTATERT:
9628 case ASHIFT:
9629 case LSHIFTRT:
9630 case ASHIFTRT:
9631 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9632 {
9633 *cost += (COSTS_N_INSNS (2)
9634 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9635 if (speed_p)
9636 *cost += 2 * extra_cost->alu.shift;
9637 return true;
9638 }
9639 else if (mode == SImode)
9640 {
9641 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9642 /* Slightly disparage register shifts at -Os, but not by much. */
9643 if (!CONST_INT_P (XEXP (x, 1)))
9644 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9645 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9646 return true;
9647 }
9648 else if (GET_MODE_CLASS (mode) == MODE_INT
9649 && GET_MODE_SIZE (mode) < 4)
9650 {
9651 if (code == ASHIFT)
9652 {
9653 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9654 /* Slightly disparage register shifts at -Os, but not by
9655 much. */
9656 if (!CONST_INT_P (XEXP (x, 1)))
9657 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9658 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9659 }
9660 else if (code == LSHIFTRT || code == ASHIFTRT)
9661 {
9662 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9663 {
9664 /* Can use SBFX/UBFX. */
9665 if (speed_p)
9666 *cost += extra_cost->alu.bfx;
9667 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9668 }
9669 else
9670 {
9671 *cost += COSTS_N_INSNS (1);
9672 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9673 if (speed_p)
9674 {
9675 if (CONST_INT_P (XEXP (x, 1)))
9676 *cost += 2 * extra_cost->alu.shift;
9677 else
9678 *cost += (extra_cost->alu.shift
9679 + extra_cost->alu.shift_reg);
9680 }
9681 else
9682 /* Slightly disparage register shifts. */
9683 *cost += !CONST_INT_P (XEXP (x, 1));
9684 }
9685 }
9686 else /* Rotates. */
9687 {
9688 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9689 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9690 if (speed_p)
9691 {
9692 if (CONST_INT_P (XEXP (x, 1)))
9693 *cost += (2 * extra_cost->alu.shift
9694 + extra_cost->alu.log_shift);
9695 else
9696 *cost += (extra_cost->alu.shift
9697 + extra_cost->alu.shift_reg
9698 + extra_cost->alu.log_shift_reg);
9699 }
9700 }
9701 return true;
9702 }
9703
9704 *cost = LIBCALL_COST (2);
9705 return false;
9706
9707 case BSWAP:
9708 if (arm_arch6)
9709 {
9710 if (mode == SImode)
9711 {
9712 if (speed_p)
9713 *cost += extra_cost->alu.rev;
9714
9715 return false;
9716 }
9717 }
9718 else
9719 {
9720 /* No rev instruction available. Look at arm_legacy_rev
9721 and thumb_legacy_rev for the form of RTL used then. */
9722 if (TARGET_THUMB)
9723 {
9724 *cost += COSTS_N_INSNS (9);
9725
9726 if (speed_p)
9727 {
9728 *cost += 6 * extra_cost->alu.shift;
9729 *cost += 3 * extra_cost->alu.logical;
9730 }
9731 }
9732 else
9733 {
9734 *cost += COSTS_N_INSNS (4);
9735
9736 if (speed_p)
9737 {
9738 *cost += 2 * extra_cost->alu.shift;
9739 *cost += extra_cost->alu.arith_shift;
9740 *cost += 2 * extra_cost->alu.logical;
9741 }
9742 }
9743 return true;
9744 }
9745 return false;
9746
9747 case MINUS:
9748 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9749 && (mode == SFmode || !TARGET_VFP_SINGLE))
9750 {
9751 if (GET_CODE (XEXP (x, 0)) == MULT
9752 || GET_CODE (XEXP (x, 1)) == MULT)
9753 {
9754 rtx mul_op0, mul_op1, sub_op;
9755
9756 if (speed_p)
9757 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9758
9759 if (GET_CODE (XEXP (x, 0)) == MULT)
9760 {
9761 mul_op0 = XEXP (XEXP (x, 0), 0);
9762 mul_op1 = XEXP (XEXP (x, 0), 1);
9763 sub_op = XEXP (x, 1);
9764 }
9765 else
9766 {
9767 mul_op0 = XEXP (XEXP (x, 1), 0);
9768 mul_op1 = XEXP (XEXP (x, 1), 1);
9769 sub_op = XEXP (x, 0);
9770 }
9771
9772 /* The first operand of the multiply may be optionally
9773 negated. */
9774 if (GET_CODE (mul_op0) == NEG)
9775 mul_op0 = XEXP (mul_op0, 0);
9776
9777 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9778 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9779 + rtx_cost (sub_op, mode, code, 0, speed_p));
9780
9781 return true;
9782 }
9783
9784 if (speed_p)
9785 *cost += extra_cost->fp[mode != SFmode].addsub;
9786 return false;
9787 }
9788
9789 if (mode == SImode)
9790 {
9791 rtx shift_by_reg = NULL;
9792 rtx shift_op;
9793 rtx non_shift_op;
9794
9795 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9796 if (shift_op == NULL)
9797 {
9798 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9799 non_shift_op = XEXP (x, 0);
9800 }
9801 else
9802 non_shift_op = XEXP (x, 1);
9803
9804 if (shift_op != NULL)
9805 {
9806 if (shift_by_reg != NULL)
9807 {
9808 if (speed_p)
9809 *cost += extra_cost->alu.arith_shift_reg;
9810 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9811 }
9812 else if (speed_p)
9813 *cost += extra_cost->alu.arith_shift;
9814
9815 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9816 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9817 return true;
9818 }
9819
9820 if (arm_arch_thumb2
9821 && GET_CODE (XEXP (x, 1)) == MULT)
9822 {
9823 /* MLS. */
9824 if (speed_p)
9825 *cost += extra_cost->mult[0].add;
9826 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9827 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9828 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9829 return true;
9830 }
9831
9832 if (CONST_INT_P (XEXP (x, 0)))
9833 {
9834 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9835 INTVAL (XEXP (x, 0)), NULL_RTX,
9836 NULL_RTX, 1, 0);
9837 *cost = COSTS_N_INSNS (insns);
9838 if (speed_p)
9839 *cost += insns * extra_cost->alu.arith;
9840 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9841 return true;
9842 }
9843 else if (speed_p)
9844 *cost += extra_cost->alu.arith;
9845
9846 return false;
9847 }
9848
9849 if (GET_MODE_CLASS (mode) == MODE_INT
9850 && GET_MODE_SIZE (mode) < 4)
9851 {
9852 rtx shift_op, shift_reg;
9853 shift_reg = NULL;
9854
9855 /* We check both sides of the MINUS for shifter operands since,
9856 unlike PLUS, it's not commutative. */
9857
9858 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9859 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9860
9861 /* Slightly disparage, as we might need to widen the result. */
9862 *cost += 1;
9863 if (speed_p)
9864 *cost += extra_cost->alu.arith;
9865
9866 if (CONST_INT_P (XEXP (x, 0)))
9867 {
9868 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9869 return true;
9870 }
9871
9872 return false;
9873 }
9874
9875 if (mode == DImode)
9876 {
9877 *cost += COSTS_N_INSNS (1);
9878
9879 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9880 {
9881 rtx op1 = XEXP (x, 1);
9882
9883 if (speed_p)
9884 *cost += 2 * extra_cost->alu.arith;
9885
9886 if (GET_CODE (op1) == ZERO_EXTEND)
9887 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9888 0, speed_p);
9889 else
9890 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9891 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9892 0, speed_p);
9893 return true;
9894 }
9895 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9896 {
9897 if (speed_p)
9898 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9899 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9900 0, speed_p)
9901 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9902 return true;
9903 }
9904 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9905 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9906 {
9907 if (speed_p)
9908 *cost += (extra_cost->alu.arith
9909 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9910 ? extra_cost->alu.arith
9911 : extra_cost->alu.arith_shift));
9912 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9913 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9914 GET_CODE (XEXP (x, 1)), 0, speed_p));
9915 return true;
9916 }
9917
9918 if (speed_p)
9919 *cost += 2 * extra_cost->alu.arith;
9920 return false;
9921 }
9922
9923 /* Vector mode? */
9924
9925 *cost = LIBCALL_COST (2);
9926 return false;
9927
9928 case PLUS:
9929 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9930 && (mode == SFmode || !TARGET_VFP_SINGLE))
9931 {
9932 if (GET_CODE (XEXP (x, 0)) == MULT)
9933 {
9934 rtx mul_op0, mul_op1, add_op;
9935
9936 if (speed_p)
9937 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9938
9939 mul_op0 = XEXP (XEXP (x, 0), 0);
9940 mul_op1 = XEXP (XEXP (x, 0), 1);
9941 add_op = XEXP (x, 1);
9942
9943 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9944 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9945 + rtx_cost (add_op, mode, code, 0, speed_p));
9946
9947 return true;
9948 }
9949
9950 if (speed_p)
9951 *cost += extra_cost->fp[mode != SFmode].addsub;
9952 return false;
9953 }
9954 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9955 {
9956 *cost = LIBCALL_COST (2);
9957 return false;
9958 }
9959
9960 /* Narrow modes can be synthesized in SImode, but the range
9961 of useful sub-operations is limited. Check for shift operations
9962 on one of the operands. Only left shifts can be used in the
9963 narrow modes. */
9964 if (GET_MODE_CLASS (mode) == MODE_INT
9965 && GET_MODE_SIZE (mode) < 4)
9966 {
9967 rtx shift_op, shift_reg;
9968 shift_reg = NULL;
9969
9970 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9971
9972 if (CONST_INT_P (XEXP (x, 1)))
9973 {
9974 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9975 INTVAL (XEXP (x, 1)), NULL_RTX,
9976 NULL_RTX, 1, 0);
9977 *cost = COSTS_N_INSNS (insns);
9978 if (speed_p)
9979 *cost += insns * extra_cost->alu.arith;
9980 /* Slightly penalize a narrow operation as the result may
9981 need widening. */
9982 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9983 return true;
9984 }
9985
9986 /* Slightly penalize a narrow operation as the result may
9987 need widening. */
9988 *cost += 1;
9989 if (speed_p)
9990 *cost += extra_cost->alu.arith;
9991
9992 return false;
9993 }
9994
9995 if (mode == SImode)
9996 {
9997 rtx shift_op, shift_reg;
9998
9999 if (TARGET_INT_SIMD
10000 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10001 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10002 {
10003 /* UXTA[BH] or SXTA[BH]. */
10004 if (speed_p)
10005 *cost += extra_cost->alu.extend_arith;
10006 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10007 0, speed_p)
10008 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10009 return true;
10010 }
10011
10012 shift_reg = NULL;
10013 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10014 if (shift_op != NULL)
10015 {
10016 if (shift_reg)
10017 {
10018 if (speed_p)
10019 *cost += extra_cost->alu.arith_shift_reg;
10020 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10021 }
10022 else if (speed_p)
10023 *cost += extra_cost->alu.arith_shift;
10024
10025 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10026 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10027 return true;
10028 }
10029 if (GET_CODE (XEXP (x, 0)) == MULT)
10030 {
10031 rtx mul_op = XEXP (x, 0);
10032
10033 if (TARGET_DSP_MULTIPLY
10034 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10035 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10036 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10037 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10038 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10039 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10040 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10041 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10042 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10043 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10044 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10045 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10046 == 16))))))
10047 {
10048 /* SMLA[BT][BT]. */
10049 if (speed_p)
10050 *cost += extra_cost->mult[0].extend_add;
10051 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10052 SIGN_EXTEND, 0, speed_p)
10053 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10054 SIGN_EXTEND, 0, speed_p)
10055 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10056 return true;
10057 }
10058
10059 if (speed_p)
10060 *cost += extra_cost->mult[0].add;
10061 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10062 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10063 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10064 return true;
10065 }
10066 if (CONST_INT_P (XEXP (x, 1)))
10067 {
10068 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10069 INTVAL (XEXP (x, 1)), NULL_RTX,
10070 NULL_RTX, 1, 0);
10071 *cost = COSTS_N_INSNS (insns);
10072 if (speed_p)
10073 *cost += insns * extra_cost->alu.arith;
10074 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10075 return true;
10076 }
10077 else if (speed_p)
10078 *cost += extra_cost->alu.arith;
10079
10080 return false;
10081 }
10082
10083 if (mode == DImode)
10084 {
10085 if (arm_arch3m
10086 && GET_CODE (XEXP (x, 0)) == MULT
10087 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10088 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10089 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10090 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10091 {
10092 if (speed_p)
10093 *cost += extra_cost->mult[1].extend_add;
10094 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10095 ZERO_EXTEND, 0, speed_p)
10096 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10097 ZERO_EXTEND, 0, speed_p)
10098 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10099 return true;
10100 }
10101
10102 *cost += COSTS_N_INSNS (1);
10103
10104 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10105 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10106 {
10107 if (speed_p)
10108 *cost += (extra_cost->alu.arith
10109 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10110 ? extra_cost->alu.arith
10111 : extra_cost->alu.arith_shift));
10112
10113 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10114 0, speed_p)
10115 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10116 return true;
10117 }
10118
10119 if (speed_p)
10120 *cost += 2 * extra_cost->alu.arith;
10121 return false;
10122 }
10123
10124 /* Vector mode? */
10125 *cost = LIBCALL_COST (2);
10126 return false;
10127 case IOR:
10128 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10129 {
10130 if (speed_p)
10131 *cost += extra_cost->alu.rev;
10132
10133 return true;
10134 }
10135 /* Fall through. */
10136 case AND: case XOR:
10137 if (mode == SImode)
10138 {
10139 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10140 rtx op0 = XEXP (x, 0);
10141 rtx shift_op, shift_reg;
10142
10143 if (subcode == NOT
10144 && (code == AND
10145 || (code == IOR && TARGET_THUMB2)))
10146 op0 = XEXP (op0, 0);
10147
10148 shift_reg = NULL;
10149 shift_op = shifter_op_p (op0, &shift_reg);
10150 if (shift_op != NULL)
10151 {
10152 if (shift_reg)
10153 {
10154 if (speed_p)
10155 *cost += extra_cost->alu.log_shift_reg;
10156 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10157 }
10158 else if (speed_p)
10159 *cost += extra_cost->alu.log_shift;
10160
10161 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10162 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10163 return true;
10164 }
10165
10166 if (CONST_INT_P (XEXP (x, 1)))
10167 {
10168 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10169 INTVAL (XEXP (x, 1)), NULL_RTX,
10170 NULL_RTX, 1, 0);
10171
10172 *cost = COSTS_N_INSNS (insns);
10173 if (speed_p)
10174 *cost += insns * extra_cost->alu.logical;
10175 *cost += rtx_cost (op0, mode, code, 0, speed_p);
10176 return true;
10177 }
10178
10179 if (speed_p)
10180 *cost += extra_cost->alu.logical;
10181 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10182 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10183 return true;
10184 }
10185
10186 if (mode == DImode)
10187 {
10188 rtx op0 = XEXP (x, 0);
10189 enum rtx_code subcode = GET_CODE (op0);
10190
10191 *cost += COSTS_N_INSNS (1);
10192
10193 if (subcode == NOT
10194 && (code == AND
10195 || (code == IOR && TARGET_THUMB2)))
10196 op0 = XEXP (op0, 0);
10197
10198 if (GET_CODE (op0) == ZERO_EXTEND)
10199 {
10200 if (speed_p)
10201 *cost += 2 * extra_cost->alu.logical;
10202
10203 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10204 0, speed_p)
10205 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10206 return true;
10207 }
10208 else if (GET_CODE (op0) == SIGN_EXTEND)
10209 {
10210 if (speed_p)
10211 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10212
10213 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10214 0, speed_p)
10215 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10216 return true;
10217 }
10218
10219 if (speed_p)
10220 *cost += 2 * extra_cost->alu.logical;
10221
10222 return true;
10223 }
10224 /* Vector mode? */
10225
10226 *cost = LIBCALL_COST (2);
10227 return false;
10228
10229 case MULT:
10230 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10231 && (mode == SFmode || !TARGET_VFP_SINGLE))
10232 {
10233 rtx op0 = XEXP (x, 0);
10234
10235 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10236 op0 = XEXP (op0, 0);
10237
10238 if (speed_p)
10239 *cost += extra_cost->fp[mode != SFmode].mult;
10240
10241 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10242 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10243 return true;
10244 }
10245 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10246 {
10247 *cost = LIBCALL_COST (2);
10248 return false;
10249 }
10250
10251 if (mode == SImode)
10252 {
10253 if (TARGET_DSP_MULTIPLY
10254 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10255 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10256 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10257 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10258 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10259 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10260 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10261 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10262 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10263 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10264 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10265 && (INTVAL (XEXP (XEXP (x, 1), 1))
10266 == 16))))))
10267 {
10268 /* SMUL[TB][TB]. */
10269 if (speed_p)
10270 *cost += extra_cost->mult[0].extend;
10271 *cost += rtx_cost (XEXP (x, 0), mode, SIGN_EXTEND, 0, speed_p);
10272 *cost += rtx_cost (XEXP (x, 1), mode, SIGN_EXTEND, 1, speed_p);
10273 return true;
10274 }
10275 if (speed_p)
10276 *cost += extra_cost->mult[0].simple;
10277 return false;
10278 }
10279
10280 if (mode == DImode)
10281 {
10282 if (arm_arch3m
10283 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10284 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10285 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10286 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10287 {
10288 if (speed_p)
10289 *cost += extra_cost->mult[1].extend;
10290 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10291 ZERO_EXTEND, 0, speed_p)
10292 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10293 ZERO_EXTEND, 0, speed_p));
10294 return true;
10295 }
10296
10297 *cost = LIBCALL_COST (2);
10298 return false;
10299 }
10300
10301 /* Vector mode? */
10302 *cost = LIBCALL_COST (2);
10303 return false;
10304
10305 case NEG:
10306 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10307 && (mode == SFmode || !TARGET_VFP_SINGLE))
10308 {
10309 if (GET_CODE (XEXP (x, 0)) == MULT)
10310 {
10311 /* VNMUL. */
10312 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10313 return true;
10314 }
10315
10316 if (speed_p)
10317 *cost += extra_cost->fp[mode != SFmode].neg;
10318
10319 return false;
10320 }
10321 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10322 {
10323 *cost = LIBCALL_COST (1);
10324 return false;
10325 }
10326
10327 if (mode == SImode)
10328 {
10329 if (GET_CODE (XEXP (x, 0)) == ABS)
10330 {
10331 *cost += COSTS_N_INSNS (1);
10332 /* Assume the non-flag-changing variant. */
10333 if (speed_p)
10334 *cost += (extra_cost->alu.log_shift
10335 + extra_cost->alu.arith_shift);
10336 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10337 return true;
10338 }
10339
10340 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10341 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10342 {
10343 *cost += COSTS_N_INSNS (1);
10344 /* No extra cost for MOV imm and MVN imm. */
10345 /* If the comparison op is using the flags, there's no further
10346 cost, otherwise we need to add the cost of the comparison. */
10347 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10348 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10349 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10350 {
10351 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10352 *cost += (COSTS_N_INSNS (1)
10353 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10354 0, speed_p)
10355 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10356 1, speed_p));
10357 if (speed_p)
10358 *cost += extra_cost->alu.arith;
10359 }
10360 return true;
10361 }
10362
10363 if (speed_p)
10364 *cost += extra_cost->alu.arith;
10365 return false;
10366 }
10367
10368 if (GET_MODE_CLASS (mode) == MODE_INT
10369 && GET_MODE_SIZE (mode) < 4)
10370 {
10371 /* Slightly disparage, as we might need an extend operation. */
10372 *cost += 1;
10373 if (speed_p)
10374 *cost += extra_cost->alu.arith;
10375 return false;
10376 }
10377
10378 if (mode == DImode)
10379 {
10380 *cost += COSTS_N_INSNS (1);
10381 if (speed_p)
10382 *cost += 2 * extra_cost->alu.arith;
10383 return false;
10384 }
10385
10386 /* Vector mode? */
10387 *cost = LIBCALL_COST (1);
10388 return false;
10389
10390 case NOT:
10391 if (mode == SImode)
10392 {
10393 rtx shift_op;
10394 rtx shift_reg = NULL;
10395
10396 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10397
10398 if (shift_op)
10399 {
10400 if (shift_reg != NULL)
10401 {
10402 if (speed_p)
10403 *cost += extra_cost->alu.log_shift_reg;
10404 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10405 }
10406 else if (speed_p)
10407 *cost += extra_cost->alu.log_shift;
10408 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10409 return true;
10410 }
10411
10412 if (speed_p)
10413 *cost += extra_cost->alu.logical;
10414 return false;
10415 }
10416 if (mode == DImode)
10417 {
10418 *cost += COSTS_N_INSNS (1);
10419 return false;
10420 }
10421
10422 /* Vector mode? */
10423
10424 *cost += LIBCALL_COST (1);
10425 return false;
10426
10427 case IF_THEN_ELSE:
10428 {
10429 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10430 {
10431 *cost += COSTS_N_INSNS (3);
10432 return true;
10433 }
10434 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10435 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10436
10437 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10438 /* Assume that if one arm of the if_then_else is a register,
10439 that it will be tied with the result and eliminate the
10440 conditional insn. */
10441 if (REG_P (XEXP (x, 1)))
10442 *cost += op2cost;
10443 else if (REG_P (XEXP (x, 2)))
10444 *cost += op1cost;
10445 else
10446 {
10447 if (speed_p)
10448 {
10449 if (extra_cost->alu.non_exec_costs_exec)
10450 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10451 else
10452 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10453 }
10454 else
10455 *cost += op1cost + op2cost;
10456 }
10457 }
10458 return true;
10459
10460 case COMPARE:
10461 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10462 *cost = 0;
10463 else
10464 {
10465 machine_mode op0mode;
10466 /* We'll mostly assume that the cost of a compare is the cost of the
10467 LHS. However, there are some notable exceptions. */
10468
10469 /* Floating point compares are never done as side-effects. */
10470 op0mode = GET_MODE (XEXP (x, 0));
10471 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10472 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10473 {
10474 if (speed_p)
10475 *cost += extra_cost->fp[op0mode != SFmode].compare;
10476
10477 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10478 {
10479 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10480 return true;
10481 }
10482
10483 return false;
10484 }
10485 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10486 {
10487 *cost = LIBCALL_COST (2);
10488 return false;
10489 }
10490
10491 /* DImode compares normally take two insns. */
10492 if (op0mode == DImode)
10493 {
10494 *cost += COSTS_N_INSNS (1);
10495 if (speed_p)
10496 *cost += 2 * extra_cost->alu.arith;
10497 return false;
10498 }
10499
10500 if (op0mode == SImode)
10501 {
10502 rtx shift_op;
10503 rtx shift_reg;
10504
10505 if (XEXP (x, 1) == const0_rtx
10506 && !(REG_P (XEXP (x, 0))
10507 || (GET_CODE (XEXP (x, 0)) == SUBREG
10508 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10509 {
10510 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10511
10512 /* Multiply operations that set the flags are often
10513 significantly more expensive. */
10514 if (speed_p
10515 && GET_CODE (XEXP (x, 0)) == MULT
10516 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10517 *cost += extra_cost->mult[0].flag_setting;
10518
10519 if (speed_p
10520 && GET_CODE (XEXP (x, 0)) == PLUS
10521 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10522 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10523 0), 1), mode))
10524 *cost += extra_cost->mult[0].flag_setting;
10525 return true;
10526 }
10527
10528 shift_reg = NULL;
10529 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10530 if (shift_op != NULL)
10531 {
10532 if (shift_reg != NULL)
10533 {
10534 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10535 1, speed_p);
10536 if (speed_p)
10537 *cost += extra_cost->alu.arith_shift_reg;
10538 }
10539 else if (speed_p)
10540 *cost += extra_cost->alu.arith_shift;
10541 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10542 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10543 return true;
10544 }
10545
10546 if (speed_p)
10547 *cost += extra_cost->alu.arith;
10548 if (CONST_INT_P (XEXP (x, 1))
10549 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10550 {
10551 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10552 return true;
10553 }
10554 return false;
10555 }
10556
10557 /* Vector mode? */
10558
10559 *cost = LIBCALL_COST (2);
10560 return false;
10561 }
10562 return true;
10563
10564 case EQ:
10565 case NE:
10566 case LT:
10567 case LE:
10568 case GT:
10569 case GE:
10570 case LTU:
10571 case LEU:
10572 case GEU:
10573 case GTU:
10574 case ORDERED:
10575 case UNORDERED:
10576 case UNEQ:
10577 case UNLE:
10578 case UNLT:
10579 case UNGE:
10580 case UNGT:
10581 case LTGT:
10582 if (outer_code == SET)
10583 {
10584 /* Is it a store-flag operation? */
10585 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10586 && XEXP (x, 1) == const0_rtx)
10587 {
10588 /* Thumb also needs an IT insn. */
10589 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10590 return true;
10591 }
10592 if (XEXP (x, 1) == const0_rtx)
10593 {
10594 switch (code)
10595 {
10596 case LT:
10597 /* LSR Rd, Rn, #31. */
10598 if (speed_p)
10599 *cost += extra_cost->alu.shift;
10600 break;
10601
10602 case EQ:
10603 /* RSBS T1, Rn, #0
10604 ADC Rd, Rn, T1. */
10605
10606 case NE:
10607 /* SUBS T1, Rn, #1
10608 SBC Rd, Rn, T1. */
10609 *cost += COSTS_N_INSNS (1);
10610 break;
10611
10612 case LE:
10613 /* RSBS T1, Rn, Rn, LSR #31
10614 ADC Rd, Rn, T1. */
10615 *cost += COSTS_N_INSNS (1);
10616 if (speed_p)
10617 *cost += extra_cost->alu.arith_shift;
10618 break;
10619
10620 case GT:
10621 /* RSB Rd, Rn, Rn, ASR #1
10622 LSR Rd, Rd, #31. */
10623 *cost += COSTS_N_INSNS (1);
10624 if (speed_p)
10625 *cost += (extra_cost->alu.arith_shift
10626 + extra_cost->alu.shift);
10627 break;
10628
10629 case GE:
10630 /* ASR Rd, Rn, #31
10631 ADD Rd, Rn, #1. */
10632 *cost += COSTS_N_INSNS (1);
10633 if (speed_p)
10634 *cost += extra_cost->alu.shift;
10635 break;
10636
10637 default:
10638 /* Remaining cases are either meaningless or would take
10639 three insns anyway. */
10640 *cost = COSTS_N_INSNS (3);
10641 break;
10642 }
10643 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10644 return true;
10645 }
10646 else
10647 {
10648 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10649 if (CONST_INT_P (XEXP (x, 1))
10650 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10651 {
10652 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10653 return true;
10654 }
10655
10656 return false;
10657 }
10658 }
10659 /* Not directly inside a set. If it involves the condition code
10660 register it must be the condition for a branch, cond_exec or
10661 I_T_E operation. Since the comparison is performed elsewhere
10662 this is just the control part which has no additional
10663 cost. */
10664 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10665 && XEXP (x, 1) == const0_rtx)
10666 {
10667 *cost = 0;
10668 return true;
10669 }
10670 return false;
10671
10672 case ABS:
10673 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10674 && (mode == SFmode || !TARGET_VFP_SINGLE))
10675 {
10676 if (speed_p)
10677 *cost += extra_cost->fp[mode != SFmode].neg;
10678
10679 return false;
10680 }
10681 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10682 {
10683 *cost = LIBCALL_COST (1);
10684 return false;
10685 }
10686
10687 if (mode == SImode)
10688 {
10689 if (speed_p)
10690 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10691 return false;
10692 }
10693 /* Vector mode? */
10694 *cost = LIBCALL_COST (1);
10695 return false;
10696
10697 case SIGN_EXTEND:
10698 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10699 && MEM_P (XEXP (x, 0)))
10700 {
10701 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10702
10703 if (mode == DImode)
10704 *cost += COSTS_N_INSNS (1);
10705
10706 if (!speed_p)
10707 return true;
10708
10709 if (GET_MODE (XEXP (x, 0)) == SImode)
10710 *cost += extra_cost->ldst.load;
10711 else
10712 *cost += extra_cost->ldst.load_sign_extend;
10713
10714 if (mode == DImode)
10715 *cost += extra_cost->alu.shift;
10716
10717 return true;
10718 }
10719
10720 /* Widening from less than 32-bits requires an extend operation. */
10721 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10722 {
10723 /* We have SXTB/SXTH. */
10724 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10725 if (speed_p)
10726 *cost += extra_cost->alu.extend;
10727 }
10728 else if (GET_MODE (XEXP (x, 0)) != SImode)
10729 {
10730 /* Needs two shifts. */
10731 *cost += COSTS_N_INSNS (1);
10732 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10733 if (speed_p)
10734 *cost += 2 * extra_cost->alu.shift;
10735 }
10736
10737 /* Widening beyond 32-bits requires one more insn. */
10738 if (mode == DImode)
10739 {
10740 *cost += COSTS_N_INSNS (1);
10741 if (speed_p)
10742 *cost += extra_cost->alu.shift;
10743 }
10744
10745 return true;
10746
10747 case ZERO_EXTEND:
10748 if ((arm_arch4
10749 || GET_MODE (XEXP (x, 0)) == SImode
10750 || GET_MODE (XEXP (x, 0)) == QImode)
10751 && MEM_P (XEXP (x, 0)))
10752 {
10753 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10754
10755 if (mode == DImode)
10756 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10757
10758 return true;
10759 }
10760
10761 /* Widening from less than 32-bits requires an extend operation. */
10762 if (GET_MODE (XEXP (x, 0)) == QImode)
10763 {
10764 /* UXTB can be a shorter instruction in Thumb2, but it might
10765 be slower than the AND Rd, Rn, #255 alternative. When
10766 optimizing for speed it should never be slower to use
10767 AND, and we don't really model 16-bit vs 32-bit insns
10768 here. */
10769 if (speed_p)
10770 *cost += extra_cost->alu.logical;
10771 }
10772 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10773 {
10774 /* We have UXTB/UXTH. */
10775 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10776 if (speed_p)
10777 *cost += extra_cost->alu.extend;
10778 }
10779 else if (GET_MODE (XEXP (x, 0)) != SImode)
10780 {
10781 /* Needs two shifts. It's marginally preferable to use
10782 shifts rather than two BIC instructions as the second
10783 shift may merge with a subsequent insn as a shifter
10784 op. */
10785 *cost = COSTS_N_INSNS (2);
10786 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10787 if (speed_p)
10788 *cost += 2 * extra_cost->alu.shift;
10789 }
10790
10791 /* Widening beyond 32-bits requires one more insn. */
10792 if (mode == DImode)
10793 {
10794 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10795 }
10796
10797 return true;
10798
10799 case CONST_INT:
10800 *cost = 0;
10801 /* CONST_INT has no mode, so we cannot tell for sure how many
10802 insns are really going to be needed. The best we can do is
10803 look at the value passed. If it fits in SImode, then assume
10804 that's the mode it will be used for. Otherwise assume it
10805 will be used in DImode. */
10806 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10807 mode = SImode;
10808 else
10809 mode = DImode;
10810
10811 /* Avoid blowing up in arm_gen_constant (). */
10812 if (!(outer_code == PLUS
10813 || outer_code == AND
10814 || outer_code == IOR
10815 || outer_code == XOR
10816 || outer_code == MINUS))
10817 outer_code = SET;
10818
10819 const_int_cost:
10820 if (mode == SImode)
10821 {
10822 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10823 INTVAL (x), NULL, NULL,
10824 0, 0));
10825 /* Extra costs? */
10826 }
10827 else
10828 {
10829 *cost += COSTS_N_INSNS (arm_gen_constant
10830 (outer_code, SImode, NULL,
10831 trunc_int_for_mode (INTVAL (x), SImode),
10832 NULL, NULL, 0, 0)
10833 + arm_gen_constant (outer_code, SImode, NULL,
10834 INTVAL (x) >> 32, NULL,
10835 NULL, 0, 0));
10836 /* Extra costs? */
10837 }
10838
10839 return true;
10840
10841 case CONST:
10842 case LABEL_REF:
10843 case SYMBOL_REF:
10844 if (speed_p)
10845 {
10846 if (arm_arch_thumb2 && !flag_pic)
10847 *cost += COSTS_N_INSNS (1);
10848 else
10849 *cost += extra_cost->ldst.load;
10850 }
10851 else
10852 *cost += COSTS_N_INSNS (1);
10853
10854 if (flag_pic)
10855 {
10856 *cost += COSTS_N_INSNS (1);
10857 if (speed_p)
10858 *cost += extra_cost->alu.arith;
10859 }
10860
10861 return true;
10862
10863 case CONST_FIXED:
10864 *cost = COSTS_N_INSNS (4);
10865 /* Fixme. */
10866 return true;
10867
10868 case CONST_DOUBLE:
10869 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10870 && (mode == SFmode || !TARGET_VFP_SINGLE))
10871 {
10872 if (vfp3_const_double_rtx (x))
10873 {
10874 if (speed_p)
10875 *cost += extra_cost->fp[mode == DFmode].fpconst;
10876 return true;
10877 }
10878
10879 if (speed_p)
10880 {
10881 if (mode == DFmode)
10882 *cost += extra_cost->ldst.loadd;
10883 else
10884 *cost += extra_cost->ldst.loadf;
10885 }
10886 else
10887 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10888
10889 return true;
10890 }
10891 *cost = COSTS_N_INSNS (4);
10892 return true;
10893
10894 case CONST_VECTOR:
10895 /* Fixme. */
10896 if (TARGET_NEON
10897 && TARGET_HARD_FLOAT
10898 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10899 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10900 *cost = COSTS_N_INSNS (1);
10901 else
10902 *cost = COSTS_N_INSNS (4);
10903 return true;
10904
10905 case HIGH:
10906 case LO_SUM:
10907 /* When optimizing for size, we prefer constant pool entries to
10908 MOVW/MOVT pairs, so bump the cost of these slightly. */
10909 if (!speed_p)
10910 *cost += 1;
10911 return true;
10912
10913 case CLZ:
10914 if (speed_p)
10915 *cost += extra_cost->alu.clz;
10916 return false;
10917
10918 case SMIN:
10919 if (XEXP (x, 1) == const0_rtx)
10920 {
10921 if (speed_p)
10922 *cost += extra_cost->alu.log_shift;
10923 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10924 return true;
10925 }
10926 /* Fall through. */
10927 case SMAX:
10928 case UMIN:
10929 case UMAX:
10930 *cost += COSTS_N_INSNS (1);
10931 return false;
10932
10933 case TRUNCATE:
10934 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10935 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10936 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10937 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10938 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10939 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10940 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10941 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10942 == ZERO_EXTEND))))
10943 {
10944 if (speed_p)
10945 *cost += extra_cost->mult[1].extend;
10946 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10947 ZERO_EXTEND, 0, speed_p)
10948 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10949 ZERO_EXTEND, 0, speed_p));
10950 return true;
10951 }
10952 *cost = LIBCALL_COST (1);
10953 return false;
10954
10955 case UNSPEC_VOLATILE:
10956 case UNSPEC:
10957 return arm_unspec_cost (x, outer_code, speed_p, cost);
10958
10959 case PC:
10960 /* Reading the PC is like reading any other register. Writing it
10961 is more expensive, but we take that into account elsewhere. */
10962 *cost = 0;
10963 return true;
10964
10965 case ZERO_EXTRACT:
10966 /* TODO: Simple zero_extract of bottom bits using AND. */
10967 /* Fall through. */
10968 case SIGN_EXTRACT:
10969 if (arm_arch6
10970 && mode == SImode
10971 && CONST_INT_P (XEXP (x, 1))
10972 && CONST_INT_P (XEXP (x, 2)))
10973 {
10974 if (speed_p)
10975 *cost += extra_cost->alu.bfx;
10976 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10977 return true;
10978 }
10979 /* Without UBFX/SBFX, need to resort to shift operations. */
10980 *cost += COSTS_N_INSNS (1);
10981 if (speed_p)
10982 *cost += 2 * extra_cost->alu.shift;
10983 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10984 return true;
10985
10986 case FLOAT_EXTEND:
10987 if (TARGET_HARD_FLOAT)
10988 {
10989 if (speed_p)
10990 *cost += extra_cost->fp[mode == DFmode].widen;
10991 if (!TARGET_FPU_ARMV8
10992 && GET_MODE (XEXP (x, 0)) == HFmode)
10993 {
10994 /* Pre v8, widening HF->DF is a two-step process, first
10995 widening to SFmode. */
10996 *cost += COSTS_N_INSNS (1);
10997 if (speed_p)
10998 *cost += extra_cost->fp[0].widen;
10999 }
11000 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11001 return true;
11002 }
11003
11004 *cost = LIBCALL_COST (1);
11005 return false;
11006
11007 case FLOAT_TRUNCATE:
11008 if (TARGET_HARD_FLOAT)
11009 {
11010 if (speed_p)
11011 *cost += extra_cost->fp[mode == DFmode].narrow;
11012 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11013 return true;
11014 /* Vector modes? */
11015 }
11016 *cost = LIBCALL_COST (1);
11017 return false;
11018
11019 case FMA:
11020 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11021 {
11022 rtx op0 = XEXP (x, 0);
11023 rtx op1 = XEXP (x, 1);
11024 rtx op2 = XEXP (x, 2);
11025
11026
11027 /* vfms or vfnma. */
11028 if (GET_CODE (op0) == NEG)
11029 op0 = XEXP (op0, 0);
11030
11031 /* vfnms or vfnma. */
11032 if (GET_CODE (op2) == NEG)
11033 op2 = XEXP (op2, 0);
11034
11035 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11036 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11037 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11038
11039 if (speed_p)
11040 *cost += extra_cost->fp[mode ==DFmode].fma;
11041
11042 return true;
11043 }
11044
11045 *cost = LIBCALL_COST (3);
11046 return false;
11047
11048 case FIX:
11049 case UNSIGNED_FIX:
11050 if (TARGET_HARD_FLOAT)
11051 {
11052 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11053 a vcvt fixed-point conversion. */
11054 if (code == FIX && mode == SImode
11055 && GET_CODE (XEXP (x, 0)) == FIX
11056 && GET_MODE (XEXP (x, 0)) == SFmode
11057 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11058 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11059 > 0)
11060 {
11061 if (speed_p)
11062 *cost += extra_cost->fp[0].toint;
11063
11064 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11065 code, 0, speed_p);
11066 return true;
11067 }
11068
11069 if (GET_MODE_CLASS (mode) == MODE_INT)
11070 {
11071 mode = GET_MODE (XEXP (x, 0));
11072 if (speed_p)
11073 *cost += extra_cost->fp[mode == DFmode].toint;
11074 /* Strip of the 'cost' of rounding towards zero. */
11075 if (GET_CODE (XEXP (x, 0)) == FIX)
11076 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11077 0, speed_p);
11078 else
11079 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11080 /* ??? Increase the cost to deal with transferring from
11081 FP -> CORE registers? */
11082 return true;
11083 }
11084 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11085 && TARGET_FPU_ARMV8)
11086 {
11087 if (speed_p)
11088 *cost += extra_cost->fp[mode == DFmode].roundint;
11089 return false;
11090 }
11091 /* Vector costs? */
11092 }
11093 *cost = LIBCALL_COST (1);
11094 return false;
11095
11096 case FLOAT:
11097 case UNSIGNED_FLOAT:
11098 if (TARGET_HARD_FLOAT)
11099 {
11100 /* ??? Increase the cost to deal with transferring from CORE
11101 -> FP registers? */
11102 if (speed_p)
11103 *cost += extra_cost->fp[mode == DFmode].fromint;
11104 return false;
11105 }
11106 *cost = LIBCALL_COST (1);
11107 return false;
11108
11109 case CALL:
11110 return true;
11111
11112 case ASM_OPERANDS:
11113 {
11114 /* Just a guess. Guess number of instructions in the asm
11115 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11116 though (see PR60663). */
11117 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11118 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11119
11120 *cost = COSTS_N_INSNS (asm_length + num_operands);
11121 return true;
11122 }
11123 default:
11124 if (mode != VOIDmode)
11125 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11126 else
11127 *cost = COSTS_N_INSNS (4); /* Who knows? */
11128 return false;
11129 }
11130 }
11131
11132 #undef HANDLE_NARROW_SHIFT_ARITH
11133
11134 /* RTX costs when optimizing for size. */
11135 static bool
11136 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11137 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11138 {
11139 bool result;
11140 int code = GET_CODE (x);
11141
11142 if (TARGET_OLD_RTX_COSTS
11143 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11144 {
11145 /* Old way. (Deprecated.) */
11146 if (!speed)
11147 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11148 (enum rtx_code) outer_code, total);
11149 else
11150 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11151 (enum rtx_code) outer_code, total,
11152 speed);
11153 }
11154 else
11155 {
11156 /* New way. */
11157 if (current_tune->insn_extra_cost)
11158 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11159 (enum rtx_code) outer_code,
11160 current_tune->insn_extra_cost,
11161 total, speed);
11162 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11163 && current_tune->insn_extra_cost != NULL */
11164 else
11165 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11166 (enum rtx_code) outer_code,
11167 &generic_extra_costs, total, speed);
11168 }
11169
11170 if (dump_file && (dump_flags & TDF_DETAILS))
11171 {
11172 print_rtl_single (dump_file, x);
11173 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11174 *total, result ? "final" : "partial");
11175 }
11176 return result;
11177 }
11178
11179 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11180 supported on any "slowmul" cores, so it can be ignored. */
11181
11182 static bool
11183 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11184 int *total, bool speed)
11185 {
11186 machine_mode mode = GET_MODE (x);
11187
11188 if (TARGET_THUMB)
11189 {
11190 *total = thumb1_rtx_costs (x, code, outer_code);
11191 return true;
11192 }
11193
11194 switch (code)
11195 {
11196 case MULT:
11197 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11198 || mode == DImode)
11199 {
11200 *total = COSTS_N_INSNS (20);
11201 return false;
11202 }
11203
11204 if (CONST_INT_P (XEXP (x, 1)))
11205 {
11206 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11207 & (unsigned HOST_WIDE_INT) 0xffffffff);
11208 int cost, const_ok = const_ok_for_arm (i);
11209 int j, booth_unit_size;
11210
11211 /* Tune as appropriate. */
11212 cost = const_ok ? 4 : 8;
11213 booth_unit_size = 2;
11214 for (j = 0; i && j < 32; j += booth_unit_size)
11215 {
11216 i >>= booth_unit_size;
11217 cost++;
11218 }
11219
11220 *total = COSTS_N_INSNS (cost);
11221 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
11222 return true;
11223 }
11224
11225 *total = COSTS_N_INSNS (20);
11226 return false;
11227
11228 default:
11229 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11230 }
11231 }
11232
11233
11234 /* RTX cost for cores with a fast multiply unit (M variants). */
11235
11236 static bool
11237 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11238 int *total, bool speed)
11239 {
11240 machine_mode mode = GET_MODE (x);
11241
11242 if (TARGET_THUMB1)
11243 {
11244 *total = thumb1_rtx_costs (x, code, outer_code);
11245 return true;
11246 }
11247
11248 /* ??? should thumb2 use different costs? */
11249 switch (code)
11250 {
11251 case MULT:
11252 /* There is no point basing this on the tuning, since it is always the
11253 fast variant if it exists at all. */
11254 if (mode == DImode
11255 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11256 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11257 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11258 {
11259 *total = COSTS_N_INSNS(2);
11260 return false;
11261 }
11262
11263
11264 if (mode == DImode)
11265 {
11266 *total = COSTS_N_INSNS (5);
11267 return false;
11268 }
11269
11270 if (CONST_INT_P (XEXP (x, 1)))
11271 {
11272 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11273 & (unsigned HOST_WIDE_INT) 0xffffffff);
11274 int cost, const_ok = const_ok_for_arm (i);
11275 int j, booth_unit_size;
11276
11277 /* Tune as appropriate. */
11278 cost = const_ok ? 4 : 8;
11279 booth_unit_size = 8;
11280 for (j = 0; i && j < 32; j += booth_unit_size)
11281 {
11282 i >>= booth_unit_size;
11283 cost++;
11284 }
11285
11286 *total = COSTS_N_INSNS(cost);
11287 return false;
11288 }
11289
11290 if (mode == SImode)
11291 {
11292 *total = COSTS_N_INSNS (4);
11293 return false;
11294 }
11295
11296 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11297 {
11298 if (TARGET_HARD_FLOAT
11299 && (mode == SFmode
11300 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11301 {
11302 *total = COSTS_N_INSNS (1);
11303 return false;
11304 }
11305 }
11306
11307 /* Requires a lib call */
11308 *total = COSTS_N_INSNS (20);
11309 return false;
11310
11311 default:
11312 return arm_rtx_costs_1 (x, outer_code, total, speed);
11313 }
11314 }
11315
11316
11317 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11318 so it can be ignored. */
11319
11320 static bool
11321 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11322 int *total, bool speed)
11323 {
11324 machine_mode mode = GET_MODE (x);
11325
11326 if (TARGET_THUMB)
11327 {
11328 *total = thumb1_rtx_costs (x, code, outer_code);
11329 return true;
11330 }
11331
11332 switch (code)
11333 {
11334 case COMPARE:
11335 if (GET_CODE (XEXP (x, 0)) != MULT)
11336 return arm_rtx_costs_1 (x, outer_code, total, speed);
11337
11338 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11339 will stall until the multiplication is complete. */
11340 *total = COSTS_N_INSNS (3);
11341 return false;
11342
11343 case MULT:
11344 /* There is no point basing this on the tuning, since it is always the
11345 fast variant if it exists at all. */
11346 if (mode == DImode
11347 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11348 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11349 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11350 {
11351 *total = COSTS_N_INSNS (2);
11352 return false;
11353 }
11354
11355
11356 if (mode == DImode)
11357 {
11358 *total = COSTS_N_INSNS (5);
11359 return false;
11360 }
11361
11362 if (CONST_INT_P (XEXP (x, 1)))
11363 {
11364 /* If operand 1 is a constant we can more accurately
11365 calculate the cost of the multiply. The multiplier can
11366 retire 15 bits on the first cycle and a further 12 on the
11367 second. We do, of course, have to load the constant into
11368 a register first. */
11369 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11370 /* There's a general overhead of one cycle. */
11371 int cost = 1;
11372 unsigned HOST_WIDE_INT masked_const;
11373
11374 if (i & 0x80000000)
11375 i = ~i;
11376
11377 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11378
11379 masked_const = i & 0xffff8000;
11380 if (masked_const != 0)
11381 {
11382 cost++;
11383 masked_const = i & 0xf8000000;
11384 if (masked_const != 0)
11385 cost++;
11386 }
11387 *total = COSTS_N_INSNS (cost);
11388 return false;
11389 }
11390
11391 if (mode == SImode)
11392 {
11393 *total = COSTS_N_INSNS (3);
11394 return false;
11395 }
11396
11397 /* Requires a lib call */
11398 *total = COSTS_N_INSNS (20);
11399 return false;
11400
11401 default:
11402 return arm_rtx_costs_1 (x, outer_code, total, speed);
11403 }
11404 }
11405
11406
11407 /* RTX costs for 9e (and later) cores. */
11408
11409 static bool
11410 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11411 int *total, bool speed)
11412 {
11413 machine_mode mode = GET_MODE (x);
11414
11415 if (TARGET_THUMB1)
11416 {
11417 switch (code)
11418 {
11419 case MULT:
11420 /* Small multiply: 32 cycles for an integer multiply inst. */
11421 if (arm_arch6m && arm_m_profile_small_mul)
11422 *total = COSTS_N_INSNS (32);
11423 else
11424 *total = COSTS_N_INSNS (3);
11425 return true;
11426
11427 default:
11428 *total = thumb1_rtx_costs (x, code, outer_code);
11429 return true;
11430 }
11431 }
11432
11433 switch (code)
11434 {
11435 case MULT:
11436 /* There is no point basing this on the tuning, since it is always the
11437 fast variant if it exists at all. */
11438 if (mode == DImode
11439 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11440 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11441 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11442 {
11443 *total = COSTS_N_INSNS (2);
11444 return false;
11445 }
11446
11447
11448 if (mode == DImode)
11449 {
11450 *total = COSTS_N_INSNS (5);
11451 return false;
11452 }
11453
11454 if (mode == SImode)
11455 {
11456 *total = COSTS_N_INSNS (2);
11457 return false;
11458 }
11459
11460 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11461 {
11462 if (TARGET_HARD_FLOAT
11463 && (mode == SFmode
11464 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11465 {
11466 *total = COSTS_N_INSNS (1);
11467 return false;
11468 }
11469 }
11470
11471 *total = COSTS_N_INSNS (20);
11472 return false;
11473
11474 default:
11475 return arm_rtx_costs_1 (x, outer_code, total, speed);
11476 }
11477 }
11478 /* All address computations that can be done are free, but rtx cost returns
11479 the same for practically all of them. So we weight the different types
11480 of address here in the order (most pref first):
11481 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11482 static inline int
11483 arm_arm_address_cost (rtx x)
11484 {
11485 enum rtx_code c = GET_CODE (x);
11486
11487 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11488 return 0;
11489 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11490 return 10;
11491
11492 if (c == PLUS)
11493 {
11494 if (CONST_INT_P (XEXP (x, 1)))
11495 return 2;
11496
11497 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11498 return 3;
11499
11500 return 4;
11501 }
11502
11503 return 6;
11504 }
11505
11506 static inline int
11507 arm_thumb_address_cost (rtx x)
11508 {
11509 enum rtx_code c = GET_CODE (x);
11510
11511 if (c == REG)
11512 return 1;
11513 if (c == PLUS
11514 && REG_P (XEXP (x, 0))
11515 && CONST_INT_P (XEXP (x, 1)))
11516 return 1;
11517
11518 return 2;
11519 }
11520
11521 static int
11522 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11523 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11524 {
11525 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11526 }
11527
11528 /* Adjust cost hook for XScale. */
11529 static bool
11530 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11531 {
11532 /* Some true dependencies can have a higher cost depending
11533 on precisely how certain input operands are used. */
11534 if (REG_NOTE_KIND(link) == 0
11535 && recog_memoized (insn) >= 0
11536 && recog_memoized (dep) >= 0)
11537 {
11538 int shift_opnum = get_attr_shift (insn);
11539 enum attr_type attr_type = get_attr_type (dep);
11540
11541 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11542 operand for INSN. If we have a shifted input operand and the
11543 instruction we depend on is another ALU instruction, then we may
11544 have to account for an additional stall. */
11545 if (shift_opnum != 0
11546 && (attr_type == TYPE_ALU_SHIFT_IMM
11547 || attr_type == TYPE_ALUS_SHIFT_IMM
11548 || attr_type == TYPE_LOGIC_SHIFT_IMM
11549 || attr_type == TYPE_LOGICS_SHIFT_IMM
11550 || attr_type == TYPE_ALU_SHIFT_REG
11551 || attr_type == TYPE_ALUS_SHIFT_REG
11552 || attr_type == TYPE_LOGIC_SHIFT_REG
11553 || attr_type == TYPE_LOGICS_SHIFT_REG
11554 || attr_type == TYPE_MOV_SHIFT
11555 || attr_type == TYPE_MVN_SHIFT
11556 || attr_type == TYPE_MOV_SHIFT_REG
11557 || attr_type == TYPE_MVN_SHIFT_REG))
11558 {
11559 rtx shifted_operand;
11560 int opno;
11561
11562 /* Get the shifted operand. */
11563 extract_insn (insn);
11564 shifted_operand = recog_data.operand[shift_opnum];
11565
11566 /* Iterate over all the operands in DEP. If we write an operand
11567 that overlaps with SHIFTED_OPERAND, then we have increase the
11568 cost of this dependency. */
11569 extract_insn (dep);
11570 preprocess_constraints (dep);
11571 for (opno = 0; opno < recog_data.n_operands; opno++)
11572 {
11573 /* We can ignore strict inputs. */
11574 if (recog_data.operand_type[opno] == OP_IN)
11575 continue;
11576
11577 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11578 shifted_operand))
11579 {
11580 *cost = 2;
11581 return false;
11582 }
11583 }
11584 }
11585 }
11586 return true;
11587 }
11588
11589 /* Adjust cost hook for Cortex A9. */
11590 static bool
11591 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11592 {
11593 switch (REG_NOTE_KIND (link))
11594 {
11595 case REG_DEP_ANTI:
11596 *cost = 0;
11597 return false;
11598
11599 case REG_DEP_TRUE:
11600 case REG_DEP_OUTPUT:
11601 if (recog_memoized (insn) >= 0
11602 && recog_memoized (dep) >= 0)
11603 {
11604 if (GET_CODE (PATTERN (insn)) == SET)
11605 {
11606 if (GET_MODE_CLASS
11607 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11608 || GET_MODE_CLASS
11609 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11610 {
11611 enum attr_type attr_type_insn = get_attr_type (insn);
11612 enum attr_type attr_type_dep = get_attr_type (dep);
11613
11614 /* By default all dependencies of the form
11615 s0 = s0 <op> s1
11616 s0 = s0 <op> s2
11617 have an extra latency of 1 cycle because
11618 of the input and output dependency in this
11619 case. However this gets modeled as an true
11620 dependency and hence all these checks. */
11621 if (REG_P (SET_DEST (PATTERN (insn)))
11622 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11623 {
11624 /* FMACS is a special case where the dependent
11625 instruction can be issued 3 cycles before
11626 the normal latency in case of an output
11627 dependency. */
11628 if ((attr_type_insn == TYPE_FMACS
11629 || attr_type_insn == TYPE_FMACD)
11630 && (attr_type_dep == TYPE_FMACS
11631 || attr_type_dep == TYPE_FMACD))
11632 {
11633 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11634 *cost = insn_default_latency (dep) - 3;
11635 else
11636 *cost = insn_default_latency (dep);
11637 return false;
11638 }
11639 else
11640 {
11641 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11642 *cost = insn_default_latency (dep) + 1;
11643 else
11644 *cost = insn_default_latency (dep);
11645 }
11646 return false;
11647 }
11648 }
11649 }
11650 }
11651 break;
11652
11653 default:
11654 gcc_unreachable ();
11655 }
11656
11657 return true;
11658 }
11659
11660 /* Adjust cost hook for FA726TE. */
11661 static bool
11662 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11663 {
11664 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11665 have penalty of 3. */
11666 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11667 && recog_memoized (insn) >= 0
11668 && recog_memoized (dep) >= 0
11669 && get_attr_conds (dep) == CONDS_SET)
11670 {
11671 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11672 if (get_attr_conds (insn) == CONDS_USE
11673 && get_attr_type (insn) != TYPE_BRANCH)
11674 {
11675 *cost = 3;
11676 return false;
11677 }
11678
11679 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11680 || get_attr_conds (insn) == CONDS_USE)
11681 {
11682 *cost = 0;
11683 return false;
11684 }
11685 }
11686
11687 return true;
11688 }
11689
11690 /* Implement TARGET_REGISTER_MOVE_COST.
11691
11692 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11693 it is typically more expensive than a single memory access. We set
11694 the cost to less than two memory accesses so that floating
11695 point to integer conversion does not go through memory. */
11696
11697 int
11698 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11699 reg_class_t from, reg_class_t to)
11700 {
11701 if (TARGET_32BIT)
11702 {
11703 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11704 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11705 return 15;
11706 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11707 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11708 return 4;
11709 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11710 return 20;
11711 else
11712 return 2;
11713 }
11714 else
11715 {
11716 if (from == HI_REGS || to == HI_REGS)
11717 return 4;
11718 else
11719 return 2;
11720 }
11721 }
11722
11723 /* Implement TARGET_MEMORY_MOVE_COST. */
11724
11725 int
11726 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11727 bool in ATTRIBUTE_UNUSED)
11728 {
11729 if (TARGET_32BIT)
11730 return 10;
11731 else
11732 {
11733 if (GET_MODE_SIZE (mode) < 4)
11734 return 8;
11735 else
11736 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11737 }
11738 }
11739
11740 /* Vectorizer cost model implementation. */
11741
11742 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11743 static int
11744 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11745 tree vectype,
11746 int misalign ATTRIBUTE_UNUSED)
11747 {
11748 unsigned elements;
11749
11750 switch (type_of_cost)
11751 {
11752 case scalar_stmt:
11753 return current_tune->vec_costs->scalar_stmt_cost;
11754
11755 case scalar_load:
11756 return current_tune->vec_costs->scalar_load_cost;
11757
11758 case scalar_store:
11759 return current_tune->vec_costs->scalar_store_cost;
11760
11761 case vector_stmt:
11762 return current_tune->vec_costs->vec_stmt_cost;
11763
11764 case vector_load:
11765 return current_tune->vec_costs->vec_align_load_cost;
11766
11767 case vector_store:
11768 return current_tune->vec_costs->vec_store_cost;
11769
11770 case vec_to_scalar:
11771 return current_tune->vec_costs->vec_to_scalar_cost;
11772
11773 case scalar_to_vec:
11774 return current_tune->vec_costs->scalar_to_vec_cost;
11775
11776 case unaligned_load:
11777 return current_tune->vec_costs->vec_unalign_load_cost;
11778
11779 case unaligned_store:
11780 return current_tune->vec_costs->vec_unalign_store_cost;
11781
11782 case cond_branch_taken:
11783 return current_tune->vec_costs->cond_taken_branch_cost;
11784
11785 case cond_branch_not_taken:
11786 return current_tune->vec_costs->cond_not_taken_branch_cost;
11787
11788 case vec_perm:
11789 case vec_promote_demote:
11790 return current_tune->vec_costs->vec_stmt_cost;
11791
11792 case vec_construct:
11793 elements = TYPE_VECTOR_SUBPARTS (vectype);
11794 return elements / 2 + 1;
11795
11796 default:
11797 gcc_unreachable ();
11798 }
11799 }
11800
11801 /* Implement targetm.vectorize.add_stmt_cost. */
11802
11803 static unsigned
11804 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11805 struct _stmt_vec_info *stmt_info, int misalign,
11806 enum vect_cost_model_location where)
11807 {
11808 unsigned *cost = (unsigned *) data;
11809 unsigned retval = 0;
11810
11811 if (flag_vect_cost_model)
11812 {
11813 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11814 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11815
11816 /* Statements in an inner loop relative to the loop being
11817 vectorized are weighted more heavily. The value here is
11818 arbitrary and could potentially be improved with analysis. */
11819 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11820 count *= 50; /* FIXME. */
11821
11822 retval = (unsigned) (count * stmt_cost);
11823 cost[where] += retval;
11824 }
11825
11826 return retval;
11827 }
11828
11829 /* Return true if and only if this insn can dual-issue only as older. */
11830 static bool
11831 cortexa7_older_only (rtx_insn *insn)
11832 {
11833 if (recog_memoized (insn) < 0)
11834 return false;
11835
11836 switch (get_attr_type (insn))
11837 {
11838 case TYPE_ALU_DSP_REG:
11839 case TYPE_ALU_SREG:
11840 case TYPE_ALUS_SREG:
11841 case TYPE_LOGIC_REG:
11842 case TYPE_LOGICS_REG:
11843 case TYPE_ADC_REG:
11844 case TYPE_ADCS_REG:
11845 case TYPE_ADR:
11846 case TYPE_BFM:
11847 case TYPE_REV:
11848 case TYPE_MVN_REG:
11849 case TYPE_SHIFT_IMM:
11850 case TYPE_SHIFT_REG:
11851 case TYPE_LOAD_BYTE:
11852 case TYPE_LOAD1:
11853 case TYPE_STORE1:
11854 case TYPE_FFARITHS:
11855 case TYPE_FADDS:
11856 case TYPE_FFARITHD:
11857 case TYPE_FADDD:
11858 case TYPE_FMOV:
11859 case TYPE_F_CVT:
11860 case TYPE_FCMPS:
11861 case TYPE_FCMPD:
11862 case TYPE_FCONSTS:
11863 case TYPE_FCONSTD:
11864 case TYPE_FMULS:
11865 case TYPE_FMACS:
11866 case TYPE_FMULD:
11867 case TYPE_FMACD:
11868 case TYPE_FDIVS:
11869 case TYPE_FDIVD:
11870 case TYPE_F_MRC:
11871 case TYPE_F_MRRC:
11872 case TYPE_F_FLAG:
11873 case TYPE_F_LOADS:
11874 case TYPE_F_STORES:
11875 return true;
11876 default:
11877 return false;
11878 }
11879 }
11880
11881 /* Return true if and only if this insn can dual-issue as younger. */
11882 static bool
11883 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11884 {
11885 if (recog_memoized (insn) < 0)
11886 {
11887 if (verbose > 5)
11888 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11889 return false;
11890 }
11891
11892 switch (get_attr_type (insn))
11893 {
11894 case TYPE_ALU_IMM:
11895 case TYPE_ALUS_IMM:
11896 case TYPE_LOGIC_IMM:
11897 case TYPE_LOGICS_IMM:
11898 case TYPE_EXTEND:
11899 case TYPE_MVN_IMM:
11900 case TYPE_MOV_IMM:
11901 case TYPE_MOV_REG:
11902 case TYPE_MOV_SHIFT:
11903 case TYPE_MOV_SHIFT_REG:
11904 case TYPE_BRANCH:
11905 case TYPE_CALL:
11906 return true;
11907 default:
11908 return false;
11909 }
11910 }
11911
11912
11913 /* Look for an instruction that can dual issue only as an older
11914 instruction, and move it in front of any instructions that can
11915 dual-issue as younger, while preserving the relative order of all
11916 other instructions in the ready list. This is a hueuristic to help
11917 dual-issue in later cycles, by postponing issue of more flexible
11918 instructions. This heuristic may affect dual issue opportunities
11919 in the current cycle. */
11920 static void
11921 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11922 int *n_readyp, int clock)
11923 {
11924 int i;
11925 int first_older_only = -1, first_younger = -1;
11926
11927 if (verbose > 5)
11928 fprintf (file,
11929 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11930 clock,
11931 *n_readyp);
11932
11933 /* Traverse the ready list from the head (the instruction to issue
11934 first), and looking for the first instruction that can issue as
11935 younger and the first instruction that can dual-issue only as
11936 older. */
11937 for (i = *n_readyp - 1; i >= 0; i--)
11938 {
11939 rtx_insn *insn = ready[i];
11940 if (cortexa7_older_only (insn))
11941 {
11942 first_older_only = i;
11943 if (verbose > 5)
11944 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11945 break;
11946 }
11947 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11948 first_younger = i;
11949 }
11950
11951 /* Nothing to reorder because either no younger insn found or insn
11952 that can dual-issue only as older appears before any insn that
11953 can dual-issue as younger. */
11954 if (first_younger == -1)
11955 {
11956 if (verbose > 5)
11957 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11958 return;
11959 }
11960
11961 /* Nothing to reorder because no older-only insn in the ready list. */
11962 if (first_older_only == -1)
11963 {
11964 if (verbose > 5)
11965 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11966 return;
11967 }
11968
11969 /* Move first_older_only insn before first_younger. */
11970 if (verbose > 5)
11971 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11972 INSN_UID(ready [first_older_only]),
11973 INSN_UID(ready [first_younger]));
11974 rtx_insn *first_older_only_insn = ready [first_older_only];
11975 for (i = first_older_only; i < first_younger; i++)
11976 {
11977 ready[i] = ready[i+1];
11978 }
11979
11980 ready[i] = first_older_only_insn;
11981 return;
11982 }
11983
11984 /* Implement TARGET_SCHED_REORDER. */
11985 static int
11986 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11987 int clock)
11988 {
11989 switch (arm_tune)
11990 {
11991 case cortexa7:
11992 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11993 break;
11994 default:
11995 /* Do nothing for other cores. */
11996 break;
11997 }
11998
11999 return arm_issue_rate ();
12000 }
12001
12002 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
12003 It corrects the value of COST based on the relationship between
12004 INSN and DEP through the dependence LINK. It returns the new
12005 value. There is a per-core adjust_cost hook to adjust scheduler costs
12006 and the per-core hook can choose to completely override the generic
12007 adjust_cost function. Only put bits of code into arm_adjust_cost that
12008 are common across all cores. */
12009 static int
12010 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
12011 {
12012 rtx i_pat, d_pat;
12013
12014 /* When generating Thumb-1 code, we want to place flag-setting operations
12015 close to a conditional branch which depends on them, so that we can
12016 omit the comparison. */
12017 if (TARGET_THUMB1
12018 && REG_NOTE_KIND (link) == 0
12019 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
12020 && recog_memoized (dep) >= 0
12021 && get_attr_conds (dep) == CONDS_SET)
12022 return 0;
12023
12024 if (current_tune->sched_adjust_cost != NULL)
12025 {
12026 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
12027 return cost;
12028 }
12029
12030 /* XXX Is this strictly true? */
12031 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
12032 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
12033 return 0;
12034
12035 /* Call insns don't incur a stall, even if they follow a load. */
12036 if (REG_NOTE_KIND (link) == 0
12037 && CALL_P (insn))
12038 return 1;
12039
12040 if ((i_pat = single_set (insn)) != NULL
12041 && MEM_P (SET_SRC (i_pat))
12042 && (d_pat = single_set (dep)) != NULL
12043 && MEM_P (SET_DEST (d_pat)))
12044 {
12045 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
12046 /* This is a load after a store, there is no conflict if the load reads
12047 from a cached area. Assume that loads from the stack, and from the
12048 constant pool are cached, and that others will miss. This is a
12049 hack. */
12050
12051 if ((GET_CODE (src_mem) == SYMBOL_REF
12052 && CONSTANT_POOL_ADDRESS_P (src_mem))
12053 || reg_mentioned_p (stack_pointer_rtx, src_mem)
12054 || reg_mentioned_p (frame_pointer_rtx, src_mem)
12055 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
12056 return 1;
12057 }
12058
12059 return cost;
12060 }
12061
12062 int
12063 arm_max_conditional_execute (void)
12064 {
12065 return max_insns_skipped;
12066 }
12067
12068 static int
12069 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12070 {
12071 if (TARGET_32BIT)
12072 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12073 else
12074 return (optimize > 0) ? 2 : 0;
12075 }
12076
12077 static int
12078 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12079 {
12080 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12081 }
12082
12083 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12084 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12085 sequences of non-executed instructions in IT blocks probably take the same
12086 amount of time as executed instructions (and the IT instruction itself takes
12087 space in icache). This function was experimentally determined to give good
12088 results on a popular embedded benchmark. */
12089
12090 static int
12091 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12092 {
12093 return (TARGET_32BIT && speed_p) ? 1
12094 : arm_default_branch_cost (speed_p, predictable_p);
12095 }
12096
12097 static int
12098 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12099 {
12100 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12101 }
12102
12103 static bool fp_consts_inited = false;
12104
12105 static REAL_VALUE_TYPE value_fp0;
12106
12107 static void
12108 init_fp_table (void)
12109 {
12110 REAL_VALUE_TYPE r;
12111
12112 r = REAL_VALUE_ATOF ("0", DFmode);
12113 value_fp0 = r;
12114 fp_consts_inited = true;
12115 }
12116
12117 /* Return TRUE if rtx X is a valid immediate FP constant. */
12118 int
12119 arm_const_double_rtx (rtx x)
12120 {
12121 const REAL_VALUE_TYPE *r;
12122
12123 if (!fp_consts_inited)
12124 init_fp_table ();
12125
12126 r = CONST_DOUBLE_REAL_VALUE (x);
12127 if (REAL_VALUE_MINUS_ZERO (*r))
12128 return 0;
12129
12130 if (real_equal (r, &value_fp0))
12131 return 1;
12132
12133 return 0;
12134 }
12135
12136 /* VFPv3 has a fairly wide range of representable immediates, formed from
12137 "quarter-precision" floating-point values. These can be evaluated using this
12138 formula (with ^ for exponentiation):
12139
12140 -1^s * n * 2^-r
12141
12142 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12143 16 <= n <= 31 and 0 <= r <= 7.
12144
12145 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12146
12147 - A (most-significant) is the sign bit.
12148 - BCD are the exponent (encoded as r XOR 3).
12149 - EFGH are the mantissa (encoded as n - 16).
12150 */
12151
12152 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12153 fconst[sd] instruction, or -1 if X isn't suitable. */
12154 static int
12155 vfp3_const_double_index (rtx x)
12156 {
12157 REAL_VALUE_TYPE r, m;
12158 int sign, exponent;
12159 unsigned HOST_WIDE_INT mantissa, mant_hi;
12160 unsigned HOST_WIDE_INT mask;
12161 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12162 bool fail;
12163
12164 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12165 return -1;
12166
12167 r = *CONST_DOUBLE_REAL_VALUE (x);
12168
12169 /* We can't represent these things, so detect them first. */
12170 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12171 return -1;
12172
12173 /* Extract sign, exponent and mantissa. */
12174 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12175 r = real_value_abs (&r);
12176 exponent = REAL_EXP (&r);
12177 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12178 highest (sign) bit, with a fixed binary point at bit point_pos.
12179 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12180 bits for the mantissa, this may fail (low bits would be lost). */
12181 real_ldexp (&m, &r, point_pos - exponent);
12182 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12183 mantissa = w.elt (0);
12184 mant_hi = w.elt (1);
12185
12186 /* If there are bits set in the low part of the mantissa, we can't
12187 represent this value. */
12188 if (mantissa != 0)
12189 return -1;
12190
12191 /* Now make it so that mantissa contains the most-significant bits, and move
12192 the point_pos to indicate that the least-significant bits have been
12193 discarded. */
12194 point_pos -= HOST_BITS_PER_WIDE_INT;
12195 mantissa = mant_hi;
12196
12197 /* We can permit four significant bits of mantissa only, plus a high bit
12198 which is always 1. */
12199 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12200 if ((mantissa & mask) != 0)
12201 return -1;
12202
12203 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12204 mantissa >>= point_pos - 5;
12205
12206 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12207 floating-point immediate zero with Neon using an integer-zero load, but
12208 that case is handled elsewhere.) */
12209 if (mantissa == 0)
12210 return -1;
12211
12212 gcc_assert (mantissa >= 16 && mantissa <= 31);
12213
12214 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12215 normalized significands are in the range [1, 2). (Our mantissa is shifted
12216 left 4 places at this point relative to normalized IEEE754 values). GCC
12217 internally uses [0.5, 1) (see real.c), so the exponent returned from
12218 REAL_EXP must be altered. */
12219 exponent = 5 - exponent;
12220
12221 if (exponent < 0 || exponent > 7)
12222 return -1;
12223
12224 /* Sign, mantissa and exponent are now in the correct form to plug into the
12225 formula described in the comment above. */
12226 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12227 }
12228
12229 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12230 int
12231 vfp3_const_double_rtx (rtx x)
12232 {
12233 if (!TARGET_VFP3)
12234 return 0;
12235
12236 return vfp3_const_double_index (x) != -1;
12237 }
12238
12239 /* Recognize immediates which can be used in various Neon instructions. Legal
12240 immediates are described by the following table (for VMVN variants, the
12241 bitwise inverse of the constant shown is recognized. In either case, VMOV
12242 is output and the correct instruction to use for a given constant is chosen
12243 by the assembler). The constant shown is replicated across all elements of
12244 the destination vector.
12245
12246 insn elems variant constant (binary)
12247 ---- ----- ------- -----------------
12248 vmov i32 0 00000000 00000000 00000000 abcdefgh
12249 vmov i32 1 00000000 00000000 abcdefgh 00000000
12250 vmov i32 2 00000000 abcdefgh 00000000 00000000
12251 vmov i32 3 abcdefgh 00000000 00000000 00000000
12252 vmov i16 4 00000000 abcdefgh
12253 vmov i16 5 abcdefgh 00000000
12254 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12255 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12256 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12257 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12258 vmvn i16 10 00000000 abcdefgh
12259 vmvn i16 11 abcdefgh 00000000
12260 vmov i32 12 00000000 00000000 abcdefgh 11111111
12261 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12262 vmov i32 14 00000000 abcdefgh 11111111 11111111
12263 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12264 vmov i8 16 abcdefgh
12265 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12266 eeeeeeee ffffffff gggggggg hhhhhhhh
12267 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12268 vmov f32 19 00000000 00000000 00000000 00000000
12269
12270 For case 18, B = !b. Representable values are exactly those accepted by
12271 vfp3_const_double_index, but are output as floating-point numbers rather
12272 than indices.
12273
12274 For case 19, we will change it to vmov.i32 when assembling.
12275
12276 Variants 0-5 (inclusive) may also be used as immediates for the second
12277 operand of VORR/VBIC instructions.
12278
12279 The INVERSE argument causes the bitwise inverse of the given operand to be
12280 recognized instead (used for recognizing legal immediates for the VAND/VORN
12281 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12282 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12283 output, rather than the real insns vbic/vorr).
12284
12285 INVERSE makes no difference to the recognition of float vectors.
12286
12287 The return value is the variant of immediate as shown in the above table, or
12288 -1 if the given value doesn't match any of the listed patterns.
12289 */
12290 static int
12291 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12292 rtx *modconst, int *elementwidth)
12293 {
12294 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12295 matches = 1; \
12296 for (i = 0; i < idx; i += (STRIDE)) \
12297 if (!(TEST)) \
12298 matches = 0; \
12299 if (matches) \
12300 { \
12301 immtype = (CLASS); \
12302 elsize = (ELSIZE); \
12303 break; \
12304 }
12305
12306 unsigned int i, elsize = 0, idx = 0, n_elts;
12307 unsigned int innersize;
12308 unsigned char bytes[16];
12309 int immtype = -1, matches;
12310 unsigned int invmask = inverse ? 0xff : 0;
12311 bool vector = GET_CODE (op) == CONST_VECTOR;
12312
12313 if (vector)
12314 n_elts = CONST_VECTOR_NUNITS (op);
12315 else
12316 {
12317 n_elts = 1;
12318 if (mode == VOIDmode)
12319 mode = DImode;
12320 }
12321
12322 innersize = GET_MODE_UNIT_SIZE (mode);
12323
12324 /* Vectors of float constants. */
12325 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12326 {
12327 rtx el0 = CONST_VECTOR_ELT (op, 0);
12328 const REAL_VALUE_TYPE *r0;
12329
12330 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12331 return -1;
12332
12333 r0 = CONST_DOUBLE_REAL_VALUE (el0);
12334
12335 for (i = 1; i < n_elts; i++)
12336 {
12337 rtx elt = CONST_VECTOR_ELT (op, i);
12338 if (!real_equal (r0, CONST_DOUBLE_REAL_VALUE (elt)))
12339 return -1;
12340 }
12341
12342 if (modconst)
12343 *modconst = CONST_VECTOR_ELT (op, 0);
12344
12345 if (elementwidth)
12346 *elementwidth = 0;
12347
12348 if (el0 == CONST0_RTX (GET_MODE (el0)))
12349 return 19;
12350 else
12351 return 18;
12352 }
12353
12354 /* Splat vector constant out into a byte vector. */
12355 for (i = 0; i < n_elts; i++)
12356 {
12357 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12358 unsigned HOST_WIDE_INT elpart;
12359
12360 gcc_assert (CONST_INT_P (el));
12361 elpart = INTVAL (el);
12362
12363 for (unsigned int byte = 0; byte < innersize; byte++)
12364 {
12365 bytes[idx++] = (elpart & 0xff) ^ invmask;
12366 elpart >>= BITS_PER_UNIT;
12367 }
12368 }
12369
12370 /* Sanity check. */
12371 gcc_assert (idx == GET_MODE_SIZE (mode));
12372
12373 do
12374 {
12375 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12376 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12377
12378 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12379 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12380
12381 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12382 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12383
12384 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12385 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12386
12387 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12388
12389 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12390
12391 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12392 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12393
12394 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12395 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12396
12397 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12398 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12399
12400 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12401 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12402
12403 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12404
12405 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12406
12407 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12408 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12409
12410 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12411 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12412
12413 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12414 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12415
12416 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12417 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12418
12419 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12420
12421 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12422 && bytes[i] == bytes[(i + 8) % idx]);
12423 }
12424 while (0);
12425
12426 if (immtype == -1)
12427 return -1;
12428
12429 if (elementwidth)
12430 *elementwidth = elsize;
12431
12432 if (modconst)
12433 {
12434 unsigned HOST_WIDE_INT imm = 0;
12435
12436 /* Un-invert bytes of recognized vector, if necessary. */
12437 if (invmask != 0)
12438 for (i = 0; i < idx; i++)
12439 bytes[i] ^= invmask;
12440
12441 if (immtype == 17)
12442 {
12443 /* FIXME: Broken on 32-bit H_W_I hosts. */
12444 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12445
12446 for (i = 0; i < 8; i++)
12447 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12448 << (i * BITS_PER_UNIT);
12449
12450 *modconst = GEN_INT (imm);
12451 }
12452 else
12453 {
12454 unsigned HOST_WIDE_INT imm = 0;
12455
12456 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12457 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12458
12459 *modconst = GEN_INT (imm);
12460 }
12461 }
12462
12463 return immtype;
12464 #undef CHECK
12465 }
12466
12467 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12468 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12469 float elements), and a modified constant (whatever should be output for a
12470 VMOV) in *MODCONST. */
12471
12472 int
12473 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12474 rtx *modconst, int *elementwidth)
12475 {
12476 rtx tmpconst;
12477 int tmpwidth;
12478 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12479
12480 if (retval == -1)
12481 return 0;
12482
12483 if (modconst)
12484 *modconst = tmpconst;
12485
12486 if (elementwidth)
12487 *elementwidth = tmpwidth;
12488
12489 return 1;
12490 }
12491
12492 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12493 the immediate is valid, write a constant suitable for using as an operand
12494 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12495 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12496
12497 int
12498 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12499 rtx *modconst, int *elementwidth)
12500 {
12501 rtx tmpconst;
12502 int tmpwidth;
12503 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12504
12505 if (retval < 0 || retval > 5)
12506 return 0;
12507
12508 if (modconst)
12509 *modconst = tmpconst;
12510
12511 if (elementwidth)
12512 *elementwidth = tmpwidth;
12513
12514 return 1;
12515 }
12516
12517 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12518 the immediate is valid, write a constant suitable for using as an operand
12519 to VSHR/VSHL to *MODCONST and the corresponding element width to
12520 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12521 because they have different limitations. */
12522
12523 int
12524 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12525 rtx *modconst, int *elementwidth,
12526 bool isleftshift)
12527 {
12528 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12529 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12530 unsigned HOST_WIDE_INT last_elt = 0;
12531 unsigned HOST_WIDE_INT maxshift;
12532
12533 /* Split vector constant out into a byte vector. */
12534 for (i = 0; i < n_elts; i++)
12535 {
12536 rtx el = CONST_VECTOR_ELT (op, i);
12537 unsigned HOST_WIDE_INT elpart;
12538
12539 if (CONST_INT_P (el))
12540 elpart = INTVAL (el);
12541 else if (CONST_DOUBLE_P (el))
12542 return 0;
12543 else
12544 gcc_unreachable ();
12545
12546 if (i != 0 && elpart != last_elt)
12547 return 0;
12548
12549 last_elt = elpart;
12550 }
12551
12552 /* Shift less than element size. */
12553 maxshift = innersize * 8;
12554
12555 if (isleftshift)
12556 {
12557 /* Left shift immediate value can be from 0 to <size>-1. */
12558 if (last_elt >= maxshift)
12559 return 0;
12560 }
12561 else
12562 {
12563 /* Right shift immediate value can be from 1 to <size>. */
12564 if (last_elt == 0 || last_elt > maxshift)
12565 return 0;
12566 }
12567
12568 if (elementwidth)
12569 *elementwidth = innersize * 8;
12570
12571 if (modconst)
12572 *modconst = CONST_VECTOR_ELT (op, 0);
12573
12574 return 1;
12575 }
12576
12577 /* Return a string suitable for output of Neon immediate logic operation
12578 MNEM. */
12579
12580 char *
12581 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12582 int inverse, int quad)
12583 {
12584 int width, is_valid;
12585 static char templ[40];
12586
12587 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12588
12589 gcc_assert (is_valid != 0);
12590
12591 if (quad)
12592 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12593 else
12594 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12595
12596 return templ;
12597 }
12598
12599 /* Return a string suitable for output of Neon immediate shift operation
12600 (VSHR or VSHL) MNEM. */
12601
12602 char *
12603 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12604 machine_mode mode, int quad,
12605 bool isleftshift)
12606 {
12607 int width, is_valid;
12608 static char templ[40];
12609
12610 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12611 gcc_assert (is_valid != 0);
12612
12613 if (quad)
12614 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12615 else
12616 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12617
12618 return templ;
12619 }
12620
12621 /* Output a sequence of pairwise operations to implement a reduction.
12622 NOTE: We do "too much work" here, because pairwise operations work on two
12623 registers-worth of operands in one go. Unfortunately we can't exploit those
12624 extra calculations to do the full operation in fewer steps, I don't think.
12625 Although all vector elements of the result but the first are ignored, we
12626 actually calculate the same result in each of the elements. An alternative
12627 such as initially loading a vector with zero to use as each of the second
12628 operands would use up an additional register and take an extra instruction,
12629 for no particular gain. */
12630
12631 void
12632 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12633 rtx (*reduc) (rtx, rtx, rtx))
12634 {
12635 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12636 rtx tmpsum = op1;
12637
12638 for (i = parts / 2; i >= 1; i /= 2)
12639 {
12640 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12641 emit_insn (reduc (dest, tmpsum, tmpsum));
12642 tmpsum = dest;
12643 }
12644 }
12645
12646 /* If VALS is a vector constant that can be loaded into a register
12647 using VDUP, generate instructions to do so and return an RTX to
12648 assign to the register. Otherwise return NULL_RTX. */
12649
12650 static rtx
12651 neon_vdup_constant (rtx vals)
12652 {
12653 machine_mode mode = GET_MODE (vals);
12654 machine_mode inner_mode = GET_MODE_INNER (mode);
12655 rtx x;
12656
12657 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12658 return NULL_RTX;
12659
12660 if (!const_vec_duplicate_p (vals, &x))
12661 /* The elements are not all the same. We could handle repeating
12662 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12663 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12664 vdup.i16). */
12665 return NULL_RTX;
12666
12667 /* We can load this constant by using VDUP and a constant in a
12668 single ARM register. This will be cheaper than a vector
12669 load. */
12670
12671 x = copy_to_mode_reg (inner_mode, x);
12672 return gen_rtx_VEC_DUPLICATE (mode, x);
12673 }
12674
12675 /* Generate code to load VALS, which is a PARALLEL containing only
12676 constants (for vec_init) or CONST_VECTOR, efficiently into a
12677 register. Returns an RTX to copy into the register, or NULL_RTX
12678 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12679
12680 rtx
12681 neon_make_constant (rtx vals)
12682 {
12683 machine_mode mode = GET_MODE (vals);
12684 rtx target;
12685 rtx const_vec = NULL_RTX;
12686 int n_elts = GET_MODE_NUNITS (mode);
12687 int n_const = 0;
12688 int i;
12689
12690 if (GET_CODE (vals) == CONST_VECTOR)
12691 const_vec = vals;
12692 else if (GET_CODE (vals) == PARALLEL)
12693 {
12694 /* A CONST_VECTOR must contain only CONST_INTs and
12695 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12696 Only store valid constants in a CONST_VECTOR. */
12697 for (i = 0; i < n_elts; ++i)
12698 {
12699 rtx x = XVECEXP (vals, 0, i);
12700 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12701 n_const++;
12702 }
12703 if (n_const == n_elts)
12704 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12705 }
12706 else
12707 gcc_unreachable ();
12708
12709 if (const_vec != NULL
12710 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12711 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12712 return const_vec;
12713 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12714 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12715 pipeline cycle; creating the constant takes one or two ARM
12716 pipeline cycles. */
12717 return target;
12718 else if (const_vec != NULL_RTX)
12719 /* Load from constant pool. On Cortex-A8 this takes two cycles
12720 (for either double or quad vectors). We can not take advantage
12721 of single-cycle VLD1 because we need a PC-relative addressing
12722 mode. */
12723 return const_vec;
12724 else
12725 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12726 We can not construct an initializer. */
12727 return NULL_RTX;
12728 }
12729
12730 /* Initialize vector TARGET to VALS. */
12731
12732 void
12733 neon_expand_vector_init (rtx target, rtx vals)
12734 {
12735 machine_mode mode = GET_MODE (target);
12736 machine_mode inner_mode = GET_MODE_INNER (mode);
12737 int n_elts = GET_MODE_NUNITS (mode);
12738 int n_var = 0, one_var = -1;
12739 bool all_same = true;
12740 rtx x, mem;
12741 int i;
12742
12743 for (i = 0; i < n_elts; ++i)
12744 {
12745 x = XVECEXP (vals, 0, i);
12746 if (!CONSTANT_P (x))
12747 ++n_var, one_var = i;
12748
12749 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12750 all_same = false;
12751 }
12752
12753 if (n_var == 0)
12754 {
12755 rtx constant = neon_make_constant (vals);
12756 if (constant != NULL_RTX)
12757 {
12758 emit_move_insn (target, constant);
12759 return;
12760 }
12761 }
12762
12763 /* Splat a single non-constant element if we can. */
12764 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12765 {
12766 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12767 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12768 return;
12769 }
12770
12771 /* One field is non-constant. Load constant then overwrite varying
12772 field. This is more efficient than using the stack. */
12773 if (n_var == 1)
12774 {
12775 rtx copy = copy_rtx (vals);
12776 rtx index = GEN_INT (one_var);
12777
12778 /* Load constant part of vector, substitute neighboring value for
12779 varying element. */
12780 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12781 neon_expand_vector_init (target, copy);
12782
12783 /* Insert variable. */
12784 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12785 switch (mode)
12786 {
12787 case V8QImode:
12788 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12789 break;
12790 case V16QImode:
12791 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12792 break;
12793 case V4HImode:
12794 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12795 break;
12796 case V8HImode:
12797 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12798 break;
12799 case V2SImode:
12800 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12801 break;
12802 case V4SImode:
12803 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12804 break;
12805 case V2SFmode:
12806 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12807 break;
12808 case V4SFmode:
12809 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12810 break;
12811 case V2DImode:
12812 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12813 break;
12814 default:
12815 gcc_unreachable ();
12816 }
12817 return;
12818 }
12819
12820 /* Construct the vector in memory one field at a time
12821 and load the whole vector. */
12822 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12823 for (i = 0; i < n_elts; i++)
12824 emit_move_insn (adjust_address_nv (mem, inner_mode,
12825 i * GET_MODE_SIZE (inner_mode)),
12826 XVECEXP (vals, 0, i));
12827 emit_move_insn (target, mem);
12828 }
12829
12830 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12831 ERR if it doesn't. EXP indicates the source location, which includes the
12832 inlining history for intrinsics. */
12833
12834 static void
12835 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12836 const_tree exp, const char *desc)
12837 {
12838 HOST_WIDE_INT lane;
12839
12840 gcc_assert (CONST_INT_P (operand));
12841
12842 lane = INTVAL (operand);
12843
12844 if (lane < low || lane >= high)
12845 {
12846 if (exp)
12847 error ("%K%s %wd out of range %wd - %wd",
12848 exp, desc, lane, low, high - 1);
12849 else
12850 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12851 }
12852 }
12853
12854 /* Bounds-check lanes. */
12855
12856 void
12857 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12858 const_tree exp)
12859 {
12860 bounds_check (operand, low, high, exp, "lane");
12861 }
12862
12863 /* Bounds-check constants. */
12864
12865 void
12866 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12867 {
12868 bounds_check (operand, low, high, NULL_TREE, "constant");
12869 }
12870
12871 HOST_WIDE_INT
12872 neon_element_bits (machine_mode mode)
12873 {
12874 return GET_MODE_UNIT_BITSIZE (mode);
12875 }
12876
12877 \f
12878 /* Predicates for `match_operand' and `match_operator'. */
12879
12880 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12881 WB is true if full writeback address modes are allowed and is false
12882 if limited writeback address modes (POST_INC and PRE_DEC) are
12883 allowed. */
12884
12885 int
12886 arm_coproc_mem_operand (rtx op, bool wb)
12887 {
12888 rtx ind;
12889
12890 /* Reject eliminable registers. */
12891 if (! (reload_in_progress || reload_completed || lra_in_progress)
12892 && ( reg_mentioned_p (frame_pointer_rtx, op)
12893 || reg_mentioned_p (arg_pointer_rtx, op)
12894 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12895 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12896 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12897 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12898 return FALSE;
12899
12900 /* Constants are converted into offsets from labels. */
12901 if (!MEM_P (op))
12902 return FALSE;
12903
12904 ind = XEXP (op, 0);
12905
12906 if (reload_completed
12907 && (GET_CODE (ind) == LABEL_REF
12908 || (GET_CODE (ind) == CONST
12909 && GET_CODE (XEXP (ind, 0)) == PLUS
12910 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12911 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12912 return TRUE;
12913
12914 /* Match: (mem (reg)). */
12915 if (REG_P (ind))
12916 return arm_address_register_rtx_p (ind, 0);
12917
12918 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12919 acceptable in any case (subject to verification by
12920 arm_address_register_rtx_p). We need WB to be true to accept
12921 PRE_INC and POST_DEC. */
12922 if (GET_CODE (ind) == POST_INC
12923 || GET_CODE (ind) == PRE_DEC
12924 || (wb
12925 && (GET_CODE (ind) == PRE_INC
12926 || GET_CODE (ind) == POST_DEC)))
12927 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12928
12929 if (wb
12930 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12931 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12932 && GET_CODE (XEXP (ind, 1)) == PLUS
12933 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12934 ind = XEXP (ind, 1);
12935
12936 /* Match:
12937 (plus (reg)
12938 (const)). */
12939 if (GET_CODE (ind) == PLUS
12940 && REG_P (XEXP (ind, 0))
12941 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12942 && CONST_INT_P (XEXP (ind, 1))
12943 && INTVAL (XEXP (ind, 1)) > -1024
12944 && INTVAL (XEXP (ind, 1)) < 1024
12945 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12946 return TRUE;
12947
12948 return FALSE;
12949 }
12950
12951 /* Return TRUE if OP is a memory operand which we can load or store a vector
12952 to/from. TYPE is one of the following values:
12953 0 - Vector load/stor (vldr)
12954 1 - Core registers (ldm)
12955 2 - Element/structure loads (vld1)
12956 */
12957 int
12958 neon_vector_mem_operand (rtx op, int type, bool strict)
12959 {
12960 rtx ind;
12961
12962 /* Reject eliminable registers. */
12963 if (! (reload_in_progress || reload_completed)
12964 && ( reg_mentioned_p (frame_pointer_rtx, op)
12965 || reg_mentioned_p (arg_pointer_rtx, op)
12966 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12967 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12968 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12969 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12970 return !strict;
12971
12972 /* Constants are converted into offsets from labels. */
12973 if (!MEM_P (op))
12974 return FALSE;
12975
12976 ind = XEXP (op, 0);
12977
12978 if (reload_completed
12979 && (GET_CODE (ind) == LABEL_REF
12980 || (GET_CODE (ind) == CONST
12981 && GET_CODE (XEXP (ind, 0)) == PLUS
12982 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12983 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12984 return TRUE;
12985
12986 /* Match: (mem (reg)). */
12987 if (REG_P (ind))
12988 return arm_address_register_rtx_p (ind, 0);
12989
12990 /* Allow post-increment with Neon registers. */
12991 if ((type != 1 && GET_CODE (ind) == POST_INC)
12992 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12993 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12994
12995 /* Allow post-increment by register for VLDn */
12996 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12997 && GET_CODE (XEXP (ind, 1)) == PLUS
12998 && REG_P (XEXP (XEXP (ind, 1), 1)))
12999 return true;
13000
13001 /* Match:
13002 (plus (reg)
13003 (const)). */
13004 if (type == 0
13005 && GET_CODE (ind) == PLUS
13006 && REG_P (XEXP (ind, 0))
13007 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
13008 && CONST_INT_P (XEXP (ind, 1))
13009 && INTVAL (XEXP (ind, 1)) > -1024
13010 /* For quad modes, we restrict the constant offset to be slightly less
13011 than what the instruction format permits. We have no such constraint
13012 on double mode offsets. (This must match arm_legitimate_index_p.) */
13013 && (INTVAL (XEXP (ind, 1))
13014 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
13015 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
13016 return TRUE;
13017
13018 return FALSE;
13019 }
13020
13021 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
13022 type. */
13023 int
13024 neon_struct_mem_operand (rtx op)
13025 {
13026 rtx ind;
13027
13028 /* Reject eliminable registers. */
13029 if (! (reload_in_progress || reload_completed)
13030 && ( reg_mentioned_p (frame_pointer_rtx, op)
13031 || reg_mentioned_p (arg_pointer_rtx, op)
13032 || reg_mentioned_p (virtual_incoming_args_rtx, op)
13033 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13034 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13035 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13036 return FALSE;
13037
13038 /* Constants are converted into offsets from labels. */
13039 if (!MEM_P (op))
13040 return FALSE;
13041
13042 ind = XEXP (op, 0);
13043
13044 if (reload_completed
13045 && (GET_CODE (ind) == LABEL_REF
13046 || (GET_CODE (ind) == CONST
13047 && GET_CODE (XEXP (ind, 0)) == PLUS
13048 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13049 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13050 return TRUE;
13051
13052 /* Match: (mem (reg)). */
13053 if (REG_P (ind))
13054 return arm_address_register_rtx_p (ind, 0);
13055
13056 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13057 if (GET_CODE (ind) == POST_INC
13058 || GET_CODE (ind) == PRE_DEC)
13059 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13060
13061 return FALSE;
13062 }
13063
13064 /* Return true if X is a register that will be eliminated later on. */
13065 int
13066 arm_eliminable_register (rtx x)
13067 {
13068 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13069 || REGNO (x) == ARG_POINTER_REGNUM
13070 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13071 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13072 }
13073
13074 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13075 coprocessor registers. Otherwise return NO_REGS. */
13076
13077 enum reg_class
13078 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13079 {
13080 if (mode == HFmode)
13081 {
13082 if (!TARGET_NEON_FP16)
13083 return GENERAL_REGS;
13084 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13085 return NO_REGS;
13086 return GENERAL_REGS;
13087 }
13088
13089 /* The neon move patterns handle all legitimate vector and struct
13090 addresses. */
13091 if (TARGET_NEON
13092 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13093 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13094 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13095 || VALID_NEON_STRUCT_MODE (mode)))
13096 return NO_REGS;
13097
13098 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13099 return NO_REGS;
13100
13101 return GENERAL_REGS;
13102 }
13103
13104 /* Values which must be returned in the most-significant end of the return
13105 register. */
13106
13107 static bool
13108 arm_return_in_msb (const_tree valtype)
13109 {
13110 return (TARGET_AAPCS_BASED
13111 && BYTES_BIG_ENDIAN
13112 && (AGGREGATE_TYPE_P (valtype)
13113 || TREE_CODE (valtype) == COMPLEX_TYPE
13114 || FIXED_POINT_TYPE_P (valtype)));
13115 }
13116
13117 /* Return TRUE if X references a SYMBOL_REF. */
13118 int
13119 symbol_mentioned_p (rtx x)
13120 {
13121 const char * fmt;
13122 int i;
13123
13124 if (GET_CODE (x) == SYMBOL_REF)
13125 return 1;
13126
13127 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13128 are constant offsets, not symbols. */
13129 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13130 return 0;
13131
13132 fmt = GET_RTX_FORMAT (GET_CODE (x));
13133
13134 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13135 {
13136 if (fmt[i] == 'E')
13137 {
13138 int j;
13139
13140 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13141 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13142 return 1;
13143 }
13144 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13145 return 1;
13146 }
13147
13148 return 0;
13149 }
13150
13151 /* Return TRUE if X references a LABEL_REF. */
13152 int
13153 label_mentioned_p (rtx x)
13154 {
13155 const char * fmt;
13156 int i;
13157
13158 if (GET_CODE (x) == LABEL_REF)
13159 return 1;
13160
13161 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13162 instruction, but they are constant offsets, not symbols. */
13163 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13164 return 0;
13165
13166 fmt = GET_RTX_FORMAT (GET_CODE (x));
13167 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13168 {
13169 if (fmt[i] == 'E')
13170 {
13171 int j;
13172
13173 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13174 if (label_mentioned_p (XVECEXP (x, i, j)))
13175 return 1;
13176 }
13177 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13178 return 1;
13179 }
13180
13181 return 0;
13182 }
13183
13184 int
13185 tls_mentioned_p (rtx x)
13186 {
13187 switch (GET_CODE (x))
13188 {
13189 case CONST:
13190 return tls_mentioned_p (XEXP (x, 0));
13191
13192 case UNSPEC:
13193 if (XINT (x, 1) == UNSPEC_TLS)
13194 return 1;
13195
13196 default:
13197 return 0;
13198 }
13199 }
13200
13201 /* Must not copy any rtx that uses a pc-relative address. */
13202
13203 static bool
13204 arm_cannot_copy_insn_p (rtx_insn *insn)
13205 {
13206 /* The tls call insn cannot be copied, as it is paired with a data
13207 word. */
13208 if (recog_memoized (insn) == CODE_FOR_tlscall)
13209 return true;
13210
13211 subrtx_iterator::array_type array;
13212 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13213 {
13214 const_rtx x = *iter;
13215 if (GET_CODE (x) == UNSPEC
13216 && (XINT (x, 1) == UNSPEC_PIC_BASE
13217 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13218 return true;
13219 }
13220 return false;
13221 }
13222
13223 enum rtx_code
13224 minmax_code (rtx x)
13225 {
13226 enum rtx_code code = GET_CODE (x);
13227
13228 switch (code)
13229 {
13230 case SMAX:
13231 return GE;
13232 case SMIN:
13233 return LE;
13234 case UMIN:
13235 return LEU;
13236 case UMAX:
13237 return GEU;
13238 default:
13239 gcc_unreachable ();
13240 }
13241 }
13242
13243 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13244
13245 bool
13246 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13247 int *mask, bool *signed_sat)
13248 {
13249 /* The high bound must be a power of two minus one. */
13250 int log = exact_log2 (INTVAL (hi_bound) + 1);
13251 if (log == -1)
13252 return false;
13253
13254 /* The low bound is either zero (for usat) or one less than the
13255 negation of the high bound (for ssat). */
13256 if (INTVAL (lo_bound) == 0)
13257 {
13258 if (mask)
13259 *mask = log;
13260 if (signed_sat)
13261 *signed_sat = false;
13262
13263 return true;
13264 }
13265
13266 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13267 {
13268 if (mask)
13269 *mask = log + 1;
13270 if (signed_sat)
13271 *signed_sat = true;
13272
13273 return true;
13274 }
13275
13276 return false;
13277 }
13278
13279 /* Return 1 if memory locations are adjacent. */
13280 int
13281 adjacent_mem_locations (rtx a, rtx b)
13282 {
13283 /* We don't guarantee to preserve the order of these memory refs. */
13284 if (volatile_refs_p (a) || volatile_refs_p (b))
13285 return 0;
13286
13287 if ((REG_P (XEXP (a, 0))
13288 || (GET_CODE (XEXP (a, 0)) == PLUS
13289 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13290 && (REG_P (XEXP (b, 0))
13291 || (GET_CODE (XEXP (b, 0)) == PLUS
13292 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13293 {
13294 HOST_WIDE_INT val0 = 0, val1 = 0;
13295 rtx reg0, reg1;
13296 int val_diff;
13297
13298 if (GET_CODE (XEXP (a, 0)) == PLUS)
13299 {
13300 reg0 = XEXP (XEXP (a, 0), 0);
13301 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13302 }
13303 else
13304 reg0 = XEXP (a, 0);
13305
13306 if (GET_CODE (XEXP (b, 0)) == PLUS)
13307 {
13308 reg1 = XEXP (XEXP (b, 0), 0);
13309 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13310 }
13311 else
13312 reg1 = XEXP (b, 0);
13313
13314 /* Don't accept any offset that will require multiple
13315 instructions to handle, since this would cause the
13316 arith_adjacentmem pattern to output an overlong sequence. */
13317 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13318 return 0;
13319
13320 /* Don't allow an eliminable register: register elimination can make
13321 the offset too large. */
13322 if (arm_eliminable_register (reg0))
13323 return 0;
13324
13325 val_diff = val1 - val0;
13326
13327 if (arm_ld_sched)
13328 {
13329 /* If the target has load delay slots, then there's no benefit
13330 to using an ldm instruction unless the offset is zero and
13331 we are optimizing for size. */
13332 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13333 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13334 && (val_diff == 4 || val_diff == -4));
13335 }
13336
13337 return ((REGNO (reg0) == REGNO (reg1))
13338 && (val_diff == 4 || val_diff == -4));
13339 }
13340
13341 return 0;
13342 }
13343
13344 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13345 for load operations, false for store operations. CONSECUTIVE is true
13346 if the register numbers in the operation must be consecutive in the register
13347 bank. RETURN_PC is true if value is to be loaded in PC.
13348 The pattern we are trying to match for load is:
13349 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13350 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13351 :
13352 :
13353 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13354 ]
13355 where
13356 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13357 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13358 3. If consecutive is TRUE, then for kth register being loaded,
13359 REGNO (R_dk) = REGNO (R_d0) + k.
13360 The pattern for store is similar. */
13361 bool
13362 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13363 bool consecutive, bool return_pc)
13364 {
13365 HOST_WIDE_INT count = XVECLEN (op, 0);
13366 rtx reg, mem, addr;
13367 unsigned regno;
13368 unsigned first_regno;
13369 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13370 rtx elt;
13371 bool addr_reg_in_reglist = false;
13372 bool update = false;
13373 int reg_increment;
13374 int offset_adj;
13375 int regs_per_val;
13376
13377 /* If not in SImode, then registers must be consecutive
13378 (e.g., VLDM instructions for DFmode). */
13379 gcc_assert ((mode == SImode) || consecutive);
13380 /* Setting return_pc for stores is illegal. */
13381 gcc_assert (!return_pc || load);
13382
13383 /* Set up the increments and the regs per val based on the mode. */
13384 reg_increment = GET_MODE_SIZE (mode);
13385 regs_per_val = reg_increment / 4;
13386 offset_adj = return_pc ? 1 : 0;
13387
13388 if (count <= 1
13389 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13390 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13391 return false;
13392
13393 /* Check if this is a write-back. */
13394 elt = XVECEXP (op, 0, offset_adj);
13395 if (GET_CODE (SET_SRC (elt)) == PLUS)
13396 {
13397 i++;
13398 base = 1;
13399 update = true;
13400
13401 /* The offset adjustment must be the number of registers being
13402 popped times the size of a single register. */
13403 if (!REG_P (SET_DEST (elt))
13404 || !REG_P (XEXP (SET_SRC (elt), 0))
13405 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13406 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13407 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13408 ((count - 1 - offset_adj) * reg_increment))
13409 return false;
13410 }
13411
13412 i = i + offset_adj;
13413 base = base + offset_adj;
13414 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13415 success depends on the type: VLDM can do just one reg,
13416 LDM must do at least two. */
13417 if ((count <= i) && (mode == SImode))
13418 return false;
13419
13420 elt = XVECEXP (op, 0, i - 1);
13421 if (GET_CODE (elt) != SET)
13422 return false;
13423
13424 if (load)
13425 {
13426 reg = SET_DEST (elt);
13427 mem = SET_SRC (elt);
13428 }
13429 else
13430 {
13431 reg = SET_SRC (elt);
13432 mem = SET_DEST (elt);
13433 }
13434
13435 if (!REG_P (reg) || !MEM_P (mem))
13436 return false;
13437
13438 regno = REGNO (reg);
13439 first_regno = regno;
13440 addr = XEXP (mem, 0);
13441 if (GET_CODE (addr) == PLUS)
13442 {
13443 if (!CONST_INT_P (XEXP (addr, 1)))
13444 return false;
13445
13446 offset = INTVAL (XEXP (addr, 1));
13447 addr = XEXP (addr, 0);
13448 }
13449
13450 if (!REG_P (addr))
13451 return false;
13452
13453 /* Don't allow SP to be loaded unless it is also the base register. It
13454 guarantees that SP is reset correctly when an LDM instruction
13455 is interrupted. Otherwise, we might end up with a corrupt stack. */
13456 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13457 return false;
13458
13459 for (; i < count; i++)
13460 {
13461 elt = XVECEXP (op, 0, i);
13462 if (GET_CODE (elt) != SET)
13463 return false;
13464
13465 if (load)
13466 {
13467 reg = SET_DEST (elt);
13468 mem = SET_SRC (elt);
13469 }
13470 else
13471 {
13472 reg = SET_SRC (elt);
13473 mem = SET_DEST (elt);
13474 }
13475
13476 if (!REG_P (reg)
13477 || GET_MODE (reg) != mode
13478 || REGNO (reg) <= regno
13479 || (consecutive
13480 && (REGNO (reg) !=
13481 (unsigned int) (first_regno + regs_per_val * (i - base))))
13482 /* Don't allow SP to be loaded unless it is also the base register. It
13483 guarantees that SP is reset correctly when an LDM instruction
13484 is interrupted. Otherwise, we might end up with a corrupt stack. */
13485 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13486 || !MEM_P (mem)
13487 || GET_MODE (mem) != mode
13488 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13489 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13490 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13491 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13492 offset + (i - base) * reg_increment))
13493 && (!REG_P (XEXP (mem, 0))
13494 || offset + (i - base) * reg_increment != 0)))
13495 return false;
13496
13497 regno = REGNO (reg);
13498 if (regno == REGNO (addr))
13499 addr_reg_in_reglist = true;
13500 }
13501
13502 if (load)
13503 {
13504 if (update && addr_reg_in_reglist)
13505 return false;
13506
13507 /* For Thumb-1, address register is always modified - either by write-back
13508 or by explicit load. If the pattern does not describe an update,
13509 then the address register must be in the list of loaded registers. */
13510 if (TARGET_THUMB1)
13511 return update || addr_reg_in_reglist;
13512 }
13513
13514 return true;
13515 }
13516
13517 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13518 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13519 instruction. ADD_OFFSET is nonzero if the base address register needs
13520 to be modified with an add instruction before we can use it. */
13521
13522 static bool
13523 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13524 int nops, HOST_WIDE_INT add_offset)
13525 {
13526 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13527 if the offset isn't small enough. The reason 2 ldrs are faster
13528 is because these ARMs are able to do more than one cache access
13529 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13530 whilst the ARM8 has a double bandwidth cache. This means that
13531 these cores can do both an instruction fetch and a data fetch in
13532 a single cycle, so the trick of calculating the address into a
13533 scratch register (one of the result regs) and then doing a load
13534 multiple actually becomes slower (and no smaller in code size).
13535 That is the transformation
13536
13537 ldr rd1, [rbase + offset]
13538 ldr rd2, [rbase + offset + 4]
13539
13540 to
13541
13542 add rd1, rbase, offset
13543 ldmia rd1, {rd1, rd2}
13544
13545 produces worse code -- '3 cycles + any stalls on rd2' instead of
13546 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13547 access per cycle, the first sequence could never complete in less
13548 than 6 cycles, whereas the ldm sequence would only take 5 and
13549 would make better use of sequential accesses if not hitting the
13550 cache.
13551
13552 We cheat here and test 'arm_ld_sched' which we currently know to
13553 only be true for the ARM8, ARM9 and StrongARM. If this ever
13554 changes, then the test below needs to be reworked. */
13555 if (nops == 2 && arm_ld_sched && add_offset != 0)
13556 return false;
13557
13558 /* XScale has load-store double instructions, but they have stricter
13559 alignment requirements than load-store multiple, so we cannot
13560 use them.
13561
13562 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13563 the pipeline until completion.
13564
13565 NREGS CYCLES
13566 1 3
13567 2 4
13568 3 5
13569 4 6
13570
13571 An ldr instruction takes 1-3 cycles, but does not block the
13572 pipeline.
13573
13574 NREGS CYCLES
13575 1 1-3
13576 2 2-6
13577 3 3-9
13578 4 4-12
13579
13580 Best case ldr will always win. However, the more ldr instructions
13581 we issue, the less likely we are to be able to schedule them well.
13582 Using ldr instructions also increases code size.
13583
13584 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13585 for counts of 3 or 4 regs. */
13586 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13587 return false;
13588 return true;
13589 }
13590
13591 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13592 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13593 an array ORDER which describes the sequence to use when accessing the
13594 offsets that produces an ascending order. In this sequence, each
13595 offset must be larger by exactly 4 than the previous one. ORDER[0]
13596 must have been filled in with the lowest offset by the caller.
13597 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13598 we use to verify that ORDER produces an ascending order of registers.
13599 Return true if it was possible to construct such an order, false if
13600 not. */
13601
13602 static bool
13603 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13604 int *unsorted_regs)
13605 {
13606 int i;
13607 for (i = 1; i < nops; i++)
13608 {
13609 int j;
13610
13611 order[i] = order[i - 1];
13612 for (j = 0; j < nops; j++)
13613 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13614 {
13615 /* We must find exactly one offset that is higher than the
13616 previous one by 4. */
13617 if (order[i] != order[i - 1])
13618 return false;
13619 order[i] = j;
13620 }
13621 if (order[i] == order[i - 1])
13622 return false;
13623 /* The register numbers must be ascending. */
13624 if (unsorted_regs != NULL
13625 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13626 return false;
13627 }
13628 return true;
13629 }
13630
13631 /* Used to determine in a peephole whether a sequence of load
13632 instructions can be changed into a load-multiple instruction.
13633 NOPS is the number of separate load instructions we are examining. The
13634 first NOPS entries in OPERANDS are the destination registers, the
13635 next NOPS entries are memory operands. If this function is
13636 successful, *BASE is set to the common base register of the memory
13637 accesses; *LOAD_OFFSET is set to the first memory location's offset
13638 from that base register.
13639 REGS is an array filled in with the destination register numbers.
13640 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13641 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13642 the sequence of registers in REGS matches the loads from ascending memory
13643 locations, and the function verifies that the register numbers are
13644 themselves ascending. If CHECK_REGS is false, the register numbers
13645 are stored in the order they are found in the operands. */
13646 static int
13647 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13648 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13649 {
13650 int unsorted_regs[MAX_LDM_STM_OPS];
13651 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13652 int order[MAX_LDM_STM_OPS];
13653 rtx base_reg_rtx = NULL;
13654 int base_reg = -1;
13655 int i, ldm_case;
13656
13657 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13658 easily extended if required. */
13659 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13660
13661 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13662
13663 /* Loop over the operands and check that the memory references are
13664 suitable (i.e. immediate offsets from the same base register). At
13665 the same time, extract the target register, and the memory
13666 offsets. */
13667 for (i = 0; i < nops; i++)
13668 {
13669 rtx reg;
13670 rtx offset;
13671
13672 /* Convert a subreg of a mem into the mem itself. */
13673 if (GET_CODE (operands[nops + i]) == SUBREG)
13674 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13675
13676 gcc_assert (MEM_P (operands[nops + i]));
13677
13678 /* Don't reorder volatile memory references; it doesn't seem worth
13679 looking for the case where the order is ok anyway. */
13680 if (MEM_VOLATILE_P (operands[nops + i]))
13681 return 0;
13682
13683 offset = const0_rtx;
13684
13685 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13686 || (GET_CODE (reg) == SUBREG
13687 && REG_P (reg = SUBREG_REG (reg))))
13688 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13689 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13690 || (GET_CODE (reg) == SUBREG
13691 && REG_P (reg = SUBREG_REG (reg))))
13692 && (CONST_INT_P (offset
13693 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13694 {
13695 if (i == 0)
13696 {
13697 base_reg = REGNO (reg);
13698 base_reg_rtx = reg;
13699 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13700 return 0;
13701 }
13702 else if (base_reg != (int) REGNO (reg))
13703 /* Not addressed from the same base register. */
13704 return 0;
13705
13706 unsorted_regs[i] = (REG_P (operands[i])
13707 ? REGNO (operands[i])
13708 : REGNO (SUBREG_REG (operands[i])));
13709
13710 /* If it isn't an integer register, or if it overwrites the
13711 base register but isn't the last insn in the list, then
13712 we can't do this. */
13713 if (unsorted_regs[i] < 0
13714 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13715 || unsorted_regs[i] > 14
13716 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13717 return 0;
13718
13719 /* Don't allow SP to be loaded unless it is also the base
13720 register. It guarantees that SP is reset correctly when
13721 an LDM instruction is interrupted. Otherwise, we might
13722 end up with a corrupt stack. */
13723 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13724 return 0;
13725
13726 unsorted_offsets[i] = INTVAL (offset);
13727 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13728 order[0] = i;
13729 }
13730 else
13731 /* Not a suitable memory address. */
13732 return 0;
13733 }
13734
13735 /* All the useful information has now been extracted from the
13736 operands into unsorted_regs and unsorted_offsets; additionally,
13737 order[0] has been set to the lowest offset in the list. Sort
13738 the offsets into order, verifying that they are adjacent, and
13739 check that the register numbers are ascending. */
13740 if (!compute_offset_order (nops, unsorted_offsets, order,
13741 check_regs ? unsorted_regs : NULL))
13742 return 0;
13743
13744 if (saved_order)
13745 memcpy (saved_order, order, sizeof order);
13746
13747 if (base)
13748 {
13749 *base = base_reg;
13750
13751 for (i = 0; i < nops; i++)
13752 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13753
13754 *load_offset = unsorted_offsets[order[0]];
13755 }
13756
13757 if (TARGET_THUMB1
13758 && !peep2_reg_dead_p (nops, base_reg_rtx))
13759 return 0;
13760
13761 if (unsorted_offsets[order[0]] == 0)
13762 ldm_case = 1; /* ldmia */
13763 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13764 ldm_case = 2; /* ldmib */
13765 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13766 ldm_case = 3; /* ldmda */
13767 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13768 ldm_case = 4; /* ldmdb */
13769 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13770 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13771 ldm_case = 5;
13772 else
13773 return 0;
13774
13775 if (!multiple_operation_profitable_p (false, nops,
13776 ldm_case == 5
13777 ? unsorted_offsets[order[0]] : 0))
13778 return 0;
13779
13780 return ldm_case;
13781 }
13782
13783 /* Used to determine in a peephole whether a sequence of store instructions can
13784 be changed into a store-multiple instruction.
13785 NOPS is the number of separate store instructions we are examining.
13786 NOPS_TOTAL is the total number of instructions recognized by the peephole
13787 pattern.
13788 The first NOPS entries in OPERANDS are the source registers, the next
13789 NOPS entries are memory operands. If this function is successful, *BASE is
13790 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13791 to the first memory location's offset from that base register. REGS is an
13792 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13793 likewise filled with the corresponding rtx's.
13794 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13795 numbers to an ascending order of stores.
13796 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13797 from ascending memory locations, and the function verifies that the register
13798 numbers are themselves ascending. If CHECK_REGS is false, the register
13799 numbers are stored in the order they are found in the operands. */
13800 static int
13801 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13802 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13803 HOST_WIDE_INT *load_offset, bool check_regs)
13804 {
13805 int unsorted_regs[MAX_LDM_STM_OPS];
13806 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13807 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13808 int order[MAX_LDM_STM_OPS];
13809 int base_reg = -1;
13810 rtx base_reg_rtx = NULL;
13811 int i, stm_case;
13812
13813 /* Write back of base register is currently only supported for Thumb 1. */
13814 int base_writeback = TARGET_THUMB1;
13815
13816 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13817 easily extended if required. */
13818 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13819
13820 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13821
13822 /* Loop over the operands and check that the memory references are
13823 suitable (i.e. immediate offsets from the same base register). At
13824 the same time, extract the target register, and the memory
13825 offsets. */
13826 for (i = 0; i < nops; i++)
13827 {
13828 rtx reg;
13829 rtx offset;
13830
13831 /* Convert a subreg of a mem into the mem itself. */
13832 if (GET_CODE (operands[nops + i]) == SUBREG)
13833 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13834
13835 gcc_assert (MEM_P (operands[nops + i]));
13836
13837 /* Don't reorder volatile memory references; it doesn't seem worth
13838 looking for the case where the order is ok anyway. */
13839 if (MEM_VOLATILE_P (operands[nops + i]))
13840 return 0;
13841
13842 offset = const0_rtx;
13843
13844 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13845 || (GET_CODE (reg) == SUBREG
13846 && REG_P (reg = SUBREG_REG (reg))))
13847 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13848 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13849 || (GET_CODE (reg) == SUBREG
13850 && REG_P (reg = SUBREG_REG (reg))))
13851 && (CONST_INT_P (offset
13852 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13853 {
13854 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13855 ? operands[i] : SUBREG_REG (operands[i]));
13856 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13857
13858 if (i == 0)
13859 {
13860 base_reg = REGNO (reg);
13861 base_reg_rtx = reg;
13862 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13863 return 0;
13864 }
13865 else if (base_reg != (int) REGNO (reg))
13866 /* Not addressed from the same base register. */
13867 return 0;
13868
13869 /* If it isn't an integer register, then we can't do this. */
13870 if (unsorted_regs[i] < 0
13871 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13872 /* The effects are unpredictable if the base register is
13873 both updated and stored. */
13874 || (base_writeback && unsorted_regs[i] == base_reg)
13875 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13876 || unsorted_regs[i] > 14)
13877 return 0;
13878
13879 unsorted_offsets[i] = INTVAL (offset);
13880 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13881 order[0] = i;
13882 }
13883 else
13884 /* Not a suitable memory address. */
13885 return 0;
13886 }
13887
13888 /* All the useful information has now been extracted from the
13889 operands into unsorted_regs and unsorted_offsets; additionally,
13890 order[0] has been set to the lowest offset in the list. Sort
13891 the offsets into order, verifying that they are adjacent, and
13892 check that the register numbers are ascending. */
13893 if (!compute_offset_order (nops, unsorted_offsets, order,
13894 check_regs ? unsorted_regs : NULL))
13895 return 0;
13896
13897 if (saved_order)
13898 memcpy (saved_order, order, sizeof order);
13899
13900 if (base)
13901 {
13902 *base = base_reg;
13903
13904 for (i = 0; i < nops; i++)
13905 {
13906 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13907 if (reg_rtxs)
13908 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13909 }
13910
13911 *load_offset = unsorted_offsets[order[0]];
13912 }
13913
13914 if (TARGET_THUMB1
13915 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13916 return 0;
13917
13918 if (unsorted_offsets[order[0]] == 0)
13919 stm_case = 1; /* stmia */
13920 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13921 stm_case = 2; /* stmib */
13922 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13923 stm_case = 3; /* stmda */
13924 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13925 stm_case = 4; /* stmdb */
13926 else
13927 return 0;
13928
13929 if (!multiple_operation_profitable_p (false, nops, 0))
13930 return 0;
13931
13932 return stm_case;
13933 }
13934 \f
13935 /* Routines for use in generating RTL. */
13936
13937 /* Generate a load-multiple instruction. COUNT is the number of loads in
13938 the instruction; REGS and MEMS are arrays containing the operands.
13939 BASEREG is the base register to be used in addressing the memory operands.
13940 WBACK_OFFSET is nonzero if the instruction should update the base
13941 register. */
13942
13943 static rtx
13944 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13945 HOST_WIDE_INT wback_offset)
13946 {
13947 int i = 0, j;
13948 rtx result;
13949
13950 if (!multiple_operation_profitable_p (false, count, 0))
13951 {
13952 rtx seq;
13953
13954 start_sequence ();
13955
13956 for (i = 0; i < count; i++)
13957 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13958
13959 if (wback_offset != 0)
13960 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13961
13962 seq = get_insns ();
13963 end_sequence ();
13964
13965 return seq;
13966 }
13967
13968 result = gen_rtx_PARALLEL (VOIDmode,
13969 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13970 if (wback_offset != 0)
13971 {
13972 XVECEXP (result, 0, 0)
13973 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13974 i = 1;
13975 count++;
13976 }
13977
13978 for (j = 0; i < count; i++, j++)
13979 XVECEXP (result, 0, i)
13980 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13981
13982 return result;
13983 }
13984
13985 /* Generate a store-multiple instruction. COUNT is the number of stores in
13986 the instruction; REGS and MEMS are arrays containing the operands.
13987 BASEREG is the base register to be used in addressing the memory operands.
13988 WBACK_OFFSET is nonzero if the instruction should update the base
13989 register. */
13990
13991 static rtx
13992 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13993 HOST_WIDE_INT wback_offset)
13994 {
13995 int i = 0, j;
13996 rtx result;
13997
13998 if (GET_CODE (basereg) == PLUS)
13999 basereg = XEXP (basereg, 0);
14000
14001 if (!multiple_operation_profitable_p (false, count, 0))
14002 {
14003 rtx seq;
14004
14005 start_sequence ();
14006
14007 for (i = 0; i < count; i++)
14008 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
14009
14010 if (wback_offset != 0)
14011 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
14012
14013 seq = get_insns ();
14014 end_sequence ();
14015
14016 return seq;
14017 }
14018
14019 result = gen_rtx_PARALLEL (VOIDmode,
14020 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
14021 if (wback_offset != 0)
14022 {
14023 XVECEXP (result, 0, 0)
14024 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
14025 i = 1;
14026 count++;
14027 }
14028
14029 for (j = 0; i < count; i++, j++)
14030 XVECEXP (result, 0, i)
14031 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
14032
14033 return result;
14034 }
14035
14036 /* Generate either a load-multiple or a store-multiple instruction. This
14037 function can be used in situations where we can start with a single MEM
14038 rtx and adjust its address upwards.
14039 COUNT is the number of operations in the instruction, not counting a
14040 possible update of the base register. REGS is an array containing the
14041 register operands.
14042 BASEREG is the base register to be used in addressing the memory operands,
14043 which are constructed from BASEMEM.
14044 WRITE_BACK specifies whether the generated instruction should include an
14045 update of the base register.
14046 OFFSETP is used to pass an offset to and from this function; this offset
14047 is not used when constructing the address (instead BASEMEM should have an
14048 appropriate offset in its address), it is used only for setting
14049 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14050
14051 static rtx
14052 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14053 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14054 {
14055 rtx mems[MAX_LDM_STM_OPS];
14056 HOST_WIDE_INT offset = *offsetp;
14057 int i;
14058
14059 gcc_assert (count <= MAX_LDM_STM_OPS);
14060
14061 if (GET_CODE (basereg) == PLUS)
14062 basereg = XEXP (basereg, 0);
14063
14064 for (i = 0; i < count; i++)
14065 {
14066 rtx addr = plus_constant (Pmode, basereg, i * 4);
14067 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14068 offset += 4;
14069 }
14070
14071 if (write_back)
14072 *offsetp = offset;
14073
14074 if (is_load)
14075 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14076 write_back ? 4 * count : 0);
14077 else
14078 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14079 write_back ? 4 * count : 0);
14080 }
14081
14082 rtx
14083 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14084 rtx basemem, HOST_WIDE_INT *offsetp)
14085 {
14086 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14087 offsetp);
14088 }
14089
14090 rtx
14091 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14092 rtx basemem, HOST_WIDE_INT *offsetp)
14093 {
14094 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14095 offsetp);
14096 }
14097
14098 /* Called from a peephole2 expander to turn a sequence of loads into an
14099 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14100 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14101 is true if we can reorder the registers because they are used commutatively
14102 subsequently.
14103 Returns true iff we could generate a new instruction. */
14104
14105 bool
14106 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14107 {
14108 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14109 rtx mems[MAX_LDM_STM_OPS];
14110 int i, j, base_reg;
14111 rtx base_reg_rtx;
14112 HOST_WIDE_INT offset;
14113 int write_back = FALSE;
14114 int ldm_case;
14115 rtx addr;
14116
14117 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14118 &base_reg, &offset, !sort_regs);
14119
14120 if (ldm_case == 0)
14121 return false;
14122
14123 if (sort_regs)
14124 for (i = 0; i < nops - 1; i++)
14125 for (j = i + 1; j < nops; j++)
14126 if (regs[i] > regs[j])
14127 {
14128 int t = regs[i];
14129 regs[i] = regs[j];
14130 regs[j] = t;
14131 }
14132 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14133
14134 if (TARGET_THUMB1)
14135 {
14136 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14137 gcc_assert (ldm_case == 1 || ldm_case == 5);
14138 write_back = TRUE;
14139 }
14140
14141 if (ldm_case == 5)
14142 {
14143 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14144 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14145 offset = 0;
14146 if (!TARGET_THUMB1)
14147 {
14148 base_reg = regs[0];
14149 base_reg_rtx = newbase;
14150 }
14151 }
14152
14153 for (i = 0; i < nops; i++)
14154 {
14155 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14156 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14157 SImode, addr, 0);
14158 }
14159 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14160 write_back ? offset + i * 4 : 0));
14161 return true;
14162 }
14163
14164 /* Called from a peephole2 expander to turn a sequence of stores into an
14165 STM instruction. OPERANDS are the operands found by the peephole matcher;
14166 NOPS indicates how many separate stores we are trying to combine.
14167 Returns true iff we could generate a new instruction. */
14168
14169 bool
14170 gen_stm_seq (rtx *operands, int nops)
14171 {
14172 int i;
14173 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14174 rtx mems[MAX_LDM_STM_OPS];
14175 int base_reg;
14176 rtx base_reg_rtx;
14177 HOST_WIDE_INT offset;
14178 int write_back = FALSE;
14179 int stm_case;
14180 rtx addr;
14181 bool base_reg_dies;
14182
14183 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14184 mem_order, &base_reg, &offset, true);
14185
14186 if (stm_case == 0)
14187 return false;
14188
14189 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14190
14191 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14192 if (TARGET_THUMB1)
14193 {
14194 gcc_assert (base_reg_dies);
14195 write_back = TRUE;
14196 }
14197
14198 if (stm_case == 5)
14199 {
14200 gcc_assert (base_reg_dies);
14201 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14202 offset = 0;
14203 }
14204
14205 addr = plus_constant (Pmode, base_reg_rtx, offset);
14206
14207 for (i = 0; i < nops; i++)
14208 {
14209 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14210 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14211 SImode, addr, 0);
14212 }
14213 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14214 write_back ? offset + i * 4 : 0));
14215 return true;
14216 }
14217
14218 /* Called from a peephole2 expander to turn a sequence of stores that are
14219 preceded by constant loads into an STM instruction. OPERANDS are the
14220 operands found by the peephole matcher; NOPS indicates how many
14221 separate stores we are trying to combine; there are 2 * NOPS
14222 instructions in the peephole.
14223 Returns true iff we could generate a new instruction. */
14224
14225 bool
14226 gen_const_stm_seq (rtx *operands, int nops)
14227 {
14228 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14229 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14230 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14231 rtx mems[MAX_LDM_STM_OPS];
14232 int base_reg;
14233 rtx base_reg_rtx;
14234 HOST_WIDE_INT offset;
14235 int write_back = FALSE;
14236 int stm_case;
14237 rtx addr;
14238 bool base_reg_dies;
14239 int i, j;
14240 HARD_REG_SET allocated;
14241
14242 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14243 mem_order, &base_reg, &offset, false);
14244
14245 if (stm_case == 0)
14246 return false;
14247
14248 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14249
14250 /* If the same register is used more than once, try to find a free
14251 register. */
14252 CLEAR_HARD_REG_SET (allocated);
14253 for (i = 0; i < nops; i++)
14254 {
14255 for (j = i + 1; j < nops; j++)
14256 if (regs[i] == regs[j])
14257 {
14258 rtx t = peep2_find_free_register (0, nops * 2,
14259 TARGET_THUMB1 ? "l" : "r",
14260 SImode, &allocated);
14261 if (t == NULL_RTX)
14262 return false;
14263 reg_rtxs[i] = t;
14264 regs[i] = REGNO (t);
14265 }
14266 }
14267
14268 /* Compute an ordering that maps the register numbers to an ascending
14269 sequence. */
14270 reg_order[0] = 0;
14271 for (i = 0; i < nops; i++)
14272 if (regs[i] < regs[reg_order[0]])
14273 reg_order[0] = i;
14274
14275 for (i = 1; i < nops; i++)
14276 {
14277 int this_order = reg_order[i - 1];
14278 for (j = 0; j < nops; j++)
14279 if (regs[j] > regs[reg_order[i - 1]]
14280 && (this_order == reg_order[i - 1]
14281 || regs[j] < regs[this_order]))
14282 this_order = j;
14283 reg_order[i] = this_order;
14284 }
14285
14286 /* Ensure that registers that must be live after the instruction end
14287 up with the correct value. */
14288 for (i = 0; i < nops; i++)
14289 {
14290 int this_order = reg_order[i];
14291 if ((this_order != mem_order[i]
14292 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14293 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14294 return false;
14295 }
14296
14297 /* Load the constants. */
14298 for (i = 0; i < nops; i++)
14299 {
14300 rtx op = operands[2 * nops + mem_order[i]];
14301 sorted_regs[i] = regs[reg_order[i]];
14302 emit_move_insn (reg_rtxs[reg_order[i]], op);
14303 }
14304
14305 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14306
14307 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14308 if (TARGET_THUMB1)
14309 {
14310 gcc_assert (base_reg_dies);
14311 write_back = TRUE;
14312 }
14313
14314 if (stm_case == 5)
14315 {
14316 gcc_assert (base_reg_dies);
14317 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14318 offset = 0;
14319 }
14320
14321 addr = plus_constant (Pmode, base_reg_rtx, offset);
14322
14323 for (i = 0; i < nops; i++)
14324 {
14325 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14326 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14327 SImode, addr, 0);
14328 }
14329 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14330 write_back ? offset + i * 4 : 0));
14331 return true;
14332 }
14333
14334 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14335 unaligned copies on processors which support unaligned semantics for those
14336 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14337 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14338 An interleave factor of 1 (the minimum) will perform no interleaving.
14339 Load/store multiple are used for aligned addresses where possible. */
14340
14341 static void
14342 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14343 HOST_WIDE_INT length,
14344 unsigned int interleave_factor)
14345 {
14346 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14347 int *regnos = XALLOCAVEC (int, interleave_factor);
14348 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14349 HOST_WIDE_INT i, j;
14350 HOST_WIDE_INT remaining = length, words;
14351 rtx halfword_tmp = NULL, byte_tmp = NULL;
14352 rtx dst, src;
14353 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14354 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14355 HOST_WIDE_INT srcoffset, dstoffset;
14356 HOST_WIDE_INT src_autoinc, dst_autoinc;
14357 rtx mem, addr;
14358
14359 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14360
14361 /* Use hard registers if we have aligned source or destination so we can use
14362 load/store multiple with contiguous registers. */
14363 if (dst_aligned || src_aligned)
14364 for (i = 0; i < interleave_factor; i++)
14365 regs[i] = gen_rtx_REG (SImode, i);
14366 else
14367 for (i = 0; i < interleave_factor; i++)
14368 regs[i] = gen_reg_rtx (SImode);
14369
14370 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14371 src = copy_addr_to_reg (XEXP (srcbase, 0));
14372
14373 srcoffset = dstoffset = 0;
14374
14375 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14376 For copying the last bytes we want to subtract this offset again. */
14377 src_autoinc = dst_autoinc = 0;
14378
14379 for (i = 0; i < interleave_factor; i++)
14380 regnos[i] = i;
14381
14382 /* Copy BLOCK_SIZE_BYTES chunks. */
14383
14384 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14385 {
14386 /* Load words. */
14387 if (src_aligned && interleave_factor > 1)
14388 {
14389 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14390 TRUE, srcbase, &srcoffset));
14391 src_autoinc += UNITS_PER_WORD * interleave_factor;
14392 }
14393 else
14394 {
14395 for (j = 0; j < interleave_factor; j++)
14396 {
14397 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14398 - src_autoinc));
14399 mem = adjust_automodify_address (srcbase, SImode, addr,
14400 srcoffset + j * UNITS_PER_WORD);
14401 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14402 }
14403 srcoffset += block_size_bytes;
14404 }
14405
14406 /* Store words. */
14407 if (dst_aligned && interleave_factor > 1)
14408 {
14409 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14410 TRUE, dstbase, &dstoffset));
14411 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14412 }
14413 else
14414 {
14415 for (j = 0; j < interleave_factor; j++)
14416 {
14417 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14418 - dst_autoinc));
14419 mem = adjust_automodify_address (dstbase, SImode, addr,
14420 dstoffset + j * UNITS_PER_WORD);
14421 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14422 }
14423 dstoffset += block_size_bytes;
14424 }
14425
14426 remaining -= block_size_bytes;
14427 }
14428
14429 /* Copy any whole words left (note these aren't interleaved with any
14430 subsequent halfword/byte load/stores in the interests of simplicity). */
14431
14432 words = remaining / UNITS_PER_WORD;
14433
14434 gcc_assert (words < interleave_factor);
14435
14436 if (src_aligned && words > 1)
14437 {
14438 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14439 &srcoffset));
14440 src_autoinc += UNITS_PER_WORD * words;
14441 }
14442 else
14443 {
14444 for (j = 0; j < words; j++)
14445 {
14446 addr = plus_constant (Pmode, src,
14447 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14448 mem = adjust_automodify_address (srcbase, SImode, addr,
14449 srcoffset + j * UNITS_PER_WORD);
14450 if (src_aligned)
14451 emit_move_insn (regs[j], mem);
14452 else
14453 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14454 }
14455 srcoffset += words * UNITS_PER_WORD;
14456 }
14457
14458 if (dst_aligned && words > 1)
14459 {
14460 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14461 &dstoffset));
14462 dst_autoinc += words * UNITS_PER_WORD;
14463 }
14464 else
14465 {
14466 for (j = 0; j < words; j++)
14467 {
14468 addr = plus_constant (Pmode, dst,
14469 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14470 mem = adjust_automodify_address (dstbase, SImode, addr,
14471 dstoffset + j * UNITS_PER_WORD);
14472 if (dst_aligned)
14473 emit_move_insn (mem, regs[j]);
14474 else
14475 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14476 }
14477 dstoffset += words * UNITS_PER_WORD;
14478 }
14479
14480 remaining -= words * UNITS_PER_WORD;
14481
14482 gcc_assert (remaining < 4);
14483
14484 /* Copy a halfword if necessary. */
14485
14486 if (remaining >= 2)
14487 {
14488 halfword_tmp = gen_reg_rtx (SImode);
14489
14490 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14491 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14492 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14493
14494 /* Either write out immediately, or delay until we've loaded the last
14495 byte, depending on interleave factor. */
14496 if (interleave_factor == 1)
14497 {
14498 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14499 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14500 emit_insn (gen_unaligned_storehi (mem,
14501 gen_lowpart (HImode, halfword_tmp)));
14502 halfword_tmp = NULL;
14503 dstoffset += 2;
14504 }
14505
14506 remaining -= 2;
14507 srcoffset += 2;
14508 }
14509
14510 gcc_assert (remaining < 2);
14511
14512 /* Copy last byte. */
14513
14514 if ((remaining & 1) != 0)
14515 {
14516 byte_tmp = gen_reg_rtx (SImode);
14517
14518 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14519 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14520 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14521
14522 if (interleave_factor == 1)
14523 {
14524 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14525 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14526 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14527 byte_tmp = NULL;
14528 dstoffset++;
14529 }
14530
14531 remaining--;
14532 srcoffset++;
14533 }
14534
14535 /* Store last halfword if we haven't done so already. */
14536
14537 if (halfword_tmp)
14538 {
14539 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14540 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14541 emit_insn (gen_unaligned_storehi (mem,
14542 gen_lowpart (HImode, halfword_tmp)));
14543 dstoffset += 2;
14544 }
14545
14546 /* Likewise for last byte. */
14547
14548 if (byte_tmp)
14549 {
14550 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14551 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14552 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14553 dstoffset++;
14554 }
14555
14556 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14557 }
14558
14559 /* From mips_adjust_block_mem:
14560
14561 Helper function for doing a loop-based block operation on memory
14562 reference MEM. Each iteration of the loop will operate on LENGTH
14563 bytes of MEM.
14564
14565 Create a new base register for use within the loop and point it to
14566 the start of MEM. Create a new memory reference that uses this
14567 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14568
14569 static void
14570 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14571 rtx *loop_mem)
14572 {
14573 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14574
14575 /* Although the new mem does not refer to a known location,
14576 it does keep up to LENGTH bytes of alignment. */
14577 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14578 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14579 }
14580
14581 /* From mips_block_move_loop:
14582
14583 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14584 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14585 the memory regions do not overlap. */
14586
14587 static void
14588 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14589 unsigned int interleave_factor,
14590 HOST_WIDE_INT bytes_per_iter)
14591 {
14592 rtx src_reg, dest_reg, final_src, test;
14593 HOST_WIDE_INT leftover;
14594
14595 leftover = length % bytes_per_iter;
14596 length -= leftover;
14597
14598 /* Create registers and memory references for use within the loop. */
14599 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14600 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14601
14602 /* Calculate the value that SRC_REG should have after the last iteration of
14603 the loop. */
14604 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14605 0, 0, OPTAB_WIDEN);
14606
14607 /* Emit the start of the loop. */
14608 rtx_code_label *label = gen_label_rtx ();
14609 emit_label (label);
14610
14611 /* Emit the loop body. */
14612 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14613 interleave_factor);
14614
14615 /* Move on to the next block. */
14616 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14617 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14618
14619 /* Emit the loop condition. */
14620 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14621 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14622
14623 /* Mop up any left-over bytes. */
14624 if (leftover)
14625 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14626 }
14627
14628 /* Emit a block move when either the source or destination is unaligned (not
14629 aligned to a four-byte boundary). This may need further tuning depending on
14630 core type, optimize_size setting, etc. */
14631
14632 static int
14633 arm_movmemqi_unaligned (rtx *operands)
14634 {
14635 HOST_WIDE_INT length = INTVAL (operands[2]);
14636
14637 if (optimize_size)
14638 {
14639 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14640 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14641 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14642 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14643 or dst_aligned though: allow more interleaving in those cases since the
14644 resulting code can be smaller. */
14645 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14646 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14647
14648 if (length > 12)
14649 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14650 interleave_factor, bytes_per_iter);
14651 else
14652 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14653 interleave_factor);
14654 }
14655 else
14656 {
14657 /* Note that the loop created by arm_block_move_unaligned_loop may be
14658 subject to loop unrolling, which makes tuning this condition a little
14659 redundant. */
14660 if (length > 32)
14661 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14662 else
14663 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14664 }
14665
14666 return 1;
14667 }
14668
14669 int
14670 arm_gen_movmemqi (rtx *operands)
14671 {
14672 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14673 HOST_WIDE_INT srcoffset, dstoffset;
14674 int i;
14675 rtx src, dst, srcbase, dstbase;
14676 rtx part_bytes_reg = NULL;
14677 rtx mem;
14678
14679 if (!CONST_INT_P (operands[2])
14680 || !CONST_INT_P (operands[3])
14681 || INTVAL (operands[2]) > 64)
14682 return 0;
14683
14684 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14685 return arm_movmemqi_unaligned (operands);
14686
14687 if (INTVAL (operands[3]) & 3)
14688 return 0;
14689
14690 dstbase = operands[0];
14691 srcbase = operands[1];
14692
14693 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14694 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14695
14696 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14697 out_words_to_go = INTVAL (operands[2]) / 4;
14698 last_bytes = INTVAL (operands[2]) & 3;
14699 dstoffset = srcoffset = 0;
14700
14701 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14702 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14703
14704 for (i = 0; in_words_to_go >= 2; i+=4)
14705 {
14706 if (in_words_to_go > 4)
14707 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14708 TRUE, srcbase, &srcoffset));
14709 else
14710 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14711 src, FALSE, srcbase,
14712 &srcoffset));
14713
14714 if (out_words_to_go)
14715 {
14716 if (out_words_to_go > 4)
14717 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14718 TRUE, dstbase, &dstoffset));
14719 else if (out_words_to_go != 1)
14720 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14721 out_words_to_go, dst,
14722 (last_bytes == 0
14723 ? FALSE : TRUE),
14724 dstbase, &dstoffset));
14725 else
14726 {
14727 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14728 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14729 if (last_bytes != 0)
14730 {
14731 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14732 dstoffset += 4;
14733 }
14734 }
14735 }
14736
14737 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14738 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14739 }
14740
14741 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14742 if (out_words_to_go)
14743 {
14744 rtx sreg;
14745
14746 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14747 sreg = copy_to_reg (mem);
14748
14749 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14750 emit_move_insn (mem, sreg);
14751 in_words_to_go--;
14752
14753 gcc_assert (!in_words_to_go); /* Sanity check */
14754 }
14755
14756 if (in_words_to_go)
14757 {
14758 gcc_assert (in_words_to_go > 0);
14759
14760 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14761 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14762 }
14763
14764 gcc_assert (!last_bytes || part_bytes_reg);
14765
14766 if (BYTES_BIG_ENDIAN && last_bytes)
14767 {
14768 rtx tmp = gen_reg_rtx (SImode);
14769
14770 /* The bytes we want are in the top end of the word. */
14771 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14772 GEN_INT (8 * (4 - last_bytes))));
14773 part_bytes_reg = tmp;
14774
14775 while (last_bytes)
14776 {
14777 mem = adjust_automodify_address (dstbase, QImode,
14778 plus_constant (Pmode, dst,
14779 last_bytes - 1),
14780 dstoffset + last_bytes - 1);
14781 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14782
14783 if (--last_bytes)
14784 {
14785 tmp = gen_reg_rtx (SImode);
14786 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14787 part_bytes_reg = tmp;
14788 }
14789 }
14790
14791 }
14792 else
14793 {
14794 if (last_bytes > 1)
14795 {
14796 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14797 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14798 last_bytes -= 2;
14799 if (last_bytes)
14800 {
14801 rtx tmp = gen_reg_rtx (SImode);
14802 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14803 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14804 part_bytes_reg = tmp;
14805 dstoffset += 2;
14806 }
14807 }
14808
14809 if (last_bytes)
14810 {
14811 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14812 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14813 }
14814 }
14815
14816 return 1;
14817 }
14818
14819 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14820 by mode size. */
14821 inline static rtx
14822 next_consecutive_mem (rtx mem)
14823 {
14824 machine_mode mode = GET_MODE (mem);
14825 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14826 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14827
14828 return adjust_automodify_address (mem, mode, addr, offset);
14829 }
14830
14831 /* Copy using LDRD/STRD instructions whenever possible.
14832 Returns true upon success. */
14833 bool
14834 gen_movmem_ldrd_strd (rtx *operands)
14835 {
14836 unsigned HOST_WIDE_INT len;
14837 HOST_WIDE_INT align;
14838 rtx src, dst, base;
14839 rtx reg0;
14840 bool src_aligned, dst_aligned;
14841 bool src_volatile, dst_volatile;
14842
14843 gcc_assert (CONST_INT_P (operands[2]));
14844 gcc_assert (CONST_INT_P (operands[3]));
14845
14846 len = UINTVAL (operands[2]);
14847 if (len > 64)
14848 return false;
14849
14850 /* Maximum alignment we can assume for both src and dst buffers. */
14851 align = INTVAL (operands[3]);
14852
14853 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14854 return false;
14855
14856 /* Place src and dst addresses in registers
14857 and update the corresponding mem rtx. */
14858 dst = operands[0];
14859 dst_volatile = MEM_VOLATILE_P (dst);
14860 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14861 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14862 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14863
14864 src = operands[1];
14865 src_volatile = MEM_VOLATILE_P (src);
14866 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14867 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14868 src = adjust_automodify_address (src, VOIDmode, base, 0);
14869
14870 if (!unaligned_access && !(src_aligned && dst_aligned))
14871 return false;
14872
14873 if (src_volatile || dst_volatile)
14874 return false;
14875
14876 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14877 if (!(dst_aligned || src_aligned))
14878 return arm_gen_movmemqi (operands);
14879
14880 src = adjust_address (src, DImode, 0);
14881 dst = adjust_address (dst, DImode, 0);
14882 while (len >= 8)
14883 {
14884 len -= 8;
14885 reg0 = gen_reg_rtx (DImode);
14886 if (src_aligned)
14887 emit_move_insn (reg0, src);
14888 else
14889 emit_insn (gen_unaligned_loaddi (reg0, src));
14890
14891 if (dst_aligned)
14892 emit_move_insn (dst, reg0);
14893 else
14894 emit_insn (gen_unaligned_storedi (dst, reg0));
14895
14896 src = next_consecutive_mem (src);
14897 dst = next_consecutive_mem (dst);
14898 }
14899
14900 gcc_assert (len < 8);
14901 if (len >= 4)
14902 {
14903 /* More than a word but less than a double-word to copy. Copy a word. */
14904 reg0 = gen_reg_rtx (SImode);
14905 src = adjust_address (src, SImode, 0);
14906 dst = adjust_address (dst, SImode, 0);
14907 if (src_aligned)
14908 emit_move_insn (reg0, src);
14909 else
14910 emit_insn (gen_unaligned_loadsi (reg0, src));
14911
14912 if (dst_aligned)
14913 emit_move_insn (dst, reg0);
14914 else
14915 emit_insn (gen_unaligned_storesi (dst, reg0));
14916
14917 src = next_consecutive_mem (src);
14918 dst = next_consecutive_mem (dst);
14919 len -= 4;
14920 }
14921
14922 if (len == 0)
14923 return true;
14924
14925 /* Copy the remaining bytes. */
14926 if (len >= 2)
14927 {
14928 dst = adjust_address (dst, HImode, 0);
14929 src = adjust_address (src, HImode, 0);
14930 reg0 = gen_reg_rtx (SImode);
14931 if (src_aligned)
14932 emit_insn (gen_zero_extendhisi2 (reg0, src));
14933 else
14934 emit_insn (gen_unaligned_loadhiu (reg0, src));
14935
14936 if (dst_aligned)
14937 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14938 else
14939 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14940
14941 src = next_consecutive_mem (src);
14942 dst = next_consecutive_mem (dst);
14943 if (len == 2)
14944 return true;
14945 }
14946
14947 dst = adjust_address (dst, QImode, 0);
14948 src = adjust_address (src, QImode, 0);
14949 reg0 = gen_reg_rtx (QImode);
14950 emit_move_insn (reg0, src);
14951 emit_move_insn (dst, reg0);
14952 return true;
14953 }
14954
14955 /* Select a dominance comparison mode if possible for a test of the general
14956 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14957 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14958 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14959 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14960 In all cases OP will be either EQ or NE, but we don't need to know which
14961 here. If we are unable to support a dominance comparison we return
14962 CC mode. This will then fail to match for the RTL expressions that
14963 generate this call. */
14964 machine_mode
14965 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14966 {
14967 enum rtx_code cond1, cond2;
14968 int swapped = 0;
14969
14970 /* Currently we will probably get the wrong result if the individual
14971 comparisons are not simple. This also ensures that it is safe to
14972 reverse a comparison if necessary. */
14973 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14974 != CCmode)
14975 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14976 != CCmode))
14977 return CCmode;
14978
14979 /* The if_then_else variant of this tests the second condition if the
14980 first passes, but is true if the first fails. Reverse the first
14981 condition to get a true "inclusive-or" expression. */
14982 if (cond_or == DOM_CC_NX_OR_Y)
14983 cond1 = reverse_condition (cond1);
14984
14985 /* If the comparisons are not equal, and one doesn't dominate the other,
14986 then we can't do this. */
14987 if (cond1 != cond2
14988 && !comparison_dominates_p (cond1, cond2)
14989 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14990 return CCmode;
14991
14992 if (swapped)
14993 std::swap (cond1, cond2);
14994
14995 switch (cond1)
14996 {
14997 case EQ:
14998 if (cond_or == DOM_CC_X_AND_Y)
14999 return CC_DEQmode;
15000
15001 switch (cond2)
15002 {
15003 case EQ: return CC_DEQmode;
15004 case LE: return CC_DLEmode;
15005 case LEU: return CC_DLEUmode;
15006 case GE: return CC_DGEmode;
15007 case GEU: return CC_DGEUmode;
15008 default: gcc_unreachable ();
15009 }
15010
15011 case LT:
15012 if (cond_or == DOM_CC_X_AND_Y)
15013 return CC_DLTmode;
15014
15015 switch (cond2)
15016 {
15017 case LT:
15018 return CC_DLTmode;
15019 case LE:
15020 return CC_DLEmode;
15021 case NE:
15022 return CC_DNEmode;
15023 default:
15024 gcc_unreachable ();
15025 }
15026
15027 case GT:
15028 if (cond_or == DOM_CC_X_AND_Y)
15029 return CC_DGTmode;
15030
15031 switch (cond2)
15032 {
15033 case GT:
15034 return CC_DGTmode;
15035 case GE:
15036 return CC_DGEmode;
15037 case NE:
15038 return CC_DNEmode;
15039 default:
15040 gcc_unreachable ();
15041 }
15042
15043 case LTU:
15044 if (cond_or == DOM_CC_X_AND_Y)
15045 return CC_DLTUmode;
15046
15047 switch (cond2)
15048 {
15049 case LTU:
15050 return CC_DLTUmode;
15051 case LEU:
15052 return CC_DLEUmode;
15053 case NE:
15054 return CC_DNEmode;
15055 default:
15056 gcc_unreachable ();
15057 }
15058
15059 case GTU:
15060 if (cond_or == DOM_CC_X_AND_Y)
15061 return CC_DGTUmode;
15062
15063 switch (cond2)
15064 {
15065 case GTU:
15066 return CC_DGTUmode;
15067 case GEU:
15068 return CC_DGEUmode;
15069 case NE:
15070 return CC_DNEmode;
15071 default:
15072 gcc_unreachable ();
15073 }
15074
15075 /* The remaining cases only occur when both comparisons are the
15076 same. */
15077 case NE:
15078 gcc_assert (cond1 == cond2);
15079 return CC_DNEmode;
15080
15081 case LE:
15082 gcc_assert (cond1 == cond2);
15083 return CC_DLEmode;
15084
15085 case GE:
15086 gcc_assert (cond1 == cond2);
15087 return CC_DGEmode;
15088
15089 case LEU:
15090 gcc_assert (cond1 == cond2);
15091 return CC_DLEUmode;
15092
15093 case GEU:
15094 gcc_assert (cond1 == cond2);
15095 return CC_DGEUmode;
15096
15097 default:
15098 gcc_unreachable ();
15099 }
15100 }
15101
15102 machine_mode
15103 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15104 {
15105 /* All floating point compares return CCFP if it is an equality
15106 comparison, and CCFPE otherwise. */
15107 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15108 {
15109 switch (op)
15110 {
15111 case EQ:
15112 case NE:
15113 case UNORDERED:
15114 case ORDERED:
15115 case UNLT:
15116 case UNLE:
15117 case UNGT:
15118 case UNGE:
15119 case UNEQ:
15120 case LTGT:
15121 return CCFPmode;
15122
15123 case LT:
15124 case LE:
15125 case GT:
15126 case GE:
15127 return CCFPEmode;
15128
15129 default:
15130 gcc_unreachable ();
15131 }
15132 }
15133
15134 /* A compare with a shifted operand. Because of canonicalization, the
15135 comparison will have to be swapped when we emit the assembler. */
15136 if (GET_MODE (y) == SImode
15137 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15138 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15139 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15140 || GET_CODE (x) == ROTATERT))
15141 return CC_SWPmode;
15142
15143 /* This operation is performed swapped, but since we only rely on the Z
15144 flag we don't need an additional mode. */
15145 if (GET_MODE (y) == SImode
15146 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15147 && GET_CODE (x) == NEG
15148 && (op == EQ || op == NE))
15149 return CC_Zmode;
15150
15151 /* This is a special case that is used by combine to allow a
15152 comparison of a shifted byte load to be split into a zero-extend
15153 followed by a comparison of the shifted integer (only valid for
15154 equalities and unsigned inequalities). */
15155 if (GET_MODE (x) == SImode
15156 && GET_CODE (x) == ASHIFT
15157 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15158 && GET_CODE (XEXP (x, 0)) == SUBREG
15159 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15160 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15161 && (op == EQ || op == NE
15162 || op == GEU || op == GTU || op == LTU || op == LEU)
15163 && CONST_INT_P (y))
15164 return CC_Zmode;
15165
15166 /* A construct for a conditional compare, if the false arm contains
15167 0, then both conditions must be true, otherwise either condition
15168 must be true. Not all conditions are possible, so CCmode is
15169 returned if it can't be done. */
15170 if (GET_CODE (x) == IF_THEN_ELSE
15171 && (XEXP (x, 2) == const0_rtx
15172 || XEXP (x, 2) == const1_rtx)
15173 && COMPARISON_P (XEXP (x, 0))
15174 && COMPARISON_P (XEXP (x, 1)))
15175 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15176 INTVAL (XEXP (x, 2)));
15177
15178 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15179 if (GET_CODE (x) == AND
15180 && (op == EQ || op == NE)
15181 && COMPARISON_P (XEXP (x, 0))
15182 && COMPARISON_P (XEXP (x, 1)))
15183 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15184 DOM_CC_X_AND_Y);
15185
15186 if (GET_CODE (x) == IOR
15187 && (op == EQ || op == NE)
15188 && COMPARISON_P (XEXP (x, 0))
15189 && COMPARISON_P (XEXP (x, 1)))
15190 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15191 DOM_CC_X_OR_Y);
15192
15193 /* An operation (on Thumb) where we want to test for a single bit.
15194 This is done by shifting that bit up into the top bit of a
15195 scratch register; we can then branch on the sign bit. */
15196 if (TARGET_THUMB1
15197 && GET_MODE (x) == SImode
15198 && (op == EQ || op == NE)
15199 && GET_CODE (x) == ZERO_EXTRACT
15200 && XEXP (x, 1) == const1_rtx)
15201 return CC_Nmode;
15202
15203 /* An operation that sets the condition codes as a side-effect, the
15204 V flag is not set correctly, so we can only use comparisons where
15205 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15206 instead.) */
15207 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15208 if (GET_MODE (x) == SImode
15209 && y == const0_rtx
15210 && (op == EQ || op == NE || op == LT || op == GE)
15211 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15212 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15213 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15214 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15215 || GET_CODE (x) == LSHIFTRT
15216 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15217 || GET_CODE (x) == ROTATERT
15218 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15219 return CC_NOOVmode;
15220
15221 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15222 return CC_Zmode;
15223
15224 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15225 && GET_CODE (x) == PLUS
15226 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15227 return CC_Cmode;
15228
15229 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15230 {
15231 switch (op)
15232 {
15233 case EQ:
15234 case NE:
15235 /* A DImode comparison against zero can be implemented by
15236 or'ing the two halves together. */
15237 if (y == const0_rtx)
15238 return CC_Zmode;
15239
15240 /* We can do an equality test in three Thumb instructions. */
15241 if (!TARGET_32BIT)
15242 return CC_Zmode;
15243
15244 /* FALLTHROUGH */
15245
15246 case LTU:
15247 case LEU:
15248 case GTU:
15249 case GEU:
15250 /* DImode unsigned comparisons can be implemented by cmp +
15251 cmpeq without a scratch register. Not worth doing in
15252 Thumb-2. */
15253 if (TARGET_32BIT)
15254 return CC_CZmode;
15255
15256 /* FALLTHROUGH */
15257
15258 case LT:
15259 case LE:
15260 case GT:
15261 case GE:
15262 /* DImode signed and unsigned comparisons can be implemented
15263 by cmp + sbcs with a scratch register, but that does not
15264 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15265 gcc_assert (op != EQ && op != NE);
15266 return CC_NCVmode;
15267
15268 default:
15269 gcc_unreachable ();
15270 }
15271 }
15272
15273 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15274 return GET_MODE (x);
15275
15276 return CCmode;
15277 }
15278
15279 /* X and Y are two things to compare using CODE. Emit the compare insn and
15280 return the rtx for register 0 in the proper mode. FP means this is a
15281 floating point compare: I don't think that it is needed on the arm. */
15282 rtx
15283 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15284 {
15285 machine_mode mode;
15286 rtx cc_reg;
15287 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15288
15289 /* We might have X as a constant, Y as a register because of the predicates
15290 used for cmpdi. If so, force X to a register here. */
15291 if (dimode_comparison && !REG_P (x))
15292 x = force_reg (DImode, x);
15293
15294 mode = SELECT_CC_MODE (code, x, y);
15295 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15296
15297 if (dimode_comparison
15298 && mode != CC_CZmode)
15299 {
15300 rtx clobber, set;
15301
15302 /* To compare two non-zero values for equality, XOR them and
15303 then compare against zero. Not used for ARM mode; there
15304 CC_CZmode is cheaper. */
15305 if (mode == CC_Zmode && y != const0_rtx)
15306 {
15307 gcc_assert (!reload_completed);
15308 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15309 y = const0_rtx;
15310 }
15311
15312 /* A scratch register is required. */
15313 if (reload_completed)
15314 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15315 else
15316 scratch = gen_rtx_SCRATCH (SImode);
15317
15318 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15319 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
15320 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15321 }
15322 else
15323 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15324
15325 return cc_reg;
15326 }
15327
15328 /* Generate a sequence of insns that will generate the correct return
15329 address mask depending on the physical architecture that the program
15330 is running on. */
15331 rtx
15332 arm_gen_return_addr_mask (void)
15333 {
15334 rtx reg = gen_reg_rtx (Pmode);
15335
15336 emit_insn (gen_return_addr_mask (reg));
15337 return reg;
15338 }
15339
15340 void
15341 arm_reload_in_hi (rtx *operands)
15342 {
15343 rtx ref = operands[1];
15344 rtx base, scratch;
15345 HOST_WIDE_INT offset = 0;
15346
15347 if (GET_CODE (ref) == SUBREG)
15348 {
15349 offset = SUBREG_BYTE (ref);
15350 ref = SUBREG_REG (ref);
15351 }
15352
15353 if (REG_P (ref))
15354 {
15355 /* We have a pseudo which has been spilt onto the stack; there
15356 are two cases here: the first where there is a simple
15357 stack-slot replacement and a second where the stack-slot is
15358 out of range, or is used as a subreg. */
15359 if (reg_equiv_mem (REGNO (ref)))
15360 {
15361 ref = reg_equiv_mem (REGNO (ref));
15362 base = find_replacement (&XEXP (ref, 0));
15363 }
15364 else
15365 /* The slot is out of range, or was dressed up in a SUBREG. */
15366 base = reg_equiv_address (REGNO (ref));
15367 }
15368 else
15369 base = find_replacement (&XEXP (ref, 0));
15370
15371 /* Handle the case where the address is too complex to be offset by 1. */
15372 if (GET_CODE (base) == MINUS
15373 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15374 {
15375 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15376
15377 emit_set_insn (base_plus, base);
15378 base = base_plus;
15379 }
15380 else if (GET_CODE (base) == PLUS)
15381 {
15382 /* The addend must be CONST_INT, or we would have dealt with it above. */
15383 HOST_WIDE_INT hi, lo;
15384
15385 offset += INTVAL (XEXP (base, 1));
15386 base = XEXP (base, 0);
15387
15388 /* Rework the address into a legal sequence of insns. */
15389 /* Valid range for lo is -4095 -> 4095 */
15390 lo = (offset >= 0
15391 ? (offset & 0xfff)
15392 : -((-offset) & 0xfff));
15393
15394 /* Corner case, if lo is the max offset then we would be out of range
15395 once we have added the additional 1 below, so bump the msb into the
15396 pre-loading insn(s). */
15397 if (lo == 4095)
15398 lo &= 0x7ff;
15399
15400 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15401 ^ (HOST_WIDE_INT) 0x80000000)
15402 - (HOST_WIDE_INT) 0x80000000);
15403
15404 gcc_assert (hi + lo == offset);
15405
15406 if (hi != 0)
15407 {
15408 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15409
15410 /* Get the base address; addsi3 knows how to handle constants
15411 that require more than one insn. */
15412 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15413 base = base_plus;
15414 offset = lo;
15415 }
15416 }
15417
15418 /* Operands[2] may overlap operands[0] (though it won't overlap
15419 operands[1]), that's why we asked for a DImode reg -- so we can
15420 use the bit that does not overlap. */
15421 if (REGNO (operands[2]) == REGNO (operands[0]))
15422 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15423 else
15424 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15425
15426 emit_insn (gen_zero_extendqisi2 (scratch,
15427 gen_rtx_MEM (QImode,
15428 plus_constant (Pmode, base,
15429 offset))));
15430 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15431 gen_rtx_MEM (QImode,
15432 plus_constant (Pmode, base,
15433 offset + 1))));
15434 if (!BYTES_BIG_ENDIAN)
15435 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15436 gen_rtx_IOR (SImode,
15437 gen_rtx_ASHIFT
15438 (SImode,
15439 gen_rtx_SUBREG (SImode, operands[0], 0),
15440 GEN_INT (8)),
15441 scratch));
15442 else
15443 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15444 gen_rtx_IOR (SImode,
15445 gen_rtx_ASHIFT (SImode, scratch,
15446 GEN_INT (8)),
15447 gen_rtx_SUBREG (SImode, operands[0], 0)));
15448 }
15449
15450 /* Handle storing a half-word to memory during reload by synthesizing as two
15451 byte stores. Take care not to clobber the input values until after we
15452 have moved them somewhere safe. This code assumes that if the DImode
15453 scratch in operands[2] overlaps either the input value or output address
15454 in some way, then that value must die in this insn (we absolutely need
15455 two scratch registers for some corner cases). */
15456 void
15457 arm_reload_out_hi (rtx *operands)
15458 {
15459 rtx ref = operands[0];
15460 rtx outval = operands[1];
15461 rtx base, scratch;
15462 HOST_WIDE_INT offset = 0;
15463
15464 if (GET_CODE (ref) == SUBREG)
15465 {
15466 offset = SUBREG_BYTE (ref);
15467 ref = SUBREG_REG (ref);
15468 }
15469
15470 if (REG_P (ref))
15471 {
15472 /* We have a pseudo which has been spilt onto the stack; there
15473 are two cases here: the first where there is a simple
15474 stack-slot replacement and a second where the stack-slot is
15475 out of range, or is used as a subreg. */
15476 if (reg_equiv_mem (REGNO (ref)))
15477 {
15478 ref = reg_equiv_mem (REGNO (ref));
15479 base = find_replacement (&XEXP (ref, 0));
15480 }
15481 else
15482 /* The slot is out of range, or was dressed up in a SUBREG. */
15483 base = reg_equiv_address (REGNO (ref));
15484 }
15485 else
15486 base = find_replacement (&XEXP (ref, 0));
15487
15488 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15489
15490 /* Handle the case where the address is too complex to be offset by 1. */
15491 if (GET_CODE (base) == MINUS
15492 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15493 {
15494 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15495
15496 /* Be careful not to destroy OUTVAL. */
15497 if (reg_overlap_mentioned_p (base_plus, outval))
15498 {
15499 /* Updating base_plus might destroy outval, see if we can
15500 swap the scratch and base_plus. */
15501 if (!reg_overlap_mentioned_p (scratch, outval))
15502 std::swap (scratch, base_plus);
15503 else
15504 {
15505 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15506
15507 /* Be conservative and copy OUTVAL into the scratch now,
15508 this should only be necessary if outval is a subreg
15509 of something larger than a word. */
15510 /* XXX Might this clobber base? I can't see how it can,
15511 since scratch is known to overlap with OUTVAL, and
15512 must be wider than a word. */
15513 emit_insn (gen_movhi (scratch_hi, outval));
15514 outval = scratch_hi;
15515 }
15516 }
15517
15518 emit_set_insn (base_plus, base);
15519 base = base_plus;
15520 }
15521 else if (GET_CODE (base) == PLUS)
15522 {
15523 /* The addend must be CONST_INT, or we would have dealt with it above. */
15524 HOST_WIDE_INT hi, lo;
15525
15526 offset += INTVAL (XEXP (base, 1));
15527 base = XEXP (base, 0);
15528
15529 /* Rework the address into a legal sequence of insns. */
15530 /* Valid range for lo is -4095 -> 4095 */
15531 lo = (offset >= 0
15532 ? (offset & 0xfff)
15533 : -((-offset) & 0xfff));
15534
15535 /* Corner case, if lo is the max offset then we would be out of range
15536 once we have added the additional 1 below, so bump the msb into the
15537 pre-loading insn(s). */
15538 if (lo == 4095)
15539 lo &= 0x7ff;
15540
15541 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15542 ^ (HOST_WIDE_INT) 0x80000000)
15543 - (HOST_WIDE_INT) 0x80000000);
15544
15545 gcc_assert (hi + lo == offset);
15546
15547 if (hi != 0)
15548 {
15549 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15550
15551 /* Be careful not to destroy OUTVAL. */
15552 if (reg_overlap_mentioned_p (base_plus, outval))
15553 {
15554 /* Updating base_plus might destroy outval, see if we
15555 can swap the scratch and base_plus. */
15556 if (!reg_overlap_mentioned_p (scratch, outval))
15557 std::swap (scratch, base_plus);
15558 else
15559 {
15560 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15561
15562 /* Be conservative and copy outval into scratch now,
15563 this should only be necessary if outval is a
15564 subreg of something larger than a word. */
15565 /* XXX Might this clobber base? I can't see how it
15566 can, since scratch is known to overlap with
15567 outval. */
15568 emit_insn (gen_movhi (scratch_hi, outval));
15569 outval = scratch_hi;
15570 }
15571 }
15572
15573 /* Get the base address; addsi3 knows how to handle constants
15574 that require more than one insn. */
15575 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15576 base = base_plus;
15577 offset = lo;
15578 }
15579 }
15580
15581 if (BYTES_BIG_ENDIAN)
15582 {
15583 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15584 plus_constant (Pmode, base,
15585 offset + 1)),
15586 gen_lowpart (QImode, outval)));
15587 emit_insn (gen_lshrsi3 (scratch,
15588 gen_rtx_SUBREG (SImode, outval, 0),
15589 GEN_INT (8)));
15590 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15591 offset)),
15592 gen_lowpart (QImode, scratch)));
15593 }
15594 else
15595 {
15596 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15597 offset)),
15598 gen_lowpart (QImode, outval)));
15599 emit_insn (gen_lshrsi3 (scratch,
15600 gen_rtx_SUBREG (SImode, outval, 0),
15601 GEN_INT (8)));
15602 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15603 plus_constant (Pmode, base,
15604 offset + 1)),
15605 gen_lowpart (QImode, scratch)));
15606 }
15607 }
15608
15609 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15610 (padded to the size of a word) should be passed in a register. */
15611
15612 static bool
15613 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15614 {
15615 if (TARGET_AAPCS_BASED)
15616 return must_pass_in_stack_var_size (mode, type);
15617 else
15618 return must_pass_in_stack_var_size_or_pad (mode, type);
15619 }
15620
15621
15622 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15623 Return true if an argument passed on the stack should be padded upwards,
15624 i.e. if the least-significant byte has useful data.
15625 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15626 aggregate types are placed in the lowest memory address. */
15627
15628 bool
15629 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15630 {
15631 if (!TARGET_AAPCS_BASED)
15632 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15633
15634 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15635 return false;
15636
15637 return true;
15638 }
15639
15640
15641 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15642 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15643 register has useful data, and return the opposite if the most
15644 significant byte does. */
15645
15646 bool
15647 arm_pad_reg_upward (machine_mode mode,
15648 tree type, int first ATTRIBUTE_UNUSED)
15649 {
15650 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15651 {
15652 /* For AAPCS, small aggregates, small fixed-point types,
15653 and small complex types are always padded upwards. */
15654 if (type)
15655 {
15656 if ((AGGREGATE_TYPE_P (type)
15657 || TREE_CODE (type) == COMPLEX_TYPE
15658 || FIXED_POINT_TYPE_P (type))
15659 && int_size_in_bytes (type) <= 4)
15660 return true;
15661 }
15662 else
15663 {
15664 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15665 && GET_MODE_SIZE (mode) <= 4)
15666 return true;
15667 }
15668 }
15669
15670 /* Otherwise, use default padding. */
15671 return !BYTES_BIG_ENDIAN;
15672 }
15673
15674 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15675 assuming that the address in the base register is word aligned. */
15676 bool
15677 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15678 {
15679 HOST_WIDE_INT max_offset;
15680
15681 /* Offset must be a multiple of 4 in Thumb mode. */
15682 if (TARGET_THUMB2 && ((offset & 3) != 0))
15683 return false;
15684
15685 if (TARGET_THUMB2)
15686 max_offset = 1020;
15687 else if (TARGET_ARM)
15688 max_offset = 255;
15689 else
15690 return false;
15691
15692 return ((offset <= max_offset) && (offset >= -max_offset));
15693 }
15694
15695 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15696 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15697 Assumes that the address in the base register RN is word aligned. Pattern
15698 guarantees that both memory accesses use the same base register,
15699 the offsets are constants within the range, and the gap between the offsets is 4.
15700 If preload complete then check that registers are legal. WBACK indicates whether
15701 address is updated. LOAD indicates whether memory access is load or store. */
15702 bool
15703 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15704 bool wback, bool load)
15705 {
15706 unsigned int t, t2, n;
15707
15708 if (!reload_completed)
15709 return true;
15710
15711 if (!offset_ok_for_ldrd_strd (offset))
15712 return false;
15713
15714 t = REGNO (rt);
15715 t2 = REGNO (rt2);
15716 n = REGNO (rn);
15717
15718 if ((TARGET_THUMB2)
15719 && ((wback && (n == t || n == t2))
15720 || (t == SP_REGNUM)
15721 || (t == PC_REGNUM)
15722 || (t2 == SP_REGNUM)
15723 || (t2 == PC_REGNUM)
15724 || (!load && (n == PC_REGNUM))
15725 || (load && (t == t2))
15726 /* Triggers Cortex-M3 LDRD errata. */
15727 || (!wback && load && fix_cm3_ldrd && (n == t))))
15728 return false;
15729
15730 if ((TARGET_ARM)
15731 && ((wback && (n == t || n == t2))
15732 || (t2 == PC_REGNUM)
15733 || (t % 2 != 0) /* First destination register is not even. */
15734 || (t2 != t + 1)
15735 /* PC can be used as base register (for offset addressing only),
15736 but it is depricated. */
15737 || (n == PC_REGNUM)))
15738 return false;
15739
15740 return true;
15741 }
15742
15743 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15744 operand MEM's address contains an immediate offset from the base
15745 register and has no side effects, in which case it sets BASE and
15746 OFFSET accordingly. */
15747 static bool
15748 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15749 {
15750 rtx addr;
15751
15752 gcc_assert (base != NULL && offset != NULL);
15753
15754 /* TODO: Handle more general memory operand patterns, such as
15755 PRE_DEC and PRE_INC. */
15756
15757 if (side_effects_p (mem))
15758 return false;
15759
15760 /* Can't deal with subregs. */
15761 if (GET_CODE (mem) == SUBREG)
15762 return false;
15763
15764 gcc_assert (MEM_P (mem));
15765
15766 *offset = const0_rtx;
15767
15768 addr = XEXP (mem, 0);
15769
15770 /* If addr isn't valid for DImode, then we can't handle it. */
15771 if (!arm_legitimate_address_p (DImode, addr,
15772 reload_in_progress || reload_completed))
15773 return false;
15774
15775 if (REG_P (addr))
15776 {
15777 *base = addr;
15778 return true;
15779 }
15780 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15781 {
15782 *base = XEXP (addr, 0);
15783 *offset = XEXP (addr, 1);
15784 return (REG_P (*base) && CONST_INT_P (*offset));
15785 }
15786
15787 return false;
15788 }
15789
15790 /* Called from a peephole2 to replace two word-size accesses with a
15791 single LDRD/STRD instruction. Returns true iff we can generate a
15792 new instruction sequence. That is, both accesses use the same base
15793 register and the gap between constant offsets is 4. This function
15794 may reorder its operands to match ldrd/strd RTL templates.
15795 OPERANDS are the operands found by the peephole matcher;
15796 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15797 corresponding memory operands. LOAD indicaates whether the access
15798 is load or store. CONST_STORE indicates a store of constant
15799 integer values held in OPERANDS[4,5] and assumes that the pattern
15800 is of length 4 insn, for the purpose of checking dead registers.
15801 COMMUTE indicates that register operands may be reordered. */
15802 bool
15803 gen_operands_ldrd_strd (rtx *operands, bool load,
15804 bool const_store, bool commute)
15805 {
15806 int nops = 2;
15807 HOST_WIDE_INT offsets[2], offset;
15808 rtx base = NULL_RTX;
15809 rtx cur_base, cur_offset, tmp;
15810 int i, gap;
15811 HARD_REG_SET regset;
15812
15813 gcc_assert (!const_store || !load);
15814 /* Check that the memory references are immediate offsets from the
15815 same base register. Extract the base register, the destination
15816 registers, and the corresponding memory offsets. */
15817 for (i = 0; i < nops; i++)
15818 {
15819 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15820 return false;
15821
15822 if (i == 0)
15823 base = cur_base;
15824 else if (REGNO (base) != REGNO (cur_base))
15825 return false;
15826
15827 offsets[i] = INTVAL (cur_offset);
15828 if (GET_CODE (operands[i]) == SUBREG)
15829 {
15830 tmp = SUBREG_REG (operands[i]);
15831 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15832 operands[i] = tmp;
15833 }
15834 }
15835
15836 /* Make sure there is no dependency between the individual loads. */
15837 if (load && REGNO (operands[0]) == REGNO (base))
15838 return false; /* RAW */
15839
15840 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15841 return false; /* WAW */
15842
15843 /* If the same input register is used in both stores
15844 when storing different constants, try to find a free register.
15845 For example, the code
15846 mov r0, 0
15847 str r0, [r2]
15848 mov r0, 1
15849 str r0, [r2, #4]
15850 can be transformed into
15851 mov r1, 0
15852 strd r1, r0, [r2]
15853 in Thumb mode assuming that r1 is free. */
15854 if (const_store
15855 && REGNO (operands[0]) == REGNO (operands[1])
15856 && INTVAL (operands[4]) != INTVAL (operands[5]))
15857 {
15858 if (TARGET_THUMB2)
15859 {
15860 CLEAR_HARD_REG_SET (regset);
15861 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15862 if (tmp == NULL_RTX)
15863 return false;
15864
15865 /* Use the new register in the first load to ensure that
15866 if the original input register is not dead after peephole,
15867 then it will have the correct constant value. */
15868 operands[0] = tmp;
15869 }
15870 else if (TARGET_ARM)
15871 {
15872 return false;
15873 int regno = REGNO (operands[0]);
15874 if (!peep2_reg_dead_p (4, operands[0]))
15875 {
15876 /* When the input register is even and is not dead after the
15877 pattern, it has to hold the second constant but we cannot
15878 form a legal STRD in ARM mode with this register as the second
15879 register. */
15880 if (regno % 2 == 0)
15881 return false;
15882
15883 /* Is regno-1 free? */
15884 SET_HARD_REG_SET (regset);
15885 CLEAR_HARD_REG_BIT(regset, regno - 1);
15886 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15887 if (tmp == NULL_RTX)
15888 return false;
15889
15890 operands[0] = tmp;
15891 }
15892 else
15893 {
15894 /* Find a DImode register. */
15895 CLEAR_HARD_REG_SET (regset);
15896 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15897 if (tmp != NULL_RTX)
15898 {
15899 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15900 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15901 }
15902 else
15903 {
15904 /* Can we use the input register to form a DI register? */
15905 SET_HARD_REG_SET (regset);
15906 CLEAR_HARD_REG_BIT(regset,
15907 regno % 2 == 0 ? regno + 1 : regno - 1);
15908 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15909 if (tmp == NULL_RTX)
15910 return false;
15911 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15912 }
15913 }
15914
15915 gcc_assert (operands[0] != NULL_RTX);
15916 gcc_assert (operands[1] != NULL_RTX);
15917 gcc_assert (REGNO (operands[0]) % 2 == 0);
15918 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15919 }
15920 }
15921
15922 /* Make sure the instructions are ordered with lower memory access first. */
15923 if (offsets[0] > offsets[1])
15924 {
15925 gap = offsets[0] - offsets[1];
15926 offset = offsets[1];
15927
15928 /* Swap the instructions such that lower memory is accessed first. */
15929 std::swap (operands[0], operands[1]);
15930 std::swap (operands[2], operands[3]);
15931 if (const_store)
15932 std::swap (operands[4], operands[5]);
15933 }
15934 else
15935 {
15936 gap = offsets[1] - offsets[0];
15937 offset = offsets[0];
15938 }
15939
15940 /* Make sure accesses are to consecutive memory locations. */
15941 if (gap != 4)
15942 return false;
15943
15944 /* Make sure we generate legal instructions. */
15945 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15946 false, load))
15947 return true;
15948
15949 /* In Thumb state, where registers are almost unconstrained, there
15950 is little hope to fix it. */
15951 if (TARGET_THUMB2)
15952 return false;
15953
15954 if (load && commute)
15955 {
15956 /* Try reordering registers. */
15957 std::swap (operands[0], operands[1]);
15958 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15959 false, load))
15960 return true;
15961 }
15962
15963 if (const_store)
15964 {
15965 /* If input registers are dead after this pattern, they can be
15966 reordered or replaced by other registers that are free in the
15967 current pattern. */
15968 if (!peep2_reg_dead_p (4, operands[0])
15969 || !peep2_reg_dead_p (4, operands[1]))
15970 return false;
15971
15972 /* Try to reorder the input registers. */
15973 /* For example, the code
15974 mov r0, 0
15975 mov r1, 1
15976 str r1, [r2]
15977 str r0, [r2, #4]
15978 can be transformed into
15979 mov r1, 0
15980 mov r0, 1
15981 strd r0, [r2]
15982 */
15983 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15984 false, false))
15985 {
15986 std::swap (operands[0], operands[1]);
15987 return true;
15988 }
15989
15990 /* Try to find a free DI register. */
15991 CLEAR_HARD_REG_SET (regset);
15992 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15993 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15994 while (true)
15995 {
15996 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15997 if (tmp == NULL_RTX)
15998 return false;
15999
16000 /* DREG must be an even-numbered register in DImode.
16001 Split it into SI registers. */
16002 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
16003 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
16004 gcc_assert (operands[0] != NULL_RTX);
16005 gcc_assert (operands[1] != NULL_RTX);
16006 gcc_assert (REGNO (operands[0]) % 2 == 0);
16007 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
16008
16009 return (operands_ok_ldrd_strd (operands[0], operands[1],
16010 base, offset,
16011 false, load));
16012 }
16013 }
16014
16015 return false;
16016 }
16017
16018
16019
16020 \f
16021 /* Print a symbolic form of X to the debug file, F. */
16022 static void
16023 arm_print_value (FILE *f, rtx x)
16024 {
16025 switch (GET_CODE (x))
16026 {
16027 case CONST_INT:
16028 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
16029 return;
16030
16031 case CONST_DOUBLE:
16032 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
16033 return;
16034
16035 case CONST_VECTOR:
16036 {
16037 int i;
16038
16039 fprintf (f, "<");
16040 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16041 {
16042 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16043 if (i < (CONST_VECTOR_NUNITS (x) - 1))
16044 fputc (',', f);
16045 }
16046 fprintf (f, ">");
16047 }
16048 return;
16049
16050 case CONST_STRING:
16051 fprintf (f, "\"%s\"", XSTR (x, 0));
16052 return;
16053
16054 case SYMBOL_REF:
16055 fprintf (f, "`%s'", XSTR (x, 0));
16056 return;
16057
16058 case LABEL_REF:
16059 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16060 return;
16061
16062 case CONST:
16063 arm_print_value (f, XEXP (x, 0));
16064 return;
16065
16066 case PLUS:
16067 arm_print_value (f, XEXP (x, 0));
16068 fprintf (f, "+");
16069 arm_print_value (f, XEXP (x, 1));
16070 return;
16071
16072 case PC:
16073 fprintf (f, "pc");
16074 return;
16075
16076 default:
16077 fprintf (f, "????");
16078 return;
16079 }
16080 }
16081 \f
16082 /* Routines for manipulation of the constant pool. */
16083
16084 /* Arm instructions cannot load a large constant directly into a
16085 register; they have to come from a pc relative load. The constant
16086 must therefore be placed in the addressable range of the pc
16087 relative load. Depending on the precise pc relative load
16088 instruction the range is somewhere between 256 bytes and 4k. This
16089 means that we often have to dump a constant inside a function, and
16090 generate code to branch around it.
16091
16092 It is important to minimize this, since the branches will slow
16093 things down and make the code larger.
16094
16095 Normally we can hide the table after an existing unconditional
16096 branch so that there is no interruption of the flow, but in the
16097 worst case the code looks like this:
16098
16099 ldr rn, L1
16100 ...
16101 b L2
16102 align
16103 L1: .long value
16104 L2:
16105 ...
16106
16107 ldr rn, L3
16108 ...
16109 b L4
16110 align
16111 L3: .long value
16112 L4:
16113 ...
16114
16115 We fix this by performing a scan after scheduling, which notices
16116 which instructions need to have their operands fetched from the
16117 constant table and builds the table.
16118
16119 The algorithm starts by building a table of all the constants that
16120 need fixing up and all the natural barriers in the function (places
16121 where a constant table can be dropped without breaking the flow).
16122 For each fixup we note how far the pc-relative replacement will be
16123 able to reach and the offset of the instruction into the function.
16124
16125 Having built the table we then group the fixes together to form
16126 tables that are as large as possible (subject to addressing
16127 constraints) and emit each table of constants after the last
16128 barrier that is within range of all the instructions in the group.
16129 If a group does not contain a barrier, then we forcibly create one
16130 by inserting a jump instruction into the flow. Once the table has
16131 been inserted, the insns are then modified to reference the
16132 relevant entry in the pool.
16133
16134 Possible enhancements to the algorithm (not implemented) are:
16135
16136 1) For some processors and object formats, there may be benefit in
16137 aligning the pools to the start of cache lines; this alignment
16138 would need to be taken into account when calculating addressability
16139 of a pool. */
16140
16141 /* These typedefs are located at the start of this file, so that
16142 they can be used in the prototypes there. This comment is to
16143 remind readers of that fact so that the following structures
16144 can be understood more easily.
16145
16146 typedef struct minipool_node Mnode;
16147 typedef struct minipool_fixup Mfix; */
16148
16149 struct minipool_node
16150 {
16151 /* Doubly linked chain of entries. */
16152 Mnode * next;
16153 Mnode * prev;
16154 /* The maximum offset into the code that this entry can be placed. While
16155 pushing fixes for forward references, all entries are sorted in order
16156 of increasing max_address. */
16157 HOST_WIDE_INT max_address;
16158 /* Similarly for an entry inserted for a backwards ref. */
16159 HOST_WIDE_INT min_address;
16160 /* The number of fixes referencing this entry. This can become zero
16161 if we "unpush" an entry. In this case we ignore the entry when we
16162 come to emit the code. */
16163 int refcount;
16164 /* The offset from the start of the minipool. */
16165 HOST_WIDE_INT offset;
16166 /* The value in table. */
16167 rtx value;
16168 /* The mode of value. */
16169 machine_mode mode;
16170 /* The size of the value. With iWMMXt enabled
16171 sizes > 4 also imply an alignment of 8-bytes. */
16172 int fix_size;
16173 };
16174
16175 struct minipool_fixup
16176 {
16177 Mfix * next;
16178 rtx_insn * insn;
16179 HOST_WIDE_INT address;
16180 rtx * loc;
16181 machine_mode mode;
16182 int fix_size;
16183 rtx value;
16184 Mnode * minipool;
16185 HOST_WIDE_INT forwards;
16186 HOST_WIDE_INT backwards;
16187 };
16188
16189 /* Fixes less than a word need padding out to a word boundary. */
16190 #define MINIPOOL_FIX_SIZE(mode) \
16191 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16192
16193 static Mnode * minipool_vector_head;
16194 static Mnode * minipool_vector_tail;
16195 static rtx_code_label *minipool_vector_label;
16196 static int minipool_pad;
16197
16198 /* The linked list of all minipool fixes required for this function. */
16199 Mfix * minipool_fix_head;
16200 Mfix * minipool_fix_tail;
16201 /* The fix entry for the current minipool, once it has been placed. */
16202 Mfix * minipool_barrier;
16203
16204 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16205 #define JUMP_TABLES_IN_TEXT_SECTION 0
16206 #endif
16207
16208 static HOST_WIDE_INT
16209 get_jump_table_size (rtx_jump_table_data *insn)
16210 {
16211 /* ADDR_VECs only take room if read-only data does into the text
16212 section. */
16213 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16214 {
16215 rtx body = PATTERN (insn);
16216 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16217 HOST_WIDE_INT size;
16218 HOST_WIDE_INT modesize;
16219
16220 modesize = GET_MODE_SIZE (GET_MODE (body));
16221 size = modesize * XVECLEN (body, elt);
16222 switch (modesize)
16223 {
16224 case 1:
16225 /* Round up size of TBB table to a halfword boundary. */
16226 size = (size + 1) & ~(HOST_WIDE_INT)1;
16227 break;
16228 case 2:
16229 /* No padding necessary for TBH. */
16230 break;
16231 case 4:
16232 /* Add two bytes for alignment on Thumb. */
16233 if (TARGET_THUMB)
16234 size += 2;
16235 break;
16236 default:
16237 gcc_unreachable ();
16238 }
16239 return size;
16240 }
16241
16242 return 0;
16243 }
16244
16245 /* Return the maximum amount of padding that will be inserted before
16246 label LABEL. */
16247
16248 static HOST_WIDE_INT
16249 get_label_padding (rtx label)
16250 {
16251 HOST_WIDE_INT align, min_insn_size;
16252
16253 align = 1 << label_to_alignment (label);
16254 min_insn_size = TARGET_THUMB ? 2 : 4;
16255 return align > min_insn_size ? align - min_insn_size : 0;
16256 }
16257
16258 /* Move a minipool fix MP from its current location to before MAX_MP.
16259 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16260 constraints may need updating. */
16261 static Mnode *
16262 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16263 HOST_WIDE_INT max_address)
16264 {
16265 /* The code below assumes these are different. */
16266 gcc_assert (mp != max_mp);
16267
16268 if (max_mp == NULL)
16269 {
16270 if (max_address < mp->max_address)
16271 mp->max_address = max_address;
16272 }
16273 else
16274 {
16275 if (max_address > max_mp->max_address - mp->fix_size)
16276 mp->max_address = max_mp->max_address - mp->fix_size;
16277 else
16278 mp->max_address = max_address;
16279
16280 /* Unlink MP from its current position. Since max_mp is non-null,
16281 mp->prev must be non-null. */
16282 mp->prev->next = mp->next;
16283 if (mp->next != NULL)
16284 mp->next->prev = mp->prev;
16285 else
16286 minipool_vector_tail = mp->prev;
16287
16288 /* Re-insert it before MAX_MP. */
16289 mp->next = max_mp;
16290 mp->prev = max_mp->prev;
16291 max_mp->prev = mp;
16292
16293 if (mp->prev != NULL)
16294 mp->prev->next = mp;
16295 else
16296 minipool_vector_head = mp;
16297 }
16298
16299 /* Save the new entry. */
16300 max_mp = mp;
16301
16302 /* Scan over the preceding entries and adjust their addresses as
16303 required. */
16304 while (mp->prev != NULL
16305 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16306 {
16307 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16308 mp = mp->prev;
16309 }
16310
16311 return max_mp;
16312 }
16313
16314 /* Add a constant to the minipool for a forward reference. Returns the
16315 node added or NULL if the constant will not fit in this pool. */
16316 static Mnode *
16317 add_minipool_forward_ref (Mfix *fix)
16318 {
16319 /* If set, max_mp is the first pool_entry that has a lower
16320 constraint than the one we are trying to add. */
16321 Mnode * max_mp = NULL;
16322 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16323 Mnode * mp;
16324
16325 /* If the minipool starts before the end of FIX->INSN then this FIX
16326 can not be placed into the current pool. Furthermore, adding the
16327 new constant pool entry may cause the pool to start FIX_SIZE bytes
16328 earlier. */
16329 if (minipool_vector_head &&
16330 (fix->address + get_attr_length (fix->insn)
16331 >= minipool_vector_head->max_address - fix->fix_size))
16332 return NULL;
16333
16334 /* Scan the pool to see if a constant with the same value has
16335 already been added. While we are doing this, also note the
16336 location where we must insert the constant if it doesn't already
16337 exist. */
16338 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16339 {
16340 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16341 && fix->mode == mp->mode
16342 && (!LABEL_P (fix->value)
16343 || (CODE_LABEL_NUMBER (fix->value)
16344 == CODE_LABEL_NUMBER (mp->value)))
16345 && rtx_equal_p (fix->value, mp->value))
16346 {
16347 /* More than one fix references this entry. */
16348 mp->refcount++;
16349 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16350 }
16351
16352 /* Note the insertion point if necessary. */
16353 if (max_mp == NULL
16354 && mp->max_address > max_address)
16355 max_mp = mp;
16356
16357 /* If we are inserting an 8-bytes aligned quantity and
16358 we have not already found an insertion point, then
16359 make sure that all such 8-byte aligned quantities are
16360 placed at the start of the pool. */
16361 if (ARM_DOUBLEWORD_ALIGN
16362 && max_mp == NULL
16363 && fix->fix_size >= 8
16364 && mp->fix_size < 8)
16365 {
16366 max_mp = mp;
16367 max_address = mp->max_address;
16368 }
16369 }
16370
16371 /* The value is not currently in the minipool, so we need to create
16372 a new entry for it. If MAX_MP is NULL, the entry will be put on
16373 the end of the list since the placement is less constrained than
16374 any existing entry. Otherwise, we insert the new fix before
16375 MAX_MP and, if necessary, adjust the constraints on the other
16376 entries. */
16377 mp = XNEW (Mnode);
16378 mp->fix_size = fix->fix_size;
16379 mp->mode = fix->mode;
16380 mp->value = fix->value;
16381 mp->refcount = 1;
16382 /* Not yet required for a backwards ref. */
16383 mp->min_address = -65536;
16384
16385 if (max_mp == NULL)
16386 {
16387 mp->max_address = max_address;
16388 mp->next = NULL;
16389 mp->prev = minipool_vector_tail;
16390
16391 if (mp->prev == NULL)
16392 {
16393 minipool_vector_head = mp;
16394 minipool_vector_label = gen_label_rtx ();
16395 }
16396 else
16397 mp->prev->next = mp;
16398
16399 minipool_vector_tail = mp;
16400 }
16401 else
16402 {
16403 if (max_address > max_mp->max_address - mp->fix_size)
16404 mp->max_address = max_mp->max_address - mp->fix_size;
16405 else
16406 mp->max_address = max_address;
16407
16408 mp->next = max_mp;
16409 mp->prev = max_mp->prev;
16410 max_mp->prev = mp;
16411 if (mp->prev != NULL)
16412 mp->prev->next = mp;
16413 else
16414 minipool_vector_head = mp;
16415 }
16416
16417 /* Save the new entry. */
16418 max_mp = mp;
16419
16420 /* Scan over the preceding entries and adjust their addresses as
16421 required. */
16422 while (mp->prev != NULL
16423 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16424 {
16425 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16426 mp = mp->prev;
16427 }
16428
16429 return max_mp;
16430 }
16431
16432 static Mnode *
16433 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16434 HOST_WIDE_INT min_address)
16435 {
16436 HOST_WIDE_INT offset;
16437
16438 /* The code below assumes these are different. */
16439 gcc_assert (mp != min_mp);
16440
16441 if (min_mp == NULL)
16442 {
16443 if (min_address > mp->min_address)
16444 mp->min_address = min_address;
16445 }
16446 else
16447 {
16448 /* We will adjust this below if it is too loose. */
16449 mp->min_address = min_address;
16450
16451 /* Unlink MP from its current position. Since min_mp is non-null,
16452 mp->next must be non-null. */
16453 mp->next->prev = mp->prev;
16454 if (mp->prev != NULL)
16455 mp->prev->next = mp->next;
16456 else
16457 minipool_vector_head = mp->next;
16458
16459 /* Reinsert it after MIN_MP. */
16460 mp->prev = min_mp;
16461 mp->next = min_mp->next;
16462 min_mp->next = mp;
16463 if (mp->next != NULL)
16464 mp->next->prev = mp;
16465 else
16466 minipool_vector_tail = mp;
16467 }
16468
16469 min_mp = mp;
16470
16471 offset = 0;
16472 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16473 {
16474 mp->offset = offset;
16475 if (mp->refcount > 0)
16476 offset += mp->fix_size;
16477
16478 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16479 mp->next->min_address = mp->min_address + mp->fix_size;
16480 }
16481
16482 return min_mp;
16483 }
16484
16485 /* Add a constant to the minipool for a backward reference. Returns the
16486 node added or NULL if the constant will not fit in this pool.
16487
16488 Note that the code for insertion for a backwards reference can be
16489 somewhat confusing because the calculated offsets for each fix do
16490 not take into account the size of the pool (which is still under
16491 construction. */
16492 static Mnode *
16493 add_minipool_backward_ref (Mfix *fix)
16494 {
16495 /* If set, min_mp is the last pool_entry that has a lower constraint
16496 than the one we are trying to add. */
16497 Mnode *min_mp = NULL;
16498 /* This can be negative, since it is only a constraint. */
16499 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16500 Mnode *mp;
16501
16502 /* If we can't reach the current pool from this insn, or if we can't
16503 insert this entry at the end of the pool without pushing other
16504 fixes out of range, then we don't try. This ensures that we
16505 can't fail later on. */
16506 if (min_address >= minipool_barrier->address
16507 || (minipool_vector_tail->min_address + fix->fix_size
16508 >= minipool_barrier->address))
16509 return NULL;
16510
16511 /* Scan the pool to see if a constant with the same value has
16512 already been added. While we are doing this, also note the
16513 location where we must insert the constant if it doesn't already
16514 exist. */
16515 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16516 {
16517 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16518 && fix->mode == mp->mode
16519 && (!LABEL_P (fix->value)
16520 || (CODE_LABEL_NUMBER (fix->value)
16521 == CODE_LABEL_NUMBER (mp->value)))
16522 && rtx_equal_p (fix->value, mp->value)
16523 /* Check that there is enough slack to move this entry to the
16524 end of the table (this is conservative). */
16525 && (mp->max_address
16526 > (minipool_barrier->address
16527 + minipool_vector_tail->offset
16528 + minipool_vector_tail->fix_size)))
16529 {
16530 mp->refcount++;
16531 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16532 }
16533
16534 if (min_mp != NULL)
16535 mp->min_address += fix->fix_size;
16536 else
16537 {
16538 /* Note the insertion point if necessary. */
16539 if (mp->min_address < min_address)
16540 {
16541 /* For now, we do not allow the insertion of 8-byte alignment
16542 requiring nodes anywhere but at the start of the pool. */
16543 if (ARM_DOUBLEWORD_ALIGN
16544 && fix->fix_size >= 8 && mp->fix_size < 8)
16545 return NULL;
16546 else
16547 min_mp = mp;
16548 }
16549 else if (mp->max_address
16550 < minipool_barrier->address + mp->offset + fix->fix_size)
16551 {
16552 /* Inserting before this entry would push the fix beyond
16553 its maximum address (which can happen if we have
16554 re-located a forwards fix); force the new fix to come
16555 after it. */
16556 if (ARM_DOUBLEWORD_ALIGN
16557 && fix->fix_size >= 8 && mp->fix_size < 8)
16558 return NULL;
16559 else
16560 {
16561 min_mp = mp;
16562 min_address = mp->min_address + fix->fix_size;
16563 }
16564 }
16565 /* Do not insert a non-8-byte aligned quantity before 8-byte
16566 aligned quantities. */
16567 else if (ARM_DOUBLEWORD_ALIGN
16568 && fix->fix_size < 8
16569 && mp->fix_size >= 8)
16570 {
16571 min_mp = mp;
16572 min_address = mp->min_address + fix->fix_size;
16573 }
16574 }
16575 }
16576
16577 /* We need to create a new entry. */
16578 mp = XNEW (Mnode);
16579 mp->fix_size = fix->fix_size;
16580 mp->mode = fix->mode;
16581 mp->value = fix->value;
16582 mp->refcount = 1;
16583 mp->max_address = minipool_barrier->address + 65536;
16584
16585 mp->min_address = min_address;
16586
16587 if (min_mp == NULL)
16588 {
16589 mp->prev = NULL;
16590 mp->next = minipool_vector_head;
16591
16592 if (mp->next == NULL)
16593 {
16594 minipool_vector_tail = mp;
16595 minipool_vector_label = gen_label_rtx ();
16596 }
16597 else
16598 mp->next->prev = mp;
16599
16600 minipool_vector_head = mp;
16601 }
16602 else
16603 {
16604 mp->next = min_mp->next;
16605 mp->prev = min_mp;
16606 min_mp->next = mp;
16607
16608 if (mp->next != NULL)
16609 mp->next->prev = mp;
16610 else
16611 minipool_vector_tail = mp;
16612 }
16613
16614 /* Save the new entry. */
16615 min_mp = mp;
16616
16617 if (mp->prev)
16618 mp = mp->prev;
16619 else
16620 mp->offset = 0;
16621
16622 /* Scan over the following entries and adjust their offsets. */
16623 while (mp->next != NULL)
16624 {
16625 if (mp->next->min_address < mp->min_address + mp->fix_size)
16626 mp->next->min_address = mp->min_address + mp->fix_size;
16627
16628 if (mp->refcount)
16629 mp->next->offset = mp->offset + mp->fix_size;
16630 else
16631 mp->next->offset = mp->offset;
16632
16633 mp = mp->next;
16634 }
16635
16636 return min_mp;
16637 }
16638
16639 static void
16640 assign_minipool_offsets (Mfix *barrier)
16641 {
16642 HOST_WIDE_INT offset = 0;
16643 Mnode *mp;
16644
16645 minipool_barrier = barrier;
16646
16647 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16648 {
16649 mp->offset = offset;
16650
16651 if (mp->refcount > 0)
16652 offset += mp->fix_size;
16653 }
16654 }
16655
16656 /* Output the literal table */
16657 static void
16658 dump_minipool (rtx_insn *scan)
16659 {
16660 Mnode * mp;
16661 Mnode * nmp;
16662 int align64 = 0;
16663
16664 if (ARM_DOUBLEWORD_ALIGN)
16665 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16666 if (mp->refcount > 0 && mp->fix_size >= 8)
16667 {
16668 align64 = 1;
16669 break;
16670 }
16671
16672 if (dump_file)
16673 fprintf (dump_file,
16674 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16675 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16676
16677 scan = emit_label_after (gen_label_rtx (), scan);
16678 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16679 scan = emit_label_after (minipool_vector_label, scan);
16680
16681 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16682 {
16683 if (mp->refcount > 0)
16684 {
16685 if (dump_file)
16686 {
16687 fprintf (dump_file,
16688 ";; Offset %u, min %ld, max %ld ",
16689 (unsigned) mp->offset, (unsigned long) mp->min_address,
16690 (unsigned long) mp->max_address);
16691 arm_print_value (dump_file, mp->value);
16692 fputc ('\n', dump_file);
16693 }
16694
16695 switch (GET_MODE_SIZE (mp->mode))
16696 {
16697 #ifdef HAVE_consttable_1
16698 case 1:
16699 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16700 break;
16701
16702 #endif
16703 #ifdef HAVE_consttable_2
16704 case 2:
16705 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16706 break;
16707
16708 #endif
16709 #ifdef HAVE_consttable_4
16710 case 4:
16711 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16712 break;
16713
16714 #endif
16715 #ifdef HAVE_consttable_8
16716 case 8:
16717 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16718 break;
16719
16720 #endif
16721 #ifdef HAVE_consttable_16
16722 case 16:
16723 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16724 break;
16725
16726 #endif
16727 default:
16728 gcc_unreachable ();
16729 }
16730 }
16731
16732 nmp = mp->next;
16733 free (mp);
16734 }
16735
16736 minipool_vector_head = minipool_vector_tail = NULL;
16737 scan = emit_insn_after (gen_consttable_end (), scan);
16738 scan = emit_barrier_after (scan);
16739 }
16740
16741 /* Return the cost of forcibly inserting a barrier after INSN. */
16742 static int
16743 arm_barrier_cost (rtx_insn *insn)
16744 {
16745 /* Basing the location of the pool on the loop depth is preferable,
16746 but at the moment, the basic block information seems to be
16747 corrupt by this stage of the compilation. */
16748 int base_cost = 50;
16749 rtx_insn *next = next_nonnote_insn (insn);
16750
16751 if (next != NULL && LABEL_P (next))
16752 base_cost -= 20;
16753
16754 switch (GET_CODE (insn))
16755 {
16756 case CODE_LABEL:
16757 /* It will always be better to place the table before the label, rather
16758 than after it. */
16759 return 50;
16760
16761 case INSN:
16762 case CALL_INSN:
16763 return base_cost;
16764
16765 case JUMP_INSN:
16766 return base_cost - 10;
16767
16768 default:
16769 return base_cost + 10;
16770 }
16771 }
16772
16773 /* Find the best place in the insn stream in the range
16774 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16775 Create the barrier by inserting a jump and add a new fix entry for
16776 it. */
16777 static Mfix *
16778 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16779 {
16780 HOST_WIDE_INT count = 0;
16781 rtx_barrier *barrier;
16782 rtx_insn *from = fix->insn;
16783 /* The instruction after which we will insert the jump. */
16784 rtx_insn *selected = NULL;
16785 int selected_cost;
16786 /* The address at which the jump instruction will be placed. */
16787 HOST_WIDE_INT selected_address;
16788 Mfix * new_fix;
16789 HOST_WIDE_INT max_count = max_address - fix->address;
16790 rtx_code_label *label = gen_label_rtx ();
16791
16792 selected_cost = arm_barrier_cost (from);
16793 selected_address = fix->address;
16794
16795 while (from && count < max_count)
16796 {
16797 rtx_jump_table_data *tmp;
16798 int new_cost;
16799
16800 /* This code shouldn't have been called if there was a natural barrier
16801 within range. */
16802 gcc_assert (!BARRIER_P (from));
16803
16804 /* Count the length of this insn. This must stay in sync with the
16805 code that pushes minipool fixes. */
16806 if (LABEL_P (from))
16807 count += get_label_padding (from);
16808 else
16809 count += get_attr_length (from);
16810
16811 /* If there is a jump table, add its length. */
16812 if (tablejump_p (from, NULL, &tmp))
16813 {
16814 count += get_jump_table_size (tmp);
16815
16816 /* Jump tables aren't in a basic block, so base the cost on
16817 the dispatch insn. If we select this location, we will
16818 still put the pool after the table. */
16819 new_cost = arm_barrier_cost (from);
16820
16821 if (count < max_count
16822 && (!selected || new_cost <= selected_cost))
16823 {
16824 selected = tmp;
16825 selected_cost = new_cost;
16826 selected_address = fix->address + count;
16827 }
16828
16829 /* Continue after the dispatch table. */
16830 from = NEXT_INSN (tmp);
16831 continue;
16832 }
16833
16834 new_cost = arm_barrier_cost (from);
16835
16836 if (count < max_count
16837 && (!selected || new_cost <= selected_cost))
16838 {
16839 selected = from;
16840 selected_cost = new_cost;
16841 selected_address = fix->address + count;
16842 }
16843
16844 from = NEXT_INSN (from);
16845 }
16846
16847 /* Make sure that we found a place to insert the jump. */
16848 gcc_assert (selected);
16849
16850 /* Make sure we do not split a call and its corresponding
16851 CALL_ARG_LOCATION note. */
16852 if (CALL_P (selected))
16853 {
16854 rtx_insn *next = NEXT_INSN (selected);
16855 if (next && NOTE_P (next)
16856 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16857 selected = next;
16858 }
16859
16860 /* Create a new JUMP_INSN that branches around a barrier. */
16861 from = emit_jump_insn_after (gen_jump (label), selected);
16862 JUMP_LABEL (from) = label;
16863 barrier = emit_barrier_after (from);
16864 emit_label_after (label, barrier);
16865
16866 /* Create a minipool barrier entry for the new barrier. */
16867 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16868 new_fix->insn = barrier;
16869 new_fix->address = selected_address;
16870 new_fix->next = fix->next;
16871 fix->next = new_fix;
16872
16873 return new_fix;
16874 }
16875
16876 /* Record that there is a natural barrier in the insn stream at
16877 ADDRESS. */
16878 static void
16879 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16880 {
16881 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16882
16883 fix->insn = insn;
16884 fix->address = address;
16885
16886 fix->next = NULL;
16887 if (minipool_fix_head != NULL)
16888 minipool_fix_tail->next = fix;
16889 else
16890 minipool_fix_head = fix;
16891
16892 minipool_fix_tail = fix;
16893 }
16894
16895 /* Record INSN, which will need fixing up to load a value from the
16896 minipool. ADDRESS is the offset of the insn since the start of the
16897 function; LOC is a pointer to the part of the insn which requires
16898 fixing; VALUE is the constant that must be loaded, which is of type
16899 MODE. */
16900 static void
16901 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16902 machine_mode mode, rtx value)
16903 {
16904 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16905
16906 fix->insn = insn;
16907 fix->address = address;
16908 fix->loc = loc;
16909 fix->mode = mode;
16910 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16911 fix->value = value;
16912 fix->forwards = get_attr_pool_range (insn);
16913 fix->backwards = get_attr_neg_pool_range (insn);
16914 fix->minipool = NULL;
16915
16916 /* If an insn doesn't have a range defined for it, then it isn't
16917 expecting to be reworked by this code. Better to stop now than
16918 to generate duff assembly code. */
16919 gcc_assert (fix->forwards || fix->backwards);
16920
16921 /* If an entry requires 8-byte alignment then assume all constant pools
16922 require 4 bytes of padding. Trying to do this later on a per-pool
16923 basis is awkward because existing pool entries have to be modified. */
16924 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16925 minipool_pad = 4;
16926
16927 if (dump_file)
16928 {
16929 fprintf (dump_file,
16930 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16931 GET_MODE_NAME (mode),
16932 INSN_UID (insn), (unsigned long) address,
16933 -1 * (long)fix->backwards, (long)fix->forwards);
16934 arm_print_value (dump_file, fix->value);
16935 fprintf (dump_file, "\n");
16936 }
16937
16938 /* Add it to the chain of fixes. */
16939 fix->next = NULL;
16940
16941 if (minipool_fix_head != NULL)
16942 minipool_fix_tail->next = fix;
16943 else
16944 minipool_fix_head = fix;
16945
16946 minipool_fix_tail = fix;
16947 }
16948
16949 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16950 Returns the number of insns needed, or 99 if we always want to synthesize
16951 the value. */
16952 int
16953 arm_max_const_double_inline_cost ()
16954 {
16955 /* Let the value get synthesized to avoid the use of literal pools. */
16956 if (arm_disable_literal_pool)
16957 return 99;
16958
16959 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16960 }
16961
16962 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16963 Returns the number of insns needed, or 99 if we don't know how to
16964 do it. */
16965 int
16966 arm_const_double_inline_cost (rtx val)
16967 {
16968 rtx lowpart, highpart;
16969 machine_mode mode;
16970
16971 mode = GET_MODE (val);
16972
16973 if (mode == VOIDmode)
16974 mode = DImode;
16975
16976 gcc_assert (GET_MODE_SIZE (mode) == 8);
16977
16978 lowpart = gen_lowpart (SImode, val);
16979 highpart = gen_highpart_mode (SImode, mode, val);
16980
16981 gcc_assert (CONST_INT_P (lowpart));
16982 gcc_assert (CONST_INT_P (highpart));
16983
16984 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16985 NULL_RTX, NULL_RTX, 0, 0)
16986 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16987 NULL_RTX, NULL_RTX, 0, 0));
16988 }
16989
16990 /* Cost of loading a SImode constant. */
16991 static inline int
16992 arm_const_inline_cost (enum rtx_code code, rtx val)
16993 {
16994 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16995 NULL_RTX, NULL_RTX, 1, 0);
16996 }
16997
16998 /* Return true if it is worthwhile to split a 64-bit constant into two
16999 32-bit operations. This is the case if optimizing for size, or
17000 if we have load delay slots, or if one 32-bit part can be done with
17001 a single data operation. */
17002 bool
17003 arm_const_double_by_parts (rtx val)
17004 {
17005 machine_mode mode = GET_MODE (val);
17006 rtx part;
17007
17008 if (optimize_size || arm_ld_sched)
17009 return true;
17010
17011 if (mode == VOIDmode)
17012 mode = DImode;
17013
17014 part = gen_highpart_mode (SImode, mode, val);
17015
17016 gcc_assert (CONST_INT_P (part));
17017
17018 if (const_ok_for_arm (INTVAL (part))
17019 || const_ok_for_arm (~INTVAL (part)))
17020 return true;
17021
17022 part = gen_lowpart (SImode, val);
17023
17024 gcc_assert (CONST_INT_P (part));
17025
17026 if (const_ok_for_arm (INTVAL (part))
17027 || const_ok_for_arm (~INTVAL (part)))
17028 return true;
17029
17030 return false;
17031 }
17032
17033 /* Return true if it is possible to inline both the high and low parts
17034 of a 64-bit constant into 32-bit data processing instructions. */
17035 bool
17036 arm_const_double_by_immediates (rtx val)
17037 {
17038 machine_mode mode = GET_MODE (val);
17039 rtx part;
17040
17041 if (mode == VOIDmode)
17042 mode = DImode;
17043
17044 part = gen_highpart_mode (SImode, mode, val);
17045
17046 gcc_assert (CONST_INT_P (part));
17047
17048 if (!const_ok_for_arm (INTVAL (part)))
17049 return false;
17050
17051 part = gen_lowpart (SImode, val);
17052
17053 gcc_assert (CONST_INT_P (part));
17054
17055 if (!const_ok_for_arm (INTVAL (part)))
17056 return false;
17057
17058 return true;
17059 }
17060
17061 /* Scan INSN and note any of its operands that need fixing.
17062 If DO_PUSHES is false we do not actually push any of the fixups
17063 needed. */
17064 static void
17065 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17066 {
17067 int opno;
17068
17069 extract_constrain_insn (insn);
17070
17071 if (recog_data.n_alternatives == 0)
17072 return;
17073
17074 /* Fill in recog_op_alt with information about the constraints of
17075 this insn. */
17076 preprocess_constraints (insn);
17077
17078 const operand_alternative *op_alt = which_op_alt ();
17079 for (opno = 0; opno < recog_data.n_operands; opno++)
17080 {
17081 /* Things we need to fix can only occur in inputs. */
17082 if (recog_data.operand_type[opno] != OP_IN)
17083 continue;
17084
17085 /* If this alternative is a memory reference, then any mention
17086 of constants in this alternative is really to fool reload
17087 into allowing us to accept one there. We need to fix them up
17088 now so that we output the right code. */
17089 if (op_alt[opno].memory_ok)
17090 {
17091 rtx op = recog_data.operand[opno];
17092
17093 if (CONSTANT_P (op))
17094 {
17095 if (do_pushes)
17096 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17097 recog_data.operand_mode[opno], op);
17098 }
17099 else if (MEM_P (op)
17100 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17101 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17102 {
17103 if (do_pushes)
17104 {
17105 rtx cop = avoid_constant_pool_reference (op);
17106
17107 /* Casting the address of something to a mode narrower
17108 than a word can cause avoid_constant_pool_reference()
17109 to return the pool reference itself. That's no good to
17110 us here. Lets just hope that we can use the
17111 constant pool value directly. */
17112 if (op == cop)
17113 cop = get_pool_constant (XEXP (op, 0));
17114
17115 push_minipool_fix (insn, address,
17116 recog_data.operand_loc[opno],
17117 recog_data.operand_mode[opno], cop);
17118 }
17119
17120 }
17121 }
17122 }
17123
17124 return;
17125 }
17126
17127 /* Rewrite move insn into subtract of 0 if the condition codes will
17128 be useful in next conditional jump insn. */
17129
17130 static void
17131 thumb1_reorg (void)
17132 {
17133 basic_block bb;
17134
17135 FOR_EACH_BB_FN (bb, cfun)
17136 {
17137 rtx dest, src;
17138 rtx pat, op0, set = NULL;
17139 rtx_insn *prev, *insn = BB_END (bb);
17140 bool insn_clobbered = false;
17141
17142 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17143 insn = PREV_INSN (insn);
17144
17145 /* Find the last cbranchsi4_insn in basic block BB. */
17146 if (insn == BB_HEAD (bb)
17147 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17148 continue;
17149
17150 /* Get the register with which we are comparing. */
17151 pat = PATTERN (insn);
17152 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17153
17154 /* Find the first flag setting insn before INSN in basic block BB. */
17155 gcc_assert (insn != BB_HEAD (bb));
17156 for (prev = PREV_INSN (insn);
17157 (!insn_clobbered
17158 && prev != BB_HEAD (bb)
17159 && (NOTE_P (prev)
17160 || DEBUG_INSN_P (prev)
17161 || ((set = single_set (prev)) != NULL
17162 && get_attr_conds (prev) == CONDS_NOCOND)));
17163 prev = PREV_INSN (prev))
17164 {
17165 if (reg_set_p (op0, prev))
17166 insn_clobbered = true;
17167 }
17168
17169 /* Skip if op0 is clobbered by insn other than prev. */
17170 if (insn_clobbered)
17171 continue;
17172
17173 if (!set)
17174 continue;
17175
17176 dest = SET_DEST (set);
17177 src = SET_SRC (set);
17178 if (!low_register_operand (dest, SImode)
17179 || !low_register_operand (src, SImode))
17180 continue;
17181
17182 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17183 in INSN. Both src and dest of the move insn are checked. */
17184 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17185 {
17186 dest = copy_rtx (dest);
17187 src = copy_rtx (src);
17188 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17189 PATTERN (prev) = gen_rtx_SET (dest, src);
17190 INSN_CODE (prev) = -1;
17191 /* Set test register in INSN to dest. */
17192 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17193 INSN_CODE (insn) = -1;
17194 }
17195 }
17196 }
17197
17198 /* Convert instructions to their cc-clobbering variant if possible, since
17199 that allows us to use smaller encodings. */
17200
17201 static void
17202 thumb2_reorg (void)
17203 {
17204 basic_block bb;
17205 regset_head live;
17206
17207 INIT_REG_SET (&live);
17208
17209 /* We are freeing block_for_insn in the toplev to keep compatibility
17210 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17211 compute_bb_for_insn ();
17212 df_analyze ();
17213
17214 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17215
17216 FOR_EACH_BB_FN (bb, cfun)
17217 {
17218 if ((current_tune->disparage_flag_setting_t16_encodings
17219 == tune_params::DISPARAGE_FLAGS_ALL)
17220 && optimize_bb_for_speed_p (bb))
17221 continue;
17222
17223 rtx_insn *insn;
17224 Convert_Action action = SKIP;
17225 Convert_Action action_for_partial_flag_setting
17226 = ((current_tune->disparage_flag_setting_t16_encodings
17227 != tune_params::DISPARAGE_FLAGS_NEITHER)
17228 && optimize_bb_for_speed_p (bb))
17229 ? SKIP : CONV;
17230
17231 COPY_REG_SET (&live, DF_LR_OUT (bb));
17232 df_simulate_initialize_backwards (bb, &live);
17233 FOR_BB_INSNS_REVERSE (bb, insn)
17234 {
17235 if (NONJUMP_INSN_P (insn)
17236 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17237 && GET_CODE (PATTERN (insn)) == SET)
17238 {
17239 action = SKIP;
17240 rtx pat = PATTERN (insn);
17241 rtx dst = XEXP (pat, 0);
17242 rtx src = XEXP (pat, 1);
17243 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17244
17245 if (UNARY_P (src) || BINARY_P (src))
17246 op0 = XEXP (src, 0);
17247
17248 if (BINARY_P (src))
17249 op1 = XEXP (src, 1);
17250
17251 if (low_register_operand (dst, SImode))
17252 {
17253 switch (GET_CODE (src))
17254 {
17255 case PLUS:
17256 /* Adding two registers and storing the result
17257 in the first source is already a 16-bit
17258 operation. */
17259 if (rtx_equal_p (dst, op0)
17260 && register_operand (op1, SImode))
17261 break;
17262
17263 if (low_register_operand (op0, SImode))
17264 {
17265 /* ADDS <Rd>,<Rn>,<Rm> */
17266 if (low_register_operand (op1, SImode))
17267 action = CONV;
17268 /* ADDS <Rdn>,#<imm8> */
17269 /* SUBS <Rdn>,#<imm8> */
17270 else if (rtx_equal_p (dst, op0)
17271 && CONST_INT_P (op1)
17272 && IN_RANGE (INTVAL (op1), -255, 255))
17273 action = CONV;
17274 /* ADDS <Rd>,<Rn>,#<imm3> */
17275 /* SUBS <Rd>,<Rn>,#<imm3> */
17276 else if (CONST_INT_P (op1)
17277 && IN_RANGE (INTVAL (op1), -7, 7))
17278 action = CONV;
17279 }
17280 /* ADCS <Rd>, <Rn> */
17281 else if (GET_CODE (XEXP (src, 0)) == PLUS
17282 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17283 && low_register_operand (XEXP (XEXP (src, 0), 1),
17284 SImode)
17285 && COMPARISON_P (op1)
17286 && cc_register (XEXP (op1, 0), VOIDmode)
17287 && maybe_get_arm_condition_code (op1) == ARM_CS
17288 && XEXP (op1, 1) == const0_rtx)
17289 action = CONV;
17290 break;
17291
17292 case MINUS:
17293 /* RSBS <Rd>,<Rn>,#0
17294 Not handled here: see NEG below. */
17295 /* SUBS <Rd>,<Rn>,#<imm3>
17296 SUBS <Rdn>,#<imm8>
17297 Not handled here: see PLUS above. */
17298 /* SUBS <Rd>,<Rn>,<Rm> */
17299 if (low_register_operand (op0, SImode)
17300 && low_register_operand (op1, SImode))
17301 action = CONV;
17302 break;
17303
17304 case MULT:
17305 /* MULS <Rdm>,<Rn>,<Rdm>
17306 As an exception to the rule, this is only used
17307 when optimizing for size since MULS is slow on all
17308 known implementations. We do not even want to use
17309 MULS in cold code, if optimizing for speed, so we
17310 test the global flag here. */
17311 if (!optimize_size)
17312 break;
17313 /* else fall through. */
17314 case AND:
17315 case IOR:
17316 case XOR:
17317 /* ANDS <Rdn>,<Rm> */
17318 if (rtx_equal_p (dst, op0)
17319 && low_register_operand (op1, SImode))
17320 action = action_for_partial_flag_setting;
17321 else if (rtx_equal_p (dst, op1)
17322 && low_register_operand (op0, SImode))
17323 action = action_for_partial_flag_setting == SKIP
17324 ? SKIP : SWAP_CONV;
17325 break;
17326
17327 case ASHIFTRT:
17328 case ASHIFT:
17329 case LSHIFTRT:
17330 /* ASRS <Rdn>,<Rm> */
17331 /* LSRS <Rdn>,<Rm> */
17332 /* LSLS <Rdn>,<Rm> */
17333 if (rtx_equal_p (dst, op0)
17334 && low_register_operand (op1, SImode))
17335 action = action_for_partial_flag_setting;
17336 /* ASRS <Rd>,<Rm>,#<imm5> */
17337 /* LSRS <Rd>,<Rm>,#<imm5> */
17338 /* LSLS <Rd>,<Rm>,#<imm5> */
17339 else if (low_register_operand (op0, SImode)
17340 && CONST_INT_P (op1)
17341 && IN_RANGE (INTVAL (op1), 0, 31))
17342 action = action_for_partial_flag_setting;
17343 break;
17344
17345 case ROTATERT:
17346 /* RORS <Rdn>,<Rm> */
17347 if (rtx_equal_p (dst, op0)
17348 && low_register_operand (op1, SImode))
17349 action = action_for_partial_flag_setting;
17350 break;
17351
17352 case NOT:
17353 /* MVNS <Rd>,<Rm> */
17354 if (low_register_operand (op0, SImode))
17355 action = action_for_partial_flag_setting;
17356 break;
17357
17358 case NEG:
17359 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17360 if (low_register_operand (op0, SImode))
17361 action = CONV;
17362 break;
17363
17364 case CONST_INT:
17365 /* MOVS <Rd>,#<imm8> */
17366 if (CONST_INT_P (src)
17367 && IN_RANGE (INTVAL (src), 0, 255))
17368 action = action_for_partial_flag_setting;
17369 break;
17370
17371 case REG:
17372 /* MOVS and MOV<c> with registers have different
17373 encodings, so are not relevant here. */
17374 break;
17375
17376 default:
17377 break;
17378 }
17379 }
17380
17381 if (action != SKIP)
17382 {
17383 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17384 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17385 rtvec vec;
17386
17387 if (action == SWAP_CONV)
17388 {
17389 src = copy_rtx (src);
17390 XEXP (src, 0) = op1;
17391 XEXP (src, 1) = op0;
17392 pat = gen_rtx_SET (dst, src);
17393 vec = gen_rtvec (2, pat, clobber);
17394 }
17395 else /* action == CONV */
17396 vec = gen_rtvec (2, pat, clobber);
17397
17398 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17399 INSN_CODE (insn) = -1;
17400 }
17401 }
17402
17403 if (NONDEBUG_INSN_P (insn))
17404 df_simulate_one_insn_backwards (bb, insn, &live);
17405 }
17406 }
17407
17408 CLEAR_REG_SET (&live);
17409 }
17410
17411 /* Gcc puts the pool in the wrong place for ARM, since we can only
17412 load addresses a limited distance around the pc. We do some
17413 special munging to move the constant pool values to the correct
17414 point in the code. */
17415 static void
17416 arm_reorg (void)
17417 {
17418 rtx_insn *insn;
17419 HOST_WIDE_INT address = 0;
17420 Mfix * fix;
17421
17422 if (TARGET_THUMB1)
17423 thumb1_reorg ();
17424 else if (TARGET_THUMB2)
17425 thumb2_reorg ();
17426
17427 /* Ensure all insns that must be split have been split at this point.
17428 Otherwise, the pool placement code below may compute incorrect
17429 insn lengths. Note that when optimizing, all insns have already
17430 been split at this point. */
17431 if (!optimize)
17432 split_all_insns_noflow ();
17433
17434 minipool_fix_head = minipool_fix_tail = NULL;
17435
17436 /* The first insn must always be a note, or the code below won't
17437 scan it properly. */
17438 insn = get_insns ();
17439 gcc_assert (NOTE_P (insn));
17440 minipool_pad = 0;
17441
17442 /* Scan all the insns and record the operands that will need fixing. */
17443 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17444 {
17445 if (BARRIER_P (insn))
17446 push_minipool_barrier (insn, address);
17447 else if (INSN_P (insn))
17448 {
17449 rtx_jump_table_data *table;
17450
17451 note_invalid_constants (insn, address, true);
17452 address += get_attr_length (insn);
17453
17454 /* If the insn is a vector jump, add the size of the table
17455 and skip the table. */
17456 if (tablejump_p (insn, NULL, &table))
17457 {
17458 address += get_jump_table_size (table);
17459 insn = table;
17460 }
17461 }
17462 else if (LABEL_P (insn))
17463 /* Add the worst-case padding due to alignment. We don't add
17464 the _current_ padding because the minipool insertions
17465 themselves might change it. */
17466 address += get_label_padding (insn);
17467 }
17468
17469 fix = minipool_fix_head;
17470
17471 /* Now scan the fixups and perform the required changes. */
17472 while (fix)
17473 {
17474 Mfix * ftmp;
17475 Mfix * fdel;
17476 Mfix * last_added_fix;
17477 Mfix * last_barrier = NULL;
17478 Mfix * this_fix;
17479
17480 /* Skip any further barriers before the next fix. */
17481 while (fix && BARRIER_P (fix->insn))
17482 fix = fix->next;
17483
17484 /* No more fixes. */
17485 if (fix == NULL)
17486 break;
17487
17488 last_added_fix = NULL;
17489
17490 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17491 {
17492 if (BARRIER_P (ftmp->insn))
17493 {
17494 if (ftmp->address >= minipool_vector_head->max_address)
17495 break;
17496
17497 last_barrier = ftmp;
17498 }
17499 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17500 break;
17501
17502 last_added_fix = ftmp; /* Keep track of the last fix added. */
17503 }
17504
17505 /* If we found a barrier, drop back to that; any fixes that we
17506 could have reached but come after the barrier will now go in
17507 the next mini-pool. */
17508 if (last_barrier != NULL)
17509 {
17510 /* Reduce the refcount for those fixes that won't go into this
17511 pool after all. */
17512 for (fdel = last_barrier->next;
17513 fdel && fdel != ftmp;
17514 fdel = fdel->next)
17515 {
17516 fdel->minipool->refcount--;
17517 fdel->minipool = NULL;
17518 }
17519
17520 ftmp = last_barrier;
17521 }
17522 else
17523 {
17524 /* ftmp is first fix that we can't fit into this pool and
17525 there no natural barriers that we could use. Insert a
17526 new barrier in the code somewhere between the previous
17527 fix and this one, and arrange to jump around it. */
17528 HOST_WIDE_INT max_address;
17529
17530 /* The last item on the list of fixes must be a barrier, so
17531 we can never run off the end of the list of fixes without
17532 last_barrier being set. */
17533 gcc_assert (ftmp);
17534
17535 max_address = minipool_vector_head->max_address;
17536 /* Check that there isn't another fix that is in range that
17537 we couldn't fit into this pool because the pool was
17538 already too large: we need to put the pool before such an
17539 instruction. The pool itself may come just after the
17540 fix because create_fix_barrier also allows space for a
17541 jump instruction. */
17542 if (ftmp->address < max_address)
17543 max_address = ftmp->address + 1;
17544
17545 last_barrier = create_fix_barrier (last_added_fix, max_address);
17546 }
17547
17548 assign_minipool_offsets (last_barrier);
17549
17550 while (ftmp)
17551 {
17552 if (!BARRIER_P (ftmp->insn)
17553 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17554 == NULL))
17555 break;
17556
17557 ftmp = ftmp->next;
17558 }
17559
17560 /* Scan over the fixes we have identified for this pool, fixing them
17561 up and adding the constants to the pool itself. */
17562 for (this_fix = fix; this_fix && ftmp != this_fix;
17563 this_fix = this_fix->next)
17564 if (!BARRIER_P (this_fix->insn))
17565 {
17566 rtx addr
17567 = plus_constant (Pmode,
17568 gen_rtx_LABEL_REF (VOIDmode,
17569 minipool_vector_label),
17570 this_fix->minipool->offset);
17571 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17572 }
17573
17574 dump_minipool (last_barrier->insn);
17575 fix = ftmp;
17576 }
17577
17578 /* From now on we must synthesize any constants that we can't handle
17579 directly. This can happen if the RTL gets split during final
17580 instruction generation. */
17581 cfun->machine->after_arm_reorg = 1;
17582
17583 /* Free the minipool memory. */
17584 obstack_free (&minipool_obstack, minipool_startobj);
17585 }
17586 \f
17587 /* Routines to output assembly language. */
17588
17589 /* Return string representation of passed in real value. */
17590 static const char *
17591 fp_const_from_val (REAL_VALUE_TYPE *r)
17592 {
17593 if (!fp_consts_inited)
17594 init_fp_table ();
17595
17596 gcc_assert (real_equal (r, &value_fp0));
17597 return "0";
17598 }
17599
17600 /* OPERANDS[0] is the entire list of insns that constitute pop,
17601 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17602 is in the list, UPDATE is true iff the list contains explicit
17603 update of base register. */
17604 void
17605 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17606 bool update)
17607 {
17608 int i;
17609 char pattern[100];
17610 int offset;
17611 const char *conditional;
17612 int num_saves = XVECLEN (operands[0], 0);
17613 unsigned int regno;
17614 unsigned int regno_base = REGNO (operands[1]);
17615
17616 offset = 0;
17617 offset += update ? 1 : 0;
17618 offset += return_pc ? 1 : 0;
17619
17620 /* Is the base register in the list? */
17621 for (i = offset; i < num_saves; i++)
17622 {
17623 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17624 /* If SP is in the list, then the base register must be SP. */
17625 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17626 /* If base register is in the list, there must be no explicit update. */
17627 if (regno == regno_base)
17628 gcc_assert (!update);
17629 }
17630
17631 conditional = reverse ? "%?%D0" : "%?%d0";
17632 if ((regno_base == SP_REGNUM) && update)
17633 {
17634 sprintf (pattern, "pop%s\t{", conditional);
17635 }
17636 else
17637 {
17638 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17639 It's just a convention, their semantics are identical. */
17640 if (regno_base == SP_REGNUM)
17641 /* update is never true here, hence there is no need to handle
17642 pop here. */
17643 sprintf (pattern, "ldmfd%s", conditional);
17644
17645 if (update)
17646 sprintf (pattern, "ldmia%s\t", conditional);
17647 else
17648 sprintf (pattern, "ldm%s\t", conditional);
17649
17650 strcat (pattern, reg_names[regno_base]);
17651 if (update)
17652 strcat (pattern, "!, {");
17653 else
17654 strcat (pattern, ", {");
17655 }
17656
17657 /* Output the first destination register. */
17658 strcat (pattern,
17659 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17660
17661 /* Output the rest of the destination registers. */
17662 for (i = offset + 1; i < num_saves; i++)
17663 {
17664 strcat (pattern, ", ");
17665 strcat (pattern,
17666 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17667 }
17668
17669 strcat (pattern, "}");
17670
17671 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17672 strcat (pattern, "^");
17673
17674 output_asm_insn (pattern, &cond);
17675 }
17676
17677
17678 /* Output the assembly for a store multiple. */
17679
17680 const char *
17681 vfp_output_vstmd (rtx * operands)
17682 {
17683 char pattern[100];
17684 int p;
17685 int base;
17686 int i;
17687 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17688 ? XEXP (operands[0], 0)
17689 : XEXP (XEXP (operands[0], 0), 0);
17690 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17691
17692 if (push_p)
17693 strcpy (pattern, "vpush%?.64\t{%P1");
17694 else
17695 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17696
17697 p = strlen (pattern);
17698
17699 gcc_assert (REG_P (operands[1]));
17700
17701 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17702 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17703 {
17704 p += sprintf (&pattern[p], ", d%d", base + i);
17705 }
17706 strcpy (&pattern[p], "}");
17707
17708 output_asm_insn (pattern, operands);
17709 return "";
17710 }
17711
17712
17713 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17714 number of bytes pushed. */
17715
17716 static int
17717 vfp_emit_fstmd (int base_reg, int count)
17718 {
17719 rtx par;
17720 rtx dwarf;
17721 rtx tmp, reg;
17722 int i;
17723
17724 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17725 register pairs are stored by a store multiple insn. We avoid this
17726 by pushing an extra pair. */
17727 if (count == 2 && !arm_arch6)
17728 {
17729 if (base_reg == LAST_VFP_REGNUM - 3)
17730 base_reg -= 2;
17731 count++;
17732 }
17733
17734 /* FSTMD may not store more than 16 doubleword registers at once. Split
17735 larger stores into multiple parts (up to a maximum of two, in
17736 practice). */
17737 if (count > 16)
17738 {
17739 int saved;
17740 /* NOTE: base_reg is an internal register number, so each D register
17741 counts as 2. */
17742 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17743 saved += vfp_emit_fstmd (base_reg, 16);
17744 return saved;
17745 }
17746
17747 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17748 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17749
17750 reg = gen_rtx_REG (DFmode, base_reg);
17751 base_reg += 2;
17752
17753 XVECEXP (par, 0, 0)
17754 = gen_rtx_SET (gen_frame_mem
17755 (BLKmode,
17756 gen_rtx_PRE_MODIFY (Pmode,
17757 stack_pointer_rtx,
17758 plus_constant
17759 (Pmode, stack_pointer_rtx,
17760 - (count * 8)))
17761 ),
17762 gen_rtx_UNSPEC (BLKmode,
17763 gen_rtvec (1, reg),
17764 UNSPEC_PUSH_MULT));
17765
17766 tmp = gen_rtx_SET (stack_pointer_rtx,
17767 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17768 RTX_FRAME_RELATED_P (tmp) = 1;
17769 XVECEXP (dwarf, 0, 0) = tmp;
17770
17771 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17772 RTX_FRAME_RELATED_P (tmp) = 1;
17773 XVECEXP (dwarf, 0, 1) = tmp;
17774
17775 for (i = 1; i < count; i++)
17776 {
17777 reg = gen_rtx_REG (DFmode, base_reg);
17778 base_reg += 2;
17779 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17780
17781 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17782 plus_constant (Pmode,
17783 stack_pointer_rtx,
17784 i * 8)),
17785 reg);
17786 RTX_FRAME_RELATED_P (tmp) = 1;
17787 XVECEXP (dwarf, 0, i + 1) = tmp;
17788 }
17789
17790 par = emit_insn (par);
17791 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17792 RTX_FRAME_RELATED_P (par) = 1;
17793
17794 return count * 8;
17795 }
17796
17797 /* Emit a call instruction with pattern PAT. ADDR is the address of
17798 the call target. */
17799
17800 void
17801 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17802 {
17803 rtx insn;
17804
17805 insn = emit_call_insn (pat);
17806
17807 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17808 If the call might use such an entry, add a use of the PIC register
17809 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17810 if (TARGET_VXWORKS_RTP
17811 && flag_pic
17812 && !sibcall
17813 && GET_CODE (addr) == SYMBOL_REF
17814 && (SYMBOL_REF_DECL (addr)
17815 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17816 : !SYMBOL_REF_LOCAL_P (addr)))
17817 {
17818 require_pic_register ();
17819 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17820 }
17821
17822 if (TARGET_AAPCS_BASED)
17823 {
17824 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17825 linker. We need to add an IP clobber to allow setting
17826 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17827 is not needed since it's a fixed register. */
17828 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17829 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17830 }
17831 }
17832
17833 /* Output a 'call' insn. */
17834 const char *
17835 output_call (rtx *operands)
17836 {
17837 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17838
17839 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17840 if (REGNO (operands[0]) == LR_REGNUM)
17841 {
17842 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17843 output_asm_insn ("mov%?\t%0, %|lr", operands);
17844 }
17845
17846 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17847
17848 if (TARGET_INTERWORK || arm_arch4t)
17849 output_asm_insn ("bx%?\t%0", operands);
17850 else
17851 output_asm_insn ("mov%?\t%|pc, %0", operands);
17852
17853 return "";
17854 }
17855
17856 /* Output a 'call' insn that is a reference in memory. This is
17857 disabled for ARMv5 and we prefer a blx instead because otherwise
17858 there's a significant performance overhead. */
17859 const char *
17860 output_call_mem (rtx *operands)
17861 {
17862 gcc_assert (!arm_arch5);
17863 if (TARGET_INTERWORK)
17864 {
17865 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17866 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17867 output_asm_insn ("bx%?\t%|ip", operands);
17868 }
17869 else if (regno_use_in (LR_REGNUM, operands[0]))
17870 {
17871 /* LR is used in the memory address. We load the address in the
17872 first instruction. It's safe to use IP as the target of the
17873 load since the call will kill it anyway. */
17874 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17875 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17876 if (arm_arch4t)
17877 output_asm_insn ("bx%?\t%|ip", operands);
17878 else
17879 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17880 }
17881 else
17882 {
17883 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17884 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17885 }
17886
17887 return "";
17888 }
17889
17890
17891 /* Output a move from arm registers to arm registers of a long double
17892 OPERANDS[0] is the destination.
17893 OPERANDS[1] is the source. */
17894 const char *
17895 output_mov_long_double_arm_from_arm (rtx *operands)
17896 {
17897 /* We have to be careful here because the two might overlap. */
17898 int dest_start = REGNO (operands[0]);
17899 int src_start = REGNO (operands[1]);
17900 rtx ops[2];
17901 int i;
17902
17903 if (dest_start < src_start)
17904 {
17905 for (i = 0; i < 3; i++)
17906 {
17907 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17908 ops[1] = gen_rtx_REG (SImode, src_start + i);
17909 output_asm_insn ("mov%?\t%0, %1", ops);
17910 }
17911 }
17912 else
17913 {
17914 for (i = 2; i >= 0; i--)
17915 {
17916 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17917 ops[1] = gen_rtx_REG (SImode, src_start + i);
17918 output_asm_insn ("mov%?\t%0, %1", ops);
17919 }
17920 }
17921
17922 return "";
17923 }
17924
17925 void
17926 arm_emit_movpair (rtx dest, rtx src)
17927 {
17928 rtx insn;
17929
17930 /* If the src is an immediate, simplify it. */
17931 if (CONST_INT_P (src))
17932 {
17933 HOST_WIDE_INT val = INTVAL (src);
17934 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17935 if ((val >> 16) & 0x0000ffff)
17936 {
17937 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17938 GEN_INT (16)),
17939 GEN_INT ((val >> 16) & 0x0000ffff));
17940 insn = get_last_insn ();
17941 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17942 }
17943 return;
17944 }
17945 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17946 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17947 insn = get_last_insn ();
17948 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17949 }
17950
17951 /* Output a move between double words. It must be REG<-MEM
17952 or MEM<-REG. */
17953 const char *
17954 output_move_double (rtx *operands, bool emit, int *count)
17955 {
17956 enum rtx_code code0 = GET_CODE (operands[0]);
17957 enum rtx_code code1 = GET_CODE (operands[1]);
17958 rtx otherops[3];
17959 if (count)
17960 *count = 1;
17961
17962 /* The only case when this might happen is when
17963 you are looking at the length of a DImode instruction
17964 that has an invalid constant in it. */
17965 if (code0 == REG && code1 != MEM)
17966 {
17967 gcc_assert (!emit);
17968 *count = 2;
17969 return "";
17970 }
17971
17972 if (code0 == REG)
17973 {
17974 unsigned int reg0 = REGNO (operands[0]);
17975
17976 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17977
17978 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17979
17980 switch (GET_CODE (XEXP (operands[1], 0)))
17981 {
17982 case REG:
17983
17984 if (emit)
17985 {
17986 if (TARGET_LDRD
17987 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17988 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
17989 else
17990 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17991 }
17992 break;
17993
17994 case PRE_INC:
17995 gcc_assert (TARGET_LDRD);
17996 if (emit)
17997 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
17998 break;
17999
18000 case PRE_DEC:
18001 if (emit)
18002 {
18003 if (TARGET_LDRD)
18004 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18005 else
18006 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18007 }
18008 break;
18009
18010 case POST_INC:
18011 if (emit)
18012 {
18013 if (TARGET_LDRD)
18014 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18015 else
18016 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18017 }
18018 break;
18019
18020 case POST_DEC:
18021 gcc_assert (TARGET_LDRD);
18022 if (emit)
18023 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18024 break;
18025
18026 case PRE_MODIFY:
18027 case POST_MODIFY:
18028 /* Autoicrement addressing modes should never have overlapping
18029 base and destination registers, and overlapping index registers
18030 are already prohibited, so this doesn't need to worry about
18031 fix_cm3_ldrd. */
18032 otherops[0] = operands[0];
18033 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18034 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18035
18036 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18037 {
18038 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18039 {
18040 /* Registers overlap so split out the increment. */
18041 if (emit)
18042 {
18043 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18044 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18045 }
18046 if (count)
18047 *count = 2;
18048 }
18049 else
18050 {
18051 /* Use a single insn if we can.
18052 FIXME: IWMMXT allows offsets larger than ldrd can
18053 handle, fix these up with a pair of ldr. */
18054 if (TARGET_THUMB2
18055 || !CONST_INT_P (otherops[2])
18056 || (INTVAL (otherops[2]) > -256
18057 && INTVAL (otherops[2]) < 256))
18058 {
18059 if (emit)
18060 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18061 }
18062 else
18063 {
18064 if (emit)
18065 {
18066 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18067 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18068 }
18069 if (count)
18070 *count = 2;
18071
18072 }
18073 }
18074 }
18075 else
18076 {
18077 /* Use a single insn if we can.
18078 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18079 fix these up with a pair of ldr. */
18080 if (TARGET_THUMB2
18081 || !CONST_INT_P (otherops[2])
18082 || (INTVAL (otherops[2]) > -256
18083 && INTVAL (otherops[2]) < 256))
18084 {
18085 if (emit)
18086 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18087 }
18088 else
18089 {
18090 if (emit)
18091 {
18092 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18093 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18094 }
18095 if (count)
18096 *count = 2;
18097 }
18098 }
18099 break;
18100
18101 case LABEL_REF:
18102 case CONST:
18103 /* We might be able to use ldrd %0, %1 here. However the range is
18104 different to ldr/adr, and it is broken on some ARMv7-M
18105 implementations. */
18106 /* Use the second register of the pair to avoid problematic
18107 overlap. */
18108 otherops[1] = operands[1];
18109 if (emit)
18110 output_asm_insn ("adr%?\t%0, %1", otherops);
18111 operands[1] = otherops[0];
18112 if (emit)
18113 {
18114 if (TARGET_LDRD)
18115 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18116 else
18117 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18118 }
18119
18120 if (count)
18121 *count = 2;
18122 break;
18123
18124 /* ??? This needs checking for thumb2. */
18125 default:
18126 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18127 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18128 {
18129 otherops[0] = operands[0];
18130 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18131 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18132
18133 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18134 {
18135 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18136 {
18137 switch ((int) INTVAL (otherops[2]))
18138 {
18139 case -8:
18140 if (emit)
18141 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18142 return "";
18143 case -4:
18144 if (TARGET_THUMB2)
18145 break;
18146 if (emit)
18147 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18148 return "";
18149 case 4:
18150 if (TARGET_THUMB2)
18151 break;
18152 if (emit)
18153 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18154 return "";
18155 }
18156 }
18157 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18158 operands[1] = otherops[0];
18159 if (TARGET_LDRD
18160 && (REG_P (otherops[2])
18161 || TARGET_THUMB2
18162 || (CONST_INT_P (otherops[2])
18163 && INTVAL (otherops[2]) > -256
18164 && INTVAL (otherops[2]) < 256)))
18165 {
18166 if (reg_overlap_mentioned_p (operands[0],
18167 otherops[2]))
18168 {
18169 /* Swap base and index registers over to
18170 avoid a conflict. */
18171 std::swap (otherops[1], otherops[2]);
18172 }
18173 /* If both registers conflict, it will usually
18174 have been fixed by a splitter. */
18175 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18176 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18177 {
18178 if (emit)
18179 {
18180 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18181 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18182 }
18183 if (count)
18184 *count = 2;
18185 }
18186 else
18187 {
18188 otherops[0] = operands[0];
18189 if (emit)
18190 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18191 }
18192 return "";
18193 }
18194
18195 if (CONST_INT_P (otherops[2]))
18196 {
18197 if (emit)
18198 {
18199 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18200 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18201 else
18202 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18203 }
18204 }
18205 else
18206 {
18207 if (emit)
18208 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18209 }
18210 }
18211 else
18212 {
18213 if (emit)
18214 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18215 }
18216
18217 if (count)
18218 *count = 2;
18219
18220 if (TARGET_LDRD)
18221 return "ldrd%?\t%0, [%1]";
18222
18223 return "ldmia%?\t%1, %M0";
18224 }
18225 else
18226 {
18227 otherops[1] = adjust_address (operands[1], SImode, 4);
18228 /* Take care of overlapping base/data reg. */
18229 if (reg_mentioned_p (operands[0], operands[1]))
18230 {
18231 if (emit)
18232 {
18233 output_asm_insn ("ldr%?\t%0, %1", otherops);
18234 output_asm_insn ("ldr%?\t%0, %1", operands);
18235 }
18236 if (count)
18237 *count = 2;
18238
18239 }
18240 else
18241 {
18242 if (emit)
18243 {
18244 output_asm_insn ("ldr%?\t%0, %1", operands);
18245 output_asm_insn ("ldr%?\t%0, %1", otherops);
18246 }
18247 if (count)
18248 *count = 2;
18249 }
18250 }
18251 }
18252 }
18253 else
18254 {
18255 /* Constraints should ensure this. */
18256 gcc_assert (code0 == MEM && code1 == REG);
18257 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18258 || (TARGET_ARM && TARGET_LDRD));
18259
18260 switch (GET_CODE (XEXP (operands[0], 0)))
18261 {
18262 case REG:
18263 if (emit)
18264 {
18265 if (TARGET_LDRD)
18266 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18267 else
18268 output_asm_insn ("stm%?\t%m0, %M1", operands);
18269 }
18270 break;
18271
18272 case PRE_INC:
18273 gcc_assert (TARGET_LDRD);
18274 if (emit)
18275 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18276 break;
18277
18278 case PRE_DEC:
18279 if (emit)
18280 {
18281 if (TARGET_LDRD)
18282 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18283 else
18284 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18285 }
18286 break;
18287
18288 case POST_INC:
18289 if (emit)
18290 {
18291 if (TARGET_LDRD)
18292 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18293 else
18294 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18295 }
18296 break;
18297
18298 case POST_DEC:
18299 gcc_assert (TARGET_LDRD);
18300 if (emit)
18301 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18302 break;
18303
18304 case PRE_MODIFY:
18305 case POST_MODIFY:
18306 otherops[0] = operands[1];
18307 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18308 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18309
18310 /* IWMMXT allows offsets larger than ldrd can handle,
18311 fix these up with a pair of ldr. */
18312 if (!TARGET_THUMB2
18313 && CONST_INT_P (otherops[2])
18314 && (INTVAL(otherops[2]) <= -256
18315 || INTVAL(otherops[2]) >= 256))
18316 {
18317 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18318 {
18319 if (emit)
18320 {
18321 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18322 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18323 }
18324 if (count)
18325 *count = 2;
18326 }
18327 else
18328 {
18329 if (emit)
18330 {
18331 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18332 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18333 }
18334 if (count)
18335 *count = 2;
18336 }
18337 }
18338 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18339 {
18340 if (emit)
18341 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18342 }
18343 else
18344 {
18345 if (emit)
18346 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18347 }
18348 break;
18349
18350 case PLUS:
18351 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18352 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18353 {
18354 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18355 {
18356 case -8:
18357 if (emit)
18358 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18359 return "";
18360
18361 case -4:
18362 if (TARGET_THUMB2)
18363 break;
18364 if (emit)
18365 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18366 return "";
18367
18368 case 4:
18369 if (TARGET_THUMB2)
18370 break;
18371 if (emit)
18372 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18373 return "";
18374 }
18375 }
18376 if (TARGET_LDRD
18377 && (REG_P (otherops[2])
18378 || TARGET_THUMB2
18379 || (CONST_INT_P (otherops[2])
18380 && INTVAL (otherops[2]) > -256
18381 && INTVAL (otherops[2]) < 256)))
18382 {
18383 otherops[0] = operands[1];
18384 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18385 if (emit)
18386 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18387 return "";
18388 }
18389 /* Fall through */
18390
18391 default:
18392 otherops[0] = adjust_address (operands[0], SImode, 4);
18393 otherops[1] = operands[1];
18394 if (emit)
18395 {
18396 output_asm_insn ("str%?\t%1, %0", operands);
18397 output_asm_insn ("str%?\t%H1, %0", otherops);
18398 }
18399 if (count)
18400 *count = 2;
18401 }
18402 }
18403
18404 return "";
18405 }
18406
18407 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18408 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18409
18410 const char *
18411 output_move_quad (rtx *operands)
18412 {
18413 if (REG_P (operands[0]))
18414 {
18415 /* Load, or reg->reg move. */
18416
18417 if (MEM_P (operands[1]))
18418 {
18419 switch (GET_CODE (XEXP (operands[1], 0)))
18420 {
18421 case REG:
18422 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18423 break;
18424
18425 case LABEL_REF:
18426 case CONST:
18427 output_asm_insn ("adr%?\t%0, %1", operands);
18428 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18429 break;
18430
18431 default:
18432 gcc_unreachable ();
18433 }
18434 }
18435 else
18436 {
18437 rtx ops[2];
18438 int dest, src, i;
18439
18440 gcc_assert (REG_P (operands[1]));
18441
18442 dest = REGNO (operands[0]);
18443 src = REGNO (operands[1]);
18444
18445 /* This seems pretty dumb, but hopefully GCC won't try to do it
18446 very often. */
18447 if (dest < src)
18448 for (i = 0; i < 4; i++)
18449 {
18450 ops[0] = gen_rtx_REG (SImode, dest + i);
18451 ops[1] = gen_rtx_REG (SImode, src + i);
18452 output_asm_insn ("mov%?\t%0, %1", ops);
18453 }
18454 else
18455 for (i = 3; i >= 0; i--)
18456 {
18457 ops[0] = gen_rtx_REG (SImode, dest + i);
18458 ops[1] = gen_rtx_REG (SImode, src + i);
18459 output_asm_insn ("mov%?\t%0, %1", ops);
18460 }
18461 }
18462 }
18463 else
18464 {
18465 gcc_assert (MEM_P (operands[0]));
18466 gcc_assert (REG_P (operands[1]));
18467 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18468
18469 switch (GET_CODE (XEXP (operands[0], 0)))
18470 {
18471 case REG:
18472 output_asm_insn ("stm%?\t%m0, %M1", operands);
18473 break;
18474
18475 default:
18476 gcc_unreachable ();
18477 }
18478 }
18479
18480 return "";
18481 }
18482
18483 /* Output a VFP load or store instruction. */
18484
18485 const char *
18486 output_move_vfp (rtx *operands)
18487 {
18488 rtx reg, mem, addr, ops[2];
18489 int load = REG_P (operands[0]);
18490 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18491 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18492 const char *templ;
18493 char buff[50];
18494 machine_mode mode;
18495
18496 reg = operands[!load];
18497 mem = operands[load];
18498
18499 mode = GET_MODE (reg);
18500
18501 gcc_assert (REG_P (reg));
18502 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18503 gcc_assert (mode == SFmode
18504 || mode == DFmode
18505 || mode == SImode
18506 || mode == DImode
18507 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18508 gcc_assert (MEM_P (mem));
18509
18510 addr = XEXP (mem, 0);
18511
18512 switch (GET_CODE (addr))
18513 {
18514 case PRE_DEC:
18515 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18516 ops[0] = XEXP (addr, 0);
18517 ops[1] = reg;
18518 break;
18519
18520 case POST_INC:
18521 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18522 ops[0] = XEXP (addr, 0);
18523 ops[1] = reg;
18524 break;
18525
18526 default:
18527 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18528 ops[0] = reg;
18529 ops[1] = mem;
18530 break;
18531 }
18532
18533 sprintf (buff, templ,
18534 load ? "ld" : "st",
18535 dp ? "64" : "32",
18536 dp ? "P" : "",
18537 integer_p ? "\t%@ int" : "");
18538 output_asm_insn (buff, ops);
18539
18540 return "";
18541 }
18542
18543 /* Output a Neon double-word or quad-word load or store, or a load
18544 or store for larger structure modes.
18545
18546 WARNING: The ordering of elements is weird in big-endian mode,
18547 because the EABI requires that vectors stored in memory appear
18548 as though they were stored by a VSTM, as required by the EABI.
18549 GCC RTL defines element ordering based on in-memory order.
18550 This can be different from the architectural ordering of elements
18551 within a NEON register. The intrinsics defined in arm_neon.h use the
18552 NEON register element ordering, not the GCC RTL element ordering.
18553
18554 For example, the in-memory ordering of a big-endian a quadword
18555 vector with 16-bit elements when stored from register pair {d0,d1}
18556 will be (lowest address first, d0[N] is NEON register element N):
18557
18558 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18559
18560 When necessary, quadword registers (dN, dN+1) are moved to ARM
18561 registers from rN in the order:
18562
18563 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18564
18565 So that STM/LDM can be used on vectors in ARM registers, and the
18566 same memory layout will result as if VSTM/VLDM were used.
18567
18568 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18569 possible, which allows use of appropriate alignment tags.
18570 Note that the choice of "64" is independent of the actual vector
18571 element size; this size simply ensures that the behavior is
18572 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18573
18574 Due to limitations of those instructions, use of VST1.64/VLD1.64
18575 is not possible if:
18576 - the address contains PRE_DEC, or
18577 - the mode refers to more than 4 double-word registers
18578
18579 In those cases, it would be possible to replace VSTM/VLDM by a
18580 sequence of instructions; this is not currently implemented since
18581 this is not certain to actually improve performance. */
18582
18583 const char *
18584 output_move_neon (rtx *operands)
18585 {
18586 rtx reg, mem, addr, ops[2];
18587 int regno, nregs, load = REG_P (operands[0]);
18588 const char *templ;
18589 char buff[50];
18590 machine_mode mode;
18591
18592 reg = operands[!load];
18593 mem = operands[load];
18594
18595 mode = GET_MODE (reg);
18596
18597 gcc_assert (REG_P (reg));
18598 regno = REGNO (reg);
18599 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18600 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18601 || NEON_REGNO_OK_FOR_QUAD (regno));
18602 gcc_assert (VALID_NEON_DREG_MODE (mode)
18603 || VALID_NEON_QREG_MODE (mode)
18604 || VALID_NEON_STRUCT_MODE (mode));
18605 gcc_assert (MEM_P (mem));
18606
18607 addr = XEXP (mem, 0);
18608
18609 /* Strip off const from addresses like (const (plus (...))). */
18610 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18611 addr = XEXP (addr, 0);
18612
18613 switch (GET_CODE (addr))
18614 {
18615 case POST_INC:
18616 /* We have to use vldm / vstm for too-large modes. */
18617 if (nregs > 4)
18618 {
18619 templ = "v%smia%%?\t%%0!, %%h1";
18620 ops[0] = XEXP (addr, 0);
18621 }
18622 else
18623 {
18624 templ = "v%s1.64\t%%h1, %%A0";
18625 ops[0] = mem;
18626 }
18627 ops[1] = reg;
18628 break;
18629
18630 case PRE_DEC:
18631 /* We have to use vldm / vstm in this case, since there is no
18632 pre-decrement form of the vld1 / vst1 instructions. */
18633 templ = "v%smdb%%?\t%%0!, %%h1";
18634 ops[0] = XEXP (addr, 0);
18635 ops[1] = reg;
18636 break;
18637
18638 case POST_MODIFY:
18639 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18640 gcc_unreachable ();
18641
18642 case REG:
18643 /* We have to use vldm / vstm for too-large modes. */
18644 if (nregs > 1)
18645 {
18646 if (nregs > 4)
18647 templ = "v%smia%%?\t%%m0, %%h1";
18648 else
18649 templ = "v%s1.64\t%%h1, %%A0";
18650
18651 ops[0] = mem;
18652 ops[1] = reg;
18653 break;
18654 }
18655 /* Fall through. */
18656 case LABEL_REF:
18657 case PLUS:
18658 {
18659 int i;
18660 int overlap = -1;
18661 for (i = 0; i < nregs; i++)
18662 {
18663 /* We're only using DImode here because it's a convenient size. */
18664 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18665 ops[1] = adjust_address (mem, DImode, 8 * i);
18666 if (reg_overlap_mentioned_p (ops[0], mem))
18667 {
18668 gcc_assert (overlap == -1);
18669 overlap = i;
18670 }
18671 else
18672 {
18673 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18674 output_asm_insn (buff, ops);
18675 }
18676 }
18677 if (overlap != -1)
18678 {
18679 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18680 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18681 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18682 output_asm_insn (buff, ops);
18683 }
18684
18685 return "";
18686 }
18687
18688 default:
18689 gcc_unreachable ();
18690 }
18691
18692 sprintf (buff, templ, load ? "ld" : "st");
18693 output_asm_insn (buff, ops);
18694
18695 return "";
18696 }
18697
18698 /* Compute and return the length of neon_mov<mode>, where <mode> is
18699 one of VSTRUCT modes: EI, OI, CI or XI. */
18700 int
18701 arm_attr_length_move_neon (rtx_insn *insn)
18702 {
18703 rtx reg, mem, addr;
18704 int load;
18705 machine_mode mode;
18706
18707 extract_insn_cached (insn);
18708
18709 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18710 {
18711 mode = GET_MODE (recog_data.operand[0]);
18712 switch (mode)
18713 {
18714 case EImode:
18715 case OImode:
18716 return 8;
18717 case CImode:
18718 return 12;
18719 case XImode:
18720 return 16;
18721 default:
18722 gcc_unreachable ();
18723 }
18724 }
18725
18726 load = REG_P (recog_data.operand[0]);
18727 reg = recog_data.operand[!load];
18728 mem = recog_data.operand[load];
18729
18730 gcc_assert (MEM_P (mem));
18731
18732 mode = GET_MODE (reg);
18733 addr = XEXP (mem, 0);
18734
18735 /* Strip off const from addresses like (const (plus (...))). */
18736 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18737 addr = XEXP (addr, 0);
18738
18739 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18740 {
18741 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18742 return insns * 4;
18743 }
18744 else
18745 return 4;
18746 }
18747
18748 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18749 return zero. */
18750
18751 int
18752 arm_address_offset_is_imm (rtx_insn *insn)
18753 {
18754 rtx mem, addr;
18755
18756 extract_insn_cached (insn);
18757
18758 if (REG_P (recog_data.operand[0]))
18759 return 0;
18760
18761 mem = recog_data.operand[0];
18762
18763 gcc_assert (MEM_P (mem));
18764
18765 addr = XEXP (mem, 0);
18766
18767 if (REG_P (addr)
18768 || (GET_CODE (addr) == PLUS
18769 && REG_P (XEXP (addr, 0))
18770 && CONST_INT_P (XEXP (addr, 1))))
18771 return 1;
18772 else
18773 return 0;
18774 }
18775
18776 /* Output an ADD r, s, #n where n may be too big for one instruction.
18777 If adding zero to one register, output nothing. */
18778 const char *
18779 output_add_immediate (rtx *operands)
18780 {
18781 HOST_WIDE_INT n = INTVAL (operands[2]);
18782
18783 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18784 {
18785 if (n < 0)
18786 output_multi_immediate (operands,
18787 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18788 -n);
18789 else
18790 output_multi_immediate (operands,
18791 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18792 n);
18793 }
18794
18795 return "";
18796 }
18797
18798 /* Output a multiple immediate operation.
18799 OPERANDS is the vector of operands referred to in the output patterns.
18800 INSTR1 is the output pattern to use for the first constant.
18801 INSTR2 is the output pattern to use for subsequent constants.
18802 IMMED_OP is the index of the constant slot in OPERANDS.
18803 N is the constant value. */
18804 static const char *
18805 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18806 int immed_op, HOST_WIDE_INT n)
18807 {
18808 #if HOST_BITS_PER_WIDE_INT > 32
18809 n &= 0xffffffff;
18810 #endif
18811
18812 if (n == 0)
18813 {
18814 /* Quick and easy output. */
18815 operands[immed_op] = const0_rtx;
18816 output_asm_insn (instr1, operands);
18817 }
18818 else
18819 {
18820 int i;
18821 const char * instr = instr1;
18822
18823 /* Note that n is never zero here (which would give no output). */
18824 for (i = 0; i < 32; i += 2)
18825 {
18826 if (n & (3 << i))
18827 {
18828 operands[immed_op] = GEN_INT (n & (255 << i));
18829 output_asm_insn (instr, operands);
18830 instr = instr2;
18831 i += 6;
18832 }
18833 }
18834 }
18835
18836 return "";
18837 }
18838
18839 /* Return the name of a shifter operation. */
18840 static const char *
18841 arm_shift_nmem(enum rtx_code code)
18842 {
18843 switch (code)
18844 {
18845 case ASHIFT:
18846 return ARM_LSL_NAME;
18847
18848 case ASHIFTRT:
18849 return "asr";
18850
18851 case LSHIFTRT:
18852 return "lsr";
18853
18854 case ROTATERT:
18855 return "ror";
18856
18857 default:
18858 abort();
18859 }
18860 }
18861
18862 /* Return the appropriate ARM instruction for the operation code.
18863 The returned result should not be overwritten. OP is the rtx of the
18864 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18865 was shifted. */
18866 const char *
18867 arithmetic_instr (rtx op, int shift_first_arg)
18868 {
18869 switch (GET_CODE (op))
18870 {
18871 case PLUS:
18872 return "add";
18873
18874 case MINUS:
18875 return shift_first_arg ? "rsb" : "sub";
18876
18877 case IOR:
18878 return "orr";
18879
18880 case XOR:
18881 return "eor";
18882
18883 case AND:
18884 return "and";
18885
18886 case ASHIFT:
18887 case ASHIFTRT:
18888 case LSHIFTRT:
18889 case ROTATERT:
18890 return arm_shift_nmem(GET_CODE(op));
18891
18892 default:
18893 gcc_unreachable ();
18894 }
18895 }
18896
18897 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18898 for the operation code. The returned result should not be overwritten.
18899 OP is the rtx code of the shift.
18900 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18901 shift. */
18902 static const char *
18903 shift_op (rtx op, HOST_WIDE_INT *amountp)
18904 {
18905 const char * mnem;
18906 enum rtx_code code = GET_CODE (op);
18907
18908 switch (code)
18909 {
18910 case ROTATE:
18911 if (!CONST_INT_P (XEXP (op, 1)))
18912 {
18913 output_operand_lossage ("invalid shift operand");
18914 return NULL;
18915 }
18916
18917 code = ROTATERT;
18918 *amountp = 32 - INTVAL (XEXP (op, 1));
18919 mnem = "ror";
18920 break;
18921
18922 case ASHIFT:
18923 case ASHIFTRT:
18924 case LSHIFTRT:
18925 case ROTATERT:
18926 mnem = arm_shift_nmem(code);
18927 if (CONST_INT_P (XEXP (op, 1)))
18928 {
18929 *amountp = INTVAL (XEXP (op, 1));
18930 }
18931 else if (REG_P (XEXP (op, 1)))
18932 {
18933 *amountp = -1;
18934 return mnem;
18935 }
18936 else
18937 {
18938 output_operand_lossage ("invalid shift operand");
18939 return NULL;
18940 }
18941 break;
18942
18943 case MULT:
18944 /* We never have to worry about the amount being other than a
18945 power of 2, since this case can never be reloaded from a reg. */
18946 if (!CONST_INT_P (XEXP (op, 1)))
18947 {
18948 output_operand_lossage ("invalid shift operand");
18949 return NULL;
18950 }
18951
18952 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18953
18954 /* Amount must be a power of two. */
18955 if (*amountp & (*amountp - 1))
18956 {
18957 output_operand_lossage ("invalid shift operand");
18958 return NULL;
18959 }
18960
18961 *amountp = int_log2 (*amountp);
18962 return ARM_LSL_NAME;
18963
18964 default:
18965 output_operand_lossage ("invalid shift operand");
18966 return NULL;
18967 }
18968
18969 /* This is not 100% correct, but follows from the desire to merge
18970 multiplication by a power of 2 with the recognizer for a
18971 shift. >=32 is not a valid shift for "lsl", so we must try and
18972 output a shift that produces the correct arithmetical result.
18973 Using lsr #32 is identical except for the fact that the carry bit
18974 is not set correctly if we set the flags; but we never use the
18975 carry bit from such an operation, so we can ignore that. */
18976 if (code == ROTATERT)
18977 /* Rotate is just modulo 32. */
18978 *amountp &= 31;
18979 else if (*amountp != (*amountp & 31))
18980 {
18981 if (code == ASHIFT)
18982 mnem = "lsr";
18983 *amountp = 32;
18984 }
18985
18986 /* Shifts of 0 are no-ops. */
18987 if (*amountp == 0)
18988 return NULL;
18989
18990 return mnem;
18991 }
18992
18993 /* Obtain the shift from the POWER of two. */
18994
18995 static HOST_WIDE_INT
18996 int_log2 (HOST_WIDE_INT power)
18997 {
18998 HOST_WIDE_INT shift = 0;
18999
19000 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
19001 {
19002 gcc_assert (shift <= 31);
19003 shift++;
19004 }
19005
19006 return shift;
19007 }
19008
19009 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19010 because /bin/as is horribly restrictive. The judgement about
19011 whether or not each character is 'printable' (and can be output as
19012 is) or not (and must be printed with an octal escape) must be made
19013 with reference to the *host* character set -- the situation is
19014 similar to that discussed in the comments above pp_c_char in
19015 c-pretty-print.c. */
19016
19017 #define MAX_ASCII_LEN 51
19018
19019 void
19020 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19021 {
19022 int i;
19023 int len_so_far = 0;
19024
19025 fputs ("\t.ascii\t\"", stream);
19026
19027 for (i = 0; i < len; i++)
19028 {
19029 int c = p[i];
19030
19031 if (len_so_far >= MAX_ASCII_LEN)
19032 {
19033 fputs ("\"\n\t.ascii\t\"", stream);
19034 len_so_far = 0;
19035 }
19036
19037 if (ISPRINT (c))
19038 {
19039 if (c == '\\' || c == '\"')
19040 {
19041 putc ('\\', stream);
19042 len_so_far++;
19043 }
19044 putc (c, stream);
19045 len_so_far++;
19046 }
19047 else
19048 {
19049 fprintf (stream, "\\%03o", c);
19050 len_so_far += 4;
19051 }
19052 }
19053
19054 fputs ("\"\n", stream);
19055 }
19056 \f
19057 /* Whether a register is callee saved or not. This is necessary because high
19058 registers are marked as caller saved when optimizing for size on Thumb-1
19059 targets despite being callee saved in order to avoid using them. */
19060 #define callee_saved_reg_p(reg) \
19061 (!call_used_regs[reg] \
19062 || (TARGET_THUMB1 && optimize_size \
19063 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19064
19065 /* Compute the register save mask for registers 0 through 12
19066 inclusive. This code is used by arm_compute_save_reg_mask. */
19067
19068 static unsigned long
19069 arm_compute_save_reg0_reg12_mask (void)
19070 {
19071 unsigned long func_type = arm_current_func_type ();
19072 unsigned long save_reg_mask = 0;
19073 unsigned int reg;
19074
19075 if (IS_INTERRUPT (func_type))
19076 {
19077 unsigned int max_reg;
19078 /* Interrupt functions must not corrupt any registers,
19079 even call clobbered ones. If this is a leaf function
19080 we can just examine the registers used by the RTL, but
19081 otherwise we have to assume that whatever function is
19082 called might clobber anything, and so we have to save
19083 all the call-clobbered registers as well. */
19084 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19085 /* FIQ handlers have registers r8 - r12 banked, so
19086 we only need to check r0 - r7, Normal ISRs only
19087 bank r14 and r15, so we must check up to r12.
19088 r13 is the stack pointer which is always preserved,
19089 so we do not need to consider it here. */
19090 max_reg = 7;
19091 else
19092 max_reg = 12;
19093
19094 for (reg = 0; reg <= max_reg; reg++)
19095 if (df_regs_ever_live_p (reg)
19096 || (! crtl->is_leaf && call_used_regs[reg]))
19097 save_reg_mask |= (1 << reg);
19098
19099 /* Also save the pic base register if necessary. */
19100 if (flag_pic
19101 && !TARGET_SINGLE_PIC_BASE
19102 && arm_pic_register != INVALID_REGNUM
19103 && crtl->uses_pic_offset_table)
19104 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19105 }
19106 else if (IS_VOLATILE(func_type))
19107 {
19108 /* For noreturn functions we historically omitted register saves
19109 altogether. However this really messes up debugging. As a
19110 compromise save just the frame pointers. Combined with the link
19111 register saved elsewhere this should be sufficient to get
19112 a backtrace. */
19113 if (frame_pointer_needed)
19114 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19115 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19116 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19117 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19118 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19119 }
19120 else
19121 {
19122 /* In the normal case we only need to save those registers
19123 which are call saved and which are used by this function. */
19124 for (reg = 0; reg <= 11; reg++)
19125 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19126 save_reg_mask |= (1 << reg);
19127
19128 /* Handle the frame pointer as a special case. */
19129 if (frame_pointer_needed)
19130 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19131
19132 /* If we aren't loading the PIC register,
19133 don't stack it even though it may be live. */
19134 if (flag_pic
19135 && !TARGET_SINGLE_PIC_BASE
19136 && arm_pic_register != INVALID_REGNUM
19137 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19138 || crtl->uses_pic_offset_table))
19139 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19140
19141 /* The prologue will copy SP into R0, so save it. */
19142 if (IS_STACKALIGN (func_type))
19143 save_reg_mask |= 1;
19144 }
19145
19146 /* Save registers so the exception handler can modify them. */
19147 if (crtl->calls_eh_return)
19148 {
19149 unsigned int i;
19150
19151 for (i = 0; ; i++)
19152 {
19153 reg = EH_RETURN_DATA_REGNO (i);
19154 if (reg == INVALID_REGNUM)
19155 break;
19156 save_reg_mask |= 1 << reg;
19157 }
19158 }
19159
19160 return save_reg_mask;
19161 }
19162
19163 /* Return true if r3 is live at the start of the function. */
19164
19165 static bool
19166 arm_r3_live_at_start_p (void)
19167 {
19168 /* Just look at cfg info, which is still close enough to correct at this
19169 point. This gives false positives for broken functions that might use
19170 uninitialized data that happens to be allocated in r3, but who cares? */
19171 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19172 }
19173
19174 /* Compute the number of bytes used to store the static chain register on the
19175 stack, above the stack frame. We need to know this accurately to get the
19176 alignment of the rest of the stack frame correct. */
19177
19178 static int
19179 arm_compute_static_chain_stack_bytes (void)
19180 {
19181 /* See the defining assertion in arm_expand_prologue. */
19182 if (IS_NESTED (arm_current_func_type ())
19183 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19184 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19185 && !df_regs_ever_live_p (LR_REGNUM)))
19186 && arm_r3_live_at_start_p ()
19187 && crtl->args.pretend_args_size == 0)
19188 return 4;
19189
19190 return 0;
19191 }
19192
19193 /* Compute a bit mask of which registers need to be
19194 saved on the stack for the current function.
19195 This is used by arm_get_frame_offsets, which may add extra registers. */
19196
19197 static unsigned long
19198 arm_compute_save_reg_mask (void)
19199 {
19200 unsigned int save_reg_mask = 0;
19201 unsigned long func_type = arm_current_func_type ();
19202 unsigned int reg;
19203
19204 if (IS_NAKED (func_type))
19205 /* This should never really happen. */
19206 return 0;
19207
19208 /* If we are creating a stack frame, then we must save the frame pointer,
19209 IP (which will hold the old stack pointer), LR and the PC. */
19210 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19211 save_reg_mask |=
19212 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19213 | (1 << IP_REGNUM)
19214 | (1 << LR_REGNUM)
19215 | (1 << PC_REGNUM);
19216
19217 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19218
19219 /* Decide if we need to save the link register.
19220 Interrupt routines have their own banked link register,
19221 so they never need to save it.
19222 Otherwise if we do not use the link register we do not need to save
19223 it. If we are pushing other registers onto the stack however, we
19224 can save an instruction in the epilogue by pushing the link register
19225 now and then popping it back into the PC. This incurs extra memory
19226 accesses though, so we only do it when optimizing for size, and only
19227 if we know that we will not need a fancy return sequence. */
19228 if (df_regs_ever_live_p (LR_REGNUM)
19229 || (save_reg_mask
19230 && optimize_size
19231 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19232 && !crtl->tail_call_emit
19233 && !crtl->calls_eh_return))
19234 save_reg_mask |= 1 << LR_REGNUM;
19235
19236 if (cfun->machine->lr_save_eliminated)
19237 save_reg_mask &= ~ (1 << LR_REGNUM);
19238
19239 if (TARGET_REALLY_IWMMXT
19240 && ((bit_count (save_reg_mask)
19241 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19242 arm_compute_static_chain_stack_bytes())
19243 ) % 2) != 0)
19244 {
19245 /* The total number of registers that are going to be pushed
19246 onto the stack is odd. We need to ensure that the stack
19247 is 64-bit aligned before we start to save iWMMXt registers,
19248 and also before we start to create locals. (A local variable
19249 might be a double or long long which we will load/store using
19250 an iWMMXt instruction). Therefore we need to push another
19251 ARM register, so that the stack will be 64-bit aligned. We
19252 try to avoid using the arg registers (r0 -r3) as they might be
19253 used to pass values in a tail call. */
19254 for (reg = 4; reg <= 12; reg++)
19255 if ((save_reg_mask & (1 << reg)) == 0)
19256 break;
19257
19258 if (reg <= 12)
19259 save_reg_mask |= (1 << reg);
19260 else
19261 {
19262 cfun->machine->sibcall_blocked = 1;
19263 save_reg_mask |= (1 << 3);
19264 }
19265 }
19266
19267 /* We may need to push an additional register for use initializing the
19268 PIC base register. */
19269 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19270 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19271 {
19272 reg = thumb_find_work_register (1 << 4);
19273 if (!call_used_regs[reg])
19274 save_reg_mask |= (1 << reg);
19275 }
19276
19277 return save_reg_mask;
19278 }
19279
19280 /* Compute a bit mask of which registers need to be
19281 saved on the stack for the current function. */
19282 static unsigned long
19283 thumb1_compute_save_reg_mask (void)
19284 {
19285 unsigned long mask;
19286 unsigned reg;
19287
19288 mask = 0;
19289 for (reg = 0; reg < 12; reg ++)
19290 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19291 mask |= 1 << reg;
19292
19293 if (flag_pic
19294 && !TARGET_SINGLE_PIC_BASE
19295 && arm_pic_register != INVALID_REGNUM
19296 && crtl->uses_pic_offset_table)
19297 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19298
19299 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19300 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19301 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19302
19303 /* LR will also be pushed if any lo regs are pushed. */
19304 if (mask & 0xff || thumb_force_lr_save ())
19305 mask |= (1 << LR_REGNUM);
19306
19307 /* Make sure we have a low work register if we need one.
19308 We will need one if we are going to push a high register,
19309 but we are not currently intending to push a low register. */
19310 if ((mask & 0xff) == 0
19311 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19312 {
19313 /* Use thumb_find_work_register to choose which register
19314 we will use. If the register is live then we will
19315 have to push it. Use LAST_LO_REGNUM as our fallback
19316 choice for the register to select. */
19317 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19318 /* Make sure the register returned by thumb_find_work_register is
19319 not part of the return value. */
19320 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19321 reg = LAST_LO_REGNUM;
19322
19323 if (callee_saved_reg_p (reg))
19324 mask |= 1 << reg;
19325 }
19326
19327 /* The 504 below is 8 bytes less than 512 because there are two possible
19328 alignment words. We can't tell here if they will be present or not so we
19329 have to play it safe and assume that they are. */
19330 if ((CALLER_INTERWORKING_SLOT_SIZE +
19331 ROUND_UP_WORD (get_frame_size ()) +
19332 crtl->outgoing_args_size) >= 504)
19333 {
19334 /* This is the same as the code in thumb1_expand_prologue() which
19335 determines which register to use for stack decrement. */
19336 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19337 if (mask & (1 << reg))
19338 break;
19339
19340 if (reg > LAST_LO_REGNUM)
19341 {
19342 /* Make sure we have a register available for stack decrement. */
19343 mask |= 1 << LAST_LO_REGNUM;
19344 }
19345 }
19346
19347 return mask;
19348 }
19349
19350
19351 /* Return the number of bytes required to save VFP registers. */
19352 static int
19353 arm_get_vfp_saved_size (void)
19354 {
19355 unsigned int regno;
19356 int count;
19357 int saved;
19358
19359 saved = 0;
19360 /* Space for saved VFP registers. */
19361 if (TARGET_HARD_FLOAT && TARGET_VFP)
19362 {
19363 count = 0;
19364 for (regno = FIRST_VFP_REGNUM;
19365 regno < LAST_VFP_REGNUM;
19366 regno += 2)
19367 {
19368 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19369 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19370 {
19371 if (count > 0)
19372 {
19373 /* Workaround ARM10 VFPr1 bug. */
19374 if (count == 2 && !arm_arch6)
19375 count++;
19376 saved += count * 8;
19377 }
19378 count = 0;
19379 }
19380 else
19381 count++;
19382 }
19383 if (count > 0)
19384 {
19385 if (count == 2 && !arm_arch6)
19386 count++;
19387 saved += count * 8;
19388 }
19389 }
19390 return saved;
19391 }
19392
19393
19394 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19395 everything bar the final return instruction. If simple_return is true,
19396 then do not output epilogue, because it has already been emitted in RTL. */
19397 const char *
19398 output_return_instruction (rtx operand, bool really_return, bool reverse,
19399 bool simple_return)
19400 {
19401 char conditional[10];
19402 char instr[100];
19403 unsigned reg;
19404 unsigned long live_regs_mask;
19405 unsigned long func_type;
19406 arm_stack_offsets *offsets;
19407
19408 func_type = arm_current_func_type ();
19409
19410 if (IS_NAKED (func_type))
19411 return "";
19412
19413 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19414 {
19415 /* If this function was declared non-returning, and we have
19416 found a tail call, then we have to trust that the called
19417 function won't return. */
19418 if (really_return)
19419 {
19420 rtx ops[2];
19421
19422 /* Otherwise, trap an attempted return by aborting. */
19423 ops[0] = operand;
19424 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19425 : "abort");
19426 assemble_external_libcall (ops[1]);
19427 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19428 }
19429
19430 return "";
19431 }
19432
19433 gcc_assert (!cfun->calls_alloca || really_return);
19434
19435 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19436
19437 cfun->machine->return_used_this_function = 1;
19438
19439 offsets = arm_get_frame_offsets ();
19440 live_regs_mask = offsets->saved_regs_mask;
19441
19442 if (!simple_return && live_regs_mask)
19443 {
19444 const char * return_reg;
19445
19446 /* If we do not have any special requirements for function exit
19447 (e.g. interworking) then we can load the return address
19448 directly into the PC. Otherwise we must load it into LR. */
19449 if (really_return
19450 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19451 return_reg = reg_names[PC_REGNUM];
19452 else
19453 return_reg = reg_names[LR_REGNUM];
19454
19455 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19456 {
19457 /* There are three possible reasons for the IP register
19458 being saved. 1) a stack frame was created, in which case
19459 IP contains the old stack pointer, or 2) an ISR routine
19460 corrupted it, or 3) it was saved to align the stack on
19461 iWMMXt. In case 1, restore IP into SP, otherwise just
19462 restore IP. */
19463 if (frame_pointer_needed)
19464 {
19465 live_regs_mask &= ~ (1 << IP_REGNUM);
19466 live_regs_mask |= (1 << SP_REGNUM);
19467 }
19468 else
19469 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19470 }
19471
19472 /* On some ARM architectures it is faster to use LDR rather than
19473 LDM to load a single register. On other architectures, the
19474 cost is the same. In 26 bit mode, or for exception handlers,
19475 we have to use LDM to load the PC so that the CPSR is also
19476 restored. */
19477 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19478 if (live_regs_mask == (1U << reg))
19479 break;
19480
19481 if (reg <= LAST_ARM_REGNUM
19482 && (reg != LR_REGNUM
19483 || ! really_return
19484 || ! IS_INTERRUPT (func_type)))
19485 {
19486 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19487 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19488 }
19489 else
19490 {
19491 char *p;
19492 int first = 1;
19493
19494 /* Generate the load multiple instruction to restore the
19495 registers. Note we can get here, even if
19496 frame_pointer_needed is true, but only if sp already
19497 points to the base of the saved core registers. */
19498 if (live_regs_mask & (1 << SP_REGNUM))
19499 {
19500 unsigned HOST_WIDE_INT stack_adjust;
19501
19502 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19503 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19504
19505 if (stack_adjust && arm_arch5 && TARGET_ARM)
19506 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19507 else
19508 {
19509 /* If we can't use ldmib (SA110 bug),
19510 then try to pop r3 instead. */
19511 if (stack_adjust)
19512 live_regs_mask |= 1 << 3;
19513
19514 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19515 }
19516 }
19517 else
19518 sprintf (instr, "pop%s\t{", conditional);
19519
19520 p = instr + strlen (instr);
19521
19522 for (reg = 0; reg <= SP_REGNUM; reg++)
19523 if (live_regs_mask & (1 << reg))
19524 {
19525 int l = strlen (reg_names[reg]);
19526
19527 if (first)
19528 first = 0;
19529 else
19530 {
19531 memcpy (p, ", ", 2);
19532 p += 2;
19533 }
19534
19535 memcpy (p, "%|", 2);
19536 memcpy (p + 2, reg_names[reg], l);
19537 p += l + 2;
19538 }
19539
19540 if (live_regs_mask & (1 << LR_REGNUM))
19541 {
19542 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19543 /* If returning from an interrupt, restore the CPSR. */
19544 if (IS_INTERRUPT (func_type))
19545 strcat (p, "^");
19546 }
19547 else
19548 strcpy (p, "}");
19549 }
19550
19551 output_asm_insn (instr, & operand);
19552
19553 /* See if we need to generate an extra instruction to
19554 perform the actual function return. */
19555 if (really_return
19556 && func_type != ARM_FT_INTERWORKED
19557 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19558 {
19559 /* The return has already been handled
19560 by loading the LR into the PC. */
19561 return "";
19562 }
19563 }
19564
19565 if (really_return)
19566 {
19567 switch ((int) ARM_FUNC_TYPE (func_type))
19568 {
19569 case ARM_FT_ISR:
19570 case ARM_FT_FIQ:
19571 /* ??? This is wrong for unified assembly syntax. */
19572 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19573 break;
19574
19575 case ARM_FT_INTERWORKED:
19576 sprintf (instr, "bx%s\t%%|lr", conditional);
19577 break;
19578
19579 case ARM_FT_EXCEPTION:
19580 /* ??? This is wrong for unified assembly syntax. */
19581 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19582 break;
19583
19584 default:
19585 /* Use bx if it's available. */
19586 if (arm_arch5 || arm_arch4t)
19587 sprintf (instr, "bx%s\t%%|lr", conditional);
19588 else
19589 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19590 break;
19591 }
19592
19593 output_asm_insn (instr, & operand);
19594 }
19595
19596 return "";
19597 }
19598
19599 /* Write the function name into the code section, directly preceding
19600 the function prologue.
19601
19602 Code will be output similar to this:
19603 t0
19604 .ascii "arm_poke_function_name", 0
19605 .align
19606 t1
19607 .word 0xff000000 + (t1 - t0)
19608 arm_poke_function_name
19609 mov ip, sp
19610 stmfd sp!, {fp, ip, lr, pc}
19611 sub fp, ip, #4
19612
19613 When performing a stack backtrace, code can inspect the value
19614 of 'pc' stored at 'fp' + 0. If the trace function then looks
19615 at location pc - 12 and the top 8 bits are set, then we know
19616 that there is a function name embedded immediately preceding this
19617 location and has length ((pc[-3]) & 0xff000000).
19618
19619 We assume that pc is declared as a pointer to an unsigned long.
19620
19621 It is of no benefit to output the function name if we are assembling
19622 a leaf function. These function types will not contain a stack
19623 backtrace structure, therefore it is not possible to determine the
19624 function name. */
19625 void
19626 arm_poke_function_name (FILE *stream, const char *name)
19627 {
19628 unsigned long alignlength;
19629 unsigned long length;
19630 rtx x;
19631
19632 length = strlen (name) + 1;
19633 alignlength = ROUND_UP_WORD (length);
19634
19635 ASM_OUTPUT_ASCII (stream, name, length);
19636 ASM_OUTPUT_ALIGN (stream, 2);
19637 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19638 assemble_aligned_integer (UNITS_PER_WORD, x);
19639 }
19640
19641 /* Place some comments into the assembler stream
19642 describing the current function. */
19643 static void
19644 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19645 {
19646 unsigned long func_type;
19647
19648 /* ??? Do we want to print some of the below anyway? */
19649 if (TARGET_THUMB1)
19650 return;
19651
19652 /* Sanity check. */
19653 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19654
19655 func_type = arm_current_func_type ();
19656
19657 switch ((int) ARM_FUNC_TYPE (func_type))
19658 {
19659 default:
19660 case ARM_FT_NORMAL:
19661 break;
19662 case ARM_FT_INTERWORKED:
19663 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19664 break;
19665 case ARM_FT_ISR:
19666 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19667 break;
19668 case ARM_FT_FIQ:
19669 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19670 break;
19671 case ARM_FT_EXCEPTION:
19672 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19673 break;
19674 }
19675
19676 if (IS_NAKED (func_type))
19677 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19678
19679 if (IS_VOLATILE (func_type))
19680 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19681
19682 if (IS_NESTED (func_type))
19683 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19684 if (IS_STACKALIGN (func_type))
19685 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19686
19687 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19688 crtl->args.size,
19689 crtl->args.pretend_args_size, frame_size);
19690
19691 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19692 frame_pointer_needed,
19693 cfun->machine->uses_anonymous_args);
19694
19695 if (cfun->machine->lr_save_eliminated)
19696 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19697
19698 if (crtl->calls_eh_return)
19699 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19700
19701 }
19702
19703 static void
19704 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19705 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19706 {
19707 arm_stack_offsets *offsets;
19708
19709 if (TARGET_THUMB1)
19710 {
19711 int regno;
19712
19713 /* Emit any call-via-reg trampolines that are needed for v4t support
19714 of call_reg and call_value_reg type insns. */
19715 for (regno = 0; regno < LR_REGNUM; regno++)
19716 {
19717 rtx label = cfun->machine->call_via[regno];
19718
19719 if (label != NULL)
19720 {
19721 switch_to_section (function_section (current_function_decl));
19722 targetm.asm_out.internal_label (asm_out_file, "L",
19723 CODE_LABEL_NUMBER (label));
19724 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19725 }
19726 }
19727
19728 /* ??? Probably not safe to set this here, since it assumes that a
19729 function will be emitted as assembly immediately after we generate
19730 RTL for it. This does not happen for inline functions. */
19731 cfun->machine->return_used_this_function = 0;
19732 }
19733 else /* TARGET_32BIT */
19734 {
19735 /* We need to take into account any stack-frame rounding. */
19736 offsets = arm_get_frame_offsets ();
19737
19738 gcc_assert (!use_return_insn (FALSE, NULL)
19739 || (cfun->machine->return_used_this_function != 0)
19740 || offsets->saved_regs == offsets->outgoing_args
19741 || frame_pointer_needed);
19742 }
19743 }
19744
19745 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19746 STR and STRD. If an even number of registers are being pushed, one
19747 or more STRD patterns are created for each register pair. If an
19748 odd number of registers are pushed, emit an initial STR followed by
19749 as many STRD instructions as are needed. This works best when the
19750 stack is initially 64-bit aligned (the normal case), since it
19751 ensures that each STRD is also 64-bit aligned. */
19752 static void
19753 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19754 {
19755 int num_regs = 0;
19756 int i;
19757 int regno;
19758 rtx par = NULL_RTX;
19759 rtx dwarf = NULL_RTX;
19760 rtx tmp;
19761 bool first = true;
19762
19763 num_regs = bit_count (saved_regs_mask);
19764
19765 /* Must be at least one register to save, and can't save SP or PC. */
19766 gcc_assert (num_regs > 0 && num_regs <= 14);
19767 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19768 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19769
19770 /* Create sequence for DWARF info. All the frame-related data for
19771 debugging is held in this wrapper. */
19772 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19773
19774 /* Describe the stack adjustment. */
19775 tmp = gen_rtx_SET (stack_pointer_rtx,
19776 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19777 RTX_FRAME_RELATED_P (tmp) = 1;
19778 XVECEXP (dwarf, 0, 0) = tmp;
19779
19780 /* Find the first register. */
19781 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19782 ;
19783
19784 i = 0;
19785
19786 /* If there's an odd number of registers to push. Start off by
19787 pushing a single register. This ensures that subsequent strd
19788 operations are dword aligned (assuming that SP was originally
19789 64-bit aligned). */
19790 if ((num_regs & 1) != 0)
19791 {
19792 rtx reg, mem, insn;
19793
19794 reg = gen_rtx_REG (SImode, regno);
19795 if (num_regs == 1)
19796 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19797 stack_pointer_rtx));
19798 else
19799 mem = gen_frame_mem (Pmode,
19800 gen_rtx_PRE_MODIFY
19801 (Pmode, stack_pointer_rtx,
19802 plus_constant (Pmode, stack_pointer_rtx,
19803 -4 * num_regs)));
19804
19805 tmp = gen_rtx_SET (mem, reg);
19806 RTX_FRAME_RELATED_P (tmp) = 1;
19807 insn = emit_insn (tmp);
19808 RTX_FRAME_RELATED_P (insn) = 1;
19809 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19810 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19811 RTX_FRAME_RELATED_P (tmp) = 1;
19812 i++;
19813 regno++;
19814 XVECEXP (dwarf, 0, i) = tmp;
19815 first = false;
19816 }
19817
19818 while (i < num_regs)
19819 if (saved_regs_mask & (1 << regno))
19820 {
19821 rtx reg1, reg2, mem1, mem2;
19822 rtx tmp0, tmp1, tmp2;
19823 int regno2;
19824
19825 /* Find the register to pair with this one. */
19826 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19827 regno2++)
19828 ;
19829
19830 reg1 = gen_rtx_REG (SImode, regno);
19831 reg2 = gen_rtx_REG (SImode, regno2);
19832
19833 if (first)
19834 {
19835 rtx insn;
19836
19837 first = false;
19838 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19839 stack_pointer_rtx,
19840 -4 * num_regs));
19841 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19842 stack_pointer_rtx,
19843 -4 * (num_regs - 1)));
19844 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19845 plus_constant (Pmode, stack_pointer_rtx,
19846 -4 * (num_regs)));
19847 tmp1 = gen_rtx_SET (mem1, reg1);
19848 tmp2 = gen_rtx_SET (mem2, reg2);
19849 RTX_FRAME_RELATED_P (tmp0) = 1;
19850 RTX_FRAME_RELATED_P (tmp1) = 1;
19851 RTX_FRAME_RELATED_P (tmp2) = 1;
19852 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19853 XVECEXP (par, 0, 0) = tmp0;
19854 XVECEXP (par, 0, 1) = tmp1;
19855 XVECEXP (par, 0, 2) = tmp2;
19856 insn = emit_insn (par);
19857 RTX_FRAME_RELATED_P (insn) = 1;
19858 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19859 }
19860 else
19861 {
19862 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19863 stack_pointer_rtx,
19864 4 * i));
19865 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19866 stack_pointer_rtx,
19867 4 * (i + 1)));
19868 tmp1 = gen_rtx_SET (mem1, reg1);
19869 tmp2 = gen_rtx_SET (mem2, reg2);
19870 RTX_FRAME_RELATED_P (tmp1) = 1;
19871 RTX_FRAME_RELATED_P (tmp2) = 1;
19872 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19873 XVECEXP (par, 0, 0) = tmp1;
19874 XVECEXP (par, 0, 1) = tmp2;
19875 emit_insn (par);
19876 }
19877
19878 /* Create unwind information. This is an approximation. */
19879 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19880 plus_constant (Pmode,
19881 stack_pointer_rtx,
19882 4 * i)),
19883 reg1);
19884 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19885 plus_constant (Pmode,
19886 stack_pointer_rtx,
19887 4 * (i + 1))),
19888 reg2);
19889
19890 RTX_FRAME_RELATED_P (tmp1) = 1;
19891 RTX_FRAME_RELATED_P (tmp2) = 1;
19892 XVECEXP (dwarf, 0, i + 1) = tmp1;
19893 XVECEXP (dwarf, 0, i + 2) = tmp2;
19894 i += 2;
19895 regno = regno2 + 1;
19896 }
19897 else
19898 regno++;
19899
19900 return;
19901 }
19902
19903 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19904 whenever possible, otherwise it emits single-word stores. The first store
19905 also allocates stack space for all saved registers, using writeback with
19906 post-addressing mode. All other stores use offset addressing. If no STRD
19907 can be emitted, this function emits a sequence of single-word stores,
19908 and not an STM as before, because single-word stores provide more freedom
19909 scheduling and can be turned into an STM by peephole optimizations. */
19910 static void
19911 arm_emit_strd_push (unsigned long saved_regs_mask)
19912 {
19913 int num_regs = 0;
19914 int i, j, dwarf_index = 0;
19915 int offset = 0;
19916 rtx dwarf = NULL_RTX;
19917 rtx insn = NULL_RTX;
19918 rtx tmp, mem;
19919
19920 /* TODO: A more efficient code can be emitted by changing the
19921 layout, e.g., first push all pairs that can use STRD to keep the
19922 stack aligned, and then push all other registers. */
19923 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19924 if (saved_regs_mask & (1 << i))
19925 num_regs++;
19926
19927 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19928 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19929 gcc_assert (num_regs > 0);
19930
19931 /* Create sequence for DWARF info. */
19932 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19933
19934 /* For dwarf info, we generate explicit stack update. */
19935 tmp = gen_rtx_SET (stack_pointer_rtx,
19936 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19937 RTX_FRAME_RELATED_P (tmp) = 1;
19938 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19939
19940 /* Save registers. */
19941 offset = - 4 * num_regs;
19942 j = 0;
19943 while (j <= LAST_ARM_REGNUM)
19944 if (saved_regs_mask & (1 << j))
19945 {
19946 if ((j % 2 == 0)
19947 && (saved_regs_mask & (1 << (j + 1))))
19948 {
19949 /* Current register and previous register form register pair for
19950 which STRD can be generated. */
19951 if (offset < 0)
19952 {
19953 /* Allocate stack space for all saved registers. */
19954 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19955 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19956 mem = gen_frame_mem (DImode, tmp);
19957 offset = 0;
19958 }
19959 else if (offset > 0)
19960 mem = gen_frame_mem (DImode,
19961 plus_constant (Pmode,
19962 stack_pointer_rtx,
19963 offset));
19964 else
19965 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19966
19967 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
19968 RTX_FRAME_RELATED_P (tmp) = 1;
19969 tmp = emit_insn (tmp);
19970
19971 /* Record the first store insn. */
19972 if (dwarf_index == 1)
19973 insn = tmp;
19974
19975 /* Generate dwarf info. */
19976 mem = gen_frame_mem (SImode,
19977 plus_constant (Pmode,
19978 stack_pointer_rtx,
19979 offset));
19980 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19981 RTX_FRAME_RELATED_P (tmp) = 1;
19982 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19983
19984 mem = gen_frame_mem (SImode,
19985 plus_constant (Pmode,
19986 stack_pointer_rtx,
19987 offset + 4));
19988 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
19989 RTX_FRAME_RELATED_P (tmp) = 1;
19990 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19991
19992 offset += 8;
19993 j += 2;
19994 }
19995 else
19996 {
19997 /* Emit a single word store. */
19998 if (offset < 0)
19999 {
20000 /* Allocate stack space for all saved registers. */
20001 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20002 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20003 mem = gen_frame_mem (SImode, tmp);
20004 offset = 0;
20005 }
20006 else if (offset > 0)
20007 mem = gen_frame_mem (SImode,
20008 plus_constant (Pmode,
20009 stack_pointer_rtx,
20010 offset));
20011 else
20012 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20013
20014 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20015 RTX_FRAME_RELATED_P (tmp) = 1;
20016 tmp = emit_insn (tmp);
20017
20018 /* Record the first store insn. */
20019 if (dwarf_index == 1)
20020 insn = tmp;
20021
20022 /* Generate dwarf info. */
20023 mem = gen_frame_mem (SImode,
20024 plus_constant(Pmode,
20025 stack_pointer_rtx,
20026 offset));
20027 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20028 RTX_FRAME_RELATED_P (tmp) = 1;
20029 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20030
20031 offset += 4;
20032 j += 1;
20033 }
20034 }
20035 else
20036 j++;
20037
20038 /* Attach dwarf info to the first insn we generate. */
20039 gcc_assert (insn != NULL_RTX);
20040 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20041 RTX_FRAME_RELATED_P (insn) = 1;
20042 }
20043
20044 /* Generate and emit an insn that we will recognize as a push_multi.
20045 Unfortunately, since this insn does not reflect very well the actual
20046 semantics of the operation, we need to annotate the insn for the benefit
20047 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20048 MASK for registers that should be annotated for DWARF2 frame unwind
20049 information. */
20050 static rtx
20051 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20052 {
20053 int num_regs = 0;
20054 int num_dwarf_regs = 0;
20055 int i, j;
20056 rtx par;
20057 rtx dwarf;
20058 int dwarf_par_index;
20059 rtx tmp, reg;
20060
20061 /* We don't record the PC in the dwarf frame information. */
20062 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20063
20064 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20065 {
20066 if (mask & (1 << i))
20067 num_regs++;
20068 if (dwarf_regs_mask & (1 << i))
20069 num_dwarf_regs++;
20070 }
20071
20072 gcc_assert (num_regs && num_regs <= 16);
20073 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20074
20075 /* For the body of the insn we are going to generate an UNSPEC in
20076 parallel with several USEs. This allows the insn to be recognized
20077 by the push_multi pattern in the arm.md file.
20078
20079 The body of the insn looks something like this:
20080
20081 (parallel [
20082 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20083 (const_int:SI <num>)))
20084 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20085 (use (reg:SI XX))
20086 (use (reg:SI YY))
20087 ...
20088 ])
20089
20090 For the frame note however, we try to be more explicit and actually
20091 show each register being stored into the stack frame, plus a (single)
20092 decrement of the stack pointer. We do it this way in order to be
20093 friendly to the stack unwinding code, which only wants to see a single
20094 stack decrement per instruction. The RTL we generate for the note looks
20095 something like this:
20096
20097 (sequence [
20098 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20099 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20100 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20101 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20102 ...
20103 ])
20104
20105 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20106 instead we'd have a parallel expression detailing all
20107 the stores to the various memory addresses so that debug
20108 information is more up-to-date. Remember however while writing
20109 this to take care of the constraints with the push instruction.
20110
20111 Note also that this has to be taken care of for the VFP registers.
20112
20113 For more see PR43399. */
20114
20115 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20116 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20117 dwarf_par_index = 1;
20118
20119 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20120 {
20121 if (mask & (1 << i))
20122 {
20123 reg = gen_rtx_REG (SImode, i);
20124
20125 XVECEXP (par, 0, 0)
20126 = gen_rtx_SET (gen_frame_mem
20127 (BLKmode,
20128 gen_rtx_PRE_MODIFY (Pmode,
20129 stack_pointer_rtx,
20130 plus_constant
20131 (Pmode, stack_pointer_rtx,
20132 -4 * num_regs))
20133 ),
20134 gen_rtx_UNSPEC (BLKmode,
20135 gen_rtvec (1, reg),
20136 UNSPEC_PUSH_MULT));
20137
20138 if (dwarf_regs_mask & (1 << i))
20139 {
20140 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20141 reg);
20142 RTX_FRAME_RELATED_P (tmp) = 1;
20143 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20144 }
20145
20146 break;
20147 }
20148 }
20149
20150 for (j = 1, i++; j < num_regs; i++)
20151 {
20152 if (mask & (1 << i))
20153 {
20154 reg = gen_rtx_REG (SImode, i);
20155
20156 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20157
20158 if (dwarf_regs_mask & (1 << i))
20159 {
20160 tmp
20161 = gen_rtx_SET (gen_frame_mem
20162 (SImode,
20163 plus_constant (Pmode, stack_pointer_rtx,
20164 4 * j)),
20165 reg);
20166 RTX_FRAME_RELATED_P (tmp) = 1;
20167 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20168 }
20169
20170 j++;
20171 }
20172 }
20173
20174 par = emit_insn (par);
20175
20176 tmp = gen_rtx_SET (stack_pointer_rtx,
20177 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20178 RTX_FRAME_RELATED_P (tmp) = 1;
20179 XVECEXP (dwarf, 0, 0) = tmp;
20180
20181 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20182
20183 return par;
20184 }
20185
20186 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20187 SIZE is the offset to be adjusted.
20188 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20189 static void
20190 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20191 {
20192 rtx dwarf;
20193
20194 RTX_FRAME_RELATED_P (insn) = 1;
20195 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20196 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20197 }
20198
20199 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20200 SAVED_REGS_MASK shows which registers need to be restored.
20201
20202 Unfortunately, since this insn does not reflect very well the actual
20203 semantics of the operation, we need to annotate the insn for the benefit
20204 of DWARF2 frame unwind information. */
20205 static void
20206 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20207 {
20208 int num_regs = 0;
20209 int i, j;
20210 rtx par;
20211 rtx dwarf = NULL_RTX;
20212 rtx tmp, reg;
20213 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20214 int offset_adj;
20215 int emit_update;
20216
20217 offset_adj = return_in_pc ? 1 : 0;
20218 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20219 if (saved_regs_mask & (1 << i))
20220 num_regs++;
20221
20222 gcc_assert (num_regs && num_regs <= 16);
20223
20224 /* If SP is in reglist, then we don't emit SP update insn. */
20225 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20226
20227 /* The parallel needs to hold num_regs SETs
20228 and one SET for the stack update. */
20229 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20230
20231 if (return_in_pc)
20232 XVECEXP (par, 0, 0) = ret_rtx;
20233
20234 if (emit_update)
20235 {
20236 /* Increment the stack pointer, based on there being
20237 num_regs 4-byte registers to restore. */
20238 tmp = gen_rtx_SET (stack_pointer_rtx,
20239 plus_constant (Pmode,
20240 stack_pointer_rtx,
20241 4 * num_regs));
20242 RTX_FRAME_RELATED_P (tmp) = 1;
20243 XVECEXP (par, 0, offset_adj) = tmp;
20244 }
20245
20246 /* Now restore every reg, which may include PC. */
20247 for (j = 0, i = 0; j < num_regs; i++)
20248 if (saved_regs_mask & (1 << i))
20249 {
20250 reg = gen_rtx_REG (SImode, i);
20251 if ((num_regs == 1) && emit_update && !return_in_pc)
20252 {
20253 /* Emit single load with writeback. */
20254 tmp = gen_frame_mem (SImode,
20255 gen_rtx_POST_INC (Pmode,
20256 stack_pointer_rtx));
20257 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20258 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20259 return;
20260 }
20261
20262 tmp = gen_rtx_SET (reg,
20263 gen_frame_mem
20264 (SImode,
20265 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20266 RTX_FRAME_RELATED_P (tmp) = 1;
20267 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20268
20269 /* We need to maintain a sequence for DWARF info too. As dwarf info
20270 should not have PC, skip PC. */
20271 if (i != PC_REGNUM)
20272 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20273
20274 j++;
20275 }
20276
20277 if (return_in_pc)
20278 par = emit_jump_insn (par);
20279 else
20280 par = emit_insn (par);
20281
20282 REG_NOTES (par) = dwarf;
20283 if (!return_in_pc)
20284 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20285 stack_pointer_rtx, stack_pointer_rtx);
20286 }
20287
20288 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20289 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20290
20291 Unfortunately, since this insn does not reflect very well the actual
20292 semantics of the operation, we need to annotate the insn for the benefit
20293 of DWARF2 frame unwind information. */
20294 static void
20295 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20296 {
20297 int i, j;
20298 rtx par;
20299 rtx dwarf = NULL_RTX;
20300 rtx tmp, reg;
20301
20302 gcc_assert (num_regs && num_regs <= 32);
20303
20304 /* Workaround ARM10 VFPr1 bug. */
20305 if (num_regs == 2 && !arm_arch6)
20306 {
20307 if (first_reg == 15)
20308 first_reg--;
20309
20310 num_regs++;
20311 }
20312
20313 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20314 there could be up to 32 D-registers to restore.
20315 If there are more than 16 D-registers, make two recursive calls,
20316 each of which emits one pop_multi instruction. */
20317 if (num_regs > 16)
20318 {
20319 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20320 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20321 return;
20322 }
20323
20324 /* The parallel needs to hold num_regs SETs
20325 and one SET for the stack update. */
20326 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20327
20328 /* Increment the stack pointer, based on there being
20329 num_regs 8-byte registers to restore. */
20330 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20331 RTX_FRAME_RELATED_P (tmp) = 1;
20332 XVECEXP (par, 0, 0) = tmp;
20333
20334 /* Now show every reg that will be restored, using a SET for each. */
20335 for (j = 0, i=first_reg; j < num_regs; i += 2)
20336 {
20337 reg = gen_rtx_REG (DFmode, i);
20338
20339 tmp = gen_rtx_SET (reg,
20340 gen_frame_mem
20341 (DFmode,
20342 plus_constant (Pmode, base_reg, 8 * j)));
20343 RTX_FRAME_RELATED_P (tmp) = 1;
20344 XVECEXP (par, 0, j + 1) = tmp;
20345
20346 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20347
20348 j++;
20349 }
20350
20351 par = emit_insn (par);
20352 REG_NOTES (par) = dwarf;
20353
20354 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20355 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20356 {
20357 RTX_FRAME_RELATED_P (par) = 1;
20358 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20359 }
20360 else
20361 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20362 base_reg, base_reg);
20363 }
20364
20365 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20366 number of registers are being popped, multiple LDRD patterns are created for
20367 all register pairs. If odd number of registers are popped, last register is
20368 loaded by using LDR pattern. */
20369 static void
20370 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20371 {
20372 int num_regs = 0;
20373 int i, j;
20374 rtx par = NULL_RTX;
20375 rtx dwarf = NULL_RTX;
20376 rtx tmp, reg, tmp1;
20377 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20378
20379 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20380 if (saved_regs_mask & (1 << i))
20381 num_regs++;
20382
20383 gcc_assert (num_regs && num_regs <= 16);
20384
20385 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20386 to be popped. So, if num_regs is even, now it will become odd,
20387 and we can generate pop with PC. If num_regs is odd, it will be
20388 even now, and ldr with return can be generated for PC. */
20389 if (return_in_pc)
20390 num_regs--;
20391
20392 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20393
20394 /* Var j iterates over all the registers to gather all the registers in
20395 saved_regs_mask. Var i gives index of saved registers in stack frame.
20396 A PARALLEL RTX of register-pair is created here, so that pattern for
20397 LDRD can be matched. As PC is always last register to be popped, and
20398 we have already decremented num_regs if PC, we don't have to worry
20399 about PC in this loop. */
20400 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20401 if (saved_regs_mask & (1 << j))
20402 {
20403 /* Create RTX for memory load. */
20404 reg = gen_rtx_REG (SImode, j);
20405 tmp = gen_rtx_SET (reg,
20406 gen_frame_mem (SImode,
20407 plus_constant (Pmode,
20408 stack_pointer_rtx, 4 * i)));
20409 RTX_FRAME_RELATED_P (tmp) = 1;
20410
20411 if (i % 2 == 0)
20412 {
20413 /* When saved-register index (i) is even, the RTX to be emitted is
20414 yet to be created. Hence create it first. The LDRD pattern we
20415 are generating is :
20416 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20417 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20418 where target registers need not be consecutive. */
20419 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20420 dwarf = NULL_RTX;
20421 }
20422
20423 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20424 added as 0th element and if i is odd, reg_i is added as 1st element
20425 of LDRD pattern shown above. */
20426 XVECEXP (par, 0, (i % 2)) = tmp;
20427 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20428
20429 if ((i % 2) == 1)
20430 {
20431 /* When saved-register index (i) is odd, RTXs for both the registers
20432 to be loaded are generated in above given LDRD pattern, and the
20433 pattern can be emitted now. */
20434 par = emit_insn (par);
20435 REG_NOTES (par) = dwarf;
20436 RTX_FRAME_RELATED_P (par) = 1;
20437 }
20438
20439 i++;
20440 }
20441
20442 /* If the number of registers pushed is odd AND return_in_pc is false OR
20443 number of registers are even AND return_in_pc is true, last register is
20444 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20445 then LDR with post increment. */
20446
20447 /* Increment the stack pointer, based on there being
20448 num_regs 4-byte registers to restore. */
20449 tmp = gen_rtx_SET (stack_pointer_rtx,
20450 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20451 RTX_FRAME_RELATED_P (tmp) = 1;
20452 tmp = emit_insn (tmp);
20453 if (!return_in_pc)
20454 {
20455 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20456 stack_pointer_rtx, stack_pointer_rtx);
20457 }
20458
20459 dwarf = NULL_RTX;
20460
20461 if (((num_regs % 2) == 1 && !return_in_pc)
20462 || ((num_regs % 2) == 0 && return_in_pc))
20463 {
20464 /* Scan for the single register to be popped. Skip until the saved
20465 register is found. */
20466 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20467
20468 /* Gen LDR with post increment here. */
20469 tmp1 = gen_rtx_MEM (SImode,
20470 gen_rtx_POST_INC (SImode,
20471 stack_pointer_rtx));
20472 set_mem_alias_set (tmp1, get_frame_alias_set ());
20473
20474 reg = gen_rtx_REG (SImode, j);
20475 tmp = gen_rtx_SET (reg, tmp1);
20476 RTX_FRAME_RELATED_P (tmp) = 1;
20477 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20478
20479 if (return_in_pc)
20480 {
20481 /* If return_in_pc, j must be PC_REGNUM. */
20482 gcc_assert (j == PC_REGNUM);
20483 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20484 XVECEXP (par, 0, 0) = ret_rtx;
20485 XVECEXP (par, 0, 1) = tmp;
20486 par = emit_jump_insn (par);
20487 }
20488 else
20489 {
20490 par = emit_insn (tmp);
20491 REG_NOTES (par) = dwarf;
20492 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20493 stack_pointer_rtx, stack_pointer_rtx);
20494 }
20495
20496 }
20497 else if ((num_regs % 2) == 1 && return_in_pc)
20498 {
20499 /* There are 2 registers to be popped. So, generate the pattern
20500 pop_multiple_with_stack_update_and_return to pop in PC. */
20501 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20502 }
20503
20504 return;
20505 }
20506
20507 /* LDRD in ARM mode needs consecutive registers as operands. This function
20508 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20509 offset addressing and then generates one separate stack udpate. This provides
20510 more scheduling freedom, compared to writeback on every load. However,
20511 if the function returns using load into PC directly
20512 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20513 before the last load. TODO: Add a peephole optimization to recognize
20514 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20515 peephole optimization to merge the load at stack-offset zero
20516 with the stack update instruction using load with writeback
20517 in post-index addressing mode. */
20518 static void
20519 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20520 {
20521 int j = 0;
20522 int offset = 0;
20523 rtx par = NULL_RTX;
20524 rtx dwarf = NULL_RTX;
20525 rtx tmp, mem;
20526
20527 /* Restore saved registers. */
20528 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20529 j = 0;
20530 while (j <= LAST_ARM_REGNUM)
20531 if (saved_regs_mask & (1 << j))
20532 {
20533 if ((j % 2) == 0
20534 && (saved_regs_mask & (1 << (j + 1)))
20535 && (j + 1) != PC_REGNUM)
20536 {
20537 /* Current register and next register form register pair for which
20538 LDRD can be generated. PC is always the last register popped, and
20539 we handle it separately. */
20540 if (offset > 0)
20541 mem = gen_frame_mem (DImode,
20542 plus_constant (Pmode,
20543 stack_pointer_rtx,
20544 offset));
20545 else
20546 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20547
20548 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20549 tmp = emit_insn (tmp);
20550 RTX_FRAME_RELATED_P (tmp) = 1;
20551
20552 /* Generate dwarf info. */
20553
20554 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20555 gen_rtx_REG (SImode, j),
20556 NULL_RTX);
20557 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20558 gen_rtx_REG (SImode, j + 1),
20559 dwarf);
20560
20561 REG_NOTES (tmp) = dwarf;
20562
20563 offset += 8;
20564 j += 2;
20565 }
20566 else if (j != PC_REGNUM)
20567 {
20568 /* Emit a single word load. */
20569 if (offset > 0)
20570 mem = gen_frame_mem (SImode,
20571 plus_constant (Pmode,
20572 stack_pointer_rtx,
20573 offset));
20574 else
20575 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20576
20577 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20578 tmp = emit_insn (tmp);
20579 RTX_FRAME_RELATED_P (tmp) = 1;
20580
20581 /* Generate dwarf info. */
20582 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20583 gen_rtx_REG (SImode, j),
20584 NULL_RTX);
20585
20586 offset += 4;
20587 j += 1;
20588 }
20589 else /* j == PC_REGNUM */
20590 j++;
20591 }
20592 else
20593 j++;
20594
20595 /* Update the stack. */
20596 if (offset > 0)
20597 {
20598 tmp = gen_rtx_SET (stack_pointer_rtx,
20599 plus_constant (Pmode,
20600 stack_pointer_rtx,
20601 offset));
20602 tmp = emit_insn (tmp);
20603 arm_add_cfa_adjust_cfa_note (tmp, offset,
20604 stack_pointer_rtx, stack_pointer_rtx);
20605 offset = 0;
20606 }
20607
20608 if (saved_regs_mask & (1 << PC_REGNUM))
20609 {
20610 /* Only PC is to be popped. */
20611 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20612 XVECEXP (par, 0, 0) = ret_rtx;
20613 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20614 gen_frame_mem (SImode,
20615 gen_rtx_POST_INC (SImode,
20616 stack_pointer_rtx)));
20617 RTX_FRAME_RELATED_P (tmp) = 1;
20618 XVECEXP (par, 0, 1) = tmp;
20619 par = emit_jump_insn (par);
20620
20621 /* Generate dwarf info. */
20622 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20623 gen_rtx_REG (SImode, PC_REGNUM),
20624 NULL_RTX);
20625 REG_NOTES (par) = dwarf;
20626 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20627 stack_pointer_rtx, stack_pointer_rtx);
20628 }
20629 }
20630
20631 /* Calculate the size of the return value that is passed in registers. */
20632 static unsigned
20633 arm_size_return_regs (void)
20634 {
20635 machine_mode mode;
20636
20637 if (crtl->return_rtx != 0)
20638 mode = GET_MODE (crtl->return_rtx);
20639 else
20640 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20641
20642 return GET_MODE_SIZE (mode);
20643 }
20644
20645 /* Return true if the current function needs to save/restore LR. */
20646 static bool
20647 thumb_force_lr_save (void)
20648 {
20649 return !cfun->machine->lr_save_eliminated
20650 && (!leaf_function_p ()
20651 || thumb_far_jump_used_p ()
20652 || df_regs_ever_live_p (LR_REGNUM));
20653 }
20654
20655 /* We do not know if r3 will be available because
20656 we do have an indirect tailcall happening in this
20657 particular case. */
20658 static bool
20659 is_indirect_tailcall_p (rtx call)
20660 {
20661 rtx pat = PATTERN (call);
20662
20663 /* Indirect tail call. */
20664 pat = XVECEXP (pat, 0, 0);
20665 if (GET_CODE (pat) == SET)
20666 pat = SET_SRC (pat);
20667
20668 pat = XEXP (XEXP (pat, 0), 0);
20669 return REG_P (pat);
20670 }
20671
20672 /* Return true if r3 is used by any of the tail call insns in the
20673 current function. */
20674 static bool
20675 any_sibcall_could_use_r3 (void)
20676 {
20677 edge_iterator ei;
20678 edge e;
20679
20680 if (!crtl->tail_call_emit)
20681 return false;
20682 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20683 if (e->flags & EDGE_SIBCALL)
20684 {
20685 rtx call = BB_END (e->src);
20686 if (!CALL_P (call))
20687 call = prev_nonnote_nondebug_insn (call);
20688 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20689 if (find_regno_fusage (call, USE, 3)
20690 || is_indirect_tailcall_p (call))
20691 return true;
20692 }
20693 return false;
20694 }
20695
20696
20697 /* Compute the distance from register FROM to register TO.
20698 These can be the arg pointer (26), the soft frame pointer (25),
20699 the stack pointer (13) or the hard frame pointer (11).
20700 In thumb mode r7 is used as the soft frame pointer, if needed.
20701 Typical stack layout looks like this:
20702
20703 old stack pointer -> | |
20704 ----
20705 | | \
20706 | | saved arguments for
20707 | | vararg functions
20708 | | /
20709 --
20710 hard FP & arg pointer -> | | \
20711 | | stack
20712 | | frame
20713 | | /
20714 --
20715 | | \
20716 | | call saved
20717 | | registers
20718 soft frame pointer -> | | /
20719 --
20720 | | \
20721 | | local
20722 | | variables
20723 locals base pointer -> | | /
20724 --
20725 | | \
20726 | | outgoing
20727 | | arguments
20728 current stack pointer -> | | /
20729 --
20730
20731 For a given function some or all of these stack components
20732 may not be needed, giving rise to the possibility of
20733 eliminating some of the registers.
20734
20735 The values returned by this function must reflect the behavior
20736 of arm_expand_prologue() and arm_compute_save_reg_mask().
20737
20738 The sign of the number returned reflects the direction of stack
20739 growth, so the values are positive for all eliminations except
20740 from the soft frame pointer to the hard frame pointer.
20741
20742 SFP may point just inside the local variables block to ensure correct
20743 alignment. */
20744
20745
20746 /* Calculate stack offsets. These are used to calculate register elimination
20747 offsets and in prologue/epilogue code. Also calculates which registers
20748 should be saved. */
20749
20750 static arm_stack_offsets *
20751 arm_get_frame_offsets (void)
20752 {
20753 struct arm_stack_offsets *offsets;
20754 unsigned long func_type;
20755 int leaf;
20756 int saved;
20757 int core_saved;
20758 HOST_WIDE_INT frame_size;
20759 int i;
20760
20761 offsets = &cfun->machine->stack_offsets;
20762
20763 /* We need to know if we are a leaf function. Unfortunately, it
20764 is possible to be called after start_sequence has been called,
20765 which causes get_insns to return the insns for the sequence,
20766 not the function, which will cause leaf_function_p to return
20767 the incorrect result.
20768
20769 to know about leaf functions once reload has completed, and the
20770 frame size cannot be changed after that time, so we can safely
20771 use the cached value. */
20772
20773 if (reload_completed)
20774 return offsets;
20775
20776 /* Initially this is the size of the local variables. It will translated
20777 into an offset once we have determined the size of preceding data. */
20778 frame_size = ROUND_UP_WORD (get_frame_size ());
20779
20780 leaf = leaf_function_p ();
20781
20782 /* Space for variadic functions. */
20783 offsets->saved_args = crtl->args.pretend_args_size;
20784
20785 /* In Thumb mode this is incorrect, but never used. */
20786 offsets->frame
20787 = (offsets->saved_args
20788 + arm_compute_static_chain_stack_bytes ()
20789 + (frame_pointer_needed ? 4 : 0));
20790
20791 if (TARGET_32BIT)
20792 {
20793 unsigned int regno;
20794
20795 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20796 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20797 saved = core_saved;
20798
20799 /* We know that SP will be doubleword aligned on entry, and we must
20800 preserve that condition at any subroutine call. We also require the
20801 soft frame pointer to be doubleword aligned. */
20802
20803 if (TARGET_REALLY_IWMMXT)
20804 {
20805 /* Check for the call-saved iWMMXt registers. */
20806 for (regno = FIRST_IWMMXT_REGNUM;
20807 regno <= LAST_IWMMXT_REGNUM;
20808 regno++)
20809 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20810 saved += 8;
20811 }
20812
20813 func_type = arm_current_func_type ();
20814 /* Space for saved VFP registers. */
20815 if (! IS_VOLATILE (func_type)
20816 && TARGET_HARD_FLOAT && TARGET_VFP)
20817 saved += arm_get_vfp_saved_size ();
20818 }
20819 else /* TARGET_THUMB1 */
20820 {
20821 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20822 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20823 saved = core_saved;
20824 if (TARGET_BACKTRACE)
20825 saved += 16;
20826 }
20827
20828 /* Saved registers include the stack frame. */
20829 offsets->saved_regs
20830 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20831 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20832
20833 /* A leaf function does not need any stack alignment if it has nothing
20834 on the stack. */
20835 if (leaf && frame_size == 0
20836 /* However if it calls alloca(), we have a dynamically allocated
20837 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20838 && ! cfun->calls_alloca)
20839 {
20840 offsets->outgoing_args = offsets->soft_frame;
20841 offsets->locals_base = offsets->soft_frame;
20842 return offsets;
20843 }
20844
20845 /* Ensure SFP has the correct alignment. */
20846 if (ARM_DOUBLEWORD_ALIGN
20847 && (offsets->soft_frame & 7))
20848 {
20849 offsets->soft_frame += 4;
20850 /* Try to align stack by pushing an extra reg. Don't bother doing this
20851 when there is a stack frame as the alignment will be rolled into
20852 the normal stack adjustment. */
20853 if (frame_size + crtl->outgoing_args_size == 0)
20854 {
20855 int reg = -1;
20856
20857 /* Register r3 is caller-saved. Normally it does not need to be
20858 saved on entry by the prologue. However if we choose to save
20859 it for padding then we may confuse the compiler into thinking
20860 a prologue sequence is required when in fact it is not. This
20861 will occur when shrink-wrapping if r3 is used as a scratch
20862 register and there are no other callee-saved writes.
20863
20864 This situation can be avoided when other callee-saved registers
20865 are available and r3 is not mandatory if we choose a callee-saved
20866 register for padding. */
20867 bool prefer_callee_reg_p = false;
20868
20869 /* If it is safe to use r3, then do so. This sometimes
20870 generates better code on Thumb-2 by avoiding the need to
20871 use 32-bit push/pop instructions. */
20872 if (! any_sibcall_could_use_r3 ()
20873 && arm_size_return_regs () <= 12
20874 && (offsets->saved_regs_mask & (1 << 3)) == 0
20875 && (TARGET_THUMB2
20876 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20877 {
20878 reg = 3;
20879 if (!TARGET_THUMB2)
20880 prefer_callee_reg_p = true;
20881 }
20882 if (reg == -1
20883 || prefer_callee_reg_p)
20884 {
20885 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20886 {
20887 /* Avoid fixed registers; they may be changed at
20888 arbitrary times so it's unsafe to restore them
20889 during the epilogue. */
20890 if (!fixed_regs[i]
20891 && (offsets->saved_regs_mask & (1 << i)) == 0)
20892 {
20893 reg = i;
20894 break;
20895 }
20896 }
20897 }
20898
20899 if (reg != -1)
20900 {
20901 offsets->saved_regs += 4;
20902 offsets->saved_regs_mask |= (1 << reg);
20903 }
20904 }
20905 }
20906
20907 offsets->locals_base = offsets->soft_frame + frame_size;
20908 offsets->outgoing_args = (offsets->locals_base
20909 + crtl->outgoing_args_size);
20910
20911 if (ARM_DOUBLEWORD_ALIGN)
20912 {
20913 /* Ensure SP remains doubleword aligned. */
20914 if (offsets->outgoing_args & 7)
20915 offsets->outgoing_args += 4;
20916 gcc_assert (!(offsets->outgoing_args & 7));
20917 }
20918
20919 return offsets;
20920 }
20921
20922
20923 /* Calculate the relative offsets for the different stack pointers. Positive
20924 offsets are in the direction of stack growth. */
20925
20926 HOST_WIDE_INT
20927 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20928 {
20929 arm_stack_offsets *offsets;
20930
20931 offsets = arm_get_frame_offsets ();
20932
20933 /* OK, now we have enough information to compute the distances.
20934 There must be an entry in these switch tables for each pair
20935 of registers in ELIMINABLE_REGS, even if some of the entries
20936 seem to be redundant or useless. */
20937 switch (from)
20938 {
20939 case ARG_POINTER_REGNUM:
20940 switch (to)
20941 {
20942 case THUMB_HARD_FRAME_POINTER_REGNUM:
20943 return 0;
20944
20945 case FRAME_POINTER_REGNUM:
20946 /* This is the reverse of the soft frame pointer
20947 to hard frame pointer elimination below. */
20948 return offsets->soft_frame - offsets->saved_args;
20949
20950 case ARM_HARD_FRAME_POINTER_REGNUM:
20951 /* This is only non-zero in the case where the static chain register
20952 is stored above the frame. */
20953 return offsets->frame - offsets->saved_args - 4;
20954
20955 case STACK_POINTER_REGNUM:
20956 /* If nothing has been pushed on the stack at all
20957 then this will return -4. This *is* correct! */
20958 return offsets->outgoing_args - (offsets->saved_args + 4);
20959
20960 default:
20961 gcc_unreachable ();
20962 }
20963 gcc_unreachable ();
20964
20965 case FRAME_POINTER_REGNUM:
20966 switch (to)
20967 {
20968 case THUMB_HARD_FRAME_POINTER_REGNUM:
20969 return 0;
20970
20971 case ARM_HARD_FRAME_POINTER_REGNUM:
20972 /* The hard frame pointer points to the top entry in the
20973 stack frame. The soft frame pointer to the bottom entry
20974 in the stack frame. If there is no stack frame at all,
20975 then they are identical. */
20976
20977 return offsets->frame - offsets->soft_frame;
20978
20979 case STACK_POINTER_REGNUM:
20980 return offsets->outgoing_args - offsets->soft_frame;
20981
20982 default:
20983 gcc_unreachable ();
20984 }
20985 gcc_unreachable ();
20986
20987 default:
20988 /* You cannot eliminate from the stack pointer.
20989 In theory you could eliminate from the hard frame
20990 pointer to the stack pointer, but this will never
20991 happen, since if a stack frame is not needed the
20992 hard frame pointer will never be used. */
20993 gcc_unreachable ();
20994 }
20995 }
20996
20997 /* Given FROM and TO register numbers, say whether this elimination is
20998 allowed. Frame pointer elimination is automatically handled.
20999
21000 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21001 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21002 pointer, we must eliminate FRAME_POINTER_REGNUM into
21003 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21004 ARG_POINTER_REGNUM. */
21005
21006 bool
21007 arm_can_eliminate (const int from, const int to)
21008 {
21009 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21010 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21011 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21012 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21013 true);
21014 }
21015
21016 /* Emit RTL to save coprocessor registers on function entry. Returns the
21017 number of bytes pushed. */
21018
21019 static int
21020 arm_save_coproc_regs(void)
21021 {
21022 int saved_size = 0;
21023 unsigned reg;
21024 unsigned start_reg;
21025 rtx insn;
21026
21027 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21028 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21029 {
21030 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21031 insn = gen_rtx_MEM (V2SImode, insn);
21032 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21033 RTX_FRAME_RELATED_P (insn) = 1;
21034 saved_size += 8;
21035 }
21036
21037 if (TARGET_HARD_FLOAT && TARGET_VFP)
21038 {
21039 start_reg = FIRST_VFP_REGNUM;
21040
21041 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21042 {
21043 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21044 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21045 {
21046 if (start_reg != reg)
21047 saved_size += vfp_emit_fstmd (start_reg,
21048 (reg - start_reg) / 2);
21049 start_reg = reg + 2;
21050 }
21051 }
21052 if (start_reg != reg)
21053 saved_size += vfp_emit_fstmd (start_reg,
21054 (reg - start_reg) / 2);
21055 }
21056 return saved_size;
21057 }
21058
21059
21060 /* Set the Thumb frame pointer from the stack pointer. */
21061
21062 static void
21063 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21064 {
21065 HOST_WIDE_INT amount;
21066 rtx insn, dwarf;
21067
21068 amount = offsets->outgoing_args - offsets->locals_base;
21069 if (amount < 1024)
21070 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21071 stack_pointer_rtx, GEN_INT (amount)));
21072 else
21073 {
21074 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21075 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21076 expects the first two operands to be the same. */
21077 if (TARGET_THUMB2)
21078 {
21079 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21080 stack_pointer_rtx,
21081 hard_frame_pointer_rtx));
21082 }
21083 else
21084 {
21085 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21086 hard_frame_pointer_rtx,
21087 stack_pointer_rtx));
21088 }
21089 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21090 plus_constant (Pmode, stack_pointer_rtx, amount));
21091 RTX_FRAME_RELATED_P (dwarf) = 1;
21092 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21093 }
21094
21095 RTX_FRAME_RELATED_P (insn) = 1;
21096 }
21097
21098 struct scratch_reg {
21099 rtx reg;
21100 bool saved;
21101 };
21102
21103 /* Return a short-lived scratch register for use as a 2nd scratch register on
21104 function entry after the registers are saved in the prologue. This register
21105 must be released by means of release_scratch_register_on_entry. IP is not
21106 considered since it is always used as the 1st scratch register if available.
21107
21108 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21109 mask of live registers. */
21110
21111 static void
21112 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21113 unsigned long live_regs)
21114 {
21115 int regno = -1;
21116
21117 sr->saved = false;
21118
21119 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21120 regno = LR_REGNUM;
21121 else
21122 {
21123 unsigned int i;
21124
21125 for (i = 4; i < 11; i++)
21126 if (regno1 != i && (live_regs & (1 << i)) != 0)
21127 {
21128 regno = i;
21129 break;
21130 }
21131
21132 if (regno < 0)
21133 {
21134 /* If IP is used as the 1st scratch register for a nested function,
21135 then either r3 wasn't available or is used to preserve IP. */
21136 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21137 regno1 = 3;
21138 regno = (regno1 == 3 ? 2 : 3);
21139 sr->saved
21140 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21141 regno);
21142 }
21143 }
21144
21145 sr->reg = gen_rtx_REG (SImode, regno);
21146 if (sr->saved)
21147 {
21148 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21149 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21150 rtx x = gen_rtx_SET (stack_pointer_rtx,
21151 plus_constant (Pmode, stack_pointer_rtx, -4));
21152 RTX_FRAME_RELATED_P (insn) = 1;
21153 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21154 }
21155 }
21156
21157 /* Release a scratch register obtained from the preceding function. */
21158
21159 static void
21160 release_scratch_register_on_entry (struct scratch_reg *sr)
21161 {
21162 if (sr->saved)
21163 {
21164 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21165 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21166 rtx x = gen_rtx_SET (stack_pointer_rtx,
21167 plus_constant (Pmode, stack_pointer_rtx, 4));
21168 RTX_FRAME_RELATED_P (insn) = 1;
21169 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21170 }
21171 }
21172
21173 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21174
21175 #if PROBE_INTERVAL > 4096
21176 #error Cannot use indexed addressing mode for stack probing
21177 #endif
21178
21179 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21180 inclusive. These are offsets from the current stack pointer. REGNO1
21181 is the index number of the 1st scratch register and LIVE_REGS is the
21182 mask of live registers. */
21183
21184 static void
21185 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21186 unsigned int regno1, unsigned long live_regs)
21187 {
21188 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21189
21190 /* See if we have a constant small number of probes to generate. If so,
21191 that's the easy case. */
21192 if (size <= PROBE_INTERVAL)
21193 {
21194 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21195 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21196 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21197 }
21198
21199 /* The run-time loop is made up of 10 insns in the generic case while the
21200 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21201 else if (size <= 5 * PROBE_INTERVAL)
21202 {
21203 HOST_WIDE_INT i, rem;
21204
21205 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21206 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21207 emit_stack_probe (reg1);
21208
21209 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21210 it exceeds SIZE. If only two probes are needed, this will not
21211 generate any code. Then probe at FIRST + SIZE. */
21212 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21213 {
21214 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21215 emit_stack_probe (reg1);
21216 }
21217
21218 rem = size - (i - PROBE_INTERVAL);
21219 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21220 {
21221 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21222 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21223 }
21224 else
21225 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21226 }
21227
21228 /* Otherwise, do the same as above, but in a loop. Note that we must be
21229 extra careful with variables wrapping around because we might be at
21230 the very top (or the very bottom) of the address space and we have
21231 to be able to handle this case properly; in particular, we use an
21232 equality test for the loop condition. */
21233 else
21234 {
21235 HOST_WIDE_INT rounded_size;
21236 struct scratch_reg sr;
21237
21238 get_scratch_register_on_entry (&sr, regno1, live_regs);
21239
21240 emit_move_insn (reg1, GEN_INT (first));
21241
21242
21243 /* Step 1: round SIZE to the previous multiple of the interval. */
21244
21245 rounded_size = size & -PROBE_INTERVAL;
21246 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21247
21248
21249 /* Step 2: compute initial and final value of the loop counter. */
21250
21251 /* TEST_ADDR = SP + FIRST. */
21252 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21253
21254 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21255 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21256
21257
21258 /* Step 3: the loop
21259
21260 do
21261 {
21262 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21263 probe at TEST_ADDR
21264 }
21265 while (TEST_ADDR != LAST_ADDR)
21266
21267 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21268 until it is equal to ROUNDED_SIZE. */
21269
21270 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21271
21272
21273 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21274 that SIZE is equal to ROUNDED_SIZE. */
21275
21276 if (size != rounded_size)
21277 {
21278 HOST_WIDE_INT rem = size - rounded_size;
21279
21280 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21281 {
21282 emit_set_insn (sr.reg,
21283 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21284 emit_stack_probe (plus_constant (Pmode, sr.reg,
21285 PROBE_INTERVAL - rem));
21286 }
21287 else
21288 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21289 }
21290
21291 release_scratch_register_on_entry (&sr);
21292 }
21293
21294 /* Make sure nothing is scheduled before we are done. */
21295 emit_insn (gen_blockage ());
21296 }
21297
21298 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21299 absolute addresses. */
21300
21301 const char *
21302 output_probe_stack_range (rtx reg1, rtx reg2)
21303 {
21304 static int labelno = 0;
21305 char loop_lab[32];
21306 rtx xops[2];
21307
21308 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21309
21310 /* Loop. */
21311 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21312
21313 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21314 xops[0] = reg1;
21315 xops[1] = GEN_INT (PROBE_INTERVAL);
21316 output_asm_insn ("sub\t%0, %0, %1", xops);
21317
21318 /* Probe at TEST_ADDR. */
21319 output_asm_insn ("str\tr0, [%0, #0]", xops);
21320
21321 /* Test if TEST_ADDR == LAST_ADDR. */
21322 xops[1] = reg2;
21323 output_asm_insn ("cmp\t%0, %1", xops);
21324
21325 /* Branch. */
21326 fputs ("\tbne\t", asm_out_file);
21327 assemble_name_raw (asm_out_file, loop_lab);
21328 fputc ('\n', asm_out_file);
21329
21330 return "";
21331 }
21332
21333 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21334 function. */
21335 void
21336 arm_expand_prologue (void)
21337 {
21338 rtx amount;
21339 rtx insn;
21340 rtx ip_rtx;
21341 unsigned long live_regs_mask;
21342 unsigned long func_type;
21343 int fp_offset = 0;
21344 int saved_pretend_args = 0;
21345 int saved_regs = 0;
21346 unsigned HOST_WIDE_INT args_to_push;
21347 HOST_WIDE_INT size;
21348 arm_stack_offsets *offsets;
21349 bool clobber_ip;
21350
21351 func_type = arm_current_func_type ();
21352
21353 /* Naked functions don't have prologues. */
21354 if (IS_NAKED (func_type))
21355 return;
21356
21357 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21358 args_to_push = crtl->args.pretend_args_size;
21359
21360 /* Compute which register we will have to save onto the stack. */
21361 offsets = arm_get_frame_offsets ();
21362 live_regs_mask = offsets->saved_regs_mask;
21363
21364 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21365
21366 if (IS_STACKALIGN (func_type))
21367 {
21368 rtx r0, r1;
21369
21370 /* Handle a word-aligned stack pointer. We generate the following:
21371
21372 mov r0, sp
21373 bic r1, r0, #7
21374 mov sp, r1
21375 <save and restore r0 in normal prologue/epilogue>
21376 mov sp, r0
21377 bx lr
21378
21379 The unwinder doesn't need to know about the stack realignment.
21380 Just tell it we saved SP in r0. */
21381 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21382
21383 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21384 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21385
21386 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21387 RTX_FRAME_RELATED_P (insn) = 1;
21388 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21389
21390 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21391
21392 /* ??? The CFA changes here, which may cause GDB to conclude that it
21393 has entered a different function. That said, the unwind info is
21394 correct, individually, before and after this instruction because
21395 we've described the save of SP, which will override the default
21396 handling of SP as restoring from the CFA. */
21397 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21398 }
21399
21400 /* The static chain register is the same as the IP register. If it is
21401 clobbered when creating the frame, we need to save and restore it. */
21402 clobber_ip = IS_NESTED (func_type)
21403 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21404 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21405 && !df_regs_ever_live_p (LR_REGNUM)
21406 && arm_r3_live_at_start_p ()));
21407
21408 /* Find somewhere to store IP whilst the frame is being created.
21409 We try the following places in order:
21410
21411 1. The last argument register r3 if it is available.
21412 2. A slot on the stack above the frame if there are no
21413 arguments to push onto the stack.
21414 3. Register r3 again, after pushing the argument registers
21415 onto the stack, if this is a varargs function.
21416 4. The last slot on the stack created for the arguments to
21417 push, if this isn't a varargs function.
21418
21419 Note - we only need to tell the dwarf2 backend about the SP
21420 adjustment in the second variant; the static chain register
21421 doesn't need to be unwound, as it doesn't contain a value
21422 inherited from the caller. */
21423 if (clobber_ip)
21424 {
21425 if (!arm_r3_live_at_start_p ())
21426 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21427 else if (args_to_push == 0)
21428 {
21429 rtx addr, dwarf;
21430
21431 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21432 saved_regs += 4;
21433
21434 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21435 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21436 fp_offset = 4;
21437
21438 /* Just tell the dwarf backend that we adjusted SP. */
21439 dwarf = gen_rtx_SET (stack_pointer_rtx,
21440 plus_constant (Pmode, stack_pointer_rtx,
21441 -fp_offset));
21442 RTX_FRAME_RELATED_P (insn) = 1;
21443 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21444 }
21445 else
21446 {
21447 /* Store the args on the stack. */
21448 if (cfun->machine->uses_anonymous_args)
21449 {
21450 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21451 (0xf0 >> (args_to_push / 4)) & 0xf);
21452 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21453 saved_pretend_args = 1;
21454 }
21455 else
21456 {
21457 rtx addr, dwarf;
21458
21459 if (args_to_push == 4)
21460 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21461 else
21462 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21463 plus_constant (Pmode,
21464 stack_pointer_rtx,
21465 -args_to_push));
21466
21467 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21468
21469 /* Just tell the dwarf backend that we adjusted SP. */
21470 dwarf = gen_rtx_SET (stack_pointer_rtx,
21471 plus_constant (Pmode, stack_pointer_rtx,
21472 -args_to_push));
21473 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21474 }
21475
21476 RTX_FRAME_RELATED_P (insn) = 1;
21477 fp_offset = args_to_push;
21478 args_to_push = 0;
21479 }
21480 }
21481
21482 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21483 {
21484 if (IS_INTERRUPT (func_type))
21485 {
21486 /* Interrupt functions must not corrupt any registers.
21487 Creating a frame pointer however, corrupts the IP
21488 register, so we must push it first. */
21489 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21490
21491 /* Do not set RTX_FRAME_RELATED_P on this insn.
21492 The dwarf stack unwinding code only wants to see one
21493 stack decrement per function, and this is not it. If
21494 this instruction is labeled as being part of the frame
21495 creation sequence then dwarf2out_frame_debug_expr will
21496 die when it encounters the assignment of IP to FP
21497 later on, since the use of SP here establishes SP as
21498 the CFA register and not IP.
21499
21500 Anyway this instruction is not really part of the stack
21501 frame creation although it is part of the prologue. */
21502 }
21503
21504 insn = emit_set_insn (ip_rtx,
21505 plus_constant (Pmode, stack_pointer_rtx,
21506 fp_offset));
21507 RTX_FRAME_RELATED_P (insn) = 1;
21508 }
21509
21510 if (args_to_push)
21511 {
21512 /* Push the argument registers, or reserve space for them. */
21513 if (cfun->machine->uses_anonymous_args)
21514 insn = emit_multi_reg_push
21515 ((0xf0 >> (args_to_push / 4)) & 0xf,
21516 (0xf0 >> (args_to_push / 4)) & 0xf);
21517 else
21518 insn = emit_insn
21519 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21520 GEN_INT (- args_to_push)));
21521 RTX_FRAME_RELATED_P (insn) = 1;
21522 }
21523
21524 /* If this is an interrupt service routine, and the link register
21525 is going to be pushed, and we're not generating extra
21526 push of IP (needed when frame is needed and frame layout if apcs),
21527 subtracting four from LR now will mean that the function return
21528 can be done with a single instruction. */
21529 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21530 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21531 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21532 && TARGET_ARM)
21533 {
21534 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21535
21536 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21537 }
21538
21539 if (live_regs_mask)
21540 {
21541 unsigned long dwarf_regs_mask = live_regs_mask;
21542
21543 saved_regs += bit_count (live_regs_mask) * 4;
21544 if (optimize_size && !frame_pointer_needed
21545 && saved_regs == offsets->saved_regs - offsets->saved_args)
21546 {
21547 /* If no coprocessor registers are being pushed and we don't have
21548 to worry about a frame pointer then push extra registers to
21549 create the stack frame. This is done is a way that does not
21550 alter the frame layout, so is independent of the epilogue. */
21551 int n;
21552 int frame;
21553 n = 0;
21554 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21555 n++;
21556 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21557 if (frame && n * 4 >= frame)
21558 {
21559 n = frame / 4;
21560 live_regs_mask |= (1 << n) - 1;
21561 saved_regs += frame;
21562 }
21563 }
21564
21565 if (TARGET_LDRD
21566 && current_tune->prefer_ldrd_strd
21567 && !optimize_function_for_size_p (cfun))
21568 {
21569 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21570 if (TARGET_THUMB2)
21571 thumb2_emit_strd_push (live_regs_mask);
21572 else if (TARGET_ARM
21573 && !TARGET_APCS_FRAME
21574 && !IS_INTERRUPT (func_type))
21575 arm_emit_strd_push (live_regs_mask);
21576 else
21577 {
21578 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21579 RTX_FRAME_RELATED_P (insn) = 1;
21580 }
21581 }
21582 else
21583 {
21584 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21585 RTX_FRAME_RELATED_P (insn) = 1;
21586 }
21587 }
21588
21589 if (! IS_VOLATILE (func_type))
21590 saved_regs += arm_save_coproc_regs ();
21591
21592 if (frame_pointer_needed && TARGET_ARM)
21593 {
21594 /* Create the new frame pointer. */
21595 if (TARGET_APCS_FRAME)
21596 {
21597 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21598 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21599 RTX_FRAME_RELATED_P (insn) = 1;
21600 }
21601 else
21602 {
21603 insn = GEN_INT (saved_regs - (4 + fp_offset));
21604 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21605 stack_pointer_rtx, insn));
21606 RTX_FRAME_RELATED_P (insn) = 1;
21607 }
21608 }
21609
21610 size = offsets->outgoing_args - offsets->saved_args;
21611 if (flag_stack_usage_info)
21612 current_function_static_stack_size = size;
21613
21614 /* If this isn't an interrupt service routine and we have a frame, then do
21615 stack checking. We use IP as the first scratch register, except for the
21616 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21617 if (!IS_INTERRUPT (func_type)
21618 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21619 {
21620 unsigned int regno;
21621
21622 if (!IS_NESTED (func_type) || clobber_ip)
21623 regno = IP_REGNUM;
21624 else if (df_regs_ever_live_p (LR_REGNUM))
21625 regno = LR_REGNUM;
21626 else
21627 regno = 3;
21628
21629 if (crtl->is_leaf && !cfun->calls_alloca)
21630 {
21631 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21632 arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
21633 size - STACK_CHECK_PROTECT,
21634 regno, live_regs_mask);
21635 }
21636 else if (size > 0)
21637 arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
21638 regno, live_regs_mask);
21639 }
21640
21641 /* Recover the static chain register. */
21642 if (clobber_ip)
21643 {
21644 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21645 insn = gen_rtx_REG (SImode, 3);
21646 else
21647 {
21648 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21649 insn = gen_frame_mem (SImode, insn);
21650 }
21651 emit_set_insn (ip_rtx, insn);
21652 emit_insn (gen_force_register_use (ip_rtx));
21653 }
21654
21655 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21656 {
21657 /* This add can produce multiple insns for a large constant, so we
21658 need to get tricky. */
21659 rtx_insn *last = get_last_insn ();
21660
21661 amount = GEN_INT (offsets->saved_args + saved_regs
21662 - offsets->outgoing_args);
21663
21664 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21665 amount));
21666 do
21667 {
21668 last = last ? NEXT_INSN (last) : get_insns ();
21669 RTX_FRAME_RELATED_P (last) = 1;
21670 }
21671 while (last != insn);
21672
21673 /* If the frame pointer is needed, emit a special barrier that
21674 will prevent the scheduler from moving stores to the frame
21675 before the stack adjustment. */
21676 if (frame_pointer_needed)
21677 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21678 hard_frame_pointer_rtx));
21679 }
21680
21681
21682 if (frame_pointer_needed && TARGET_THUMB2)
21683 thumb_set_frame_pointer (offsets);
21684
21685 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21686 {
21687 unsigned long mask;
21688
21689 mask = live_regs_mask;
21690 mask &= THUMB2_WORK_REGS;
21691 if (!IS_NESTED (func_type))
21692 mask |= (1 << IP_REGNUM);
21693 arm_load_pic_register (mask);
21694 }
21695
21696 /* If we are profiling, make sure no instructions are scheduled before
21697 the call to mcount. Similarly if the user has requested no
21698 scheduling in the prolog. Similarly if we want non-call exceptions
21699 using the EABI unwinder, to prevent faulting instructions from being
21700 swapped with a stack adjustment. */
21701 if (crtl->profile || !TARGET_SCHED_PROLOG
21702 || (arm_except_unwind_info (&global_options) == UI_TARGET
21703 && cfun->can_throw_non_call_exceptions))
21704 emit_insn (gen_blockage ());
21705
21706 /* If the link register is being kept alive, with the return address in it,
21707 then make sure that it does not get reused by the ce2 pass. */
21708 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21709 cfun->machine->lr_save_eliminated = 1;
21710 }
21711 \f
21712 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21713 static void
21714 arm_print_condition (FILE *stream)
21715 {
21716 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21717 {
21718 /* Branch conversion is not implemented for Thumb-2. */
21719 if (TARGET_THUMB)
21720 {
21721 output_operand_lossage ("predicated Thumb instruction");
21722 return;
21723 }
21724 if (current_insn_predicate != NULL)
21725 {
21726 output_operand_lossage
21727 ("predicated instruction in conditional sequence");
21728 return;
21729 }
21730
21731 fputs (arm_condition_codes[arm_current_cc], stream);
21732 }
21733 else if (current_insn_predicate)
21734 {
21735 enum arm_cond_code code;
21736
21737 if (TARGET_THUMB1)
21738 {
21739 output_operand_lossage ("predicated Thumb instruction");
21740 return;
21741 }
21742
21743 code = get_arm_condition_code (current_insn_predicate);
21744 fputs (arm_condition_codes[code], stream);
21745 }
21746 }
21747
21748
21749 /* Globally reserved letters: acln
21750 Puncutation letters currently used: @_|?().!#
21751 Lower case letters currently used: bcdefhimpqtvwxyz
21752 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21753 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21754
21755 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21756
21757 If CODE is 'd', then the X is a condition operand and the instruction
21758 should only be executed if the condition is true.
21759 if CODE is 'D', then the X is a condition operand and the instruction
21760 should only be executed if the condition is false: however, if the mode
21761 of the comparison is CCFPEmode, then always execute the instruction -- we
21762 do this because in these circumstances !GE does not necessarily imply LT;
21763 in these cases the instruction pattern will take care to make sure that
21764 an instruction containing %d will follow, thereby undoing the effects of
21765 doing this instruction unconditionally.
21766 If CODE is 'N' then X is a floating point operand that must be negated
21767 before output.
21768 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21769 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21770 static void
21771 arm_print_operand (FILE *stream, rtx x, int code)
21772 {
21773 switch (code)
21774 {
21775 case '@':
21776 fputs (ASM_COMMENT_START, stream);
21777 return;
21778
21779 case '_':
21780 fputs (user_label_prefix, stream);
21781 return;
21782
21783 case '|':
21784 fputs (REGISTER_PREFIX, stream);
21785 return;
21786
21787 case '?':
21788 arm_print_condition (stream);
21789 return;
21790
21791 case '.':
21792 /* The current condition code for a condition code setting instruction.
21793 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21794 fputc('s', stream);
21795 arm_print_condition (stream);
21796 return;
21797
21798 case '!':
21799 /* If the instruction is conditionally executed then print
21800 the current condition code, otherwise print 's'. */
21801 gcc_assert (TARGET_THUMB2);
21802 if (current_insn_predicate)
21803 arm_print_condition (stream);
21804 else
21805 fputc('s', stream);
21806 break;
21807
21808 /* %# is a "break" sequence. It doesn't output anything, but is used to
21809 separate e.g. operand numbers from following text, if that text consists
21810 of further digits which we don't want to be part of the operand
21811 number. */
21812 case '#':
21813 return;
21814
21815 case 'N':
21816 {
21817 REAL_VALUE_TYPE r;
21818 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21819 fprintf (stream, "%s", fp_const_from_val (&r));
21820 }
21821 return;
21822
21823 /* An integer or symbol address without a preceding # sign. */
21824 case 'c':
21825 switch (GET_CODE (x))
21826 {
21827 case CONST_INT:
21828 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21829 break;
21830
21831 case SYMBOL_REF:
21832 output_addr_const (stream, x);
21833 break;
21834
21835 case CONST:
21836 if (GET_CODE (XEXP (x, 0)) == PLUS
21837 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21838 {
21839 output_addr_const (stream, x);
21840 break;
21841 }
21842 /* Fall through. */
21843
21844 default:
21845 output_operand_lossage ("Unsupported operand for code '%c'", code);
21846 }
21847 return;
21848
21849 /* An integer that we want to print in HEX. */
21850 case 'x':
21851 switch (GET_CODE (x))
21852 {
21853 case CONST_INT:
21854 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21855 break;
21856
21857 default:
21858 output_operand_lossage ("Unsupported operand for code '%c'", code);
21859 }
21860 return;
21861
21862 case 'B':
21863 if (CONST_INT_P (x))
21864 {
21865 HOST_WIDE_INT val;
21866 val = ARM_SIGN_EXTEND (~INTVAL (x));
21867 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21868 }
21869 else
21870 {
21871 putc ('~', stream);
21872 output_addr_const (stream, x);
21873 }
21874 return;
21875
21876 case 'b':
21877 /* Print the log2 of a CONST_INT. */
21878 {
21879 HOST_WIDE_INT val;
21880
21881 if (!CONST_INT_P (x)
21882 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21883 output_operand_lossage ("Unsupported operand for code '%c'", code);
21884 else
21885 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21886 }
21887 return;
21888
21889 case 'L':
21890 /* The low 16 bits of an immediate constant. */
21891 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21892 return;
21893
21894 case 'i':
21895 fprintf (stream, "%s", arithmetic_instr (x, 1));
21896 return;
21897
21898 case 'I':
21899 fprintf (stream, "%s", arithmetic_instr (x, 0));
21900 return;
21901
21902 case 'S':
21903 {
21904 HOST_WIDE_INT val;
21905 const char *shift;
21906
21907 shift = shift_op (x, &val);
21908
21909 if (shift)
21910 {
21911 fprintf (stream, ", %s ", shift);
21912 if (val == -1)
21913 arm_print_operand (stream, XEXP (x, 1), 0);
21914 else
21915 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21916 }
21917 }
21918 return;
21919
21920 /* An explanation of the 'Q', 'R' and 'H' register operands:
21921
21922 In a pair of registers containing a DI or DF value the 'Q'
21923 operand returns the register number of the register containing
21924 the least significant part of the value. The 'R' operand returns
21925 the register number of the register containing the most
21926 significant part of the value.
21927
21928 The 'H' operand returns the higher of the two register numbers.
21929 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21930 same as the 'Q' operand, since the most significant part of the
21931 value is held in the lower number register. The reverse is true
21932 on systems where WORDS_BIG_ENDIAN is false.
21933
21934 The purpose of these operands is to distinguish between cases
21935 where the endian-ness of the values is important (for example
21936 when they are added together), and cases where the endian-ness
21937 is irrelevant, but the order of register operations is important.
21938 For example when loading a value from memory into a register
21939 pair, the endian-ness does not matter. Provided that the value
21940 from the lower memory address is put into the lower numbered
21941 register, and the value from the higher address is put into the
21942 higher numbered register, the load will work regardless of whether
21943 the value being loaded is big-wordian or little-wordian. The
21944 order of the two register loads can matter however, if the address
21945 of the memory location is actually held in one of the registers
21946 being overwritten by the load.
21947
21948 The 'Q' and 'R' constraints are also available for 64-bit
21949 constants. */
21950 case 'Q':
21951 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21952 {
21953 rtx part = gen_lowpart (SImode, x);
21954 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21955 return;
21956 }
21957
21958 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21959 {
21960 output_operand_lossage ("invalid operand for code '%c'", code);
21961 return;
21962 }
21963
21964 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21965 return;
21966
21967 case 'R':
21968 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21969 {
21970 machine_mode mode = GET_MODE (x);
21971 rtx part;
21972
21973 if (mode == VOIDmode)
21974 mode = DImode;
21975 part = gen_highpart_mode (SImode, mode, x);
21976 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21977 return;
21978 }
21979
21980 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21981 {
21982 output_operand_lossage ("invalid operand for code '%c'", code);
21983 return;
21984 }
21985
21986 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21987 return;
21988
21989 case 'H':
21990 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21991 {
21992 output_operand_lossage ("invalid operand for code '%c'", code);
21993 return;
21994 }
21995
21996 asm_fprintf (stream, "%r", REGNO (x) + 1);
21997 return;
21998
21999 case 'J':
22000 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22001 {
22002 output_operand_lossage ("invalid operand for code '%c'", code);
22003 return;
22004 }
22005
22006 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22007 return;
22008
22009 case 'K':
22010 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22011 {
22012 output_operand_lossage ("invalid operand for code '%c'", code);
22013 return;
22014 }
22015
22016 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22017 return;
22018
22019 case 'm':
22020 asm_fprintf (stream, "%r",
22021 REG_P (XEXP (x, 0))
22022 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22023 return;
22024
22025 case 'M':
22026 asm_fprintf (stream, "{%r-%r}",
22027 REGNO (x),
22028 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22029 return;
22030
22031 /* Like 'M', but writing doubleword vector registers, for use by Neon
22032 insns. */
22033 case 'h':
22034 {
22035 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22036 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22037 if (numregs == 1)
22038 asm_fprintf (stream, "{d%d}", regno);
22039 else
22040 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22041 }
22042 return;
22043
22044 case 'd':
22045 /* CONST_TRUE_RTX means always -- that's the default. */
22046 if (x == const_true_rtx)
22047 return;
22048
22049 if (!COMPARISON_P (x))
22050 {
22051 output_operand_lossage ("invalid operand for code '%c'", code);
22052 return;
22053 }
22054
22055 fputs (arm_condition_codes[get_arm_condition_code (x)],
22056 stream);
22057 return;
22058
22059 case 'D':
22060 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22061 want to do that. */
22062 if (x == const_true_rtx)
22063 {
22064 output_operand_lossage ("instruction never executed");
22065 return;
22066 }
22067 if (!COMPARISON_P (x))
22068 {
22069 output_operand_lossage ("invalid operand for code '%c'", code);
22070 return;
22071 }
22072
22073 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22074 (get_arm_condition_code (x))],
22075 stream);
22076 return;
22077
22078 case 's':
22079 case 'V':
22080 case 'W':
22081 case 'X':
22082 case 'Y':
22083 case 'Z':
22084 /* Former Maverick support, removed after GCC-4.7. */
22085 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22086 return;
22087
22088 case 'U':
22089 if (!REG_P (x)
22090 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22091 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22092 /* Bad value for wCG register number. */
22093 {
22094 output_operand_lossage ("invalid operand for code '%c'", code);
22095 return;
22096 }
22097
22098 else
22099 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22100 return;
22101
22102 /* Print an iWMMXt control register name. */
22103 case 'w':
22104 if (!CONST_INT_P (x)
22105 || INTVAL (x) < 0
22106 || INTVAL (x) >= 16)
22107 /* Bad value for wC register number. */
22108 {
22109 output_operand_lossage ("invalid operand for code '%c'", code);
22110 return;
22111 }
22112
22113 else
22114 {
22115 static const char * wc_reg_names [16] =
22116 {
22117 "wCID", "wCon", "wCSSF", "wCASF",
22118 "wC4", "wC5", "wC6", "wC7",
22119 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22120 "wC12", "wC13", "wC14", "wC15"
22121 };
22122
22123 fputs (wc_reg_names [INTVAL (x)], stream);
22124 }
22125 return;
22126
22127 /* Print the high single-precision register of a VFP double-precision
22128 register. */
22129 case 'p':
22130 {
22131 machine_mode mode = GET_MODE (x);
22132 int regno;
22133
22134 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22135 {
22136 output_operand_lossage ("invalid operand for code '%c'", code);
22137 return;
22138 }
22139
22140 regno = REGNO (x);
22141 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22142 {
22143 output_operand_lossage ("invalid operand for code '%c'", code);
22144 return;
22145 }
22146
22147 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22148 }
22149 return;
22150
22151 /* Print a VFP/Neon double precision or quad precision register name. */
22152 case 'P':
22153 case 'q':
22154 {
22155 machine_mode mode = GET_MODE (x);
22156 int is_quad = (code == 'q');
22157 int regno;
22158
22159 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22160 {
22161 output_operand_lossage ("invalid operand for code '%c'", code);
22162 return;
22163 }
22164
22165 if (!REG_P (x)
22166 || !IS_VFP_REGNUM (REGNO (x)))
22167 {
22168 output_operand_lossage ("invalid operand for code '%c'", code);
22169 return;
22170 }
22171
22172 regno = REGNO (x);
22173 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22174 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22175 {
22176 output_operand_lossage ("invalid operand for code '%c'", code);
22177 return;
22178 }
22179
22180 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22181 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22182 }
22183 return;
22184
22185 /* These two codes print the low/high doubleword register of a Neon quad
22186 register, respectively. For pair-structure types, can also print
22187 low/high quadword registers. */
22188 case 'e':
22189 case 'f':
22190 {
22191 machine_mode mode = GET_MODE (x);
22192 int regno;
22193
22194 if ((GET_MODE_SIZE (mode) != 16
22195 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22196 {
22197 output_operand_lossage ("invalid operand for code '%c'", code);
22198 return;
22199 }
22200
22201 regno = REGNO (x);
22202 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22203 {
22204 output_operand_lossage ("invalid operand for code '%c'", code);
22205 return;
22206 }
22207
22208 if (GET_MODE_SIZE (mode) == 16)
22209 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22210 + (code == 'f' ? 1 : 0));
22211 else
22212 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22213 + (code == 'f' ? 1 : 0));
22214 }
22215 return;
22216
22217 /* Print a VFPv3 floating-point constant, represented as an integer
22218 index. */
22219 case 'G':
22220 {
22221 int index = vfp3_const_double_index (x);
22222 gcc_assert (index != -1);
22223 fprintf (stream, "%d", index);
22224 }
22225 return;
22226
22227 /* Print bits representing opcode features for Neon.
22228
22229 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22230 and polynomials as unsigned.
22231
22232 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22233
22234 Bit 2 is 1 for rounding functions, 0 otherwise. */
22235
22236 /* Identify the type as 's', 'u', 'p' or 'f'. */
22237 case 'T':
22238 {
22239 HOST_WIDE_INT bits = INTVAL (x);
22240 fputc ("uspf"[bits & 3], stream);
22241 }
22242 return;
22243
22244 /* Likewise, but signed and unsigned integers are both 'i'. */
22245 case 'F':
22246 {
22247 HOST_WIDE_INT bits = INTVAL (x);
22248 fputc ("iipf"[bits & 3], stream);
22249 }
22250 return;
22251
22252 /* As for 'T', but emit 'u' instead of 'p'. */
22253 case 't':
22254 {
22255 HOST_WIDE_INT bits = INTVAL (x);
22256 fputc ("usuf"[bits & 3], stream);
22257 }
22258 return;
22259
22260 /* Bit 2: rounding (vs none). */
22261 case 'O':
22262 {
22263 HOST_WIDE_INT bits = INTVAL (x);
22264 fputs ((bits & 4) != 0 ? "r" : "", stream);
22265 }
22266 return;
22267
22268 /* Memory operand for vld1/vst1 instruction. */
22269 case 'A':
22270 {
22271 rtx addr;
22272 bool postinc = FALSE;
22273 rtx postinc_reg = NULL;
22274 unsigned align, memsize, align_bits;
22275
22276 gcc_assert (MEM_P (x));
22277 addr = XEXP (x, 0);
22278 if (GET_CODE (addr) == POST_INC)
22279 {
22280 postinc = 1;
22281 addr = XEXP (addr, 0);
22282 }
22283 if (GET_CODE (addr) == POST_MODIFY)
22284 {
22285 postinc_reg = XEXP( XEXP (addr, 1), 1);
22286 addr = XEXP (addr, 0);
22287 }
22288 asm_fprintf (stream, "[%r", REGNO (addr));
22289
22290 /* We know the alignment of this access, so we can emit a hint in the
22291 instruction (for some alignments) as an aid to the memory subsystem
22292 of the target. */
22293 align = MEM_ALIGN (x) >> 3;
22294 memsize = MEM_SIZE (x);
22295
22296 /* Only certain alignment specifiers are supported by the hardware. */
22297 if (memsize == 32 && (align % 32) == 0)
22298 align_bits = 256;
22299 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22300 align_bits = 128;
22301 else if (memsize >= 8 && (align % 8) == 0)
22302 align_bits = 64;
22303 else
22304 align_bits = 0;
22305
22306 if (align_bits != 0)
22307 asm_fprintf (stream, ":%d", align_bits);
22308
22309 asm_fprintf (stream, "]");
22310
22311 if (postinc)
22312 fputs("!", stream);
22313 if (postinc_reg)
22314 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22315 }
22316 return;
22317
22318 case 'C':
22319 {
22320 rtx addr;
22321
22322 gcc_assert (MEM_P (x));
22323 addr = XEXP (x, 0);
22324 gcc_assert (REG_P (addr));
22325 asm_fprintf (stream, "[%r]", REGNO (addr));
22326 }
22327 return;
22328
22329 /* Translate an S register number into a D register number and element index. */
22330 case 'y':
22331 {
22332 machine_mode mode = GET_MODE (x);
22333 int regno;
22334
22335 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22336 {
22337 output_operand_lossage ("invalid operand for code '%c'", code);
22338 return;
22339 }
22340
22341 regno = REGNO (x);
22342 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22343 {
22344 output_operand_lossage ("invalid operand for code '%c'", code);
22345 return;
22346 }
22347
22348 regno = regno - FIRST_VFP_REGNUM;
22349 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22350 }
22351 return;
22352
22353 case 'v':
22354 gcc_assert (CONST_DOUBLE_P (x));
22355 int result;
22356 result = vfp3_const_double_for_fract_bits (x);
22357 if (result == 0)
22358 result = vfp3_const_double_for_bits (x);
22359 fprintf (stream, "#%d", result);
22360 return;
22361
22362 /* Register specifier for vld1.16/vst1.16. Translate the S register
22363 number into a D register number and element index. */
22364 case 'z':
22365 {
22366 machine_mode mode = GET_MODE (x);
22367 int regno;
22368
22369 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22370 {
22371 output_operand_lossage ("invalid operand for code '%c'", code);
22372 return;
22373 }
22374
22375 regno = REGNO (x);
22376 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22377 {
22378 output_operand_lossage ("invalid operand for code '%c'", code);
22379 return;
22380 }
22381
22382 regno = regno - FIRST_VFP_REGNUM;
22383 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22384 }
22385 return;
22386
22387 default:
22388 if (x == 0)
22389 {
22390 output_operand_lossage ("missing operand");
22391 return;
22392 }
22393
22394 switch (GET_CODE (x))
22395 {
22396 case REG:
22397 asm_fprintf (stream, "%r", REGNO (x));
22398 break;
22399
22400 case MEM:
22401 output_address (GET_MODE (x), XEXP (x, 0));
22402 break;
22403
22404 case CONST_DOUBLE:
22405 {
22406 char fpstr[20];
22407 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22408 sizeof (fpstr), 0, 1);
22409 fprintf (stream, "#%s", fpstr);
22410 }
22411 break;
22412
22413 default:
22414 gcc_assert (GET_CODE (x) != NEG);
22415 fputc ('#', stream);
22416 if (GET_CODE (x) == HIGH)
22417 {
22418 fputs (":lower16:", stream);
22419 x = XEXP (x, 0);
22420 }
22421
22422 output_addr_const (stream, x);
22423 break;
22424 }
22425 }
22426 }
22427 \f
22428 /* Target hook for printing a memory address. */
22429 static void
22430 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22431 {
22432 if (TARGET_32BIT)
22433 {
22434 int is_minus = GET_CODE (x) == MINUS;
22435
22436 if (REG_P (x))
22437 asm_fprintf (stream, "[%r]", REGNO (x));
22438 else if (GET_CODE (x) == PLUS || is_minus)
22439 {
22440 rtx base = XEXP (x, 0);
22441 rtx index = XEXP (x, 1);
22442 HOST_WIDE_INT offset = 0;
22443 if (!REG_P (base)
22444 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22445 {
22446 /* Ensure that BASE is a register. */
22447 /* (one of them must be). */
22448 /* Also ensure the SP is not used as in index register. */
22449 std::swap (base, index);
22450 }
22451 switch (GET_CODE (index))
22452 {
22453 case CONST_INT:
22454 offset = INTVAL (index);
22455 if (is_minus)
22456 offset = -offset;
22457 asm_fprintf (stream, "[%r, #%wd]",
22458 REGNO (base), offset);
22459 break;
22460
22461 case REG:
22462 asm_fprintf (stream, "[%r, %s%r]",
22463 REGNO (base), is_minus ? "-" : "",
22464 REGNO (index));
22465 break;
22466
22467 case MULT:
22468 case ASHIFTRT:
22469 case LSHIFTRT:
22470 case ASHIFT:
22471 case ROTATERT:
22472 {
22473 asm_fprintf (stream, "[%r, %s%r",
22474 REGNO (base), is_minus ? "-" : "",
22475 REGNO (XEXP (index, 0)));
22476 arm_print_operand (stream, index, 'S');
22477 fputs ("]", stream);
22478 break;
22479 }
22480
22481 default:
22482 gcc_unreachable ();
22483 }
22484 }
22485 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22486 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22487 {
22488 gcc_assert (REG_P (XEXP (x, 0)));
22489
22490 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22491 asm_fprintf (stream, "[%r, #%s%d]!",
22492 REGNO (XEXP (x, 0)),
22493 GET_CODE (x) == PRE_DEC ? "-" : "",
22494 GET_MODE_SIZE (mode));
22495 else
22496 asm_fprintf (stream, "[%r], #%s%d",
22497 REGNO (XEXP (x, 0)),
22498 GET_CODE (x) == POST_DEC ? "-" : "",
22499 GET_MODE_SIZE (mode));
22500 }
22501 else if (GET_CODE (x) == PRE_MODIFY)
22502 {
22503 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22504 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22505 asm_fprintf (stream, "#%wd]!",
22506 INTVAL (XEXP (XEXP (x, 1), 1)));
22507 else
22508 asm_fprintf (stream, "%r]!",
22509 REGNO (XEXP (XEXP (x, 1), 1)));
22510 }
22511 else if (GET_CODE (x) == POST_MODIFY)
22512 {
22513 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22514 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22515 asm_fprintf (stream, "#%wd",
22516 INTVAL (XEXP (XEXP (x, 1), 1)));
22517 else
22518 asm_fprintf (stream, "%r",
22519 REGNO (XEXP (XEXP (x, 1), 1)));
22520 }
22521 else output_addr_const (stream, x);
22522 }
22523 else
22524 {
22525 if (REG_P (x))
22526 asm_fprintf (stream, "[%r]", REGNO (x));
22527 else if (GET_CODE (x) == POST_INC)
22528 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22529 else if (GET_CODE (x) == PLUS)
22530 {
22531 gcc_assert (REG_P (XEXP (x, 0)));
22532 if (CONST_INT_P (XEXP (x, 1)))
22533 asm_fprintf (stream, "[%r, #%wd]",
22534 REGNO (XEXP (x, 0)),
22535 INTVAL (XEXP (x, 1)));
22536 else
22537 asm_fprintf (stream, "[%r, %r]",
22538 REGNO (XEXP (x, 0)),
22539 REGNO (XEXP (x, 1)));
22540 }
22541 else
22542 output_addr_const (stream, x);
22543 }
22544 }
22545 \f
22546 /* Target hook for indicating whether a punctuation character for
22547 TARGET_PRINT_OPERAND is valid. */
22548 static bool
22549 arm_print_operand_punct_valid_p (unsigned char code)
22550 {
22551 return (code == '@' || code == '|' || code == '.'
22552 || code == '(' || code == ')' || code == '#'
22553 || (TARGET_32BIT && (code == '?'))
22554 || (TARGET_THUMB2 && (code == '!'))
22555 || (TARGET_THUMB && (code == '_')));
22556 }
22557 \f
22558 /* Target hook for assembling integer objects. The ARM version needs to
22559 handle word-sized values specially. */
22560 static bool
22561 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22562 {
22563 machine_mode mode;
22564
22565 if (size == UNITS_PER_WORD && aligned_p)
22566 {
22567 fputs ("\t.word\t", asm_out_file);
22568 output_addr_const (asm_out_file, x);
22569
22570 /* Mark symbols as position independent. We only do this in the
22571 .text segment, not in the .data segment. */
22572 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22573 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22574 {
22575 /* See legitimize_pic_address for an explanation of the
22576 TARGET_VXWORKS_RTP check. */
22577 if (!arm_pic_data_is_text_relative
22578 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22579 fputs ("(GOT)", asm_out_file);
22580 else
22581 fputs ("(GOTOFF)", asm_out_file);
22582 }
22583 fputc ('\n', asm_out_file);
22584 return true;
22585 }
22586
22587 mode = GET_MODE (x);
22588
22589 if (arm_vector_mode_supported_p (mode))
22590 {
22591 int i, units;
22592
22593 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22594
22595 units = CONST_VECTOR_NUNITS (x);
22596 size = GET_MODE_UNIT_SIZE (mode);
22597
22598 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22599 for (i = 0; i < units; i++)
22600 {
22601 rtx elt = CONST_VECTOR_ELT (x, i);
22602 assemble_integer
22603 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22604 }
22605 else
22606 for (i = 0; i < units; i++)
22607 {
22608 rtx elt = CONST_VECTOR_ELT (x, i);
22609 assemble_real
22610 (*CONST_DOUBLE_REAL_VALUE (elt), GET_MODE_INNER (mode),
22611 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22612 }
22613
22614 return true;
22615 }
22616
22617 return default_assemble_integer (x, size, aligned_p);
22618 }
22619
22620 static void
22621 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22622 {
22623 section *s;
22624
22625 if (!TARGET_AAPCS_BASED)
22626 {
22627 (is_ctor ?
22628 default_named_section_asm_out_constructor
22629 : default_named_section_asm_out_destructor) (symbol, priority);
22630 return;
22631 }
22632
22633 /* Put these in the .init_array section, using a special relocation. */
22634 if (priority != DEFAULT_INIT_PRIORITY)
22635 {
22636 char buf[18];
22637 sprintf (buf, "%s.%.5u",
22638 is_ctor ? ".init_array" : ".fini_array",
22639 priority);
22640 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22641 }
22642 else if (is_ctor)
22643 s = ctors_section;
22644 else
22645 s = dtors_section;
22646
22647 switch_to_section (s);
22648 assemble_align (POINTER_SIZE);
22649 fputs ("\t.word\t", asm_out_file);
22650 output_addr_const (asm_out_file, symbol);
22651 fputs ("(target1)\n", asm_out_file);
22652 }
22653
22654 /* Add a function to the list of static constructors. */
22655
22656 static void
22657 arm_elf_asm_constructor (rtx symbol, int priority)
22658 {
22659 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22660 }
22661
22662 /* Add a function to the list of static destructors. */
22663
22664 static void
22665 arm_elf_asm_destructor (rtx symbol, int priority)
22666 {
22667 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22668 }
22669 \f
22670 /* A finite state machine takes care of noticing whether or not instructions
22671 can be conditionally executed, and thus decrease execution time and code
22672 size by deleting branch instructions. The fsm is controlled by
22673 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22674
22675 /* The state of the fsm controlling condition codes are:
22676 0: normal, do nothing special
22677 1: make ASM_OUTPUT_OPCODE not output this instruction
22678 2: make ASM_OUTPUT_OPCODE not output this instruction
22679 3: make instructions conditional
22680 4: make instructions conditional
22681
22682 State transitions (state->state by whom under condition):
22683 0 -> 1 final_prescan_insn if the `target' is a label
22684 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22685 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22686 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22687 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22688 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22689 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22690 (the target insn is arm_target_insn).
22691
22692 If the jump clobbers the conditions then we use states 2 and 4.
22693
22694 A similar thing can be done with conditional return insns.
22695
22696 XXX In case the `target' is an unconditional branch, this conditionalising
22697 of the instructions always reduces code size, but not always execution
22698 time. But then, I want to reduce the code size to somewhere near what
22699 /bin/cc produces. */
22700
22701 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22702 instructions. When a COND_EXEC instruction is seen the subsequent
22703 instructions are scanned so that multiple conditional instructions can be
22704 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22705 specify the length and true/false mask for the IT block. These will be
22706 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22707
22708 /* Returns the index of the ARM condition code string in
22709 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22710 COMPARISON should be an rtx like `(eq (...) (...))'. */
22711
22712 enum arm_cond_code
22713 maybe_get_arm_condition_code (rtx comparison)
22714 {
22715 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22716 enum arm_cond_code code;
22717 enum rtx_code comp_code = GET_CODE (comparison);
22718
22719 if (GET_MODE_CLASS (mode) != MODE_CC)
22720 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22721 XEXP (comparison, 1));
22722
22723 switch (mode)
22724 {
22725 case CC_DNEmode: code = ARM_NE; goto dominance;
22726 case CC_DEQmode: code = ARM_EQ; goto dominance;
22727 case CC_DGEmode: code = ARM_GE; goto dominance;
22728 case CC_DGTmode: code = ARM_GT; goto dominance;
22729 case CC_DLEmode: code = ARM_LE; goto dominance;
22730 case CC_DLTmode: code = ARM_LT; goto dominance;
22731 case CC_DGEUmode: code = ARM_CS; goto dominance;
22732 case CC_DGTUmode: code = ARM_HI; goto dominance;
22733 case CC_DLEUmode: code = ARM_LS; goto dominance;
22734 case CC_DLTUmode: code = ARM_CC;
22735
22736 dominance:
22737 if (comp_code == EQ)
22738 return ARM_INVERSE_CONDITION_CODE (code);
22739 if (comp_code == NE)
22740 return code;
22741 return ARM_NV;
22742
22743 case CC_NOOVmode:
22744 switch (comp_code)
22745 {
22746 case NE: return ARM_NE;
22747 case EQ: return ARM_EQ;
22748 case GE: return ARM_PL;
22749 case LT: return ARM_MI;
22750 default: return ARM_NV;
22751 }
22752
22753 case CC_Zmode:
22754 switch (comp_code)
22755 {
22756 case NE: return ARM_NE;
22757 case EQ: return ARM_EQ;
22758 default: return ARM_NV;
22759 }
22760
22761 case CC_Nmode:
22762 switch (comp_code)
22763 {
22764 case NE: return ARM_MI;
22765 case EQ: return ARM_PL;
22766 default: return ARM_NV;
22767 }
22768
22769 case CCFPEmode:
22770 case CCFPmode:
22771 /* We can handle all cases except UNEQ and LTGT. */
22772 switch (comp_code)
22773 {
22774 case GE: return ARM_GE;
22775 case GT: return ARM_GT;
22776 case LE: return ARM_LS;
22777 case LT: return ARM_MI;
22778 case NE: return ARM_NE;
22779 case EQ: return ARM_EQ;
22780 case ORDERED: return ARM_VC;
22781 case UNORDERED: return ARM_VS;
22782 case UNLT: return ARM_LT;
22783 case UNLE: return ARM_LE;
22784 case UNGT: return ARM_HI;
22785 case UNGE: return ARM_PL;
22786 /* UNEQ and LTGT do not have a representation. */
22787 case UNEQ: /* Fall through. */
22788 case LTGT: /* Fall through. */
22789 default: return ARM_NV;
22790 }
22791
22792 case CC_SWPmode:
22793 switch (comp_code)
22794 {
22795 case NE: return ARM_NE;
22796 case EQ: return ARM_EQ;
22797 case GE: return ARM_LE;
22798 case GT: return ARM_LT;
22799 case LE: return ARM_GE;
22800 case LT: return ARM_GT;
22801 case GEU: return ARM_LS;
22802 case GTU: return ARM_CC;
22803 case LEU: return ARM_CS;
22804 case LTU: return ARM_HI;
22805 default: return ARM_NV;
22806 }
22807
22808 case CC_Cmode:
22809 switch (comp_code)
22810 {
22811 case LTU: return ARM_CS;
22812 case GEU: return ARM_CC;
22813 default: return ARM_NV;
22814 }
22815
22816 case CC_CZmode:
22817 switch (comp_code)
22818 {
22819 case NE: return ARM_NE;
22820 case EQ: return ARM_EQ;
22821 case GEU: return ARM_CS;
22822 case GTU: return ARM_HI;
22823 case LEU: return ARM_LS;
22824 case LTU: return ARM_CC;
22825 default: return ARM_NV;
22826 }
22827
22828 case CC_NCVmode:
22829 switch (comp_code)
22830 {
22831 case GE: return ARM_GE;
22832 case LT: return ARM_LT;
22833 case GEU: return ARM_CS;
22834 case LTU: return ARM_CC;
22835 default: return ARM_NV;
22836 }
22837
22838 case CCmode:
22839 switch (comp_code)
22840 {
22841 case NE: return ARM_NE;
22842 case EQ: return ARM_EQ;
22843 case GE: return ARM_GE;
22844 case GT: return ARM_GT;
22845 case LE: return ARM_LE;
22846 case LT: return ARM_LT;
22847 case GEU: return ARM_CS;
22848 case GTU: return ARM_HI;
22849 case LEU: return ARM_LS;
22850 case LTU: return ARM_CC;
22851 default: return ARM_NV;
22852 }
22853
22854 default: gcc_unreachable ();
22855 }
22856 }
22857
22858 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22859 static enum arm_cond_code
22860 get_arm_condition_code (rtx comparison)
22861 {
22862 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22863 gcc_assert (code != ARM_NV);
22864 return code;
22865 }
22866
22867 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22868 instructions. */
22869 void
22870 thumb2_final_prescan_insn (rtx_insn *insn)
22871 {
22872 rtx_insn *first_insn = insn;
22873 rtx body = PATTERN (insn);
22874 rtx predicate;
22875 enum arm_cond_code code;
22876 int n;
22877 int mask;
22878 int max;
22879
22880 /* max_insns_skipped in the tune was already taken into account in the
22881 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22882 just emit the IT blocks as we can. It does not make sense to split
22883 the IT blocks. */
22884 max = MAX_INSN_PER_IT_BLOCK;
22885
22886 /* Remove the previous insn from the count of insns to be output. */
22887 if (arm_condexec_count)
22888 arm_condexec_count--;
22889
22890 /* Nothing to do if we are already inside a conditional block. */
22891 if (arm_condexec_count)
22892 return;
22893
22894 if (GET_CODE (body) != COND_EXEC)
22895 return;
22896
22897 /* Conditional jumps are implemented directly. */
22898 if (JUMP_P (insn))
22899 return;
22900
22901 predicate = COND_EXEC_TEST (body);
22902 arm_current_cc = get_arm_condition_code (predicate);
22903
22904 n = get_attr_ce_count (insn);
22905 arm_condexec_count = 1;
22906 arm_condexec_mask = (1 << n) - 1;
22907 arm_condexec_masklen = n;
22908 /* See if subsequent instructions can be combined into the same block. */
22909 for (;;)
22910 {
22911 insn = next_nonnote_insn (insn);
22912
22913 /* Jumping into the middle of an IT block is illegal, so a label or
22914 barrier terminates the block. */
22915 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22916 break;
22917
22918 body = PATTERN (insn);
22919 /* USE and CLOBBER aren't really insns, so just skip them. */
22920 if (GET_CODE (body) == USE
22921 || GET_CODE (body) == CLOBBER)
22922 continue;
22923
22924 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22925 if (GET_CODE (body) != COND_EXEC)
22926 break;
22927 /* Maximum number of conditionally executed instructions in a block. */
22928 n = get_attr_ce_count (insn);
22929 if (arm_condexec_masklen + n > max)
22930 break;
22931
22932 predicate = COND_EXEC_TEST (body);
22933 code = get_arm_condition_code (predicate);
22934 mask = (1 << n) - 1;
22935 if (arm_current_cc == code)
22936 arm_condexec_mask |= (mask << arm_condexec_masklen);
22937 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22938 break;
22939
22940 arm_condexec_count++;
22941 arm_condexec_masklen += n;
22942
22943 /* A jump must be the last instruction in a conditional block. */
22944 if (JUMP_P (insn))
22945 break;
22946 }
22947 /* Restore recog_data (getting the attributes of other insns can
22948 destroy this array, but final.c assumes that it remains intact
22949 across this call). */
22950 extract_constrain_insn_cached (first_insn);
22951 }
22952
22953 void
22954 arm_final_prescan_insn (rtx_insn *insn)
22955 {
22956 /* BODY will hold the body of INSN. */
22957 rtx body = PATTERN (insn);
22958
22959 /* This will be 1 if trying to repeat the trick, and things need to be
22960 reversed if it appears to fail. */
22961 int reverse = 0;
22962
22963 /* If we start with a return insn, we only succeed if we find another one. */
22964 int seeking_return = 0;
22965 enum rtx_code return_code = UNKNOWN;
22966
22967 /* START_INSN will hold the insn from where we start looking. This is the
22968 first insn after the following code_label if REVERSE is true. */
22969 rtx_insn *start_insn = insn;
22970
22971 /* If in state 4, check if the target branch is reached, in order to
22972 change back to state 0. */
22973 if (arm_ccfsm_state == 4)
22974 {
22975 if (insn == arm_target_insn)
22976 {
22977 arm_target_insn = NULL;
22978 arm_ccfsm_state = 0;
22979 }
22980 return;
22981 }
22982
22983 /* If in state 3, it is possible to repeat the trick, if this insn is an
22984 unconditional branch to a label, and immediately following this branch
22985 is the previous target label which is only used once, and the label this
22986 branch jumps to is not too far off. */
22987 if (arm_ccfsm_state == 3)
22988 {
22989 if (simplejump_p (insn))
22990 {
22991 start_insn = next_nonnote_insn (start_insn);
22992 if (BARRIER_P (start_insn))
22993 {
22994 /* XXX Isn't this always a barrier? */
22995 start_insn = next_nonnote_insn (start_insn);
22996 }
22997 if (LABEL_P (start_insn)
22998 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22999 && LABEL_NUSES (start_insn) == 1)
23000 reverse = TRUE;
23001 else
23002 return;
23003 }
23004 else if (ANY_RETURN_P (body))
23005 {
23006 start_insn = next_nonnote_insn (start_insn);
23007 if (BARRIER_P (start_insn))
23008 start_insn = next_nonnote_insn (start_insn);
23009 if (LABEL_P (start_insn)
23010 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23011 && LABEL_NUSES (start_insn) == 1)
23012 {
23013 reverse = TRUE;
23014 seeking_return = 1;
23015 return_code = GET_CODE (body);
23016 }
23017 else
23018 return;
23019 }
23020 else
23021 return;
23022 }
23023
23024 gcc_assert (!arm_ccfsm_state || reverse);
23025 if (!JUMP_P (insn))
23026 return;
23027
23028 /* This jump might be paralleled with a clobber of the condition codes
23029 the jump should always come first */
23030 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23031 body = XVECEXP (body, 0, 0);
23032
23033 if (reverse
23034 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23035 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23036 {
23037 int insns_skipped;
23038 int fail = FALSE, succeed = FALSE;
23039 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23040 int then_not_else = TRUE;
23041 rtx_insn *this_insn = start_insn;
23042 rtx label = 0;
23043
23044 /* Register the insn jumped to. */
23045 if (reverse)
23046 {
23047 if (!seeking_return)
23048 label = XEXP (SET_SRC (body), 0);
23049 }
23050 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23051 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23052 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23053 {
23054 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23055 then_not_else = FALSE;
23056 }
23057 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23058 {
23059 seeking_return = 1;
23060 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23061 }
23062 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23063 {
23064 seeking_return = 1;
23065 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23066 then_not_else = FALSE;
23067 }
23068 else
23069 gcc_unreachable ();
23070
23071 /* See how many insns this branch skips, and what kind of insns. If all
23072 insns are okay, and the label or unconditional branch to the same
23073 label is not too far away, succeed. */
23074 for (insns_skipped = 0;
23075 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23076 {
23077 rtx scanbody;
23078
23079 this_insn = next_nonnote_insn (this_insn);
23080 if (!this_insn)
23081 break;
23082
23083 switch (GET_CODE (this_insn))
23084 {
23085 case CODE_LABEL:
23086 /* Succeed if it is the target label, otherwise fail since
23087 control falls in from somewhere else. */
23088 if (this_insn == label)
23089 {
23090 arm_ccfsm_state = 1;
23091 succeed = TRUE;
23092 }
23093 else
23094 fail = TRUE;
23095 break;
23096
23097 case BARRIER:
23098 /* Succeed if the following insn is the target label.
23099 Otherwise fail.
23100 If return insns are used then the last insn in a function
23101 will be a barrier. */
23102 this_insn = next_nonnote_insn (this_insn);
23103 if (this_insn && this_insn == label)
23104 {
23105 arm_ccfsm_state = 1;
23106 succeed = TRUE;
23107 }
23108 else
23109 fail = TRUE;
23110 break;
23111
23112 case CALL_INSN:
23113 /* The AAPCS says that conditional calls should not be
23114 used since they make interworking inefficient (the
23115 linker can't transform BL<cond> into BLX). That's
23116 only a problem if the machine has BLX. */
23117 if (arm_arch5)
23118 {
23119 fail = TRUE;
23120 break;
23121 }
23122
23123 /* Succeed if the following insn is the target label, or
23124 if the following two insns are a barrier and the
23125 target label. */
23126 this_insn = next_nonnote_insn (this_insn);
23127 if (this_insn && BARRIER_P (this_insn))
23128 this_insn = next_nonnote_insn (this_insn);
23129
23130 if (this_insn && this_insn == label
23131 && insns_skipped < max_insns_skipped)
23132 {
23133 arm_ccfsm_state = 1;
23134 succeed = TRUE;
23135 }
23136 else
23137 fail = TRUE;
23138 break;
23139
23140 case JUMP_INSN:
23141 /* If this is an unconditional branch to the same label, succeed.
23142 If it is to another label, do nothing. If it is conditional,
23143 fail. */
23144 /* XXX Probably, the tests for SET and the PC are
23145 unnecessary. */
23146
23147 scanbody = PATTERN (this_insn);
23148 if (GET_CODE (scanbody) == SET
23149 && GET_CODE (SET_DEST (scanbody)) == PC)
23150 {
23151 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23152 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23153 {
23154 arm_ccfsm_state = 2;
23155 succeed = TRUE;
23156 }
23157 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23158 fail = TRUE;
23159 }
23160 /* Fail if a conditional return is undesirable (e.g. on a
23161 StrongARM), but still allow this if optimizing for size. */
23162 else if (GET_CODE (scanbody) == return_code
23163 && !use_return_insn (TRUE, NULL)
23164 && !optimize_size)
23165 fail = TRUE;
23166 else if (GET_CODE (scanbody) == return_code)
23167 {
23168 arm_ccfsm_state = 2;
23169 succeed = TRUE;
23170 }
23171 else if (GET_CODE (scanbody) == PARALLEL)
23172 {
23173 switch (get_attr_conds (this_insn))
23174 {
23175 case CONDS_NOCOND:
23176 break;
23177 default:
23178 fail = TRUE;
23179 break;
23180 }
23181 }
23182 else
23183 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23184
23185 break;
23186
23187 case INSN:
23188 /* Instructions using or affecting the condition codes make it
23189 fail. */
23190 scanbody = PATTERN (this_insn);
23191 if (!(GET_CODE (scanbody) == SET
23192 || GET_CODE (scanbody) == PARALLEL)
23193 || get_attr_conds (this_insn) != CONDS_NOCOND)
23194 fail = TRUE;
23195 break;
23196
23197 default:
23198 break;
23199 }
23200 }
23201 if (succeed)
23202 {
23203 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23204 arm_target_label = CODE_LABEL_NUMBER (label);
23205 else
23206 {
23207 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23208
23209 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23210 {
23211 this_insn = next_nonnote_insn (this_insn);
23212 gcc_assert (!this_insn
23213 || (!BARRIER_P (this_insn)
23214 && !LABEL_P (this_insn)));
23215 }
23216 if (!this_insn)
23217 {
23218 /* Oh, dear! we ran off the end.. give up. */
23219 extract_constrain_insn_cached (insn);
23220 arm_ccfsm_state = 0;
23221 arm_target_insn = NULL;
23222 return;
23223 }
23224 arm_target_insn = this_insn;
23225 }
23226
23227 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23228 what it was. */
23229 if (!reverse)
23230 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23231
23232 if (reverse || then_not_else)
23233 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23234 }
23235
23236 /* Restore recog_data (getting the attributes of other insns can
23237 destroy this array, but final.c assumes that it remains intact
23238 across this call. */
23239 extract_constrain_insn_cached (insn);
23240 }
23241 }
23242
23243 /* Output IT instructions. */
23244 void
23245 thumb2_asm_output_opcode (FILE * stream)
23246 {
23247 char buff[5];
23248 int n;
23249
23250 if (arm_condexec_mask)
23251 {
23252 for (n = 0; n < arm_condexec_masklen; n++)
23253 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23254 buff[n] = 0;
23255 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23256 arm_condition_codes[arm_current_cc]);
23257 arm_condexec_mask = 0;
23258 }
23259 }
23260
23261 /* Returns true if REGNO is a valid register
23262 for holding a quantity of type MODE. */
23263 int
23264 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23265 {
23266 if (GET_MODE_CLASS (mode) == MODE_CC)
23267 return (regno == CC_REGNUM
23268 || (TARGET_HARD_FLOAT && TARGET_VFP
23269 && regno == VFPCC_REGNUM));
23270
23271 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23272 return false;
23273
23274 if (TARGET_THUMB1)
23275 /* For the Thumb we only allow values bigger than SImode in
23276 registers 0 - 6, so that there is always a second low
23277 register available to hold the upper part of the value.
23278 We probably we ought to ensure that the register is the
23279 start of an even numbered register pair. */
23280 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23281
23282 if (TARGET_HARD_FLOAT && TARGET_VFP
23283 && IS_VFP_REGNUM (regno))
23284 {
23285 if (mode == SFmode || mode == SImode)
23286 return VFP_REGNO_OK_FOR_SINGLE (regno);
23287
23288 if (mode == DFmode)
23289 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23290
23291 /* VFP registers can hold HFmode values, but there is no point in
23292 putting them there unless we have hardware conversion insns. */
23293 if (mode == HFmode)
23294 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23295
23296 if (TARGET_NEON)
23297 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23298 || (VALID_NEON_QREG_MODE (mode)
23299 && NEON_REGNO_OK_FOR_QUAD (regno))
23300 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23301 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23302 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23303 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23304 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23305
23306 return FALSE;
23307 }
23308
23309 if (TARGET_REALLY_IWMMXT)
23310 {
23311 if (IS_IWMMXT_GR_REGNUM (regno))
23312 return mode == SImode;
23313
23314 if (IS_IWMMXT_REGNUM (regno))
23315 return VALID_IWMMXT_REG_MODE (mode);
23316 }
23317
23318 /* We allow almost any value to be stored in the general registers.
23319 Restrict doubleword quantities to even register pairs in ARM state
23320 so that we can use ldrd. Do not allow very large Neon structure
23321 opaque modes in general registers; they would use too many. */
23322 if (regno <= LAST_ARM_REGNUM)
23323 {
23324 if (ARM_NUM_REGS (mode) > 4)
23325 return FALSE;
23326
23327 if (TARGET_THUMB2)
23328 return TRUE;
23329
23330 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23331 }
23332
23333 if (regno == FRAME_POINTER_REGNUM
23334 || regno == ARG_POINTER_REGNUM)
23335 /* We only allow integers in the fake hard registers. */
23336 return GET_MODE_CLASS (mode) == MODE_INT;
23337
23338 return FALSE;
23339 }
23340
23341 /* Implement MODES_TIEABLE_P. */
23342
23343 bool
23344 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23345 {
23346 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23347 return true;
23348
23349 /* We specifically want to allow elements of "structure" modes to
23350 be tieable to the structure. This more general condition allows
23351 other rarer situations too. */
23352 if (TARGET_NEON
23353 && (VALID_NEON_DREG_MODE (mode1)
23354 || VALID_NEON_QREG_MODE (mode1)
23355 || VALID_NEON_STRUCT_MODE (mode1))
23356 && (VALID_NEON_DREG_MODE (mode2)
23357 || VALID_NEON_QREG_MODE (mode2)
23358 || VALID_NEON_STRUCT_MODE (mode2)))
23359 return true;
23360
23361 return false;
23362 }
23363
23364 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23365 not used in arm mode. */
23366
23367 enum reg_class
23368 arm_regno_class (int regno)
23369 {
23370 if (regno == PC_REGNUM)
23371 return NO_REGS;
23372
23373 if (TARGET_THUMB1)
23374 {
23375 if (regno == STACK_POINTER_REGNUM)
23376 return STACK_REG;
23377 if (regno == CC_REGNUM)
23378 return CC_REG;
23379 if (regno < 8)
23380 return LO_REGS;
23381 return HI_REGS;
23382 }
23383
23384 if (TARGET_THUMB2 && regno < 8)
23385 return LO_REGS;
23386
23387 if ( regno <= LAST_ARM_REGNUM
23388 || regno == FRAME_POINTER_REGNUM
23389 || regno == ARG_POINTER_REGNUM)
23390 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23391
23392 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23393 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23394
23395 if (IS_VFP_REGNUM (regno))
23396 {
23397 if (regno <= D7_VFP_REGNUM)
23398 return VFP_D0_D7_REGS;
23399 else if (regno <= LAST_LO_VFP_REGNUM)
23400 return VFP_LO_REGS;
23401 else
23402 return VFP_HI_REGS;
23403 }
23404
23405 if (IS_IWMMXT_REGNUM (regno))
23406 return IWMMXT_REGS;
23407
23408 if (IS_IWMMXT_GR_REGNUM (regno))
23409 return IWMMXT_GR_REGS;
23410
23411 return NO_REGS;
23412 }
23413
23414 /* Handle a special case when computing the offset
23415 of an argument from the frame pointer. */
23416 int
23417 arm_debugger_arg_offset (int value, rtx addr)
23418 {
23419 rtx_insn *insn;
23420
23421 /* We are only interested if dbxout_parms() failed to compute the offset. */
23422 if (value != 0)
23423 return 0;
23424
23425 /* We can only cope with the case where the address is held in a register. */
23426 if (!REG_P (addr))
23427 return 0;
23428
23429 /* If we are using the frame pointer to point at the argument, then
23430 an offset of 0 is correct. */
23431 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23432 return 0;
23433
23434 /* If we are using the stack pointer to point at the
23435 argument, then an offset of 0 is correct. */
23436 /* ??? Check this is consistent with thumb2 frame layout. */
23437 if ((TARGET_THUMB || !frame_pointer_needed)
23438 && REGNO (addr) == SP_REGNUM)
23439 return 0;
23440
23441 /* Oh dear. The argument is pointed to by a register rather
23442 than being held in a register, or being stored at a known
23443 offset from the frame pointer. Since GDB only understands
23444 those two kinds of argument we must translate the address
23445 held in the register into an offset from the frame pointer.
23446 We do this by searching through the insns for the function
23447 looking to see where this register gets its value. If the
23448 register is initialized from the frame pointer plus an offset
23449 then we are in luck and we can continue, otherwise we give up.
23450
23451 This code is exercised by producing debugging information
23452 for a function with arguments like this:
23453
23454 double func (double a, double b, int c, double d) {return d;}
23455
23456 Without this code the stab for parameter 'd' will be set to
23457 an offset of 0 from the frame pointer, rather than 8. */
23458
23459 /* The if() statement says:
23460
23461 If the insn is a normal instruction
23462 and if the insn is setting the value in a register
23463 and if the register being set is the register holding the address of the argument
23464 and if the address is computing by an addition
23465 that involves adding to a register
23466 which is the frame pointer
23467 a constant integer
23468
23469 then... */
23470
23471 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23472 {
23473 if ( NONJUMP_INSN_P (insn)
23474 && GET_CODE (PATTERN (insn)) == SET
23475 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23476 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23477 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23478 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23479 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23480 )
23481 {
23482 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23483
23484 break;
23485 }
23486 }
23487
23488 if (value == 0)
23489 {
23490 debug_rtx (addr);
23491 warning (0, "unable to compute real location of stacked parameter");
23492 value = 8; /* XXX magic hack */
23493 }
23494
23495 return value;
23496 }
23497 \f
23498 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23499
23500 static const char *
23501 arm_invalid_parameter_type (const_tree t)
23502 {
23503 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23504 return N_("function parameters cannot have __fp16 type");
23505 return NULL;
23506 }
23507
23508 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23509
23510 static const char *
23511 arm_invalid_return_type (const_tree t)
23512 {
23513 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23514 return N_("functions cannot return __fp16 type");
23515 return NULL;
23516 }
23517
23518 /* Implement TARGET_PROMOTED_TYPE. */
23519
23520 static tree
23521 arm_promoted_type (const_tree t)
23522 {
23523 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23524 return float_type_node;
23525 return NULL_TREE;
23526 }
23527
23528 /* Implement TARGET_CONVERT_TO_TYPE.
23529 Specifically, this hook implements the peculiarity of the ARM
23530 half-precision floating-point C semantics that requires conversions between
23531 __fp16 to or from double to do an intermediate conversion to float. */
23532
23533 static tree
23534 arm_convert_to_type (tree type, tree expr)
23535 {
23536 tree fromtype = TREE_TYPE (expr);
23537 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23538 return NULL_TREE;
23539 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23540 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23541 return convert (type, convert (float_type_node, expr));
23542 return NULL_TREE;
23543 }
23544
23545 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23546 This simply adds HFmode as a supported mode; even though we don't
23547 implement arithmetic on this type directly, it's supported by
23548 optabs conversions, much the way the double-word arithmetic is
23549 special-cased in the default hook. */
23550
23551 static bool
23552 arm_scalar_mode_supported_p (machine_mode mode)
23553 {
23554 if (mode == HFmode)
23555 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23556 else if (ALL_FIXED_POINT_MODE_P (mode))
23557 return true;
23558 else
23559 return default_scalar_mode_supported_p (mode);
23560 }
23561
23562 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23563 void
23564 neon_reinterpret (rtx dest, rtx src)
23565 {
23566 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
23567 }
23568
23569 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23570 not to early-clobber SRC registers in the process.
23571
23572 We assume that the operands described by SRC and DEST represent a
23573 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23574 number of components into which the copy has been decomposed. */
23575 void
23576 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23577 {
23578 unsigned int i;
23579
23580 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23581 || REGNO (operands[0]) < REGNO (operands[1]))
23582 {
23583 for (i = 0; i < count; i++)
23584 {
23585 operands[2 * i] = dest[i];
23586 operands[2 * i + 1] = src[i];
23587 }
23588 }
23589 else
23590 {
23591 for (i = 0; i < count; i++)
23592 {
23593 operands[2 * i] = dest[count - i - 1];
23594 operands[2 * i + 1] = src[count - i - 1];
23595 }
23596 }
23597 }
23598
23599 /* Split operands into moves from op[1] + op[2] into op[0]. */
23600
23601 void
23602 neon_split_vcombine (rtx operands[3])
23603 {
23604 unsigned int dest = REGNO (operands[0]);
23605 unsigned int src1 = REGNO (operands[1]);
23606 unsigned int src2 = REGNO (operands[2]);
23607 machine_mode halfmode = GET_MODE (operands[1]);
23608 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23609 rtx destlo, desthi;
23610
23611 if (src1 == dest && src2 == dest + halfregs)
23612 {
23613 /* No-op move. Can't split to nothing; emit something. */
23614 emit_note (NOTE_INSN_DELETED);
23615 return;
23616 }
23617
23618 /* Preserve register attributes for variable tracking. */
23619 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23620 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23621 GET_MODE_SIZE (halfmode));
23622
23623 /* Special case of reversed high/low parts. Use VSWP. */
23624 if (src2 == dest && src1 == dest + halfregs)
23625 {
23626 rtx x = gen_rtx_SET (destlo, operands[1]);
23627 rtx y = gen_rtx_SET (desthi, operands[2]);
23628 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23629 return;
23630 }
23631
23632 if (!reg_overlap_mentioned_p (operands[2], destlo))
23633 {
23634 /* Try to avoid unnecessary moves if part of the result
23635 is in the right place already. */
23636 if (src1 != dest)
23637 emit_move_insn (destlo, operands[1]);
23638 if (src2 != dest + halfregs)
23639 emit_move_insn (desthi, operands[2]);
23640 }
23641 else
23642 {
23643 if (src2 != dest + halfregs)
23644 emit_move_insn (desthi, operands[2]);
23645 if (src1 != dest)
23646 emit_move_insn (destlo, operands[1]);
23647 }
23648 }
23649 \f
23650 /* Return the number (counting from 0) of
23651 the least significant set bit in MASK. */
23652
23653 inline static int
23654 number_of_first_bit_set (unsigned mask)
23655 {
23656 return ctz_hwi (mask);
23657 }
23658
23659 /* Like emit_multi_reg_push, but allowing for a different set of
23660 registers to be described as saved. MASK is the set of registers
23661 to be saved; REAL_REGS is the set of registers to be described as
23662 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23663
23664 static rtx_insn *
23665 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23666 {
23667 unsigned long regno;
23668 rtx par[10], tmp, reg;
23669 rtx_insn *insn;
23670 int i, j;
23671
23672 /* Build the parallel of the registers actually being stored. */
23673 for (i = 0; mask; ++i, mask &= mask - 1)
23674 {
23675 regno = ctz_hwi (mask);
23676 reg = gen_rtx_REG (SImode, regno);
23677
23678 if (i == 0)
23679 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23680 else
23681 tmp = gen_rtx_USE (VOIDmode, reg);
23682
23683 par[i] = tmp;
23684 }
23685
23686 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23687 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23688 tmp = gen_frame_mem (BLKmode, tmp);
23689 tmp = gen_rtx_SET (tmp, par[0]);
23690 par[0] = tmp;
23691
23692 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23693 insn = emit_insn (tmp);
23694
23695 /* Always build the stack adjustment note for unwind info. */
23696 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23697 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23698 par[0] = tmp;
23699
23700 /* Build the parallel of the registers recorded as saved for unwind. */
23701 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23702 {
23703 regno = ctz_hwi (real_regs);
23704 reg = gen_rtx_REG (SImode, regno);
23705
23706 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23707 tmp = gen_frame_mem (SImode, tmp);
23708 tmp = gen_rtx_SET (tmp, reg);
23709 RTX_FRAME_RELATED_P (tmp) = 1;
23710 par[j + 1] = tmp;
23711 }
23712
23713 if (j == 0)
23714 tmp = par[0];
23715 else
23716 {
23717 RTX_FRAME_RELATED_P (par[0]) = 1;
23718 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23719 }
23720
23721 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23722
23723 return insn;
23724 }
23725
23726 /* Emit code to push or pop registers to or from the stack. F is the
23727 assembly file. MASK is the registers to pop. */
23728 static void
23729 thumb_pop (FILE *f, unsigned long mask)
23730 {
23731 int regno;
23732 int lo_mask = mask & 0xFF;
23733 int pushed_words = 0;
23734
23735 gcc_assert (mask);
23736
23737 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23738 {
23739 /* Special case. Do not generate a POP PC statement here, do it in
23740 thumb_exit() */
23741 thumb_exit (f, -1);
23742 return;
23743 }
23744
23745 fprintf (f, "\tpop\t{");
23746
23747 /* Look at the low registers first. */
23748 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23749 {
23750 if (lo_mask & 1)
23751 {
23752 asm_fprintf (f, "%r", regno);
23753
23754 if ((lo_mask & ~1) != 0)
23755 fprintf (f, ", ");
23756
23757 pushed_words++;
23758 }
23759 }
23760
23761 if (mask & (1 << PC_REGNUM))
23762 {
23763 /* Catch popping the PC. */
23764 if (TARGET_INTERWORK || TARGET_BACKTRACE
23765 || crtl->calls_eh_return)
23766 {
23767 /* The PC is never poped directly, instead
23768 it is popped into r3 and then BX is used. */
23769 fprintf (f, "}\n");
23770
23771 thumb_exit (f, -1);
23772
23773 return;
23774 }
23775 else
23776 {
23777 if (mask & 0xFF)
23778 fprintf (f, ", ");
23779
23780 asm_fprintf (f, "%r", PC_REGNUM);
23781 }
23782 }
23783
23784 fprintf (f, "}\n");
23785 }
23786
23787 /* Generate code to return from a thumb function.
23788 If 'reg_containing_return_addr' is -1, then the return address is
23789 actually on the stack, at the stack pointer. */
23790 static void
23791 thumb_exit (FILE *f, int reg_containing_return_addr)
23792 {
23793 unsigned regs_available_for_popping;
23794 unsigned regs_to_pop;
23795 int pops_needed;
23796 unsigned available;
23797 unsigned required;
23798 machine_mode mode;
23799 int size;
23800 int restore_a4 = FALSE;
23801
23802 /* Compute the registers we need to pop. */
23803 regs_to_pop = 0;
23804 pops_needed = 0;
23805
23806 if (reg_containing_return_addr == -1)
23807 {
23808 regs_to_pop |= 1 << LR_REGNUM;
23809 ++pops_needed;
23810 }
23811
23812 if (TARGET_BACKTRACE)
23813 {
23814 /* Restore the (ARM) frame pointer and stack pointer. */
23815 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23816 pops_needed += 2;
23817 }
23818
23819 /* If there is nothing to pop then just emit the BX instruction and
23820 return. */
23821 if (pops_needed == 0)
23822 {
23823 if (crtl->calls_eh_return)
23824 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23825
23826 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23827 return;
23828 }
23829 /* Otherwise if we are not supporting interworking and we have not created
23830 a backtrace structure and the function was not entered in ARM mode then
23831 just pop the return address straight into the PC. */
23832 else if (!TARGET_INTERWORK
23833 && !TARGET_BACKTRACE
23834 && !is_called_in_ARM_mode (current_function_decl)
23835 && !crtl->calls_eh_return)
23836 {
23837 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23838 return;
23839 }
23840
23841 /* Find out how many of the (return) argument registers we can corrupt. */
23842 regs_available_for_popping = 0;
23843
23844 /* If returning via __builtin_eh_return, the bottom three registers
23845 all contain information needed for the return. */
23846 if (crtl->calls_eh_return)
23847 size = 12;
23848 else
23849 {
23850 /* If we can deduce the registers used from the function's
23851 return value. This is more reliable that examining
23852 df_regs_ever_live_p () because that will be set if the register is
23853 ever used in the function, not just if the register is used
23854 to hold a return value. */
23855
23856 if (crtl->return_rtx != 0)
23857 mode = GET_MODE (crtl->return_rtx);
23858 else
23859 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23860
23861 size = GET_MODE_SIZE (mode);
23862
23863 if (size == 0)
23864 {
23865 /* In a void function we can use any argument register.
23866 In a function that returns a structure on the stack
23867 we can use the second and third argument registers. */
23868 if (mode == VOIDmode)
23869 regs_available_for_popping =
23870 (1 << ARG_REGISTER (1))
23871 | (1 << ARG_REGISTER (2))
23872 | (1 << ARG_REGISTER (3));
23873 else
23874 regs_available_for_popping =
23875 (1 << ARG_REGISTER (2))
23876 | (1 << ARG_REGISTER (3));
23877 }
23878 else if (size <= 4)
23879 regs_available_for_popping =
23880 (1 << ARG_REGISTER (2))
23881 | (1 << ARG_REGISTER (3));
23882 else if (size <= 8)
23883 regs_available_for_popping =
23884 (1 << ARG_REGISTER (3));
23885 }
23886
23887 /* Match registers to be popped with registers into which we pop them. */
23888 for (available = regs_available_for_popping,
23889 required = regs_to_pop;
23890 required != 0 && available != 0;
23891 available &= ~(available & - available),
23892 required &= ~(required & - required))
23893 -- pops_needed;
23894
23895 /* If we have any popping registers left over, remove them. */
23896 if (available > 0)
23897 regs_available_for_popping &= ~available;
23898
23899 /* Otherwise if we need another popping register we can use
23900 the fourth argument register. */
23901 else if (pops_needed)
23902 {
23903 /* If we have not found any free argument registers and
23904 reg a4 contains the return address, we must move it. */
23905 if (regs_available_for_popping == 0
23906 && reg_containing_return_addr == LAST_ARG_REGNUM)
23907 {
23908 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23909 reg_containing_return_addr = LR_REGNUM;
23910 }
23911 else if (size > 12)
23912 {
23913 /* Register a4 is being used to hold part of the return value,
23914 but we have dire need of a free, low register. */
23915 restore_a4 = TRUE;
23916
23917 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23918 }
23919
23920 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23921 {
23922 /* The fourth argument register is available. */
23923 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23924
23925 --pops_needed;
23926 }
23927 }
23928
23929 /* Pop as many registers as we can. */
23930 thumb_pop (f, regs_available_for_popping);
23931
23932 /* Process the registers we popped. */
23933 if (reg_containing_return_addr == -1)
23934 {
23935 /* The return address was popped into the lowest numbered register. */
23936 regs_to_pop &= ~(1 << LR_REGNUM);
23937
23938 reg_containing_return_addr =
23939 number_of_first_bit_set (regs_available_for_popping);
23940
23941 /* Remove this register for the mask of available registers, so that
23942 the return address will not be corrupted by further pops. */
23943 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23944 }
23945
23946 /* If we popped other registers then handle them here. */
23947 if (regs_available_for_popping)
23948 {
23949 int frame_pointer;
23950
23951 /* Work out which register currently contains the frame pointer. */
23952 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23953
23954 /* Move it into the correct place. */
23955 asm_fprintf (f, "\tmov\t%r, %r\n",
23956 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23957
23958 /* (Temporarily) remove it from the mask of popped registers. */
23959 regs_available_for_popping &= ~(1 << frame_pointer);
23960 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23961
23962 if (regs_available_for_popping)
23963 {
23964 int stack_pointer;
23965
23966 /* We popped the stack pointer as well,
23967 find the register that contains it. */
23968 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23969
23970 /* Move it into the stack register. */
23971 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23972
23973 /* At this point we have popped all necessary registers, so
23974 do not worry about restoring regs_available_for_popping
23975 to its correct value:
23976
23977 assert (pops_needed == 0)
23978 assert (regs_available_for_popping == (1 << frame_pointer))
23979 assert (regs_to_pop == (1 << STACK_POINTER)) */
23980 }
23981 else
23982 {
23983 /* Since we have just move the popped value into the frame
23984 pointer, the popping register is available for reuse, and
23985 we know that we still have the stack pointer left to pop. */
23986 regs_available_for_popping |= (1 << frame_pointer);
23987 }
23988 }
23989
23990 /* If we still have registers left on the stack, but we no longer have
23991 any registers into which we can pop them, then we must move the return
23992 address into the link register and make available the register that
23993 contained it. */
23994 if (regs_available_for_popping == 0 && pops_needed > 0)
23995 {
23996 regs_available_for_popping |= 1 << reg_containing_return_addr;
23997
23998 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23999 reg_containing_return_addr);
24000
24001 reg_containing_return_addr = LR_REGNUM;
24002 }
24003
24004 /* If we have registers left on the stack then pop some more.
24005 We know that at most we will want to pop FP and SP. */
24006 if (pops_needed > 0)
24007 {
24008 int popped_into;
24009 int move_to;
24010
24011 thumb_pop (f, regs_available_for_popping);
24012
24013 /* We have popped either FP or SP.
24014 Move whichever one it is into the correct register. */
24015 popped_into = number_of_first_bit_set (regs_available_for_popping);
24016 move_to = number_of_first_bit_set (regs_to_pop);
24017
24018 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24019
24020 regs_to_pop &= ~(1 << move_to);
24021
24022 --pops_needed;
24023 }
24024
24025 /* If we still have not popped everything then we must have only
24026 had one register available to us and we are now popping the SP. */
24027 if (pops_needed > 0)
24028 {
24029 int popped_into;
24030
24031 thumb_pop (f, regs_available_for_popping);
24032
24033 popped_into = number_of_first_bit_set (regs_available_for_popping);
24034
24035 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24036 /*
24037 assert (regs_to_pop == (1 << STACK_POINTER))
24038 assert (pops_needed == 1)
24039 */
24040 }
24041
24042 /* If necessary restore the a4 register. */
24043 if (restore_a4)
24044 {
24045 if (reg_containing_return_addr != LR_REGNUM)
24046 {
24047 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24048 reg_containing_return_addr = LR_REGNUM;
24049 }
24050
24051 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24052 }
24053
24054 if (crtl->calls_eh_return)
24055 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24056
24057 /* Return to caller. */
24058 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24059 }
24060 \f
24061 /* Scan INSN just before assembler is output for it.
24062 For Thumb-1, we track the status of the condition codes; this
24063 information is used in the cbranchsi4_insn pattern. */
24064 void
24065 thumb1_final_prescan_insn (rtx_insn *insn)
24066 {
24067 if (flag_print_asm_name)
24068 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24069 INSN_ADDRESSES (INSN_UID (insn)));
24070 /* Don't overwrite the previous setter when we get to a cbranch. */
24071 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24072 {
24073 enum attr_conds conds;
24074
24075 if (cfun->machine->thumb1_cc_insn)
24076 {
24077 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24078 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24079 CC_STATUS_INIT;
24080 }
24081 conds = get_attr_conds (insn);
24082 if (conds == CONDS_SET)
24083 {
24084 rtx set = single_set (insn);
24085 cfun->machine->thumb1_cc_insn = insn;
24086 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24087 cfun->machine->thumb1_cc_op1 = const0_rtx;
24088 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24089 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24090 {
24091 rtx src1 = XEXP (SET_SRC (set), 1);
24092 if (src1 == const0_rtx)
24093 cfun->machine->thumb1_cc_mode = CCmode;
24094 }
24095 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24096 {
24097 /* Record the src register operand instead of dest because
24098 cprop_hardreg pass propagates src. */
24099 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24100 }
24101 }
24102 else if (conds != CONDS_NOCOND)
24103 cfun->machine->thumb1_cc_insn = NULL_RTX;
24104 }
24105
24106 /* Check if unexpected far jump is used. */
24107 if (cfun->machine->lr_save_eliminated
24108 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24109 internal_error("Unexpected thumb1 far jump");
24110 }
24111
24112 int
24113 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24114 {
24115 unsigned HOST_WIDE_INT mask = 0xff;
24116 int i;
24117
24118 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24119 if (val == 0) /* XXX */
24120 return 0;
24121
24122 for (i = 0; i < 25; i++)
24123 if ((val & (mask << i)) == val)
24124 return 1;
24125
24126 return 0;
24127 }
24128
24129 /* Returns nonzero if the current function contains,
24130 or might contain a far jump. */
24131 static int
24132 thumb_far_jump_used_p (void)
24133 {
24134 rtx_insn *insn;
24135 bool far_jump = false;
24136 unsigned int func_size = 0;
24137
24138 /* This test is only important for leaf functions. */
24139 /* assert (!leaf_function_p ()); */
24140
24141 /* If we have already decided that far jumps may be used,
24142 do not bother checking again, and always return true even if
24143 it turns out that they are not being used. Once we have made
24144 the decision that far jumps are present (and that hence the link
24145 register will be pushed onto the stack) we cannot go back on it. */
24146 if (cfun->machine->far_jump_used)
24147 return 1;
24148
24149 /* If this function is not being called from the prologue/epilogue
24150 generation code then it must be being called from the
24151 INITIAL_ELIMINATION_OFFSET macro. */
24152 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24153 {
24154 /* In this case we know that we are being asked about the elimination
24155 of the arg pointer register. If that register is not being used,
24156 then there are no arguments on the stack, and we do not have to
24157 worry that a far jump might force the prologue to push the link
24158 register, changing the stack offsets. In this case we can just
24159 return false, since the presence of far jumps in the function will
24160 not affect stack offsets.
24161
24162 If the arg pointer is live (or if it was live, but has now been
24163 eliminated and so set to dead) then we do have to test to see if
24164 the function might contain a far jump. This test can lead to some
24165 false negatives, since before reload is completed, then length of
24166 branch instructions is not known, so gcc defaults to returning their
24167 longest length, which in turn sets the far jump attribute to true.
24168
24169 A false negative will not result in bad code being generated, but it
24170 will result in a needless push and pop of the link register. We
24171 hope that this does not occur too often.
24172
24173 If we need doubleword stack alignment this could affect the other
24174 elimination offsets so we can't risk getting it wrong. */
24175 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24176 cfun->machine->arg_pointer_live = 1;
24177 else if (!cfun->machine->arg_pointer_live)
24178 return 0;
24179 }
24180
24181 /* We should not change far_jump_used during or after reload, as there is
24182 no chance to change stack frame layout. */
24183 if (reload_in_progress || reload_completed)
24184 return 0;
24185
24186 /* Check to see if the function contains a branch
24187 insn with the far jump attribute set. */
24188 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24189 {
24190 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24191 {
24192 far_jump = true;
24193 }
24194 func_size += get_attr_length (insn);
24195 }
24196
24197 /* Attribute far_jump will always be true for thumb1 before
24198 shorten_branch pass. So checking far_jump attribute before
24199 shorten_branch isn't much useful.
24200
24201 Following heuristic tries to estimate more accurately if a far jump
24202 may finally be used. The heuristic is very conservative as there is
24203 no chance to roll-back the decision of not to use far jump.
24204
24205 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24206 2-byte insn is associated with a 4 byte constant pool. Using
24207 function size 2048/3 as the threshold is conservative enough. */
24208 if (far_jump)
24209 {
24210 if ((func_size * 3) >= 2048)
24211 {
24212 /* Record the fact that we have decided that
24213 the function does use far jumps. */
24214 cfun->machine->far_jump_used = 1;
24215 return 1;
24216 }
24217 }
24218
24219 return 0;
24220 }
24221
24222 /* Return nonzero if FUNC must be entered in ARM mode. */
24223 static bool
24224 is_called_in_ARM_mode (tree func)
24225 {
24226 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24227
24228 /* Ignore the problem about functions whose address is taken. */
24229 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24230 return true;
24231
24232 #ifdef ARM_PE
24233 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24234 #else
24235 return false;
24236 #endif
24237 }
24238
24239 /* Given the stack offsets and register mask in OFFSETS, decide how
24240 many additional registers to push instead of subtracting a constant
24241 from SP. For epilogues the principle is the same except we use pop.
24242 FOR_PROLOGUE indicates which we're generating. */
24243 static int
24244 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24245 {
24246 HOST_WIDE_INT amount;
24247 unsigned long live_regs_mask = offsets->saved_regs_mask;
24248 /* Extract a mask of the ones we can give to the Thumb's push/pop
24249 instruction. */
24250 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24251 /* Then count how many other high registers will need to be pushed. */
24252 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24253 int n_free, reg_base, size;
24254
24255 if (!for_prologue && frame_pointer_needed)
24256 amount = offsets->locals_base - offsets->saved_regs;
24257 else
24258 amount = offsets->outgoing_args - offsets->saved_regs;
24259
24260 /* If the stack frame size is 512 exactly, we can save one load
24261 instruction, which should make this a win even when optimizing
24262 for speed. */
24263 if (!optimize_size && amount != 512)
24264 return 0;
24265
24266 /* Can't do this if there are high registers to push. */
24267 if (high_regs_pushed != 0)
24268 return 0;
24269
24270 /* Shouldn't do it in the prologue if no registers would normally
24271 be pushed at all. In the epilogue, also allow it if we'll have
24272 a pop insn for the PC. */
24273 if (l_mask == 0
24274 && (for_prologue
24275 || TARGET_BACKTRACE
24276 || (live_regs_mask & 1 << LR_REGNUM) == 0
24277 || TARGET_INTERWORK
24278 || crtl->args.pretend_args_size != 0))
24279 return 0;
24280
24281 /* Don't do this if thumb_expand_prologue wants to emit instructions
24282 between the push and the stack frame allocation. */
24283 if (for_prologue
24284 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24285 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24286 return 0;
24287
24288 reg_base = 0;
24289 n_free = 0;
24290 if (!for_prologue)
24291 {
24292 size = arm_size_return_regs ();
24293 reg_base = ARM_NUM_INTS (size);
24294 live_regs_mask >>= reg_base;
24295 }
24296
24297 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24298 && (for_prologue || call_used_regs[reg_base + n_free]))
24299 {
24300 live_regs_mask >>= 1;
24301 n_free++;
24302 }
24303
24304 if (n_free == 0)
24305 return 0;
24306 gcc_assert (amount / 4 * 4 == amount);
24307
24308 if (amount >= 512 && (amount - n_free * 4) < 512)
24309 return (amount - 508) / 4;
24310 if (amount <= n_free * 4)
24311 return amount / 4;
24312 return 0;
24313 }
24314
24315 /* The bits which aren't usefully expanded as rtl. */
24316 const char *
24317 thumb1_unexpanded_epilogue (void)
24318 {
24319 arm_stack_offsets *offsets;
24320 int regno;
24321 unsigned long live_regs_mask = 0;
24322 int high_regs_pushed = 0;
24323 int extra_pop;
24324 int had_to_push_lr;
24325 int size;
24326
24327 if (cfun->machine->return_used_this_function != 0)
24328 return "";
24329
24330 if (IS_NAKED (arm_current_func_type ()))
24331 return "";
24332
24333 offsets = arm_get_frame_offsets ();
24334 live_regs_mask = offsets->saved_regs_mask;
24335 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24336
24337 /* If we can deduce the registers used from the function's return value.
24338 This is more reliable that examining df_regs_ever_live_p () because that
24339 will be set if the register is ever used in the function, not just if
24340 the register is used to hold a return value. */
24341 size = arm_size_return_regs ();
24342
24343 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24344 if (extra_pop > 0)
24345 {
24346 unsigned long extra_mask = (1 << extra_pop) - 1;
24347 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24348 }
24349
24350 /* The prolog may have pushed some high registers to use as
24351 work registers. e.g. the testsuite file:
24352 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24353 compiles to produce:
24354 push {r4, r5, r6, r7, lr}
24355 mov r7, r9
24356 mov r6, r8
24357 push {r6, r7}
24358 as part of the prolog. We have to undo that pushing here. */
24359
24360 if (high_regs_pushed)
24361 {
24362 unsigned long mask = live_regs_mask & 0xff;
24363 int next_hi_reg;
24364
24365 /* The available low registers depend on the size of the value we are
24366 returning. */
24367 if (size <= 12)
24368 mask |= 1 << 3;
24369 if (size <= 8)
24370 mask |= 1 << 2;
24371
24372 if (mask == 0)
24373 /* Oh dear! We have no low registers into which we can pop
24374 high registers! */
24375 internal_error
24376 ("no low registers available for popping high registers");
24377
24378 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24379 if (live_regs_mask & (1 << next_hi_reg))
24380 break;
24381
24382 while (high_regs_pushed)
24383 {
24384 /* Find lo register(s) into which the high register(s) can
24385 be popped. */
24386 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24387 {
24388 if (mask & (1 << regno))
24389 high_regs_pushed--;
24390 if (high_regs_pushed == 0)
24391 break;
24392 }
24393
24394 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24395
24396 /* Pop the values into the low register(s). */
24397 thumb_pop (asm_out_file, mask);
24398
24399 /* Move the value(s) into the high registers. */
24400 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24401 {
24402 if (mask & (1 << regno))
24403 {
24404 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24405 regno);
24406
24407 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24408 if (live_regs_mask & (1 << next_hi_reg))
24409 break;
24410 }
24411 }
24412 }
24413 live_regs_mask &= ~0x0f00;
24414 }
24415
24416 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24417 live_regs_mask &= 0xff;
24418
24419 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24420 {
24421 /* Pop the return address into the PC. */
24422 if (had_to_push_lr)
24423 live_regs_mask |= 1 << PC_REGNUM;
24424
24425 /* Either no argument registers were pushed or a backtrace
24426 structure was created which includes an adjusted stack
24427 pointer, so just pop everything. */
24428 if (live_regs_mask)
24429 thumb_pop (asm_out_file, live_regs_mask);
24430
24431 /* We have either just popped the return address into the
24432 PC or it is was kept in LR for the entire function.
24433 Note that thumb_pop has already called thumb_exit if the
24434 PC was in the list. */
24435 if (!had_to_push_lr)
24436 thumb_exit (asm_out_file, LR_REGNUM);
24437 }
24438 else
24439 {
24440 /* Pop everything but the return address. */
24441 if (live_regs_mask)
24442 thumb_pop (asm_out_file, live_regs_mask);
24443
24444 if (had_to_push_lr)
24445 {
24446 if (size > 12)
24447 {
24448 /* We have no free low regs, so save one. */
24449 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24450 LAST_ARG_REGNUM);
24451 }
24452
24453 /* Get the return address into a temporary register. */
24454 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24455
24456 if (size > 12)
24457 {
24458 /* Move the return address to lr. */
24459 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24460 LAST_ARG_REGNUM);
24461 /* Restore the low register. */
24462 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24463 IP_REGNUM);
24464 regno = LR_REGNUM;
24465 }
24466 else
24467 regno = LAST_ARG_REGNUM;
24468 }
24469 else
24470 regno = LR_REGNUM;
24471
24472 /* Remove the argument registers that were pushed onto the stack. */
24473 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24474 SP_REGNUM, SP_REGNUM,
24475 crtl->args.pretend_args_size);
24476
24477 thumb_exit (asm_out_file, regno);
24478 }
24479
24480 return "";
24481 }
24482
24483 /* Functions to save and restore machine-specific function data. */
24484 static struct machine_function *
24485 arm_init_machine_status (void)
24486 {
24487 struct machine_function *machine;
24488 machine = ggc_cleared_alloc<machine_function> ();
24489
24490 #if ARM_FT_UNKNOWN != 0
24491 machine->func_type = ARM_FT_UNKNOWN;
24492 #endif
24493 return machine;
24494 }
24495
24496 /* Return an RTX indicating where the return address to the
24497 calling function can be found. */
24498 rtx
24499 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24500 {
24501 if (count != 0)
24502 return NULL_RTX;
24503
24504 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24505 }
24506
24507 /* Do anything needed before RTL is emitted for each function. */
24508 void
24509 arm_init_expanders (void)
24510 {
24511 /* Arrange to initialize and mark the machine per-function status. */
24512 init_machine_status = arm_init_machine_status;
24513
24514 /* This is to stop the combine pass optimizing away the alignment
24515 adjustment of va_arg. */
24516 /* ??? It is claimed that this should not be necessary. */
24517 if (cfun)
24518 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24519 }
24520
24521 /* Check that FUNC is called with a different mode. */
24522
24523 bool
24524 arm_change_mode_p (tree func)
24525 {
24526 if (TREE_CODE (func) != FUNCTION_DECL)
24527 return false;
24528
24529 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24530
24531 if (!callee_tree)
24532 callee_tree = target_option_default_node;
24533
24534 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24535 int flags = callee_opts->x_target_flags;
24536
24537 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24538 }
24539
24540 /* Like arm_compute_initial_elimination offset. Simpler because there
24541 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24542 to point at the base of the local variables after static stack
24543 space for a function has been allocated. */
24544
24545 HOST_WIDE_INT
24546 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24547 {
24548 arm_stack_offsets *offsets;
24549
24550 offsets = arm_get_frame_offsets ();
24551
24552 switch (from)
24553 {
24554 case ARG_POINTER_REGNUM:
24555 switch (to)
24556 {
24557 case STACK_POINTER_REGNUM:
24558 return offsets->outgoing_args - offsets->saved_args;
24559
24560 case FRAME_POINTER_REGNUM:
24561 return offsets->soft_frame - offsets->saved_args;
24562
24563 case ARM_HARD_FRAME_POINTER_REGNUM:
24564 return offsets->saved_regs - offsets->saved_args;
24565
24566 case THUMB_HARD_FRAME_POINTER_REGNUM:
24567 return offsets->locals_base - offsets->saved_args;
24568
24569 default:
24570 gcc_unreachable ();
24571 }
24572 break;
24573
24574 case FRAME_POINTER_REGNUM:
24575 switch (to)
24576 {
24577 case STACK_POINTER_REGNUM:
24578 return offsets->outgoing_args - offsets->soft_frame;
24579
24580 case ARM_HARD_FRAME_POINTER_REGNUM:
24581 return offsets->saved_regs - offsets->soft_frame;
24582
24583 case THUMB_HARD_FRAME_POINTER_REGNUM:
24584 return offsets->locals_base - offsets->soft_frame;
24585
24586 default:
24587 gcc_unreachable ();
24588 }
24589 break;
24590
24591 default:
24592 gcc_unreachable ();
24593 }
24594 }
24595
24596 /* Generate the function's prologue. */
24597
24598 void
24599 thumb1_expand_prologue (void)
24600 {
24601 rtx_insn *insn;
24602
24603 HOST_WIDE_INT amount;
24604 HOST_WIDE_INT size;
24605 arm_stack_offsets *offsets;
24606 unsigned long func_type;
24607 int regno;
24608 unsigned long live_regs_mask;
24609 unsigned long l_mask;
24610 unsigned high_regs_pushed = 0;
24611
24612 func_type = arm_current_func_type ();
24613
24614 /* Naked functions don't have prologues. */
24615 if (IS_NAKED (func_type))
24616 return;
24617
24618 if (IS_INTERRUPT (func_type))
24619 {
24620 error ("interrupt Service Routines cannot be coded in Thumb mode");
24621 return;
24622 }
24623
24624 if (is_called_in_ARM_mode (current_function_decl))
24625 emit_insn (gen_prologue_thumb1_interwork ());
24626
24627 offsets = arm_get_frame_offsets ();
24628 live_regs_mask = offsets->saved_regs_mask;
24629
24630 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24631 l_mask = live_regs_mask & 0x40ff;
24632 /* Then count how many other high registers will need to be pushed. */
24633 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24634
24635 if (crtl->args.pretend_args_size)
24636 {
24637 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24638
24639 if (cfun->machine->uses_anonymous_args)
24640 {
24641 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24642 unsigned long mask;
24643
24644 mask = 1ul << (LAST_ARG_REGNUM + 1);
24645 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24646
24647 insn = thumb1_emit_multi_reg_push (mask, 0);
24648 }
24649 else
24650 {
24651 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24652 stack_pointer_rtx, x));
24653 }
24654 RTX_FRAME_RELATED_P (insn) = 1;
24655 }
24656
24657 if (TARGET_BACKTRACE)
24658 {
24659 HOST_WIDE_INT offset = 0;
24660 unsigned work_register;
24661 rtx work_reg, x, arm_hfp_rtx;
24662
24663 /* We have been asked to create a stack backtrace structure.
24664 The code looks like this:
24665
24666 0 .align 2
24667 0 func:
24668 0 sub SP, #16 Reserve space for 4 registers.
24669 2 push {R7} Push low registers.
24670 4 add R7, SP, #20 Get the stack pointer before the push.
24671 6 str R7, [SP, #8] Store the stack pointer
24672 (before reserving the space).
24673 8 mov R7, PC Get hold of the start of this code + 12.
24674 10 str R7, [SP, #16] Store it.
24675 12 mov R7, FP Get hold of the current frame pointer.
24676 14 str R7, [SP, #4] Store it.
24677 16 mov R7, LR Get hold of the current return address.
24678 18 str R7, [SP, #12] Store it.
24679 20 add R7, SP, #16 Point at the start of the
24680 backtrace structure.
24681 22 mov FP, R7 Put this value into the frame pointer. */
24682
24683 work_register = thumb_find_work_register (live_regs_mask);
24684 work_reg = gen_rtx_REG (SImode, work_register);
24685 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24686
24687 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24688 stack_pointer_rtx, GEN_INT (-16)));
24689 RTX_FRAME_RELATED_P (insn) = 1;
24690
24691 if (l_mask)
24692 {
24693 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24694 RTX_FRAME_RELATED_P (insn) = 1;
24695
24696 offset = bit_count (l_mask) * UNITS_PER_WORD;
24697 }
24698
24699 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24700 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24701
24702 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24703 x = gen_frame_mem (SImode, x);
24704 emit_move_insn (x, work_reg);
24705
24706 /* Make sure that the instruction fetching the PC is in the right place
24707 to calculate "start of backtrace creation code + 12". */
24708 /* ??? The stores using the common WORK_REG ought to be enough to
24709 prevent the scheduler from doing anything weird. Failing that
24710 we could always move all of the following into an UNSPEC_VOLATILE. */
24711 if (l_mask)
24712 {
24713 x = gen_rtx_REG (SImode, PC_REGNUM);
24714 emit_move_insn (work_reg, x);
24715
24716 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24717 x = gen_frame_mem (SImode, x);
24718 emit_move_insn (x, work_reg);
24719
24720 emit_move_insn (work_reg, arm_hfp_rtx);
24721
24722 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24723 x = gen_frame_mem (SImode, x);
24724 emit_move_insn (x, work_reg);
24725 }
24726 else
24727 {
24728 emit_move_insn (work_reg, arm_hfp_rtx);
24729
24730 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24731 x = gen_frame_mem (SImode, x);
24732 emit_move_insn (x, work_reg);
24733
24734 x = gen_rtx_REG (SImode, PC_REGNUM);
24735 emit_move_insn (work_reg, x);
24736
24737 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24738 x = gen_frame_mem (SImode, x);
24739 emit_move_insn (x, work_reg);
24740 }
24741
24742 x = gen_rtx_REG (SImode, LR_REGNUM);
24743 emit_move_insn (work_reg, x);
24744
24745 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24746 x = gen_frame_mem (SImode, x);
24747 emit_move_insn (x, work_reg);
24748
24749 x = GEN_INT (offset + 12);
24750 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24751
24752 emit_move_insn (arm_hfp_rtx, work_reg);
24753 }
24754 /* Optimization: If we are not pushing any low registers but we are going
24755 to push some high registers then delay our first push. This will just
24756 be a push of LR and we can combine it with the push of the first high
24757 register. */
24758 else if ((l_mask & 0xff) != 0
24759 || (high_regs_pushed == 0 && l_mask))
24760 {
24761 unsigned long mask = l_mask;
24762 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24763 insn = thumb1_emit_multi_reg_push (mask, mask);
24764 RTX_FRAME_RELATED_P (insn) = 1;
24765 }
24766
24767 if (high_regs_pushed)
24768 {
24769 unsigned pushable_regs;
24770 unsigned next_hi_reg;
24771 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24772 : crtl->args.info.nregs;
24773 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24774
24775 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24776 if (live_regs_mask & (1 << next_hi_reg))
24777 break;
24778
24779 /* Here we need to mask out registers used for passing arguments
24780 even if they can be pushed. This is to avoid using them to stash the high
24781 registers. Such kind of stash may clobber the use of arguments. */
24782 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24783
24784 if (pushable_regs == 0)
24785 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24786
24787 while (high_regs_pushed > 0)
24788 {
24789 unsigned long real_regs_mask = 0;
24790
24791 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24792 {
24793 if (pushable_regs & (1 << regno))
24794 {
24795 emit_move_insn (gen_rtx_REG (SImode, regno),
24796 gen_rtx_REG (SImode, next_hi_reg));
24797
24798 high_regs_pushed --;
24799 real_regs_mask |= (1 << next_hi_reg);
24800
24801 if (high_regs_pushed)
24802 {
24803 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24804 next_hi_reg --)
24805 if (live_regs_mask & (1 << next_hi_reg))
24806 break;
24807 }
24808 else
24809 {
24810 pushable_regs &= ~((1 << regno) - 1);
24811 break;
24812 }
24813 }
24814 }
24815
24816 /* If we had to find a work register and we have not yet
24817 saved the LR then add it to the list of regs to push. */
24818 if (l_mask == (1 << LR_REGNUM))
24819 {
24820 pushable_regs |= l_mask;
24821 real_regs_mask |= l_mask;
24822 l_mask = 0;
24823 }
24824
24825 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24826 RTX_FRAME_RELATED_P (insn) = 1;
24827 }
24828 }
24829
24830 /* Load the pic register before setting the frame pointer,
24831 so we can use r7 as a temporary work register. */
24832 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24833 arm_load_pic_register (live_regs_mask);
24834
24835 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24836 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24837 stack_pointer_rtx);
24838
24839 size = offsets->outgoing_args - offsets->saved_args;
24840 if (flag_stack_usage_info)
24841 current_function_static_stack_size = size;
24842
24843 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24844 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
24845 sorry ("-fstack-check=specific for Thumb-1");
24846
24847 amount = offsets->outgoing_args - offsets->saved_regs;
24848 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24849 if (amount)
24850 {
24851 if (amount < 512)
24852 {
24853 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24854 GEN_INT (- amount)));
24855 RTX_FRAME_RELATED_P (insn) = 1;
24856 }
24857 else
24858 {
24859 rtx reg, dwarf;
24860
24861 /* The stack decrement is too big for an immediate value in a single
24862 insn. In theory we could issue multiple subtracts, but after
24863 three of them it becomes more space efficient to place the full
24864 value in the constant pool and load into a register. (Also the
24865 ARM debugger really likes to see only one stack decrement per
24866 function). So instead we look for a scratch register into which
24867 we can load the decrement, and then we subtract this from the
24868 stack pointer. Unfortunately on the thumb the only available
24869 scratch registers are the argument registers, and we cannot use
24870 these as they may hold arguments to the function. Instead we
24871 attempt to locate a call preserved register which is used by this
24872 function. If we can find one, then we know that it will have
24873 been pushed at the start of the prologue and so we can corrupt
24874 it now. */
24875 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24876 if (live_regs_mask & (1 << regno))
24877 break;
24878
24879 gcc_assert(regno <= LAST_LO_REGNUM);
24880
24881 reg = gen_rtx_REG (SImode, regno);
24882
24883 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24884
24885 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24886 stack_pointer_rtx, reg));
24887
24888 dwarf = gen_rtx_SET (stack_pointer_rtx,
24889 plus_constant (Pmode, stack_pointer_rtx,
24890 -amount));
24891 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24892 RTX_FRAME_RELATED_P (insn) = 1;
24893 }
24894 }
24895
24896 if (frame_pointer_needed)
24897 thumb_set_frame_pointer (offsets);
24898
24899 /* If we are profiling, make sure no instructions are scheduled before
24900 the call to mcount. Similarly if the user has requested no
24901 scheduling in the prolog. Similarly if we want non-call exceptions
24902 using the EABI unwinder, to prevent faulting instructions from being
24903 swapped with a stack adjustment. */
24904 if (crtl->profile || !TARGET_SCHED_PROLOG
24905 || (arm_except_unwind_info (&global_options) == UI_TARGET
24906 && cfun->can_throw_non_call_exceptions))
24907 emit_insn (gen_blockage ());
24908
24909 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24910 if (live_regs_mask & 0xff)
24911 cfun->machine->lr_save_eliminated = 0;
24912 }
24913
24914 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24915 POP instruction can be generated. LR should be replaced by PC. All
24916 the checks required are already done by USE_RETURN_INSN (). Hence,
24917 all we really need to check here is if single register is to be
24918 returned, or multiple register return. */
24919 void
24920 thumb2_expand_return (bool simple_return)
24921 {
24922 int i, num_regs;
24923 unsigned long saved_regs_mask;
24924 arm_stack_offsets *offsets;
24925
24926 offsets = arm_get_frame_offsets ();
24927 saved_regs_mask = offsets->saved_regs_mask;
24928
24929 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24930 if (saved_regs_mask & (1 << i))
24931 num_regs++;
24932
24933 if (!simple_return && saved_regs_mask)
24934 {
24935 if (num_regs == 1)
24936 {
24937 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24938 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24939 rtx addr = gen_rtx_MEM (SImode,
24940 gen_rtx_POST_INC (SImode,
24941 stack_pointer_rtx));
24942 set_mem_alias_set (addr, get_frame_alias_set ());
24943 XVECEXP (par, 0, 0) = ret_rtx;
24944 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
24945 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24946 emit_jump_insn (par);
24947 }
24948 else
24949 {
24950 saved_regs_mask &= ~ (1 << LR_REGNUM);
24951 saved_regs_mask |= (1 << PC_REGNUM);
24952 arm_emit_multi_reg_pop (saved_regs_mask);
24953 }
24954 }
24955 else
24956 {
24957 emit_jump_insn (simple_return_rtx);
24958 }
24959 }
24960
24961 void
24962 thumb1_expand_epilogue (void)
24963 {
24964 HOST_WIDE_INT amount;
24965 arm_stack_offsets *offsets;
24966 int regno;
24967
24968 /* Naked functions don't have prologues. */
24969 if (IS_NAKED (arm_current_func_type ()))
24970 return;
24971
24972 offsets = arm_get_frame_offsets ();
24973 amount = offsets->outgoing_args - offsets->saved_regs;
24974
24975 if (frame_pointer_needed)
24976 {
24977 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
24978 amount = offsets->locals_base - offsets->saved_regs;
24979 }
24980 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
24981
24982 gcc_assert (amount >= 0);
24983 if (amount)
24984 {
24985 emit_insn (gen_blockage ());
24986
24987 if (amount < 512)
24988 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24989 GEN_INT (amount)));
24990 else
24991 {
24992 /* r3 is always free in the epilogue. */
24993 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
24994
24995 emit_insn (gen_movsi (reg, GEN_INT (amount)));
24996 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
24997 }
24998 }
24999
25000 /* Emit a USE (stack_pointer_rtx), so that
25001 the stack adjustment will not be deleted. */
25002 emit_insn (gen_force_register_use (stack_pointer_rtx));
25003
25004 if (crtl->profile || !TARGET_SCHED_PROLOG)
25005 emit_insn (gen_blockage ());
25006
25007 /* Emit a clobber for each insn that will be restored in the epilogue,
25008 so that flow2 will get register lifetimes correct. */
25009 for (regno = 0; regno < 13; regno++)
25010 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25011 emit_clobber (gen_rtx_REG (SImode, regno));
25012
25013 if (! df_regs_ever_live_p (LR_REGNUM))
25014 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25015 }
25016
25017 /* Epilogue code for APCS frame. */
25018 static void
25019 arm_expand_epilogue_apcs_frame (bool really_return)
25020 {
25021 unsigned long func_type;
25022 unsigned long saved_regs_mask;
25023 int num_regs = 0;
25024 int i;
25025 int floats_from_frame = 0;
25026 arm_stack_offsets *offsets;
25027
25028 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25029 func_type = arm_current_func_type ();
25030
25031 /* Get frame offsets for ARM. */
25032 offsets = arm_get_frame_offsets ();
25033 saved_regs_mask = offsets->saved_regs_mask;
25034
25035 /* Find the offset of the floating-point save area in the frame. */
25036 floats_from_frame
25037 = (offsets->saved_args
25038 + arm_compute_static_chain_stack_bytes ()
25039 - offsets->frame);
25040
25041 /* Compute how many core registers saved and how far away the floats are. */
25042 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25043 if (saved_regs_mask & (1 << i))
25044 {
25045 num_regs++;
25046 floats_from_frame += 4;
25047 }
25048
25049 if (TARGET_HARD_FLOAT && TARGET_VFP)
25050 {
25051 int start_reg;
25052 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25053
25054 /* The offset is from IP_REGNUM. */
25055 int saved_size = arm_get_vfp_saved_size ();
25056 if (saved_size > 0)
25057 {
25058 rtx_insn *insn;
25059 floats_from_frame += saved_size;
25060 insn = emit_insn (gen_addsi3 (ip_rtx,
25061 hard_frame_pointer_rtx,
25062 GEN_INT (-floats_from_frame)));
25063 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25064 ip_rtx, hard_frame_pointer_rtx);
25065 }
25066
25067 /* Generate VFP register multi-pop. */
25068 start_reg = FIRST_VFP_REGNUM;
25069
25070 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25071 /* Look for a case where a reg does not need restoring. */
25072 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25073 && (!df_regs_ever_live_p (i + 1)
25074 || call_used_regs[i + 1]))
25075 {
25076 if (start_reg != i)
25077 arm_emit_vfp_multi_reg_pop (start_reg,
25078 (i - start_reg) / 2,
25079 gen_rtx_REG (SImode,
25080 IP_REGNUM));
25081 start_reg = i + 2;
25082 }
25083
25084 /* Restore the remaining regs that we have discovered (or possibly
25085 even all of them, if the conditional in the for loop never
25086 fired). */
25087 if (start_reg != i)
25088 arm_emit_vfp_multi_reg_pop (start_reg,
25089 (i - start_reg) / 2,
25090 gen_rtx_REG (SImode, IP_REGNUM));
25091 }
25092
25093 if (TARGET_IWMMXT)
25094 {
25095 /* The frame pointer is guaranteed to be non-double-word aligned, as
25096 it is set to double-word-aligned old_stack_pointer - 4. */
25097 rtx_insn *insn;
25098 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25099
25100 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25101 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25102 {
25103 rtx addr = gen_frame_mem (V2SImode,
25104 plus_constant (Pmode, hard_frame_pointer_rtx,
25105 - lrm_count * 4));
25106 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25107 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25108 gen_rtx_REG (V2SImode, i),
25109 NULL_RTX);
25110 lrm_count += 2;
25111 }
25112 }
25113
25114 /* saved_regs_mask should contain IP which contains old stack pointer
25115 at the time of activation creation. Since SP and IP are adjacent registers,
25116 we can restore the value directly into SP. */
25117 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25118 saved_regs_mask &= ~(1 << IP_REGNUM);
25119 saved_regs_mask |= (1 << SP_REGNUM);
25120
25121 /* There are two registers left in saved_regs_mask - LR and PC. We
25122 only need to restore LR (the return address), but to
25123 save time we can load it directly into PC, unless we need a
25124 special function exit sequence, or we are not really returning. */
25125 if (really_return
25126 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25127 && !crtl->calls_eh_return)
25128 /* Delete LR from the register mask, so that LR on
25129 the stack is loaded into the PC in the register mask. */
25130 saved_regs_mask &= ~(1 << LR_REGNUM);
25131 else
25132 saved_regs_mask &= ~(1 << PC_REGNUM);
25133
25134 num_regs = bit_count (saved_regs_mask);
25135 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25136 {
25137 rtx_insn *insn;
25138 emit_insn (gen_blockage ());
25139 /* Unwind the stack to just below the saved registers. */
25140 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25141 hard_frame_pointer_rtx,
25142 GEN_INT (- 4 * num_regs)));
25143
25144 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25145 stack_pointer_rtx, hard_frame_pointer_rtx);
25146 }
25147
25148 arm_emit_multi_reg_pop (saved_regs_mask);
25149
25150 if (IS_INTERRUPT (func_type))
25151 {
25152 /* Interrupt handlers will have pushed the
25153 IP onto the stack, so restore it now. */
25154 rtx_insn *insn;
25155 rtx addr = gen_rtx_MEM (SImode,
25156 gen_rtx_POST_INC (SImode,
25157 stack_pointer_rtx));
25158 set_mem_alias_set (addr, get_frame_alias_set ());
25159 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25160 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25161 gen_rtx_REG (SImode, IP_REGNUM),
25162 NULL_RTX);
25163 }
25164
25165 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25166 return;
25167
25168 if (crtl->calls_eh_return)
25169 emit_insn (gen_addsi3 (stack_pointer_rtx,
25170 stack_pointer_rtx,
25171 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25172
25173 if (IS_STACKALIGN (func_type))
25174 /* Restore the original stack pointer. Before prologue, the stack was
25175 realigned and the original stack pointer saved in r0. For details,
25176 see comment in arm_expand_prologue. */
25177 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25178
25179 emit_jump_insn (simple_return_rtx);
25180 }
25181
25182 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25183 function is not a sibcall. */
25184 void
25185 arm_expand_epilogue (bool really_return)
25186 {
25187 unsigned long func_type;
25188 unsigned long saved_regs_mask;
25189 int num_regs = 0;
25190 int i;
25191 int amount;
25192 arm_stack_offsets *offsets;
25193
25194 func_type = arm_current_func_type ();
25195
25196 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25197 let output_return_instruction take care of instruction emission if any. */
25198 if (IS_NAKED (func_type)
25199 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25200 {
25201 if (really_return)
25202 emit_jump_insn (simple_return_rtx);
25203 return;
25204 }
25205
25206 /* If we are throwing an exception, then we really must be doing a
25207 return, so we can't tail-call. */
25208 gcc_assert (!crtl->calls_eh_return || really_return);
25209
25210 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25211 {
25212 arm_expand_epilogue_apcs_frame (really_return);
25213 return;
25214 }
25215
25216 /* Get frame offsets for ARM. */
25217 offsets = arm_get_frame_offsets ();
25218 saved_regs_mask = offsets->saved_regs_mask;
25219 num_regs = bit_count (saved_regs_mask);
25220
25221 if (frame_pointer_needed)
25222 {
25223 rtx_insn *insn;
25224 /* Restore stack pointer if necessary. */
25225 if (TARGET_ARM)
25226 {
25227 /* In ARM mode, frame pointer points to first saved register.
25228 Restore stack pointer to last saved register. */
25229 amount = offsets->frame - offsets->saved_regs;
25230
25231 /* Force out any pending memory operations that reference stacked data
25232 before stack de-allocation occurs. */
25233 emit_insn (gen_blockage ());
25234 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25235 hard_frame_pointer_rtx,
25236 GEN_INT (amount)));
25237 arm_add_cfa_adjust_cfa_note (insn, amount,
25238 stack_pointer_rtx,
25239 hard_frame_pointer_rtx);
25240
25241 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25242 deleted. */
25243 emit_insn (gen_force_register_use (stack_pointer_rtx));
25244 }
25245 else
25246 {
25247 /* In Thumb-2 mode, the frame pointer points to the last saved
25248 register. */
25249 amount = offsets->locals_base - offsets->saved_regs;
25250 if (amount)
25251 {
25252 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25253 hard_frame_pointer_rtx,
25254 GEN_INT (amount)));
25255 arm_add_cfa_adjust_cfa_note (insn, amount,
25256 hard_frame_pointer_rtx,
25257 hard_frame_pointer_rtx);
25258 }
25259
25260 /* Force out any pending memory operations that reference stacked data
25261 before stack de-allocation occurs. */
25262 emit_insn (gen_blockage ());
25263 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25264 hard_frame_pointer_rtx));
25265 arm_add_cfa_adjust_cfa_note (insn, 0,
25266 stack_pointer_rtx,
25267 hard_frame_pointer_rtx);
25268 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25269 deleted. */
25270 emit_insn (gen_force_register_use (stack_pointer_rtx));
25271 }
25272 }
25273 else
25274 {
25275 /* Pop off outgoing args and local frame to adjust stack pointer to
25276 last saved register. */
25277 amount = offsets->outgoing_args - offsets->saved_regs;
25278 if (amount)
25279 {
25280 rtx_insn *tmp;
25281 /* Force out any pending memory operations that reference stacked data
25282 before stack de-allocation occurs. */
25283 emit_insn (gen_blockage ());
25284 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25285 stack_pointer_rtx,
25286 GEN_INT (amount)));
25287 arm_add_cfa_adjust_cfa_note (tmp, amount,
25288 stack_pointer_rtx, stack_pointer_rtx);
25289 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25290 not deleted. */
25291 emit_insn (gen_force_register_use (stack_pointer_rtx));
25292 }
25293 }
25294
25295 if (TARGET_HARD_FLOAT && TARGET_VFP)
25296 {
25297 /* Generate VFP register multi-pop. */
25298 int end_reg = LAST_VFP_REGNUM + 1;
25299
25300 /* Scan the registers in reverse order. We need to match
25301 any groupings made in the prologue and generate matching
25302 vldm operations. The need to match groups is because,
25303 unlike pop, vldm can only do consecutive regs. */
25304 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25305 /* Look for a case where a reg does not need restoring. */
25306 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25307 && (!df_regs_ever_live_p (i + 1)
25308 || call_used_regs[i + 1]))
25309 {
25310 /* Restore the regs discovered so far (from reg+2 to
25311 end_reg). */
25312 if (end_reg > i + 2)
25313 arm_emit_vfp_multi_reg_pop (i + 2,
25314 (end_reg - (i + 2)) / 2,
25315 stack_pointer_rtx);
25316 end_reg = i;
25317 }
25318
25319 /* Restore the remaining regs that we have discovered (or possibly
25320 even all of them, if the conditional in the for loop never
25321 fired). */
25322 if (end_reg > i + 2)
25323 arm_emit_vfp_multi_reg_pop (i + 2,
25324 (end_reg - (i + 2)) / 2,
25325 stack_pointer_rtx);
25326 }
25327
25328 if (TARGET_IWMMXT)
25329 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25330 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25331 {
25332 rtx_insn *insn;
25333 rtx addr = gen_rtx_MEM (V2SImode,
25334 gen_rtx_POST_INC (SImode,
25335 stack_pointer_rtx));
25336 set_mem_alias_set (addr, get_frame_alias_set ());
25337 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25338 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25339 gen_rtx_REG (V2SImode, i),
25340 NULL_RTX);
25341 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25342 stack_pointer_rtx, stack_pointer_rtx);
25343 }
25344
25345 if (saved_regs_mask)
25346 {
25347 rtx insn;
25348 bool return_in_pc = false;
25349
25350 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25351 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25352 && !IS_STACKALIGN (func_type)
25353 && really_return
25354 && crtl->args.pretend_args_size == 0
25355 && saved_regs_mask & (1 << LR_REGNUM)
25356 && !crtl->calls_eh_return)
25357 {
25358 saved_regs_mask &= ~(1 << LR_REGNUM);
25359 saved_regs_mask |= (1 << PC_REGNUM);
25360 return_in_pc = true;
25361 }
25362
25363 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25364 {
25365 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25366 if (saved_regs_mask & (1 << i))
25367 {
25368 rtx addr = gen_rtx_MEM (SImode,
25369 gen_rtx_POST_INC (SImode,
25370 stack_pointer_rtx));
25371 set_mem_alias_set (addr, get_frame_alias_set ());
25372
25373 if (i == PC_REGNUM)
25374 {
25375 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25376 XVECEXP (insn, 0, 0) = ret_rtx;
25377 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25378 addr);
25379 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25380 insn = emit_jump_insn (insn);
25381 }
25382 else
25383 {
25384 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25385 addr));
25386 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25387 gen_rtx_REG (SImode, i),
25388 NULL_RTX);
25389 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25390 stack_pointer_rtx,
25391 stack_pointer_rtx);
25392 }
25393 }
25394 }
25395 else
25396 {
25397 if (TARGET_LDRD
25398 && current_tune->prefer_ldrd_strd
25399 && !optimize_function_for_size_p (cfun))
25400 {
25401 if (TARGET_THUMB2)
25402 thumb2_emit_ldrd_pop (saved_regs_mask);
25403 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25404 arm_emit_ldrd_pop (saved_regs_mask);
25405 else
25406 arm_emit_multi_reg_pop (saved_regs_mask);
25407 }
25408 else
25409 arm_emit_multi_reg_pop (saved_regs_mask);
25410 }
25411
25412 if (return_in_pc)
25413 return;
25414 }
25415
25416 amount
25417 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25418 if (amount)
25419 {
25420 int i, j;
25421 rtx dwarf = NULL_RTX;
25422 rtx_insn *tmp =
25423 emit_insn (gen_addsi3 (stack_pointer_rtx,
25424 stack_pointer_rtx,
25425 GEN_INT (amount)));
25426
25427 RTX_FRAME_RELATED_P (tmp) = 1;
25428
25429 if (cfun->machine->uses_anonymous_args)
25430 {
25431 /* Restore pretend args. Refer arm_expand_prologue on how to save
25432 pretend_args in stack. */
25433 int num_regs = crtl->args.pretend_args_size / 4;
25434 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25435 for (j = 0, i = 0; j < num_regs; i++)
25436 if (saved_regs_mask & (1 << i))
25437 {
25438 rtx reg = gen_rtx_REG (SImode, i);
25439 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25440 j++;
25441 }
25442 REG_NOTES (tmp) = dwarf;
25443 }
25444 arm_add_cfa_adjust_cfa_note (tmp, amount,
25445 stack_pointer_rtx, stack_pointer_rtx);
25446 }
25447
25448 if (!really_return)
25449 return;
25450
25451 if (crtl->calls_eh_return)
25452 emit_insn (gen_addsi3 (stack_pointer_rtx,
25453 stack_pointer_rtx,
25454 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25455
25456 if (IS_STACKALIGN (func_type))
25457 /* Restore the original stack pointer. Before prologue, the stack was
25458 realigned and the original stack pointer saved in r0. For details,
25459 see comment in arm_expand_prologue. */
25460 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25461
25462 emit_jump_insn (simple_return_rtx);
25463 }
25464
25465 /* Implementation of insn prologue_thumb1_interwork. This is the first
25466 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25467
25468 const char *
25469 thumb1_output_interwork (void)
25470 {
25471 const char * name;
25472 FILE *f = asm_out_file;
25473
25474 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25475 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25476 == SYMBOL_REF);
25477 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25478
25479 /* Generate code sequence to switch us into Thumb mode. */
25480 /* The .code 32 directive has already been emitted by
25481 ASM_DECLARE_FUNCTION_NAME. */
25482 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25483 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25484
25485 /* Generate a label, so that the debugger will notice the
25486 change in instruction sets. This label is also used by
25487 the assembler to bypass the ARM code when this function
25488 is called from a Thumb encoded function elsewhere in the
25489 same file. Hence the definition of STUB_NAME here must
25490 agree with the definition in gas/config/tc-arm.c. */
25491
25492 #define STUB_NAME ".real_start_of"
25493
25494 fprintf (f, "\t.code\t16\n");
25495 #ifdef ARM_PE
25496 if (arm_dllexport_name_p (name))
25497 name = arm_strip_name_encoding (name);
25498 #endif
25499 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25500 fprintf (f, "\t.thumb_func\n");
25501 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25502
25503 return "";
25504 }
25505
25506 /* Handle the case of a double word load into a low register from
25507 a computed memory address. The computed address may involve a
25508 register which is overwritten by the load. */
25509 const char *
25510 thumb_load_double_from_address (rtx *operands)
25511 {
25512 rtx addr;
25513 rtx base;
25514 rtx offset;
25515 rtx arg1;
25516 rtx arg2;
25517
25518 gcc_assert (REG_P (operands[0]));
25519 gcc_assert (MEM_P (operands[1]));
25520
25521 /* Get the memory address. */
25522 addr = XEXP (operands[1], 0);
25523
25524 /* Work out how the memory address is computed. */
25525 switch (GET_CODE (addr))
25526 {
25527 case REG:
25528 operands[2] = adjust_address (operands[1], SImode, 4);
25529
25530 if (REGNO (operands[0]) == REGNO (addr))
25531 {
25532 output_asm_insn ("ldr\t%H0, %2", operands);
25533 output_asm_insn ("ldr\t%0, %1", operands);
25534 }
25535 else
25536 {
25537 output_asm_insn ("ldr\t%0, %1", operands);
25538 output_asm_insn ("ldr\t%H0, %2", operands);
25539 }
25540 break;
25541
25542 case CONST:
25543 /* Compute <address> + 4 for the high order load. */
25544 operands[2] = adjust_address (operands[1], SImode, 4);
25545
25546 output_asm_insn ("ldr\t%0, %1", operands);
25547 output_asm_insn ("ldr\t%H0, %2", operands);
25548 break;
25549
25550 case PLUS:
25551 arg1 = XEXP (addr, 0);
25552 arg2 = XEXP (addr, 1);
25553
25554 if (CONSTANT_P (arg1))
25555 base = arg2, offset = arg1;
25556 else
25557 base = arg1, offset = arg2;
25558
25559 gcc_assert (REG_P (base));
25560
25561 /* Catch the case of <address> = <reg> + <reg> */
25562 if (REG_P (offset))
25563 {
25564 int reg_offset = REGNO (offset);
25565 int reg_base = REGNO (base);
25566 int reg_dest = REGNO (operands[0]);
25567
25568 /* Add the base and offset registers together into the
25569 higher destination register. */
25570 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25571 reg_dest + 1, reg_base, reg_offset);
25572
25573 /* Load the lower destination register from the address in
25574 the higher destination register. */
25575 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25576 reg_dest, reg_dest + 1);
25577
25578 /* Load the higher destination register from its own address
25579 plus 4. */
25580 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25581 reg_dest + 1, reg_dest + 1);
25582 }
25583 else
25584 {
25585 /* Compute <address> + 4 for the high order load. */
25586 operands[2] = adjust_address (operands[1], SImode, 4);
25587
25588 /* If the computed address is held in the low order register
25589 then load the high order register first, otherwise always
25590 load the low order register first. */
25591 if (REGNO (operands[0]) == REGNO (base))
25592 {
25593 output_asm_insn ("ldr\t%H0, %2", operands);
25594 output_asm_insn ("ldr\t%0, %1", operands);
25595 }
25596 else
25597 {
25598 output_asm_insn ("ldr\t%0, %1", operands);
25599 output_asm_insn ("ldr\t%H0, %2", operands);
25600 }
25601 }
25602 break;
25603
25604 case LABEL_REF:
25605 /* With no registers to worry about we can just load the value
25606 directly. */
25607 operands[2] = adjust_address (operands[1], SImode, 4);
25608
25609 output_asm_insn ("ldr\t%H0, %2", operands);
25610 output_asm_insn ("ldr\t%0, %1", operands);
25611 break;
25612
25613 default:
25614 gcc_unreachable ();
25615 }
25616
25617 return "";
25618 }
25619
25620 const char *
25621 thumb_output_move_mem_multiple (int n, rtx *operands)
25622 {
25623 switch (n)
25624 {
25625 case 2:
25626 if (REGNO (operands[4]) > REGNO (operands[5]))
25627 std::swap (operands[4], operands[5]);
25628
25629 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25630 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25631 break;
25632
25633 case 3:
25634 if (REGNO (operands[4]) > REGNO (operands[5]))
25635 std::swap (operands[4], operands[5]);
25636 if (REGNO (operands[5]) > REGNO (operands[6]))
25637 std::swap (operands[5], operands[6]);
25638 if (REGNO (operands[4]) > REGNO (operands[5]))
25639 std::swap (operands[4], operands[5]);
25640
25641 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25642 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25643 break;
25644
25645 default:
25646 gcc_unreachable ();
25647 }
25648
25649 return "";
25650 }
25651
25652 /* Output a call-via instruction for thumb state. */
25653 const char *
25654 thumb_call_via_reg (rtx reg)
25655 {
25656 int regno = REGNO (reg);
25657 rtx *labelp;
25658
25659 gcc_assert (regno < LR_REGNUM);
25660
25661 /* If we are in the normal text section we can use a single instance
25662 per compilation unit. If we are doing function sections, then we need
25663 an entry per section, since we can't rely on reachability. */
25664 if (in_section == text_section)
25665 {
25666 thumb_call_reg_needed = 1;
25667
25668 if (thumb_call_via_label[regno] == NULL)
25669 thumb_call_via_label[regno] = gen_label_rtx ();
25670 labelp = thumb_call_via_label + regno;
25671 }
25672 else
25673 {
25674 if (cfun->machine->call_via[regno] == NULL)
25675 cfun->machine->call_via[regno] = gen_label_rtx ();
25676 labelp = cfun->machine->call_via + regno;
25677 }
25678
25679 output_asm_insn ("bl\t%a0", labelp);
25680 return "";
25681 }
25682
25683 /* Routines for generating rtl. */
25684 void
25685 thumb_expand_movmemqi (rtx *operands)
25686 {
25687 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25688 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25689 HOST_WIDE_INT len = INTVAL (operands[2]);
25690 HOST_WIDE_INT offset = 0;
25691
25692 while (len >= 12)
25693 {
25694 emit_insn (gen_movmem12b (out, in, out, in));
25695 len -= 12;
25696 }
25697
25698 if (len >= 8)
25699 {
25700 emit_insn (gen_movmem8b (out, in, out, in));
25701 len -= 8;
25702 }
25703
25704 if (len >= 4)
25705 {
25706 rtx reg = gen_reg_rtx (SImode);
25707 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25708 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25709 len -= 4;
25710 offset += 4;
25711 }
25712
25713 if (len >= 2)
25714 {
25715 rtx reg = gen_reg_rtx (HImode);
25716 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25717 plus_constant (Pmode, in,
25718 offset))));
25719 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25720 offset)),
25721 reg));
25722 len -= 2;
25723 offset += 2;
25724 }
25725
25726 if (len)
25727 {
25728 rtx reg = gen_reg_rtx (QImode);
25729 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25730 plus_constant (Pmode, in,
25731 offset))));
25732 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25733 offset)),
25734 reg));
25735 }
25736 }
25737
25738 void
25739 thumb_reload_out_hi (rtx *operands)
25740 {
25741 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25742 }
25743
25744 /* Handle reading a half-word from memory during reload. */
25745 void
25746 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25747 {
25748 gcc_unreachable ();
25749 }
25750
25751 /* Return the length of a function name prefix
25752 that starts with the character 'c'. */
25753 static int
25754 arm_get_strip_length (int c)
25755 {
25756 switch (c)
25757 {
25758 ARM_NAME_ENCODING_LENGTHS
25759 default: return 0;
25760 }
25761 }
25762
25763 /* Return a pointer to a function's name with any
25764 and all prefix encodings stripped from it. */
25765 const char *
25766 arm_strip_name_encoding (const char *name)
25767 {
25768 int skip;
25769
25770 while ((skip = arm_get_strip_length (* name)))
25771 name += skip;
25772
25773 return name;
25774 }
25775
25776 /* If there is a '*' anywhere in the name's prefix, then
25777 emit the stripped name verbatim, otherwise prepend an
25778 underscore if leading underscores are being used. */
25779 void
25780 arm_asm_output_labelref (FILE *stream, const char *name)
25781 {
25782 int skip;
25783 int verbatim = 0;
25784
25785 while ((skip = arm_get_strip_length (* name)))
25786 {
25787 verbatim |= (*name == '*');
25788 name += skip;
25789 }
25790
25791 if (verbatim)
25792 fputs (name, stream);
25793 else
25794 asm_fprintf (stream, "%U%s", name);
25795 }
25796
25797 /* This function is used to emit an EABI tag and its associated value.
25798 We emit the numerical value of the tag in case the assembler does not
25799 support textual tags. (Eg gas prior to 2.20). If requested we include
25800 the tag name in a comment so that anyone reading the assembler output
25801 will know which tag is being set.
25802
25803 This function is not static because arm-c.c needs it too. */
25804
25805 void
25806 arm_emit_eabi_attribute (const char *name, int num, int val)
25807 {
25808 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25809 if (flag_verbose_asm || flag_debug_asm)
25810 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25811 asm_fprintf (asm_out_file, "\n");
25812 }
25813
25814 /* This function is used to print CPU tuning information as comment
25815 in assembler file. Pointers are not printed for now. */
25816
25817 void
25818 arm_print_tune_info (void)
25819 {
25820 asm_fprintf (asm_out_file, "\t@.tune parameters\n");
25821 asm_fprintf (asm_out_file, "\t\t@constant_limit:\t%d\n",
25822 current_tune->constant_limit);
25823 asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
25824 current_tune->max_insns_skipped);
25825 asm_fprintf (asm_out_file, "\t\t@prefetch.num_slots:\t%d\n",
25826 current_tune->prefetch.num_slots);
25827 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_size:\t%d\n",
25828 current_tune->prefetch.l1_cache_size);
25829 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
25830 current_tune->prefetch.l1_cache_line_size);
25831 asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
25832 (int) current_tune->prefer_constant_pool);
25833 asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25834 asm_fprintf (asm_out_file, "\t\t\t\ts&p\tcost\n");
25835 asm_fprintf (asm_out_file, "\t\t\t\t00\t%d\n",
25836 current_tune->branch_cost (false, false));
25837 asm_fprintf (asm_out_file, "\t\t\t\t01\t%d\n",
25838 current_tune->branch_cost (false, true));
25839 asm_fprintf (asm_out_file, "\t\t\t\t10\t%d\n",
25840 current_tune->branch_cost (true, false));
25841 asm_fprintf (asm_out_file, "\t\t\t\t11\t%d\n",
25842 current_tune->branch_cost (true, true));
25843 asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
25844 (int) current_tune->prefer_ldrd_strd);
25845 asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25846 (int) current_tune->logical_op_non_short_circuit_thumb,
25847 (int) current_tune->logical_op_non_short_circuit_arm);
25848 asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
25849 (int) current_tune->prefer_neon_for_64bits);
25850 asm_fprintf (asm_out_file,
25851 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25852 (int) current_tune->disparage_flag_setting_t16_encodings);
25853 asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
25854 (int) current_tune->string_ops_prefer_neon);
25855 asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
25856 current_tune->max_insns_inline_memset);
25857 asm_fprintf (asm_out_file, "\t\t@fusible_ops:\t%u\n",
25858 current_tune->fusible_ops);
25859 asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n",
25860 (int) current_tune->sched_autopref);
25861 }
25862
25863 static void
25864 arm_file_start (void)
25865 {
25866 int val;
25867
25868 if (TARGET_BPABI)
25869 {
25870 const char *fpu_name;
25871 if (arm_selected_arch)
25872 {
25873 /* armv7ve doesn't support any extensions. */
25874 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
25875 {
25876 /* Keep backward compatability for assemblers
25877 which don't support armv7ve. */
25878 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
25879 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
25880 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
25881 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
25882 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
25883 }
25884 else
25885 {
25886 const char* pos = strchr (arm_selected_arch->name, '+');
25887 if (pos)
25888 {
25889 char buf[15];
25890 gcc_assert (strlen (arm_selected_arch->name)
25891 <= sizeof (buf) / sizeof (*pos));
25892 strncpy (buf, arm_selected_arch->name,
25893 (pos - arm_selected_arch->name) * sizeof (*pos));
25894 buf[pos - arm_selected_arch->name] = '\0';
25895 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25896 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25897 }
25898 else
25899 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25900 }
25901 }
25902 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25903 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25904 else
25905 {
25906 const char* truncated_name
25907 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
25908 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
25909 }
25910
25911 if (print_tune_info)
25912 arm_print_tune_info ();
25913
25914 if (TARGET_SOFT_FLOAT)
25915 {
25916 fpu_name = "softvfp";
25917 }
25918 else
25919 {
25920 fpu_name = arm_fpu_desc->name;
25921 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
25922 {
25923 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
25924 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
25925
25926 if (TARGET_HARD_FLOAT_ABI)
25927 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25928 }
25929 }
25930 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25931
25932 /* Some of these attributes only apply when the corresponding features
25933 are used. However we don't have any easy way of figuring this out.
25934 Conservatively record the setting that would have been used. */
25935
25936 if (flag_rounding_math)
25937 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25938
25939 if (!flag_unsafe_math_optimizations)
25940 {
25941 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25942 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25943 }
25944 if (flag_signaling_nans)
25945 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25946
25947 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25948 flag_finite_math_only ? 1 : 3);
25949
25950 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25951 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25952 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25953 flag_short_enums ? 1 : 2);
25954
25955 /* Tag_ABI_optimization_goals. */
25956 if (optimize_size)
25957 val = 4;
25958 else if (optimize >= 2)
25959 val = 2;
25960 else if (optimize)
25961 val = 1;
25962 else
25963 val = 6;
25964 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
25965
25966 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25967 unaligned_access);
25968
25969 if (arm_fp16_format)
25970 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25971 (int) arm_fp16_format);
25972
25973 if (arm_lang_output_object_attributes_hook)
25974 arm_lang_output_object_attributes_hook();
25975 }
25976
25977 default_file_start ();
25978 }
25979
25980 static void
25981 arm_file_end (void)
25982 {
25983 int regno;
25984
25985 if (NEED_INDICATE_EXEC_STACK)
25986 /* Add .note.GNU-stack. */
25987 file_end_indicate_exec_stack ();
25988
25989 if (! thumb_call_reg_needed)
25990 return;
25991
25992 switch_to_section (text_section);
25993 asm_fprintf (asm_out_file, "\t.code 16\n");
25994 ASM_OUTPUT_ALIGN (asm_out_file, 1);
25995
25996 for (regno = 0; regno < LR_REGNUM; regno++)
25997 {
25998 rtx label = thumb_call_via_label[regno];
25999
26000 if (label != 0)
26001 {
26002 targetm.asm_out.internal_label (asm_out_file, "L",
26003 CODE_LABEL_NUMBER (label));
26004 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26005 }
26006 }
26007 }
26008
26009 #ifndef ARM_PE
26010 /* Symbols in the text segment can be accessed without indirecting via the
26011 constant pool; it may take an extra binary operation, but this is still
26012 faster than indirecting via memory. Don't do this when not optimizing,
26013 since we won't be calculating al of the offsets necessary to do this
26014 simplification. */
26015
26016 static void
26017 arm_encode_section_info (tree decl, rtx rtl, int first)
26018 {
26019 if (optimize > 0 && TREE_CONSTANT (decl))
26020 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26021
26022 default_encode_section_info (decl, rtl, first);
26023 }
26024 #endif /* !ARM_PE */
26025
26026 static void
26027 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26028 {
26029 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26030 && !strcmp (prefix, "L"))
26031 {
26032 arm_ccfsm_state = 0;
26033 arm_target_insn = NULL;
26034 }
26035 default_internal_label (stream, prefix, labelno);
26036 }
26037
26038 /* Output code to add DELTA to the first argument, and then jump
26039 to FUNCTION. Used for C++ multiple inheritance. */
26040 static void
26041 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
26042 HOST_WIDE_INT delta,
26043 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
26044 tree function)
26045 {
26046 static int thunk_label = 0;
26047 char label[256];
26048 char labelpc[256];
26049 int mi_delta = delta;
26050 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26051 int shift = 0;
26052 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26053 ? 1 : 0);
26054 if (mi_delta < 0)
26055 mi_delta = - mi_delta;
26056
26057 final_start_function (emit_barrier (), file, 1);
26058
26059 if (TARGET_THUMB1)
26060 {
26061 int labelno = thunk_label++;
26062 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26063 /* Thunks are entered in arm mode when avaiable. */
26064 if (TARGET_THUMB1_ONLY)
26065 {
26066 /* push r3 so we can use it as a temporary. */
26067 /* TODO: Omit this save if r3 is not used. */
26068 fputs ("\tpush {r3}\n", file);
26069 fputs ("\tldr\tr3, ", file);
26070 }
26071 else
26072 {
26073 fputs ("\tldr\tr12, ", file);
26074 }
26075 assemble_name (file, label);
26076 fputc ('\n', file);
26077 if (flag_pic)
26078 {
26079 /* If we are generating PIC, the ldr instruction below loads
26080 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26081 the address of the add + 8, so we have:
26082
26083 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26084 = target + 1.
26085
26086 Note that we have "+ 1" because some versions of GNU ld
26087 don't set the low bit of the result for R_ARM_REL32
26088 relocations against thumb function symbols.
26089 On ARMv6M this is +4, not +8. */
26090 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26091 assemble_name (file, labelpc);
26092 fputs (":\n", file);
26093 if (TARGET_THUMB1_ONLY)
26094 {
26095 /* This is 2 insns after the start of the thunk, so we know it
26096 is 4-byte aligned. */
26097 fputs ("\tadd\tr3, pc, r3\n", file);
26098 fputs ("\tmov r12, r3\n", file);
26099 }
26100 else
26101 fputs ("\tadd\tr12, pc, r12\n", file);
26102 }
26103 else if (TARGET_THUMB1_ONLY)
26104 fputs ("\tmov r12, r3\n", file);
26105 }
26106 if (TARGET_THUMB1_ONLY)
26107 {
26108 if (mi_delta > 255)
26109 {
26110 fputs ("\tldr\tr3, ", file);
26111 assemble_name (file, label);
26112 fputs ("+4\n", file);
26113 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26114 mi_op, this_regno, this_regno);
26115 }
26116 else if (mi_delta != 0)
26117 {
26118 /* Thumb1 unified syntax requires s suffix in instruction name when
26119 one of the operands is immediate. */
26120 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26121 mi_op, this_regno, this_regno,
26122 mi_delta);
26123 }
26124 }
26125 else
26126 {
26127 /* TODO: Use movw/movt for large constants when available. */
26128 while (mi_delta != 0)
26129 {
26130 if ((mi_delta & (3 << shift)) == 0)
26131 shift += 2;
26132 else
26133 {
26134 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26135 mi_op, this_regno, this_regno,
26136 mi_delta & (0xff << shift));
26137 mi_delta &= ~(0xff << shift);
26138 shift += 8;
26139 }
26140 }
26141 }
26142 if (TARGET_THUMB1)
26143 {
26144 if (TARGET_THUMB1_ONLY)
26145 fputs ("\tpop\t{r3}\n", file);
26146
26147 fprintf (file, "\tbx\tr12\n");
26148 ASM_OUTPUT_ALIGN (file, 2);
26149 assemble_name (file, label);
26150 fputs (":\n", file);
26151 if (flag_pic)
26152 {
26153 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26154 rtx tem = XEXP (DECL_RTL (function), 0);
26155 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26156 pipeline offset is four rather than eight. Adjust the offset
26157 accordingly. */
26158 tem = plus_constant (GET_MODE (tem), tem,
26159 TARGET_THUMB1_ONLY ? -3 : -7);
26160 tem = gen_rtx_MINUS (GET_MODE (tem),
26161 tem,
26162 gen_rtx_SYMBOL_REF (Pmode,
26163 ggc_strdup (labelpc)));
26164 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26165 }
26166 else
26167 /* Output ".word .LTHUNKn". */
26168 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26169
26170 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26171 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26172 }
26173 else
26174 {
26175 fputs ("\tb\t", file);
26176 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26177 if (NEED_PLT_RELOC)
26178 fputs ("(PLT)", file);
26179 fputc ('\n', file);
26180 }
26181
26182 final_end_function ();
26183 }
26184
26185 int
26186 arm_emit_vector_const (FILE *file, rtx x)
26187 {
26188 int i;
26189 const char * pattern;
26190
26191 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26192
26193 switch (GET_MODE (x))
26194 {
26195 case V2SImode: pattern = "%08x"; break;
26196 case V4HImode: pattern = "%04x"; break;
26197 case V8QImode: pattern = "%02x"; break;
26198 default: gcc_unreachable ();
26199 }
26200
26201 fprintf (file, "0x");
26202 for (i = CONST_VECTOR_NUNITS (x); i--;)
26203 {
26204 rtx element;
26205
26206 element = CONST_VECTOR_ELT (x, i);
26207 fprintf (file, pattern, INTVAL (element));
26208 }
26209
26210 return 1;
26211 }
26212
26213 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26214 HFmode constant pool entries are actually loaded with ldr. */
26215 void
26216 arm_emit_fp16_const (rtx c)
26217 {
26218 long bits;
26219
26220 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26221 if (WORDS_BIG_ENDIAN)
26222 assemble_zeros (2);
26223 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26224 if (!WORDS_BIG_ENDIAN)
26225 assemble_zeros (2);
26226 }
26227
26228 const char *
26229 arm_output_load_gr (rtx *operands)
26230 {
26231 rtx reg;
26232 rtx offset;
26233 rtx wcgr;
26234 rtx sum;
26235
26236 if (!MEM_P (operands [1])
26237 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26238 || !REG_P (reg = XEXP (sum, 0))
26239 || !CONST_INT_P (offset = XEXP (sum, 1))
26240 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26241 return "wldrw%?\t%0, %1";
26242
26243 /* Fix up an out-of-range load of a GR register. */
26244 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26245 wcgr = operands[0];
26246 operands[0] = reg;
26247 output_asm_insn ("ldr%?\t%0, %1", operands);
26248
26249 operands[0] = wcgr;
26250 operands[1] = reg;
26251 output_asm_insn ("tmcr%?\t%0, %1", operands);
26252 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26253
26254 return "";
26255 }
26256
26257 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26258
26259 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26260 named arg and all anonymous args onto the stack.
26261 XXX I know the prologue shouldn't be pushing registers, but it is faster
26262 that way. */
26263
26264 static void
26265 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26266 machine_mode mode,
26267 tree type,
26268 int *pretend_size,
26269 int second_time ATTRIBUTE_UNUSED)
26270 {
26271 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26272 int nregs;
26273
26274 cfun->machine->uses_anonymous_args = 1;
26275 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26276 {
26277 nregs = pcum->aapcs_ncrn;
26278 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
26279 nregs++;
26280 }
26281 else
26282 nregs = pcum->nregs;
26283
26284 if (nregs < NUM_ARG_REGS)
26285 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26286 }
26287
26288 /* We can't rely on the caller doing the proper promotion when
26289 using APCS or ATPCS. */
26290
26291 static bool
26292 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26293 {
26294 return !TARGET_AAPCS_BASED;
26295 }
26296
26297 static machine_mode
26298 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26299 machine_mode mode,
26300 int *punsignedp ATTRIBUTE_UNUSED,
26301 const_tree fntype ATTRIBUTE_UNUSED,
26302 int for_return ATTRIBUTE_UNUSED)
26303 {
26304 if (GET_MODE_CLASS (mode) == MODE_INT
26305 && GET_MODE_SIZE (mode) < 4)
26306 return SImode;
26307
26308 return mode;
26309 }
26310
26311 /* AAPCS based ABIs use short enums by default. */
26312
26313 static bool
26314 arm_default_short_enums (void)
26315 {
26316 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
26317 }
26318
26319
26320 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26321
26322 static bool
26323 arm_align_anon_bitfield (void)
26324 {
26325 return TARGET_AAPCS_BASED;
26326 }
26327
26328
26329 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26330
26331 static tree
26332 arm_cxx_guard_type (void)
26333 {
26334 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26335 }
26336
26337
26338 /* The EABI says test the least significant bit of a guard variable. */
26339
26340 static bool
26341 arm_cxx_guard_mask_bit (void)
26342 {
26343 return TARGET_AAPCS_BASED;
26344 }
26345
26346
26347 /* The EABI specifies that all array cookies are 8 bytes long. */
26348
26349 static tree
26350 arm_get_cookie_size (tree type)
26351 {
26352 tree size;
26353
26354 if (!TARGET_AAPCS_BASED)
26355 return default_cxx_get_cookie_size (type);
26356
26357 size = build_int_cst (sizetype, 8);
26358 return size;
26359 }
26360
26361
26362 /* The EABI says that array cookies should also contain the element size. */
26363
26364 static bool
26365 arm_cookie_has_size (void)
26366 {
26367 return TARGET_AAPCS_BASED;
26368 }
26369
26370
26371 /* The EABI says constructors and destructors should return a pointer to
26372 the object constructed/destroyed. */
26373
26374 static bool
26375 arm_cxx_cdtor_returns_this (void)
26376 {
26377 return TARGET_AAPCS_BASED;
26378 }
26379
26380 /* The EABI says that an inline function may never be the key
26381 method. */
26382
26383 static bool
26384 arm_cxx_key_method_may_be_inline (void)
26385 {
26386 return !TARGET_AAPCS_BASED;
26387 }
26388
26389 static void
26390 arm_cxx_determine_class_data_visibility (tree decl)
26391 {
26392 if (!TARGET_AAPCS_BASED
26393 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26394 return;
26395
26396 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26397 is exported. However, on systems without dynamic vague linkage,
26398 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26399 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26400 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26401 else
26402 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26403 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26404 }
26405
26406 static bool
26407 arm_cxx_class_data_always_comdat (void)
26408 {
26409 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26410 vague linkage if the class has no key function. */
26411 return !TARGET_AAPCS_BASED;
26412 }
26413
26414
26415 /* The EABI says __aeabi_atexit should be used to register static
26416 destructors. */
26417
26418 static bool
26419 arm_cxx_use_aeabi_atexit (void)
26420 {
26421 return TARGET_AAPCS_BASED;
26422 }
26423
26424
26425 void
26426 arm_set_return_address (rtx source, rtx scratch)
26427 {
26428 arm_stack_offsets *offsets;
26429 HOST_WIDE_INT delta;
26430 rtx addr;
26431 unsigned long saved_regs;
26432
26433 offsets = arm_get_frame_offsets ();
26434 saved_regs = offsets->saved_regs_mask;
26435
26436 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26437 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26438 else
26439 {
26440 if (frame_pointer_needed)
26441 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26442 else
26443 {
26444 /* LR will be the first saved register. */
26445 delta = offsets->outgoing_args - (offsets->frame + 4);
26446
26447
26448 if (delta >= 4096)
26449 {
26450 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26451 GEN_INT (delta & ~4095)));
26452 addr = scratch;
26453 delta &= 4095;
26454 }
26455 else
26456 addr = stack_pointer_rtx;
26457
26458 addr = plus_constant (Pmode, addr, delta);
26459 }
26460 /* The store needs to be marked as frame related in order to prevent
26461 DSE from deleting it as dead if it is based on fp. */
26462 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26463 RTX_FRAME_RELATED_P (insn) = 1;
26464 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26465 }
26466 }
26467
26468
26469 void
26470 thumb_set_return_address (rtx source, rtx scratch)
26471 {
26472 arm_stack_offsets *offsets;
26473 HOST_WIDE_INT delta;
26474 HOST_WIDE_INT limit;
26475 int reg;
26476 rtx addr;
26477 unsigned long mask;
26478
26479 emit_use (source);
26480
26481 offsets = arm_get_frame_offsets ();
26482 mask = offsets->saved_regs_mask;
26483 if (mask & (1 << LR_REGNUM))
26484 {
26485 limit = 1024;
26486 /* Find the saved regs. */
26487 if (frame_pointer_needed)
26488 {
26489 delta = offsets->soft_frame - offsets->saved_args;
26490 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26491 if (TARGET_THUMB1)
26492 limit = 128;
26493 }
26494 else
26495 {
26496 delta = offsets->outgoing_args - offsets->saved_args;
26497 reg = SP_REGNUM;
26498 }
26499 /* Allow for the stack frame. */
26500 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26501 delta -= 16;
26502 /* The link register is always the first saved register. */
26503 delta -= 4;
26504
26505 /* Construct the address. */
26506 addr = gen_rtx_REG (SImode, reg);
26507 if (delta > limit)
26508 {
26509 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26510 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26511 addr = scratch;
26512 }
26513 else
26514 addr = plus_constant (Pmode, addr, delta);
26515
26516 /* The store needs to be marked as frame related in order to prevent
26517 DSE from deleting it as dead if it is based on fp. */
26518 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26519 RTX_FRAME_RELATED_P (insn) = 1;
26520 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26521 }
26522 else
26523 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26524 }
26525
26526 /* Implements target hook vector_mode_supported_p. */
26527 bool
26528 arm_vector_mode_supported_p (machine_mode mode)
26529 {
26530 /* Neon also supports V2SImode, etc. listed in the clause below. */
26531 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26532 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26533 || mode == V2DImode || mode == V8HFmode))
26534 return true;
26535
26536 if ((TARGET_NEON || TARGET_IWMMXT)
26537 && ((mode == V2SImode)
26538 || (mode == V4HImode)
26539 || (mode == V8QImode)))
26540 return true;
26541
26542 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26543 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26544 || mode == V2HAmode))
26545 return true;
26546
26547 return false;
26548 }
26549
26550 /* Implements target hook array_mode_supported_p. */
26551
26552 static bool
26553 arm_array_mode_supported_p (machine_mode mode,
26554 unsigned HOST_WIDE_INT nelems)
26555 {
26556 if (TARGET_NEON
26557 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26558 && (nelems >= 2 && nelems <= 4))
26559 return true;
26560
26561 return false;
26562 }
26563
26564 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26565 registers when autovectorizing for Neon, at least until multiple vector
26566 widths are supported properly by the middle-end. */
26567
26568 static machine_mode
26569 arm_preferred_simd_mode (machine_mode mode)
26570 {
26571 if (TARGET_NEON)
26572 switch (mode)
26573 {
26574 case SFmode:
26575 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26576 case SImode:
26577 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26578 case HImode:
26579 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26580 case QImode:
26581 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26582 case DImode:
26583 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26584 return V2DImode;
26585 break;
26586
26587 default:;
26588 }
26589
26590 if (TARGET_REALLY_IWMMXT)
26591 switch (mode)
26592 {
26593 case SImode:
26594 return V2SImode;
26595 case HImode:
26596 return V4HImode;
26597 case QImode:
26598 return V8QImode;
26599
26600 default:;
26601 }
26602
26603 return word_mode;
26604 }
26605
26606 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26607
26608 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26609 using r0-r4 for function arguments, r7 for the stack frame and don't have
26610 enough left over to do doubleword arithmetic. For Thumb-2 all the
26611 potentially problematic instructions accept high registers so this is not
26612 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26613 that require many low registers. */
26614 static bool
26615 arm_class_likely_spilled_p (reg_class_t rclass)
26616 {
26617 if ((TARGET_THUMB1 && rclass == LO_REGS)
26618 || rclass == CC_REG)
26619 return true;
26620
26621 return false;
26622 }
26623
26624 /* Implements target hook small_register_classes_for_mode_p. */
26625 bool
26626 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26627 {
26628 return TARGET_THUMB1;
26629 }
26630
26631 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26632 ARM insns and therefore guarantee that the shift count is modulo 256.
26633 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26634 guarantee no particular behavior for out-of-range counts. */
26635
26636 static unsigned HOST_WIDE_INT
26637 arm_shift_truncation_mask (machine_mode mode)
26638 {
26639 return mode == SImode ? 255 : 0;
26640 }
26641
26642
26643 /* Map internal gcc register numbers to DWARF2 register numbers. */
26644
26645 unsigned int
26646 arm_dbx_register_number (unsigned int regno)
26647 {
26648 if (regno < 16)
26649 return regno;
26650
26651 if (IS_VFP_REGNUM (regno))
26652 {
26653 /* See comment in arm_dwarf_register_span. */
26654 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26655 return 64 + regno - FIRST_VFP_REGNUM;
26656 else
26657 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26658 }
26659
26660 if (IS_IWMMXT_GR_REGNUM (regno))
26661 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26662
26663 if (IS_IWMMXT_REGNUM (regno))
26664 return 112 + regno - FIRST_IWMMXT_REGNUM;
26665
26666 return DWARF_FRAME_REGISTERS;
26667 }
26668
26669 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26670 GCC models tham as 64 32-bit registers, so we need to describe this to
26671 the DWARF generation code. Other registers can use the default. */
26672 static rtx
26673 arm_dwarf_register_span (rtx rtl)
26674 {
26675 machine_mode mode;
26676 unsigned regno;
26677 rtx parts[16];
26678 int nregs;
26679 int i;
26680
26681 regno = REGNO (rtl);
26682 if (!IS_VFP_REGNUM (regno))
26683 return NULL_RTX;
26684
26685 /* XXX FIXME: The EABI defines two VFP register ranges:
26686 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26687 256-287: D0-D31
26688 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26689 corresponding D register. Until GDB supports this, we shall use the
26690 legacy encodings. We also use these encodings for D0-D15 for
26691 compatibility with older debuggers. */
26692 mode = GET_MODE (rtl);
26693 if (GET_MODE_SIZE (mode) < 8)
26694 return NULL_RTX;
26695
26696 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26697 {
26698 nregs = GET_MODE_SIZE (mode) / 4;
26699 for (i = 0; i < nregs; i += 2)
26700 if (TARGET_BIG_END)
26701 {
26702 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26703 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26704 }
26705 else
26706 {
26707 parts[i] = gen_rtx_REG (SImode, regno + i);
26708 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26709 }
26710 }
26711 else
26712 {
26713 nregs = GET_MODE_SIZE (mode) / 8;
26714 for (i = 0; i < nregs; i++)
26715 parts[i] = gen_rtx_REG (DImode, regno + i);
26716 }
26717
26718 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26719 }
26720
26721 #if ARM_UNWIND_INFO
26722 /* Emit unwind directives for a store-multiple instruction or stack pointer
26723 push during alignment.
26724 These should only ever be generated by the function prologue code, so
26725 expect them to have a particular form.
26726 The store-multiple instruction sometimes pushes pc as the last register,
26727 although it should not be tracked into unwind information, or for -Os
26728 sometimes pushes some dummy registers before first register that needs
26729 to be tracked in unwind information; such dummy registers are there just
26730 to avoid separate stack adjustment, and will not be restored in the
26731 epilogue. */
26732
26733 static void
26734 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26735 {
26736 int i;
26737 HOST_WIDE_INT offset;
26738 HOST_WIDE_INT nregs;
26739 int reg_size;
26740 unsigned reg;
26741 unsigned lastreg;
26742 unsigned padfirst = 0, padlast = 0;
26743 rtx e;
26744
26745 e = XVECEXP (p, 0, 0);
26746 gcc_assert (GET_CODE (e) == SET);
26747
26748 /* First insn will adjust the stack pointer. */
26749 gcc_assert (GET_CODE (e) == SET
26750 && REG_P (SET_DEST (e))
26751 && REGNO (SET_DEST (e)) == SP_REGNUM
26752 && GET_CODE (SET_SRC (e)) == PLUS);
26753
26754 offset = -INTVAL (XEXP (SET_SRC (e), 1));
26755 nregs = XVECLEN (p, 0) - 1;
26756 gcc_assert (nregs);
26757
26758 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26759 if (reg < 16)
26760 {
26761 /* For -Os dummy registers can be pushed at the beginning to
26762 avoid separate stack pointer adjustment. */
26763 e = XVECEXP (p, 0, 1);
26764 e = XEXP (SET_DEST (e), 0);
26765 if (GET_CODE (e) == PLUS)
26766 padfirst = INTVAL (XEXP (e, 1));
26767 gcc_assert (padfirst == 0 || optimize_size);
26768 /* The function prologue may also push pc, but not annotate it as it is
26769 never restored. We turn this into a stack pointer adjustment. */
26770 e = XVECEXP (p, 0, nregs);
26771 e = XEXP (SET_DEST (e), 0);
26772 if (GET_CODE (e) == PLUS)
26773 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26774 else
26775 padlast = offset - 4;
26776 gcc_assert (padlast == 0 || padlast == 4);
26777 if (padlast == 4)
26778 fprintf (asm_out_file, "\t.pad #4\n");
26779 reg_size = 4;
26780 fprintf (asm_out_file, "\t.save {");
26781 }
26782 else if (IS_VFP_REGNUM (reg))
26783 {
26784 reg_size = 8;
26785 fprintf (asm_out_file, "\t.vsave {");
26786 }
26787 else
26788 /* Unknown register type. */
26789 gcc_unreachable ();
26790
26791 /* If the stack increment doesn't match the size of the saved registers,
26792 something has gone horribly wrong. */
26793 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26794
26795 offset = padfirst;
26796 lastreg = 0;
26797 /* The remaining insns will describe the stores. */
26798 for (i = 1; i <= nregs; i++)
26799 {
26800 /* Expect (set (mem <addr>) (reg)).
26801 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26802 e = XVECEXP (p, 0, i);
26803 gcc_assert (GET_CODE (e) == SET
26804 && MEM_P (SET_DEST (e))
26805 && REG_P (SET_SRC (e)));
26806
26807 reg = REGNO (SET_SRC (e));
26808 gcc_assert (reg >= lastreg);
26809
26810 if (i != 1)
26811 fprintf (asm_out_file, ", ");
26812 /* We can't use %r for vfp because we need to use the
26813 double precision register names. */
26814 if (IS_VFP_REGNUM (reg))
26815 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26816 else
26817 asm_fprintf (asm_out_file, "%r", reg);
26818
26819 if (flag_checking)
26820 {
26821 /* Check that the addresses are consecutive. */
26822 e = XEXP (SET_DEST (e), 0);
26823 if (GET_CODE (e) == PLUS)
26824 gcc_assert (REG_P (XEXP (e, 0))
26825 && REGNO (XEXP (e, 0)) == SP_REGNUM
26826 && CONST_INT_P (XEXP (e, 1))
26827 && offset == INTVAL (XEXP (e, 1)));
26828 else
26829 gcc_assert (i == 1
26830 && REG_P (e)
26831 && REGNO (e) == SP_REGNUM);
26832 offset += reg_size;
26833 }
26834 }
26835 fprintf (asm_out_file, "}\n");
26836 if (padfirst)
26837 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
26838 }
26839
26840 /* Emit unwind directives for a SET. */
26841
26842 static void
26843 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26844 {
26845 rtx e0;
26846 rtx e1;
26847 unsigned reg;
26848
26849 e0 = XEXP (p, 0);
26850 e1 = XEXP (p, 1);
26851 switch (GET_CODE (e0))
26852 {
26853 case MEM:
26854 /* Pushing a single register. */
26855 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26856 || !REG_P (XEXP (XEXP (e0, 0), 0))
26857 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26858 abort ();
26859
26860 asm_fprintf (asm_out_file, "\t.save ");
26861 if (IS_VFP_REGNUM (REGNO (e1)))
26862 asm_fprintf(asm_out_file, "{d%d}\n",
26863 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26864 else
26865 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26866 break;
26867
26868 case REG:
26869 if (REGNO (e0) == SP_REGNUM)
26870 {
26871 /* A stack increment. */
26872 if (GET_CODE (e1) != PLUS
26873 || !REG_P (XEXP (e1, 0))
26874 || REGNO (XEXP (e1, 0)) != SP_REGNUM
26875 || !CONST_INT_P (XEXP (e1, 1)))
26876 abort ();
26877
26878 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26879 -INTVAL (XEXP (e1, 1)));
26880 }
26881 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26882 {
26883 HOST_WIDE_INT offset;
26884
26885 if (GET_CODE (e1) == PLUS)
26886 {
26887 if (!REG_P (XEXP (e1, 0))
26888 || !CONST_INT_P (XEXP (e1, 1)))
26889 abort ();
26890 reg = REGNO (XEXP (e1, 0));
26891 offset = INTVAL (XEXP (e1, 1));
26892 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26893 HARD_FRAME_POINTER_REGNUM, reg,
26894 offset);
26895 }
26896 else if (REG_P (e1))
26897 {
26898 reg = REGNO (e1);
26899 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26900 HARD_FRAME_POINTER_REGNUM, reg);
26901 }
26902 else
26903 abort ();
26904 }
26905 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26906 {
26907 /* Move from sp to reg. */
26908 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26909 }
26910 else if (GET_CODE (e1) == PLUS
26911 && REG_P (XEXP (e1, 0))
26912 && REGNO (XEXP (e1, 0)) == SP_REGNUM
26913 && CONST_INT_P (XEXP (e1, 1)))
26914 {
26915 /* Set reg to offset from sp. */
26916 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26917 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26918 }
26919 else
26920 abort ();
26921 break;
26922
26923 default:
26924 abort ();
26925 }
26926 }
26927
26928
26929 /* Emit unwind directives for the given insn. */
26930
26931 static void
26932 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
26933 {
26934 rtx note, pat;
26935 bool handled_one = false;
26936
26937 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26938 return;
26939
26940 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26941 && (TREE_NOTHROW (current_function_decl)
26942 || crtl->all_throwers_are_sibcalls))
26943 return;
26944
26945 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26946 return;
26947
26948 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26949 {
26950 switch (REG_NOTE_KIND (note))
26951 {
26952 case REG_FRAME_RELATED_EXPR:
26953 pat = XEXP (note, 0);
26954 goto found;
26955
26956 case REG_CFA_REGISTER:
26957 pat = XEXP (note, 0);
26958 if (pat == NULL)
26959 {
26960 pat = PATTERN (insn);
26961 if (GET_CODE (pat) == PARALLEL)
26962 pat = XVECEXP (pat, 0, 0);
26963 }
26964
26965 /* Only emitted for IS_STACKALIGN re-alignment. */
26966 {
26967 rtx dest, src;
26968 unsigned reg;
26969
26970 src = SET_SRC (pat);
26971 dest = SET_DEST (pat);
26972
26973 gcc_assert (src == stack_pointer_rtx);
26974 reg = REGNO (dest);
26975 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26976 reg + 0x90, reg);
26977 }
26978 handled_one = true;
26979 break;
26980
26981 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26982 to get correct dwarf information for shrink-wrap. We should not
26983 emit unwind information for it because these are used either for
26984 pretend arguments or notes to adjust sp and restore registers from
26985 stack. */
26986 case REG_CFA_DEF_CFA:
26987 case REG_CFA_ADJUST_CFA:
26988 case REG_CFA_RESTORE:
26989 return;
26990
26991 case REG_CFA_EXPRESSION:
26992 case REG_CFA_OFFSET:
26993 /* ??? Only handling here what we actually emit. */
26994 gcc_unreachable ();
26995
26996 default:
26997 break;
26998 }
26999 }
27000 if (handled_one)
27001 return;
27002 pat = PATTERN (insn);
27003 found:
27004
27005 switch (GET_CODE (pat))
27006 {
27007 case SET:
27008 arm_unwind_emit_set (asm_out_file, pat);
27009 break;
27010
27011 case SEQUENCE:
27012 /* Store multiple. */
27013 arm_unwind_emit_sequence (asm_out_file, pat);
27014 break;
27015
27016 default:
27017 abort();
27018 }
27019 }
27020
27021
27022 /* Output a reference from a function exception table to the type_info
27023 object X. The EABI specifies that the symbol should be relocated by
27024 an R_ARM_TARGET2 relocation. */
27025
27026 static bool
27027 arm_output_ttype (rtx x)
27028 {
27029 fputs ("\t.word\t", asm_out_file);
27030 output_addr_const (asm_out_file, x);
27031 /* Use special relocations for symbol references. */
27032 if (!CONST_INT_P (x))
27033 fputs ("(TARGET2)", asm_out_file);
27034 fputc ('\n', asm_out_file);
27035
27036 return TRUE;
27037 }
27038
27039 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27040
27041 static void
27042 arm_asm_emit_except_personality (rtx personality)
27043 {
27044 fputs ("\t.personality\t", asm_out_file);
27045 output_addr_const (asm_out_file, personality);
27046 fputc ('\n', asm_out_file);
27047 }
27048
27049 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27050
27051 static void
27052 arm_asm_init_sections (void)
27053 {
27054 exception_section = get_unnamed_section (0, output_section_asm_op,
27055 "\t.handlerdata");
27056 }
27057 #endif /* ARM_UNWIND_INFO */
27058
27059 /* Output unwind directives for the start/end of a function. */
27060
27061 void
27062 arm_output_fn_unwind (FILE * f, bool prologue)
27063 {
27064 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27065 return;
27066
27067 if (prologue)
27068 fputs ("\t.fnstart\n", f);
27069 else
27070 {
27071 /* If this function will never be unwound, then mark it as such.
27072 The came condition is used in arm_unwind_emit to suppress
27073 the frame annotations. */
27074 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27075 && (TREE_NOTHROW (current_function_decl)
27076 || crtl->all_throwers_are_sibcalls))
27077 fputs("\t.cantunwind\n", f);
27078
27079 fputs ("\t.fnend\n", f);
27080 }
27081 }
27082
27083 static bool
27084 arm_emit_tls_decoration (FILE *fp, rtx x)
27085 {
27086 enum tls_reloc reloc;
27087 rtx val;
27088
27089 val = XVECEXP (x, 0, 0);
27090 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27091
27092 output_addr_const (fp, val);
27093
27094 switch (reloc)
27095 {
27096 case TLS_GD32:
27097 fputs ("(tlsgd)", fp);
27098 break;
27099 case TLS_LDM32:
27100 fputs ("(tlsldm)", fp);
27101 break;
27102 case TLS_LDO32:
27103 fputs ("(tlsldo)", fp);
27104 break;
27105 case TLS_IE32:
27106 fputs ("(gottpoff)", fp);
27107 break;
27108 case TLS_LE32:
27109 fputs ("(tpoff)", fp);
27110 break;
27111 case TLS_DESCSEQ:
27112 fputs ("(tlsdesc)", fp);
27113 break;
27114 default:
27115 gcc_unreachable ();
27116 }
27117
27118 switch (reloc)
27119 {
27120 case TLS_GD32:
27121 case TLS_LDM32:
27122 case TLS_IE32:
27123 case TLS_DESCSEQ:
27124 fputs (" + (. - ", fp);
27125 output_addr_const (fp, XVECEXP (x, 0, 2));
27126 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27127 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27128 output_addr_const (fp, XVECEXP (x, 0, 3));
27129 fputc (')', fp);
27130 break;
27131 default:
27132 break;
27133 }
27134
27135 return TRUE;
27136 }
27137
27138 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27139
27140 static void
27141 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27142 {
27143 gcc_assert (size == 4);
27144 fputs ("\t.word\t", file);
27145 output_addr_const (file, x);
27146 fputs ("(tlsldo)", file);
27147 }
27148
27149 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27150
27151 static bool
27152 arm_output_addr_const_extra (FILE *fp, rtx x)
27153 {
27154 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27155 return arm_emit_tls_decoration (fp, x);
27156 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27157 {
27158 char label[256];
27159 int labelno = INTVAL (XVECEXP (x, 0, 0));
27160
27161 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27162 assemble_name_raw (fp, label);
27163
27164 return TRUE;
27165 }
27166 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27167 {
27168 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27169 if (GOT_PCREL)
27170 fputs ("+.", fp);
27171 fputs ("-(", fp);
27172 output_addr_const (fp, XVECEXP (x, 0, 0));
27173 fputc (')', fp);
27174 return TRUE;
27175 }
27176 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27177 {
27178 output_addr_const (fp, XVECEXP (x, 0, 0));
27179 if (GOT_PCREL)
27180 fputs ("+.", fp);
27181 fputs ("-(", fp);
27182 output_addr_const (fp, XVECEXP (x, 0, 1));
27183 fputc (')', fp);
27184 return TRUE;
27185 }
27186 else if (GET_CODE (x) == CONST_VECTOR)
27187 return arm_emit_vector_const (fp, x);
27188
27189 return FALSE;
27190 }
27191
27192 /* Output assembly for a shift instruction.
27193 SET_FLAGS determines how the instruction modifies the condition codes.
27194 0 - Do not set condition codes.
27195 1 - Set condition codes.
27196 2 - Use smallest instruction. */
27197 const char *
27198 arm_output_shift(rtx * operands, int set_flags)
27199 {
27200 char pattern[100];
27201 static const char flag_chars[3] = {'?', '.', '!'};
27202 const char *shift;
27203 HOST_WIDE_INT val;
27204 char c;
27205
27206 c = flag_chars[set_flags];
27207 shift = shift_op(operands[3], &val);
27208 if (shift)
27209 {
27210 if (val != -1)
27211 operands[2] = GEN_INT(val);
27212 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27213 }
27214 else
27215 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27216
27217 output_asm_insn (pattern, operands);
27218 return "";
27219 }
27220
27221 /* Output assembly for a WMMX immediate shift instruction. */
27222 const char *
27223 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27224 {
27225 int shift = INTVAL (operands[2]);
27226 char templ[50];
27227 machine_mode opmode = GET_MODE (operands[0]);
27228
27229 gcc_assert (shift >= 0);
27230
27231 /* If the shift value in the register versions is > 63 (for D qualifier),
27232 31 (for W qualifier) or 15 (for H qualifier). */
27233 if (((opmode == V4HImode) && (shift > 15))
27234 || ((opmode == V2SImode) && (shift > 31))
27235 || ((opmode == DImode) && (shift > 63)))
27236 {
27237 if (wror_or_wsra)
27238 {
27239 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27240 output_asm_insn (templ, operands);
27241 if (opmode == DImode)
27242 {
27243 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27244 output_asm_insn (templ, operands);
27245 }
27246 }
27247 else
27248 {
27249 /* The destination register will contain all zeros. */
27250 sprintf (templ, "wzero\t%%0");
27251 output_asm_insn (templ, operands);
27252 }
27253 return "";
27254 }
27255
27256 if ((opmode == DImode) && (shift > 32))
27257 {
27258 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27259 output_asm_insn (templ, operands);
27260 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27261 output_asm_insn (templ, operands);
27262 }
27263 else
27264 {
27265 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27266 output_asm_insn (templ, operands);
27267 }
27268 return "";
27269 }
27270
27271 /* Output assembly for a WMMX tinsr instruction. */
27272 const char *
27273 arm_output_iwmmxt_tinsr (rtx *operands)
27274 {
27275 int mask = INTVAL (operands[3]);
27276 int i;
27277 char templ[50];
27278 int units = mode_nunits[GET_MODE (operands[0])];
27279 gcc_assert ((mask & (mask - 1)) == 0);
27280 for (i = 0; i < units; ++i)
27281 {
27282 if ((mask & 0x01) == 1)
27283 {
27284 break;
27285 }
27286 mask >>= 1;
27287 }
27288 gcc_assert (i < units);
27289 {
27290 switch (GET_MODE (operands[0]))
27291 {
27292 case V8QImode:
27293 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27294 break;
27295 case V4HImode:
27296 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27297 break;
27298 case V2SImode:
27299 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27300 break;
27301 default:
27302 gcc_unreachable ();
27303 break;
27304 }
27305 output_asm_insn (templ, operands);
27306 }
27307 return "";
27308 }
27309
27310 /* Output a Thumb-1 casesi dispatch sequence. */
27311 const char *
27312 thumb1_output_casesi (rtx *operands)
27313 {
27314 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27315
27316 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27317
27318 switch (GET_MODE(diff_vec))
27319 {
27320 case QImode:
27321 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27322 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27323 case HImode:
27324 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27325 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27326 case SImode:
27327 return "bl\t%___gnu_thumb1_case_si";
27328 default:
27329 gcc_unreachable ();
27330 }
27331 }
27332
27333 /* Output a Thumb-2 casesi instruction. */
27334 const char *
27335 thumb2_output_casesi (rtx *operands)
27336 {
27337 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27338
27339 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27340
27341 output_asm_insn ("cmp\t%0, %1", operands);
27342 output_asm_insn ("bhi\t%l3", operands);
27343 switch (GET_MODE(diff_vec))
27344 {
27345 case QImode:
27346 return "tbb\t[%|pc, %0]";
27347 case HImode:
27348 return "tbh\t[%|pc, %0, lsl #1]";
27349 case SImode:
27350 if (flag_pic)
27351 {
27352 output_asm_insn ("adr\t%4, %l2", operands);
27353 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27354 output_asm_insn ("add\t%4, %4, %5", operands);
27355 return "bx\t%4";
27356 }
27357 else
27358 {
27359 output_asm_insn ("adr\t%4, %l2", operands);
27360 return "ldr\t%|pc, [%4, %0, lsl #2]";
27361 }
27362 default:
27363 gcc_unreachable ();
27364 }
27365 }
27366
27367 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27368 per-core tuning structs. */
27369 static int
27370 arm_issue_rate (void)
27371 {
27372 return current_tune->issue_rate;
27373 }
27374
27375 /* Return how many instructions should scheduler lookahead to choose the
27376 best one. */
27377 static int
27378 arm_first_cycle_multipass_dfa_lookahead (void)
27379 {
27380 int issue_rate = arm_issue_rate ();
27381
27382 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27383 }
27384
27385 /* Enable modeling of L2 auto-prefetcher. */
27386 static int
27387 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27388 {
27389 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27390 }
27391
27392 const char *
27393 arm_mangle_type (const_tree type)
27394 {
27395 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27396 has to be managled as if it is in the "std" namespace. */
27397 if (TARGET_AAPCS_BASED
27398 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27399 return "St9__va_list";
27400
27401 /* Half-precision float. */
27402 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27403 return "Dh";
27404
27405 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27406 builtin type. */
27407 if (TYPE_NAME (type) != NULL)
27408 return arm_mangle_builtin_type (type);
27409
27410 /* Use the default mangling. */
27411 return NULL;
27412 }
27413
27414 /* Order of allocation of core registers for Thumb: this allocation is
27415 written over the corresponding initial entries of the array
27416 initialized with REG_ALLOC_ORDER. We allocate all low registers
27417 first. Saving and restoring a low register is usually cheaper than
27418 using a call-clobbered high register. */
27419
27420 static const int thumb_core_reg_alloc_order[] =
27421 {
27422 3, 2, 1, 0, 4, 5, 6, 7,
27423 14, 12, 8, 9, 10, 11
27424 };
27425
27426 /* Adjust register allocation order when compiling for Thumb. */
27427
27428 void
27429 arm_order_regs_for_local_alloc (void)
27430 {
27431 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27432 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27433 if (TARGET_THUMB)
27434 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27435 sizeof (thumb_core_reg_alloc_order));
27436 }
27437
27438 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27439
27440 bool
27441 arm_frame_pointer_required (void)
27442 {
27443 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27444 return true;
27445
27446 /* If the function receives nonlocal gotos, it needs to save the frame
27447 pointer in the nonlocal_goto_save_area object. */
27448 if (cfun->has_nonlocal_label)
27449 return true;
27450
27451 /* The frame pointer is required for non-leaf APCS frames. */
27452 if (TARGET_ARM && TARGET_APCS_FRAME && !leaf_function_p ())
27453 return true;
27454
27455 /* If we are probing the stack in the prologue, we will have a faulting
27456 instruction prior to the stack adjustment and this requires a frame
27457 pointer if we want to catch the exception using the EABI unwinder. */
27458 if (!IS_INTERRUPT (arm_current_func_type ())
27459 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27460 && arm_except_unwind_info (&global_options) == UI_TARGET
27461 && cfun->can_throw_non_call_exceptions)
27462 {
27463 HOST_WIDE_INT size = get_frame_size ();
27464
27465 /* That's irrelevant if there is no stack adjustment. */
27466 if (size <= 0)
27467 return false;
27468
27469 /* That's relevant only if there is a stack probe. */
27470 if (crtl->is_leaf && !cfun->calls_alloca)
27471 {
27472 /* We don't have the final size of the frame so adjust. */
27473 size += 32 * UNITS_PER_WORD;
27474 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27475 return true;
27476 }
27477 else
27478 return true;
27479 }
27480
27481 return false;
27482 }
27483
27484 /* Only thumb1 can't support conditional execution, so return true if
27485 the target is not thumb1. */
27486 static bool
27487 arm_have_conditional_execution (void)
27488 {
27489 return !TARGET_THUMB1;
27490 }
27491
27492 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27493 static HOST_WIDE_INT
27494 arm_vector_alignment (const_tree type)
27495 {
27496 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27497
27498 if (TARGET_AAPCS_BASED)
27499 align = MIN (align, 64);
27500
27501 return align;
27502 }
27503
27504 static unsigned int
27505 arm_autovectorize_vector_sizes (void)
27506 {
27507 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27508 }
27509
27510 static bool
27511 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27512 {
27513 /* Vectors which aren't in packed structures will not be less aligned than
27514 the natural alignment of their element type, so this is safe. */
27515 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27516 return !is_packed;
27517
27518 return default_builtin_vector_alignment_reachable (type, is_packed);
27519 }
27520
27521 static bool
27522 arm_builtin_support_vector_misalignment (machine_mode mode,
27523 const_tree type, int misalignment,
27524 bool is_packed)
27525 {
27526 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27527 {
27528 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27529
27530 if (is_packed)
27531 return align == 1;
27532
27533 /* If the misalignment is unknown, we should be able to handle the access
27534 so long as it is not to a member of a packed data structure. */
27535 if (misalignment == -1)
27536 return true;
27537
27538 /* Return true if the misalignment is a multiple of the natural alignment
27539 of the vector's element type. This is probably always going to be
27540 true in practice, since we've already established that this isn't a
27541 packed access. */
27542 return ((misalignment % align) == 0);
27543 }
27544
27545 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27546 is_packed);
27547 }
27548
27549 static void
27550 arm_conditional_register_usage (void)
27551 {
27552 int regno;
27553
27554 if (TARGET_THUMB1 && optimize_size)
27555 {
27556 /* When optimizing for size on Thumb-1, it's better not
27557 to use the HI regs, because of the overhead of
27558 stacking them. */
27559 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27560 fixed_regs[regno] = call_used_regs[regno] = 1;
27561 }
27562
27563 /* The link register can be clobbered by any branch insn,
27564 but we have no way to track that at present, so mark
27565 it as unavailable. */
27566 if (TARGET_THUMB1)
27567 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27568
27569 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27570 {
27571 /* VFPv3 registers are disabled when earlier VFP
27572 versions are selected due to the definition of
27573 LAST_VFP_REGNUM. */
27574 for (regno = FIRST_VFP_REGNUM;
27575 regno <= LAST_VFP_REGNUM; ++ regno)
27576 {
27577 fixed_regs[regno] = 0;
27578 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27579 || regno >= FIRST_VFP_REGNUM + 32;
27580 }
27581 }
27582
27583 if (TARGET_REALLY_IWMMXT)
27584 {
27585 regno = FIRST_IWMMXT_GR_REGNUM;
27586 /* The 2002/10/09 revision of the XScale ABI has wCG0
27587 and wCG1 as call-preserved registers. The 2002/11/21
27588 revision changed this so that all wCG registers are
27589 scratch registers. */
27590 for (regno = FIRST_IWMMXT_GR_REGNUM;
27591 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27592 fixed_regs[regno] = 0;
27593 /* The XScale ABI has wR0 - wR9 as scratch registers,
27594 the rest as call-preserved registers. */
27595 for (regno = FIRST_IWMMXT_REGNUM;
27596 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27597 {
27598 fixed_regs[regno] = 0;
27599 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27600 }
27601 }
27602
27603 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27604 {
27605 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27606 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27607 }
27608 else if (TARGET_APCS_STACK)
27609 {
27610 fixed_regs[10] = 1;
27611 call_used_regs[10] = 1;
27612 }
27613 /* -mcaller-super-interworking reserves r11 for calls to
27614 _interwork_r11_call_via_rN(). Making the register global
27615 is an easy way of ensuring that it remains valid for all
27616 calls. */
27617 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27618 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27619 {
27620 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27621 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27622 if (TARGET_CALLER_INTERWORKING)
27623 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27624 }
27625 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27626 }
27627
27628 static reg_class_t
27629 arm_preferred_rename_class (reg_class_t rclass)
27630 {
27631 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27632 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27633 and code size can be reduced. */
27634 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27635 return LO_REGS;
27636 else
27637 return NO_REGS;
27638 }
27639
27640 /* Compute the atrribute "length" of insn "*push_multi".
27641 So this function MUST be kept in sync with that insn pattern. */
27642 int
27643 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27644 {
27645 int i, regno, hi_reg;
27646 int num_saves = XVECLEN (parallel_op, 0);
27647
27648 /* ARM mode. */
27649 if (TARGET_ARM)
27650 return 4;
27651 /* Thumb1 mode. */
27652 if (TARGET_THUMB1)
27653 return 2;
27654
27655 /* Thumb2 mode. */
27656 regno = REGNO (first_op);
27657 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27658 for (i = 1; i < num_saves && !hi_reg; i++)
27659 {
27660 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27661 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27662 }
27663
27664 if (!hi_reg)
27665 return 2;
27666 return 4;
27667 }
27668
27669 /* Compute the number of instructions emitted by output_move_double. */
27670 int
27671 arm_count_output_move_double_insns (rtx *operands)
27672 {
27673 int count;
27674 rtx ops[2];
27675 /* output_move_double may modify the operands array, so call it
27676 here on a copy of the array. */
27677 ops[0] = operands[0];
27678 ops[1] = operands[1];
27679 output_move_double (ops, false, &count);
27680 return count;
27681 }
27682
27683 int
27684 vfp3_const_double_for_fract_bits (rtx operand)
27685 {
27686 REAL_VALUE_TYPE r0;
27687
27688 if (!CONST_DOUBLE_P (operand))
27689 return 0;
27690
27691 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
27692 if (exact_real_inverse (DFmode, &r0)
27693 && !REAL_VALUE_NEGATIVE (r0))
27694 {
27695 if (exact_real_truncate (DFmode, &r0))
27696 {
27697 HOST_WIDE_INT value = real_to_integer (&r0);
27698 value = value & 0xffffffff;
27699 if ((value != 0) && ( (value & (value - 1)) == 0))
27700 return int_log2 (value);
27701 }
27702 }
27703 return 0;
27704 }
27705
27706 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
27707 log2 is in [1, 32], return that log2. Otherwise return -1.
27708 This is used in the patterns for vcvt.s32.f32 floating-point to
27709 fixed-point conversions. */
27710
27711 int
27712 vfp3_const_double_for_bits (rtx x)
27713 {
27714 const REAL_VALUE_TYPE *r;
27715
27716 if (!CONST_DOUBLE_P (x))
27717 return -1;
27718
27719 r = CONST_DOUBLE_REAL_VALUE (x);
27720
27721 if (REAL_VALUE_NEGATIVE (*r)
27722 || REAL_VALUE_ISNAN (*r)
27723 || REAL_VALUE_ISINF (*r)
27724 || !real_isinteger (r, SFmode))
27725 return -1;
27726
27727 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
27728
27729 /* The exact_log2 above will have returned -1 if this is
27730 not an exact log2. */
27731 if (!IN_RANGE (hwint, 1, 32))
27732 return -1;
27733
27734 return hwint;
27735 }
27736
27737 \f
27738 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27739
27740 static void
27741 arm_pre_atomic_barrier (enum memmodel model)
27742 {
27743 if (need_atomic_barrier_p (model, true))
27744 emit_insn (gen_memory_barrier ());
27745 }
27746
27747 static void
27748 arm_post_atomic_barrier (enum memmodel model)
27749 {
27750 if (need_atomic_barrier_p (model, false))
27751 emit_insn (gen_memory_barrier ());
27752 }
27753
27754 /* Emit the load-exclusive and store-exclusive instructions.
27755 Use acquire and release versions if necessary. */
27756
27757 static void
27758 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
27759 {
27760 rtx (*gen) (rtx, rtx);
27761
27762 if (acq)
27763 {
27764 switch (mode)
27765 {
27766 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27767 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27768 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27769 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27770 default:
27771 gcc_unreachable ();
27772 }
27773 }
27774 else
27775 {
27776 switch (mode)
27777 {
27778 case QImode: gen = gen_arm_load_exclusiveqi; break;
27779 case HImode: gen = gen_arm_load_exclusivehi; break;
27780 case SImode: gen = gen_arm_load_exclusivesi; break;
27781 case DImode: gen = gen_arm_load_exclusivedi; break;
27782 default:
27783 gcc_unreachable ();
27784 }
27785 }
27786
27787 emit_insn (gen (rval, mem));
27788 }
27789
27790 static void
27791 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
27792 rtx mem, bool rel)
27793 {
27794 rtx (*gen) (rtx, rtx, rtx);
27795
27796 if (rel)
27797 {
27798 switch (mode)
27799 {
27800 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27801 case HImode: gen = gen_arm_store_release_exclusivehi; break;
27802 case SImode: gen = gen_arm_store_release_exclusivesi; break;
27803 case DImode: gen = gen_arm_store_release_exclusivedi; break;
27804 default:
27805 gcc_unreachable ();
27806 }
27807 }
27808 else
27809 {
27810 switch (mode)
27811 {
27812 case QImode: gen = gen_arm_store_exclusiveqi; break;
27813 case HImode: gen = gen_arm_store_exclusivehi; break;
27814 case SImode: gen = gen_arm_store_exclusivesi; break;
27815 case DImode: gen = gen_arm_store_exclusivedi; break;
27816 default:
27817 gcc_unreachable ();
27818 }
27819 }
27820
27821 emit_insn (gen (bval, rval, mem));
27822 }
27823
27824 /* Mark the previous jump instruction as unlikely. */
27825
27826 static void
27827 emit_unlikely_jump (rtx insn)
27828 {
27829 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
27830
27831 insn = emit_jump_insn (insn);
27832 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
27833 }
27834
27835 /* Expand a compare and swap pattern. */
27836
27837 void
27838 arm_expand_compare_and_swap (rtx operands[])
27839 {
27840 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27841 machine_mode mode;
27842 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27843
27844 bval = operands[0];
27845 rval = operands[1];
27846 mem = operands[2];
27847 oldval = operands[3];
27848 newval = operands[4];
27849 is_weak = operands[5];
27850 mod_s = operands[6];
27851 mod_f = operands[7];
27852 mode = GET_MODE (mem);
27853
27854 /* Normally the succ memory model must be stronger than fail, but in the
27855 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27856 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27857
27858 if (TARGET_HAVE_LDACQ
27859 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
27860 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
27861 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27862
27863 switch (mode)
27864 {
27865 case QImode:
27866 case HImode:
27867 /* For narrow modes, we're going to perform the comparison in SImode,
27868 so do the zero-extension now. */
27869 rval = gen_reg_rtx (SImode);
27870 oldval = convert_modes (SImode, mode, oldval, true);
27871 /* FALLTHRU */
27872
27873 case SImode:
27874 /* Force the value into a register if needed. We waited until after
27875 the zero-extension above to do this properly. */
27876 if (!arm_add_operand (oldval, SImode))
27877 oldval = force_reg (SImode, oldval);
27878 break;
27879
27880 case DImode:
27881 if (!cmpdi_operand (oldval, mode))
27882 oldval = force_reg (mode, oldval);
27883 break;
27884
27885 default:
27886 gcc_unreachable ();
27887 }
27888
27889 switch (mode)
27890 {
27891 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27892 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27893 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27894 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27895 default:
27896 gcc_unreachable ();
27897 }
27898
27899 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27900
27901 if (mode == QImode || mode == HImode)
27902 emit_move_insn (operands[1], gen_lowpart (mode, rval));
27903
27904 /* In all cases, we arrange for success to be signaled by Z set.
27905 This arrangement allows for the boolean result to be used directly
27906 in a subsequent branch, post optimization. */
27907 x = gen_rtx_REG (CCmode, CC_REGNUM);
27908 x = gen_rtx_EQ (SImode, x, const0_rtx);
27909 emit_insn (gen_rtx_SET (bval, x));
27910 }
27911
27912 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27913 another memory store between the load-exclusive and store-exclusive can
27914 reset the monitor from Exclusive to Open state. This means we must wait
27915 until after reload to split the pattern, lest we get a register spill in
27916 the middle of the atomic sequence. */
27917
27918 void
27919 arm_split_compare_and_swap (rtx operands[])
27920 {
27921 rtx rval, mem, oldval, newval, scratch;
27922 machine_mode mode;
27923 enum memmodel mod_s, mod_f;
27924 bool is_weak;
27925 rtx_code_label *label1, *label2;
27926 rtx x, cond;
27927
27928 rval = operands[0];
27929 mem = operands[1];
27930 oldval = operands[2];
27931 newval = operands[3];
27932 is_weak = (operands[4] != const0_rtx);
27933 mod_s = memmodel_from_int (INTVAL (operands[5]));
27934 mod_f = memmodel_from_int (INTVAL (operands[6]));
27935 scratch = operands[7];
27936 mode = GET_MODE (mem);
27937
27938 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
27939
27940 bool use_acquire = TARGET_HAVE_LDACQ
27941 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
27942 || is_mm_release (mod_s));
27943
27944 bool use_release = TARGET_HAVE_LDACQ
27945 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
27946 || is_mm_acquire (mod_s));
27947
27948 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
27949 a full barrier is emitted after the store-release. */
27950 if (is_armv8_sync)
27951 use_acquire = false;
27952
27953 /* Checks whether a barrier is needed and emits one accordingly. */
27954 if (!(use_acquire || use_release))
27955 arm_pre_atomic_barrier (mod_s);
27956
27957 label1 = NULL;
27958 if (!is_weak)
27959 {
27960 label1 = gen_label_rtx ();
27961 emit_label (label1);
27962 }
27963 label2 = gen_label_rtx ();
27964
27965 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27966
27967 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27968 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27969 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27970 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27971 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
27972
27973 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
27974
27975 /* Weak or strong, we want EQ to be true for success, so that we
27976 match the flags that we got from the compare above. */
27977 cond = gen_rtx_REG (CCmode, CC_REGNUM);
27978 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
27979 emit_insn (gen_rtx_SET (cond, x));
27980
27981 if (!is_weak)
27982 {
27983 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27984 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27985 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
27986 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
27987 }
27988
27989 if (!is_mm_relaxed (mod_f))
27990 emit_label (label2);
27991
27992 /* Checks whether a barrier is needed and emits one accordingly. */
27993 if (is_armv8_sync
27994 || !(use_acquire || use_release))
27995 arm_post_atomic_barrier (mod_s);
27996
27997 if (is_mm_relaxed (mod_f))
27998 emit_label (label2);
27999 }
28000
28001 void
28002 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28003 rtx value, rtx model_rtx, rtx cond)
28004 {
28005 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28006 machine_mode mode = GET_MODE (mem);
28007 machine_mode wmode = (mode == DImode ? DImode : SImode);
28008 rtx_code_label *label;
28009 rtx x;
28010
28011 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28012
28013 bool use_acquire = TARGET_HAVE_LDACQ
28014 && !(is_mm_relaxed (model) || is_mm_consume (model)
28015 || is_mm_release (model));
28016
28017 bool use_release = TARGET_HAVE_LDACQ
28018 && !(is_mm_relaxed (model) || is_mm_consume (model)
28019 || is_mm_acquire (model));
28020
28021 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28022 a full barrier is emitted after the store-release. */
28023 if (is_armv8_sync)
28024 use_acquire = false;
28025
28026 /* Checks whether a barrier is needed and emits one accordingly. */
28027 if (!(use_acquire || use_release))
28028 arm_pre_atomic_barrier (model);
28029
28030 label = gen_label_rtx ();
28031 emit_label (label);
28032
28033 if (new_out)
28034 new_out = gen_lowpart (wmode, new_out);
28035 if (old_out)
28036 old_out = gen_lowpart (wmode, old_out);
28037 else
28038 old_out = new_out;
28039 value = simplify_gen_subreg (wmode, value, mode, 0);
28040
28041 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28042
28043 switch (code)
28044 {
28045 case SET:
28046 new_out = value;
28047 break;
28048
28049 case NOT:
28050 x = gen_rtx_AND (wmode, old_out, value);
28051 emit_insn (gen_rtx_SET (new_out, x));
28052 x = gen_rtx_NOT (wmode, new_out);
28053 emit_insn (gen_rtx_SET (new_out, x));
28054 break;
28055
28056 case MINUS:
28057 if (CONST_INT_P (value))
28058 {
28059 value = GEN_INT (-INTVAL (value));
28060 code = PLUS;
28061 }
28062 /* FALLTHRU */
28063
28064 case PLUS:
28065 if (mode == DImode)
28066 {
28067 /* DImode plus/minus need to clobber flags. */
28068 /* The adddi3 and subdi3 patterns are incorrectly written so that
28069 they require matching operands, even when we could easily support
28070 three operands. Thankfully, this can be fixed up post-splitting,
28071 as the individual add+adc patterns do accept three operands and
28072 post-reload cprop can make these moves go away. */
28073 emit_move_insn (new_out, old_out);
28074 if (code == PLUS)
28075 x = gen_adddi3 (new_out, new_out, value);
28076 else
28077 x = gen_subdi3 (new_out, new_out, value);
28078 emit_insn (x);
28079 break;
28080 }
28081 /* FALLTHRU */
28082
28083 default:
28084 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28085 emit_insn (gen_rtx_SET (new_out, x));
28086 break;
28087 }
28088
28089 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28090 use_release);
28091
28092 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28093 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28094
28095 /* Checks whether a barrier is needed and emits one accordingly. */
28096 if (is_armv8_sync
28097 || !(use_acquire || use_release))
28098 arm_post_atomic_barrier (model);
28099 }
28100 \f
28101 #define MAX_VECT_LEN 16
28102
28103 struct expand_vec_perm_d
28104 {
28105 rtx target, op0, op1;
28106 unsigned char perm[MAX_VECT_LEN];
28107 machine_mode vmode;
28108 unsigned char nelt;
28109 bool one_vector_p;
28110 bool testing_p;
28111 };
28112
28113 /* Generate a variable permutation. */
28114
28115 static void
28116 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28117 {
28118 machine_mode vmode = GET_MODE (target);
28119 bool one_vector_p = rtx_equal_p (op0, op1);
28120
28121 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28122 gcc_checking_assert (GET_MODE (op0) == vmode);
28123 gcc_checking_assert (GET_MODE (op1) == vmode);
28124 gcc_checking_assert (GET_MODE (sel) == vmode);
28125 gcc_checking_assert (TARGET_NEON);
28126
28127 if (one_vector_p)
28128 {
28129 if (vmode == V8QImode)
28130 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28131 else
28132 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28133 }
28134 else
28135 {
28136 rtx pair;
28137
28138 if (vmode == V8QImode)
28139 {
28140 pair = gen_reg_rtx (V16QImode);
28141 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28142 pair = gen_lowpart (TImode, pair);
28143 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28144 }
28145 else
28146 {
28147 pair = gen_reg_rtx (OImode);
28148 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28149 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28150 }
28151 }
28152 }
28153
28154 void
28155 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28156 {
28157 machine_mode vmode = GET_MODE (target);
28158 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28159 bool one_vector_p = rtx_equal_p (op0, op1);
28160 rtx rmask[MAX_VECT_LEN], mask;
28161
28162 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28163 numbering of elements for big-endian, we must reverse the order. */
28164 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28165
28166 /* The VTBL instruction does not use a modulo index, so we must take care
28167 of that ourselves. */
28168 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28169 for (i = 0; i < nelt; ++i)
28170 rmask[i] = mask;
28171 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28172 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28173
28174 arm_expand_vec_perm_1 (target, op0, op1, sel);
28175 }
28176
28177 /* Generate or test for an insn that supports a constant permutation. */
28178
28179 /* Recognize patterns for the VUZP insns. */
28180
28181 static bool
28182 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28183 {
28184 unsigned int i, odd, mask, nelt = d->nelt;
28185 rtx out0, out1, in0, in1;
28186 rtx (*gen)(rtx, rtx, rtx, rtx);
28187
28188 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28189 return false;
28190
28191 /* Note that these are little-endian tests. Adjust for big-endian later. */
28192 if (d->perm[0] == 0)
28193 odd = 0;
28194 else if (d->perm[0] == 1)
28195 odd = 1;
28196 else
28197 return false;
28198 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28199
28200 for (i = 0; i < nelt; i++)
28201 {
28202 unsigned elt = (i * 2 + odd) & mask;
28203 if (d->perm[i] != elt)
28204 return false;
28205 }
28206
28207 /* Success! */
28208 if (d->testing_p)
28209 return true;
28210
28211 switch (d->vmode)
28212 {
28213 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28214 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28215 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28216 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28217 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28218 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28219 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28220 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28221 default:
28222 gcc_unreachable ();
28223 }
28224
28225 in0 = d->op0;
28226 in1 = d->op1;
28227 if (BYTES_BIG_ENDIAN)
28228 {
28229 std::swap (in0, in1);
28230 odd = !odd;
28231 }
28232
28233 out0 = d->target;
28234 out1 = gen_reg_rtx (d->vmode);
28235 if (odd)
28236 std::swap (out0, out1);
28237
28238 emit_insn (gen (out0, in0, in1, out1));
28239 return true;
28240 }
28241
28242 /* Recognize patterns for the VZIP insns. */
28243
28244 static bool
28245 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28246 {
28247 unsigned int i, high, mask, nelt = d->nelt;
28248 rtx out0, out1, in0, in1;
28249 rtx (*gen)(rtx, rtx, rtx, rtx);
28250
28251 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28252 return false;
28253
28254 /* Note that these are little-endian tests. Adjust for big-endian later. */
28255 high = nelt / 2;
28256 if (d->perm[0] == high)
28257 ;
28258 else if (d->perm[0] == 0)
28259 high = 0;
28260 else
28261 return false;
28262 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28263
28264 for (i = 0; i < nelt / 2; i++)
28265 {
28266 unsigned elt = (i + high) & mask;
28267 if (d->perm[i * 2] != elt)
28268 return false;
28269 elt = (elt + nelt) & mask;
28270 if (d->perm[i * 2 + 1] != elt)
28271 return false;
28272 }
28273
28274 /* Success! */
28275 if (d->testing_p)
28276 return true;
28277
28278 switch (d->vmode)
28279 {
28280 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28281 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28282 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28283 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28284 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
28285 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
28286 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28287 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28288 default:
28289 gcc_unreachable ();
28290 }
28291
28292 in0 = d->op0;
28293 in1 = d->op1;
28294 if (BYTES_BIG_ENDIAN)
28295 {
28296 std::swap (in0, in1);
28297 high = !high;
28298 }
28299
28300 out0 = d->target;
28301 out1 = gen_reg_rtx (d->vmode);
28302 if (high)
28303 std::swap (out0, out1);
28304
28305 emit_insn (gen (out0, in0, in1, out1));
28306 return true;
28307 }
28308
28309 /* Recognize patterns for the VREV insns. */
28310
28311 static bool
28312 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28313 {
28314 unsigned int i, j, diff, nelt = d->nelt;
28315 rtx (*gen)(rtx, rtx);
28316
28317 if (!d->one_vector_p)
28318 return false;
28319
28320 diff = d->perm[0];
28321 switch (diff)
28322 {
28323 case 7:
28324 switch (d->vmode)
28325 {
28326 case V16QImode: gen = gen_neon_vrev64v16qi; break;
28327 case V8QImode: gen = gen_neon_vrev64v8qi; break;
28328 default:
28329 return false;
28330 }
28331 break;
28332 case 3:
28333 switch (d->vmode)
28334 {
28335 case V16QImode: gen = gen_neon_vrev32v16qi; break;
28336 case V8QImode: gen = gen_neon_vrev32v8qi; break;
28337 case V8HImode: gen = gen_neon_vrev64v8hi; break;
28338 case V4HImode: gen = gen_neon_vrev64v4hi; break;
28339 default:
28340 return false;
28341 }
28342 break;
28343 case 1:
28344 switch (d->vmode)
28345 {
28346 case V16QImode: gen = gen_neon_vrev16v16qi; break;
28347 case V8QImode: gen = gen_neon_vrev16v8qi; break;
28348 case V8HImode: gen = gen_neon_vrev32v8hi; break;
28349 case V4HImode: gen = gen_neon_vrev32v4hi; break;
28350 case V4SImode: gen = gen_neon_vrev64v4si; break;
28351 case V2SImode: gen = gen_neon_vrev64v2si; break;
28352 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
28353 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
28354 default:
28355 return false;
28356 }
28357 break;
28358 default:
28359 return false;
28360 }
28361
28362 for (i = 0; i < nelt ; i += diff + 1)
28363 for (j = 0; j <= diff; j += 1)
28364 {
28365 /* This is guaranteed to be true as the value of diff
28366 is 7, 3, 1 and we should have enough elements in the
28367 queue to generate this. Getting a vector mask with a
28368 value of diff other than these values implies that
28369 something is wrong by the time we get here. */
28370 gcc_assert (i + j < nelt);
28371 if (d->perm[i + j] != i + diff - j)
28372 return false;
28373 }
28374
28375 /* Success! */
28376 if (d->testing_p)
28377 return true;
28378
28379 emit_insn (gen (d->target, d->op0));
28380 return true;
28381 }
28382
28383 /* Recognize patterns for the VTRN insns. */
28384
28385 static bool
28386 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28387 {
28388 unsigned int i, odd, mask, nelt = d->nelt;
28389 rtx out0, out1, in0, in1;
28390 rtx (*gen)(rtx, rtx, rtx, rtx);
28391
28392 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28393 return false;
28394
28395 /* Note that these are little-endian tests. Adjust for big-endian later. */
28396 if (d->perm[0] == 0)
28397 odd = 0;
28398 else if (d->perm[0] == 1)
28399 odd = 1;
28400 else
28401 return false;
28402 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28403
28404 for (i = 0; i < nelt; i += 2)
28405 {
28406 if (d->perm[i] != i + odd)
28407 return false;
28408 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28409 return false;
28410 }
28411
28412 /* Success! */
28413 if (d->testing_p)
28414 return true;
28415
28416 switch (d->vmode)
28417 {
28418 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28419 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28420 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28421 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28422 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28423 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28424 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28425 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28426 default:
28427 gcc_unreachable ();
28428 }
28429
28430 in0 = d->op0;
28431 in1 = d->op1;
28432 if (BYTES_BIG_ENDIAN)
28433 {
28434 std::swap (in0, in1);
28435 odd = !odd;
28436 }
28437
28438 out0 = d->target;
28439 out1 = gen_reg_rtx (d->vmode);
28440 if (odd)
28441 std::swap (out0, out1);
28442
28443 emit_insn (gen (out0, in0, in1, out1));
28444 return true;
28445 }
28446
28447 /* Recognize patterns for the VEXT insns. */
28448
28449 static bool
28450 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28451 {
28452 unsigned int i, nelt = d->nelt;
28453 rtx (*gen) (rtx, rtx, rtx, rtx);
28454 rtx offset;
28455
28456 unsigned int location;
28457
28458 unsigned int next = d->perm[0] + 1;
28459
28460 /* TODO: Handle GCC's numbering of elements for big-endian. */
28461 if (BYTES_BIG_ENDIAN)
28462 return false;
28463
28464 /* Check if the extracted indexes are increasing by one. */
28465 for (i = 1; i < nelt; next++, i++)
28466 {
28467 /* If we hit the most significant element of the 2nd vector in
28468 the previous iteration, no need to test further. */
28469 if (next == 2 * nelt)
28470 return false;
28471
28472 /* If we are operating on only one vector: it could be a
28473 rotation. If there are only two elements of size < 64, let
28474 arm_evpc_neon_vrev catch it. */
28475 if (d->one_vector_p && (next == nelt))
28476 {
28477 if ((nelt == 2) && (d->vmode != V2DImode))
28478 return false;
28479 else
28480 next = 0;
28481 }
28482
28483 if (d->perm[i] != next)
28484 return false;
28485 }
28486
28487 location = d->perm[0];
28488
28489 switch (d->vmode)
28490 {
28491 case V16QImode: gen = gen_neon_vextv16qi; break;
28492 case V8QImode: gen = gen_neon_vextv8qi; break;
28493 case V4HImode: gen = gen_neon_vextv4hi; break;
28494 case V8HImode: gen = gen_neon_vextv8hi; break;
28495 case V2SImode: gen = gen_neon_vextv2si; break;
28496 case V4SImode: gen = gen_neon_vextv4si; break;
28497 case V2SFmode: gen = gen_neon_vextv2sf; break;
28498 case V4SFmode: gen = gen_neon_vextv4sf; break;
28499 case V2DImode: gen = gen_neon_vextv2di; break;
28500 default:
28501 return false;
28502 }
28503
28504 /* Success! */
28505 if (d->testing_p)
28506 return true;
28507
28508 offset = GEN_INT (location);
28509 emit_insn (gen (d->target, d->op0, d->op1, offset));
28510 return true;
28511 }
28512
28513 /* The NEON VTBL instruction is a fully variable permuation that's even
28514 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28515 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28516 can do slightly better by expanding this as a constant where we don't
28517 have to apply a mask. */
28518
28519 static bool
28520 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28521 {
28522 rtx rperm[MAX_VECT_LEN], sel;
28523 machine_mode vmode = d->vmode;
28524 unsigned int i, nelt = d->nelt;
28525
28526 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28527 numbering of elements for big-endian, we must reverse the order. */
28528 if (BYTES_BIG_ENDIAN)
28529 return false;
28530
28531 if (d->testing_p)
28532 return true;
28533
28534 /* Generic code will try constant permutation twice. Once with the
28535 original mode and again with the elements lowered to QImode.
28536 So wait and don't do the selector expansion ourselves. */
28537 if (vmode != V8QImode && vmode != V16QImode)
28538 return false;
28539
28540 for (i = 0; i < nelt; ++i)
28541 rperm[i] = GEN_INT (d->perm[i]);
28542 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28543 sel = force_reg (vmode, sel);
28544
28545 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28546 return true;
28547 }
28548
28549 static bool
28550 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28551 {
28552 /* Check if the input mask matches vext before reordering the
28553 operands. */
28554 if (TARGET_NEON)
28555 if (arm_evpc_neon_vext (d))
28556 return true;
28557
28558 /* The pattern matching functions above are written to look for a small
28559 number to begin the sequence (0, 1, N/2). If we begin with an index
28560 from the second operand, we can swap the operands. */
28561 if (d->perm[0] >= d->nelt)
28562 {
28563 unsigned i, nelt = d->nelt;
28564
28565 for (i = 0; i < nelt; ++i)
28566 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28567
28568 std::swap (d->op0, d->op1);
28569 }
28570
28571 if (TARGET_NEON)
28572 {
28573 if (arm_evpc_neon_vuzp (d))
28574 return true;
28575 if (arm_evpc_neon_vzip (d))
28576 return true;
28577 if (arm_evpc_neon_vrev (d))
28578 return true;
28579 if (arm_evpc_neon_vtrn (d))
28580 return true;
28581 return arm_evpc_neon_vtbl (d);
28582 }
28583 return false;
28584 }
28585
28586 /* Expand a vec_perm_const pattern. */
28587
28588 bool
28589 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28590 {
28591 struct expand_vec_perm_d d;
28592 int i, nelt, which;
28593
28594 d.target = target;
28595 d.op0 = op0;
28596 d.op1 = op1;
28597
28598 d.vmode = GET_MODE (target);
28599 gcc_assert (VECTOR_MODE_P (d.vmode));
28600 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28601 d.testing_p = false;
28602
28603 for (i = which = 0; i < nelt; ++i)
28604 {
28605 rtx e = XVECEXP (sel, 0, i);
28606 int ei = INTVAL (e) & (2 * nelt - 1);
28607 which |= (ei < nelt ? 1 : 2);
28608 d.perm[i] = ei;
28609 }
28610
28611 switch (which)
28612 {
28613 default:
28614 gcc_unreachable();
28615
28616 case 3:
28617 d.one_vector_p = false;
28618 if (!rtx_equal_p (op0, op1))
28619 break;
28620
28621 /* The elements of PERM do not suggest that only the first operand
28622 is used, but both operands are identical. Allow easier matching
28623 of the permutation by folding the permutation into the single
28624 input vector. */
28625 /* FALLTHRU */
28626 case 2:
28627 for (i = 0; i < nelt; ++i)
28628 d.perm[i] &= nelt - 1;
28629 d.op0 = op1;
28630 d.one_vector_p = true;
28631 break;
28632
28633 case 1:
28634 d.op1 = op0;
28635 d.one_vector_p = true;
28636 break;
28637 }
28638
28639 return arm_expand_vec_perm_const_1 (&d);
28640 }
28641
28642 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28643
28644 static bool
28645 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28646 const unsigned char *sel)
28647 {
28648 struct expand_vec_perm_d d;
28649 unsigned int i, nelt, which;
28650 bool ret;
28651
28652 d.vmode = vmode;
28653 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28654 d.testing_p = true;
28655 memcpy (d.perm, sel, nelt);
28656
28657 /* Categorize the set of elements in the selector. */
28658 for (i = which = 0; i < nelt; ++i)
28659 {
28660 unsigned char e = d.perm[i];
28661 gcc_assert (e < 2 * nelt);
28662 which |= (e < nelt ? 1 : 2);
28663 }
28664
28665 /* For all elements from second vector, fold the elements to first. */
28666 if (which == 2)
28667 for (i = 0; i < nelt; ++i)
28668 d.perm[i] -= nelt;
28669
28670 /* Check whether the mask can be applied to the vector type. */
28671 d.one_vector_p = (which != 3);
28672
28673 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28674 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28675 if (!d.one_vector_p)
28676 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28677
28678 start_sequence ();
28679 ret = arm_expand_vec_perm_const_1 (&d);
28680 end_sequence ();
28681
28682 return ret;
28683 }
28684
28685 bool
28686 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
28687 {
28688 /* If we are soft float and we do not have ldrd
28689 then all auto increment forms are ok. */
28690 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28691 return true;
28692
28693 switch (code)
28694 {
28695 /* Post increment and Pre Decrement are supported for all
28696 instruction forms except for vector forms. */
28697 case ARM_POST_INC:
28698 case ARM_PRE_DEC:
28699 if (VECTOR_MODE_P (mode))
28700 {
28701 if (code != ARM_PRE_DEC)
28702 return true;
28703 else
28704 return false;
28705 }
28706
28707 return true;
28708
28709 case ARM_POST_DEC:
28710 case ARM_PRE_INC:
28711 /* Without LDRD and mode size greater than
28712 word size, there is no point in auto-incrementing
28713 because ldm and stm will not have these forms. */
28714 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28715 return false;
28716
28717 /* Vector and floating point modes do not support
28718 these auto increment forms. */
28719 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28720 return false;
28721
28722 return true;
28723
28724 default:
28725 return false;
28726
28727 }
28728
28729 return false;
28730 }
28731
28732 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28733 on ARM, since we know that shifts by negative amounts are no-ops.
28734 Additionally, the default expansion code is not available or suitable
28735 for post-reload insn splits (this can occur when the register allocator
28736 chooses not to do a shift in NEON).
28737
28738 This function is used in both initial expand and post-reload splits, and
28739 handles all kinds of 64-bit shifts.
28740
28741 Input requirements:
28742 - It is safe for the input and output to be the same register, but
28743 early-clobber rules apply for the shift amount and scratch registers.
28744 - Shift by register requires both scratch registers. In all other cases
28745 the scratch registers may be NULL.
28746 - Ashiftrt by a register also clobbers the CC register. */
28747 void
28748 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28749 rtx amount, rtx scratch1, rtx scratch2)
28750 {
28751 rtx out_high = gen_highpart (SImode, out);
28752 rtx out_low = gen_lowpart (SImode, out);
28753 rtx in_high = gen_highpart (SImode, in);
28754 rtx in_low = gen_lowpart (SImode, in);
28755
28756 /* Terminology:
28757 in = the register pair containing the input value.
28758 out = the destination register pair.
28759 up = the high- or low-part of each pair.
28760 down = the opposite part to "up".
28761 In a shift, we can consider bits to shift from "up"-stream to
28762 "down"-stream, so in a left-shift "up" is the low-part and "down"
28763 is the high-part of each register pair. */
28764
28765 rtx out_up = code == ASHIFT ? out_low : out_high;
28766 rtx out_down = code == ASHIFT ? out_high : out_low;
28767 rtx in_up = code == ASHIFT ? in_low : in_high;
28768 rtx in_down = code == ASHIFT ? in_high : in_low;
28769
28770 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28771 gcc_assert (out
28772 && (REG_P (out) || GET_CODE (out) == SUBREG)
28773 && GET_MODE (out) == DImode);
28774 gcc_assert (in
28775 && (REG_P (in) || GET_CODE (in) == SUBREG)
28776 && GET_MODE (in) == DImode);
28777 gcc_assert (amount
28778 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28779 && GET_MODE (amount) == SImode)
28780 || CONST_INT_P (amount)));
28781 gcc_assert (scratch1 == NULL
28782 || (GET_CODE (scratch1) == SCRATCH)
28783 || (GET_MODE (scratch1) == SImode
28784 && REG_P (scratch1)));
28785 gcc_assert (scratch2 == NULL
28786 || (GET_CODE (scratch2) == SCRATCH)
28787 || (GET_MODE (scratch2) == SImode
28788 && REG_P (scratch2)));
28789 gcc_assert (!REG_P (out) || !REG_P (amount)
28790 || !HARD_REGISTER_P (out)
28791 || (REGNO (out) != REGNO (amount)
28792 && REGNO (out) + 1 != REGNO (amount)));
28793
28794 /* Macros to make following code more readable. */
28795 #define SUB_32(DEST,SRC) \
28796 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28797 #define RSB_32(DEST,SRC) \
28798 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28799 #define SUB_S_32(DEST,SRC) \
28800 gen_addsi3_compare0 ((DEST), (SRC), \
28801 GEN_INT (-32))
28802 #define SET(DEST,SRC) \
28803 gen_rtx_SET ((DEST), (SRC))
28804 #define SHIFT(CODE,SRC,AMOUNT) \
28805 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28806 #define LSHIFT(CODE,SRC,AMOUNT) \
28807 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28808 SImode, (SRC), (AMOUNT))
28809 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28810 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28811 SImode, (SRC), (AMOUNT))
28812 #define ORR(A,B) \
28813 gen_rtx_IOR (SImode, (A), (B))
28814 #define BRANCH(COND,LABEL) \
28815 gen_arm_cond_branch ((LABEL), \
28816 gen_rtx_ ## COND (CCmode, cc_reg, \
28817 const0_rtx), \
28818 cc_reg)
28819
28820 /* Shifts by register and shifts by constant are handled separately. */
28821 if (CONST_INT_P (amount))
28822 {
28823 /* We have a shift-by-constant. */
28824
28825 /* First, handle out-of-range shift amounts.
28826 In both cases we try to match the result an ARM instruction in a
28827 shift-by-register would give. This helps reduce execution
28828 differences between optimization levels, but it won't stop other
28829 parts of the compiler doing different things. This is "undefined
28830 behaviour, in any case. */
28831 if (INTVAL (amount) <= 0)
28832 emit_insn (gen_movdi (out, in));
28833 else if (INTVAL (amount) >= 64)
28834 {
28835 if (code == ASHIFTRT)
28836 {
28837 rtx const31_rtx = GEN_INT (31);
28838 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28839 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28840 }
28841 else
28842 emit_insn (gen_movdi (out, const0_rtx));
28843 }
28844
28845 /* Now handle valid shifts. */
28846 else if (INTVAL (amount) < 32)
28847 {
28848 /* Shifts by a constant less than 32. */
28849 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28850
28851 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28852 emit_insn (SET (out_down,
28853 ORR (REV_LSHIFT (code, in_up, reverse_amount),
28854 out_down)));
28855 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28856 }
28857 else
28858 {
28859 /* Shifts by a constant greater than 31. */
28860 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28861
28862 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28863 if (code == ASHIFTRT)
28864 emit_insn (gen_ashrsi3 (out_up, in_up,
28865 GEN_INT (31)));
28866 else
28867 emit_insn (SET (out_up, const0_rtx));
28868 }
28869 }
28870 else
28871 {
28872 /* We have a shift-by-register. */
28873 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28874
28875 /* This alternative requires the scratch registers. */
28876 gcc_assert (scratch1 && REG_P (scratch1));
28877 gcc_assert (scratch2 && REG_P (scratch2));
28878
28879 /* We will need the values "amount-32" and "32-amount" later.
28880 Swapping them around now allows the later code to be more general. */
28881 switch (code)
28882 {
28883 case ASHIFT:
28884 emit_insn (SUB_32 (scratch1, amount));
28885 emit_insn (RSB_32 (scratch2, amount));
28886 break;
28887 case ASHIFTRT:
28888 emit_insn (RSB_32 (scratch1, amount));
28889 /* Also set CC = amount > 32. */
28890 emit_insn (SUB_S_32 (scratch2, amount));
28891 break;
28892 case LSHIFTRT:
28893 emit_insn (RSB_32 (scratch1, amount));
28894 emit_insn (SUB_32 (scratch2, amount));
28895 break;
28896 default:
28897 gcc_unreachable ();
28898 }
28899
28900 /* Emit code like this:
28901
28902 arithmetic-left:
28903 out_down = in_down << amount;
28904 out_down = (in_up << (amount - 32)) | out_down;
28905 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28906 out_up = in_up << amount;
28907
28908 arithmetic-right:
28909 out_down = in_down >> amount;
28910 out_down = (in_up << (32 - amount)) | out_down;
28911 if (amount < 32)
28912 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28913 out_up = in_up << amount;
28914
28915 logical-right:
28916 out_down = in_down >> amount;
28917 out_down = (in_up << (32 - amount)) | out_down;
28918 if (amount < 32)
28919 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28920 out_up = in_up << amount;
28921
28922 The ARM and Thumb2 variants are the same but implemented slightly
28923 differently. If this were only called during expand we could just
28924 use the Thumb2 case and let combine do the right thing, but this
28925 can also be called from post-reload splitters. */
28926
28927 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28928
28929 if (!TARGET_THUMB2)
28930 {
28931 /* Emit code for ARM mode. */
28932 emit_insn (SET (out_down,
28933 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28934 if (code == ASHIFTRT)
28935 {
28936 rtx_code_label *done_label = gen_label_rtx ();
28937 emit_jump_insn (BRANCH (LT, done_label));
28938 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28939 out_down)));
28940 emit_label (done_label);
28941 }
28942 else
28943 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28944 out_down)));
28945 }
28946 else
28947 {
28948 /* Emit code for Thumb2 mode.
28949 Thumb2 can't do shift and or in one insn. */
28950 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28951 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28952
28953 if (code == ASHIFTRT)
28954 {
28955 rtx_code_label *done_label = gen_label_rtx ();
28956 emit_jump_insn (BRANCH (LT, done_label));
28957 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28958 emit_insn (SET (out_down, ORR (out_down, scratch2)));
28959 emit_label (done_label);
28960 }
28961 else
28962 {
28963 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28964 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28965 }
28966 }
28967
28968 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28969 }
28970
28971 #undef SUB_32
28972 #undef RSB_32
28973 #undef SUB_S_32
28974 #undef SET
28975 #undef SHIFT
28976 #undef LSHIFT
28977 #undef REV_LSHIFT
28978 #undef ORR
28979 #undef BRANCH
28980 }
28981
28982 /* Returns true if the pattern is a valid symbolic address, which is either a
28983 symbol_ref or (symbol_ref + addend).
28984
28985 According to the ARM ELF ABI, the initial addend of REL-type relocations
28986 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
28987 literal field of the instruction as a 16-bit signed value in the range
28988 -32768 <= A < 32768. */
28989
28990 bool
28991 arm_valid_symbolic_address_p (rtx addr)
28992 {
28993 rtx xop0, xop1 = NULL_RTX;
28994 rtx tmp = addr;
28995
28996 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
28997 return true;
28998
28999 /* (const (plus: symbol_ref const_int)) */
29000 if (GET_CODE (addr) == CONST)
29001 tmp = XEXP (addr, 0);
29002
29003 if (GET_CODE (tmp) == PLUS)
29004 {
29005 xop0 = XEXP (tmp, 0);
29006 xop1 = XEXP (tmp, 1);
29007
29008 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29009 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29010 }
29011
29012 return false;
29013 }
29014
29015 /* Returns true if a valid comparison operation and makes
29016 the operands in a form that is valid. */
29017 bool
29018 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29019 {
29020 enum rtx_code code = GET_CODE (*comparison);
29021 int code_int;
29022 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29023 ? GET_MODE (*op2) : GET_MODE (*op1);
29024
29025 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29026
29027 if (code == UNEQ || code == LTGT)
29028 return false;
29029
29030 code_int = (int)code;
29031 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29032 PUT_CODE (*comparison, (enum rtx_code)code_int);
29033
29034 switch (mode)
29035 {
29036 case SImode:
29037 if (!arm_add_operand (*op1, mode))
29038 *op1 = force_reg (mode, *op1);
29039 if (!arm_add_operand (*op2, mode))
29040 *op2 = force_reg (mode, *op2);
29041 return true;
29042
29043 case DImode:
29044 if (!cmpdi_operand (*op1, mode))
29045 *op1 = force_reg (mode, *op1);
29046 if (!cmpdi_operand (*op2, mode))
29047 *op2 = force_reg (mode, *op2);
29048 return true;
29049
29050 case SFmode:
29051 case DFmode:
29052 if (!arm_float_compare_operand (*op1, mode))
29053 *op1 = force_reg (mode, *op1);
29054 if (!arm_float_compare_operand (*op2, mode))
29055 *op2 = force_reg (mode, *op2);
29056 return true;
29057 default:
29058 break;
29059 }
29060
29061 return false;
29062
29063 }
29064
29065 /* Maximum number of instructions to set block of memory. */
29066 static int
29067 arm_block_set_max_insns (void)
29068 {
29069 if (optimize_function_for_size_p (cfun))
29070 return 4;
29071 else
29072 return current_tune->max_insns_inline_memset;
29073 }
29074
29075 /* Return TRUE if it's profitable to set block of memory for
29076 non-vectorized case. VAL is the value to set the memory
29077 with. LENGTH is the number of bytes to set. ALIGN is the
29078 alignment of the destination memory in bytes. UNALIGNED_P
29079 is TRUE if we can only set the memory with instructions
29080 meeting alignment requirements. USE_STRD_P is TRUE if we
29081 can use strd to set the memory. */
29082 static bool
29083 arm_block_set_non_vect_profit_p (rtx val,
29084 unsigned HOST_WIDE_INT length,
29085 unsigned HOST_WIDE_INT align,
29086 bool unaligned_p, bool use_strd_p)
29087 {
29088 int num = 0;
29089 /* For leftovers in bytes of 0-7, we can set the memory block using
29090 strb/strh/str with minimum instruction number. */
29091 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29092
29093 if (unaligned_p)
29094 {
29095 num = arm_const_inline_cost (SET, val);
29096 num += length / align + length % align;
29097 }
29098 else if (use_strd_p)
29099 {
29100 num = arm_const_double_inline_cost (val);
29101 num += (length >> 3) + leftover[length & 7];
29102 }
29103 else
29104 {
29105 num = arm_const_inline_cost (SET, val);
29106 num += (length >> 2) + leftover[length & 3];
29107 }
29108
29109 /* We may be able to combine last pair STRH/STRB into a single STR
29110 by shifting one byte back. */
29111 if (unaligned_access && length > 3 && (length & 3) == 3)
29112 num--;
29113
29114 return (num <= arm_block_set_max_insns ());
29115 }
29116
29117 /* Return TRUE if it's profitable to set block of memory for
29118 vectorized case. LENGTH is the number of bytes to set.
29119 ALIGN is the alignment of destination memory in bytes.
29120 MODE is the vector mode used to set the memory. */
29121 static bool
29122 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29123 unsigned HOST_WIDE_INT align,
29124 machine_mode mode)
29125 {
29126 int num;
29127 bool unaligned_p = ((align & 3) != 0);
29128 unsigned int nelt = GET_MODE_NUNITS (mode);
29129
29130 /* Instruction loading constant value. */
29131 num = 1;
29132 /* Instructions storing the memory. */
29133 num += (length + nelt - 1) / nelt;
29134 /* Instructions adjusting the address expression. Only need to
29135 adjust address expression if it's 4 bytes aligned and bytes
29136 leftover can only be stored by mis-aligned store instruction. */
29137 if (!unaligned_p && (length & 3) != 0)
29138 num++;
29139
29140 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29141 if (!unaligned_p && mode == V16QImode)
29142 num--;
29143
29144 return (num <= arm_block_set_max_insns ());
29145 }
29146
29147 /* Set a block of memory using vectorization instructions for the
29148 unaligned case. We fill the first LENGTH bytes of the memory
29149 area starting from DSTBASE with byte constant VALUE. ALIGN is
29150 the alignment requirement of memory. Return TRUE if succeeded. */
29151 static bool
29152 arm_block_set_unaligned_vect (rtx dstbase,
29153 unsigned HOST_WIDE_INT length,
29154 unsigned HOST_WIDE_INT value,
29155 unsigned HOST_WIDE_INT align)
29156 {
29157 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29158 rtx dst, mem;
29159 rtx val_elt, val_vec, reg;
29160 rtx rval[MAX_VECT_LEN];
29161 rtx (*gen_func) (rtx, rtx);
29162 machine_mode mode;
29163 unsigned HOST_WIDE_INT v = value;
29164
29165 gcc_assert ((align & 0x3) != 0);
29166 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29167 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29168 if (length >= nelt_v16)
29169 {
29170 mode = V16QImode;
29171 gen_func = gen_movmisalignv16qi;
29172 }
29173 else
29174 {
29175 mode = V8QImode;
29176 gen_func = gen_movmisalignv8qi;
29177 }
29178 nelt_mode = GET_MODE_NUNITS (mode);
29179 gcc_assert (length >= nelt_mode);
29180 /* Skip if it isn't profitable. */
29181 if (!arm_block_set_vect_profit_p (length, align, mode))
29182 return false;
29183
29184 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29185 mem = adjust_automodify_address (dstbase, mode, dst, 0);
29186
29187 v = sext_hwi (v, BITS_PER_WORD);
29188 val_elt = GEN_INT (v);
29189 for (j = 0; j < nelt_mode; j++)
29190 rval[j] = val_elt;
29191
29192 reg = gen_reg_rtx (mode);
29193 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29194 /* Emit instruction loading the constant value. */
29195 emit_move_insn (reg, val_vec);
29196
29197 /* Handle nelt_mode bytes in a vector. */
29198 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29199 {
29200 emit_insn ((*gen_func) (mem, reg));
29201 if (i + 2 * nelt_mode <= length)
29202 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29203 }
29204
29205 /* If there are not less than nelt_v8 bytes leftover, we must be in
29206 V16QI mode. */
29207 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29208
29209 /* Handle (8, 16) bytes leftover. */
29210 if (i + nelt_v8 < length)
29211 {
29212 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29213 /* We are shifting bytes back, set the alignment accordingly. */
29214 if ((length & 1) != 0 && align >= 2)
29215 set_mem_align (mem, BITS_PER_UNIT);
29216
29217 emit_insn (gen_movmisalignv16qi (mem, reg));
29218 }
29219 /* Handle (0, 8] bytes leftover. */
29220 else if (i < length && i + nelt_v8 >= length)
29221 {
29222 if (mode == V16QImode)
29223 {
29224 reg = gen_lowpart (V8QImode, reg);
29225 mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
29226 }
29227 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29228 + (nelt_mode - nelt_v8))));
29229 /* We are shifting bytes back, set the alignment accordingly. */
29230 if ((length & 1) != 0 && align >= 2)
29231 set_mem_align (mem, BITS_PER_UNIT);
29232
29233 emit_insn (gen_movmisalignv8qi (mem, reg));
29234 }
29235
29236 return true;
29237 }
29238
29239 /* Set a block of memory using vectorization instructions for the
29240 aligned case. We fill the first LENGTH bytes of the memory area
29241 starting from DSTBASE with byte constant VALUE. ALIGN is the
29242 alignment requirement of memory. Return TRUE if succeeded. */
29243 static bool
29244 arm_block_set_aligned_vect (rtx dstbase,
29245 unsigned HOST_WIDE_INT length,
29246 unsigned HOST_WIDE_INT value,
29247 unsigned HOST_WIDE_INT align)
29248 {
29249 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29250 rtx dst, addr, mem;
29251 rtx val_elt, val_vec, reg;
29252 rtx rval[MAX_VECT_LEN];
29253 machine_mode mode;
29254 unsigned HOST_WIDE_INT v = value;
29255
29256 gcc_assert ((align & 0x3) == 0);
29257 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29258 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29259 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29260 mode = V16QImode;
29261 else
29262 mode = V8QImode;
29263
29264 nelt_mode = GET_MODE_NUNITS (mode);
29265 gcc_assert (length >= nelt_mode);
29266 /* Skip if it isn't profitable. */
29267 if (!arm_block_set_vect_profit_p (length, align, mode))
29268 return false;
29269
29270 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29271
29272 v = sext_hwi (v, BITS_PER_WORD);
29273 val_elt = GEN_INT (v);
29274 for (j = 0; j < nelt_mode; j++)
29275 rval[j] = val_elt;
29276
29277 reg = gen_reg_rtx (mode);
29278 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29279 /* Emit instruction loading the constant value. */
29280 emit_move_insn (reg, val_vec);
29281
29282 i = 0;
29283 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29284 if (mode == V16QImode)
29285 {
29286 mem = adjust_automodify_address (dstbase, mode, dst, 0);
29287 emit_insn (gen_movmisalignv16qi (mem, reg));
29288 i += nelt_mode;
29289 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29290 if (i + nelt_v8 < length && i + nelt_v16 > length)
29291 {
29292 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29293 mem = adjust_automodify_address (dstbase, mode, dst, 0);
29294 /* We are shifting bytes back, set the alignment accordingly. */
29295 if ((length & 0x3) == 0)
29296 set_mem_align (mem, BITS_PER_UNIT * 4);
29297 else if ((length & 0x1) == 0)
29298 set_mem_align (mem, BITS_PER_UNIT * 2);
29299 else
29300 set_mem_align (mem, BITS_PER_UNIT);
29301
29302 emit_insn (gen_movmisalignv16qi (mem, reg));
29303 return true;
29304 }
29305 /* Fall through for bytes leftover. */
29306 mode = V8QImode;
29307 nelt_mode = GET_MODE_NUNITS (mode);
29308 reg = gen_lowpart (V8QImode, reg);
29309 }
29310
29311 /* Handle 8 bytes in a vector. */
29312 for (; (i + nelt_mode <= length); i += nelt_mode)
29313 {
29314 addr = plus_constant (Pmode, dst, i);
29315 mem = adjust_automodify_address (dstbase, mode, addr, i);
29316 emit_move_insn (mem, reg);
29317 }
29318
29319 /* Handle single word leftover by shifting 4 bytes back. We can
29320 use aligned access for this case. */
29321 if (i + UNITS_PER_WORD == length)
29322 {
29323 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29324 mem = adjust_automodify_address (dstbase, mode,
29325 addr, i - UNITS_PER_WORD);
29326 /* We are shifting 4 bytes back, set the alignment accordingly. */
29327 if (align > UNITS_PER_WORD)
29328 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29329
29330 emit_move_insn (mem, reg);
29331 }
29332 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29333 We have to use unaligned access for this case. */
29334 else if (i < length)
29335 {
29336 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29337 mem = adjust_automodify_address (dstbase, mode, dst, 0);
29338 /* We are shifting bytes back, set the alignment accordingly. */
29339 if ((length & 1) == 0)
29340 set_mem_align (mem, BITS_PER_UNIT * 2);
29341 else
29342 set_mem_align (mem, BITS_PER_UNIT);
29343
29344 emit_insn (gen_movmisalignv8qi (mem, reg));
29345 }
29346
29347 return true;
29348 }
29349
29350 /* Set a block of memory using plain strh/strb instructions, only
29351 using instructions allowed by ALIGN on processor. We fill the
29352 first LENGTH bytes of the memory area starting from DSTBASE
29353 with byte constant VALUE. ALIGN is the alignment requirement
29354 of memory. */
29355 static bool
29356 arm_block_set_unaligned_non_vect (rtx dstbase,
29357 unsigned HOST_WIDE_INT length,
29358 unsigned HOST_WIDE_INT value,
29359 unsigned HOST_WIDE_INT align)
29360 {
29361 unsigned int i;
29362 rtx dst, addr, mem;
29363 rtx val_exp, val_reg, reg;
29364 machine_mode mode;
29365 HOST_WIDE_INT v = value;
29366
29367 gcc_assert (align == 1 || align == 2);
29368
29369 if (align == 2)
29370 v |= (value << BITS_PER_UNIT);
29371
29372 v = sext_hwi (v, BITS_PER_WORD);
29373 val_exp = GEN_INT (v);
29374 /* Skip if it isn't profitable. */
29375 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29376 align, true, false))
29377 return false;
29378
29379 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29380 mode = (align == 2 ? HImode : QImode);
29381 val_reg = force_reg (SImode, val_exp);
29382 reg = gen_lowpart (mode, val_reg);
29383
29384 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29385 {
29386 addr = plus_constant (Pmode, dst, i);
29387 mem = adjust_automodify_address (dstbase, mode, addr, i);
29388 emit_move_insn (mem, reg);
29389 }
29390
29391 /* Handle single byte leftover. */
29392 if (i + 1 == length)
29393 {
29394 reg = gen_lowpart (QImode, val_reg);
29395 addr = plus_constant (Pmode, dst, i);
29396 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29397 emit_move_insn (mem, reg);
29398 i++;
29399 }
29400
29401 gcc_assert (i == length);
29402 return true;
29403 }
29404
29405 /* Set a block of memory using plain strd/str/strh/strb instructions,
29406 to permit unaligned copies on processors which support unaligned
29407 semantics for those instructions. We fill the first LENGTH bytes
29408 of the memory area starting from DSTBASE with byte constant VALUE.
29409 ALIGN is the alignment requirement of memory. */
29410 static bool
29411 arm_block_set_aligned_non_vect (rtx dstbase,
29412 unsigned HOST_WIDE_INT length,
29413 unsigned HOST_WIDE_INT value,
29414 unsigned HOST_WIDE_INT align)
29415 {
29416 unsigned int i;
29417 rtx dst, addr, mem;
29418 rtx val_exp, val_reg, reg;
29419 unsigned HOST_WIDE_INT v;
29420 bool use_strd_p;
29421
29422 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29423 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29424
29425 v = (value | (value << 8) | (value << 16) | (value << 24));
29426 if (length < UNITS_PER_WORD)
29427 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29428
29429 if (use_strd_p)
29430 v |= (v << BITS_PER_WORD);
29431 else
29432 v = sext_hwi (v, BITS_PER_WORD);
29433
29434 val_exp = GEN_INT (v);
29435 /* Skip if it isn't profitable. */
29436 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29437 align, false, use_strd_p))
29438 {
29439 if (!use_strd_p)
29440 return false;
29441
29442 /* Try without strd. */
29443 v = (v >> BITS_PER_WORD);
29444 v = sext_hwi (v, BITS_PER_WORD);
29445 val_exp = GEN_INT (v);
29446 use_strd_p = false;
29447 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29448 align, false, use_strd_p))
29449 return false;
29450 }
29451
29452 i = 0;
29453 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29454 /* Handle double words using strd if possible. */
29455 if (use_strd_p)
29456 {
29457 val_reg = force_reg (DImode, val_exp);
29458 reg = val_reg;
29459 for (; (i + 8 <= length); i += 8)
29460 {
29461 addr = plus_constant (Pmode, dst, i);
29462 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29463 emit_move_insn (mem, reg);
29464 }
29465 }
29466 else
29467 val_reg = force_reg (SImode, val_exp);
29468
29469 /* Handle words. */
29470 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29471 for (; (i + 4 <= length); i += 4)
29472 {
29473 addr = plus_constant (Pmode, dst, i);
29474 mem = adjust_automodify_address (dstbase, SImode, addr, i);
29475 if ((align & 3) == 0)
29476 emit_move_insn (mem, reg);
29477 else
29478 emit_insn (gen_unaligned_storesi (mem, reg));
29479 }
29480
29481 /* Merge last pair of STRH and STRB into a STR if possible. */
29482 if (unaligned_access && i > 0 && (i + 3) == length)
29483 {
29484 addr = plus_constant (Pmode, dst, i - 1);
29485 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29486 /* We are shifting one byte back, set the alignment accordingly. */
29487 if ((align & 1) == 0)
29488 set_mem_align (mem, BITS_PER_UNIT);
29489
29490 /* Most likely this is an unaligned access, and we can't tell at
29491 compilation time. */
29492 emit_insn (gen_unaligned_storesi (mem, reg));
29493 return true;
29494 }
29495
29496 /* Handle half word leftover. */
29497 if (i + 2 <= length)
29498 {
29499 reg = gen_lowpart (HImode, val_reg);
29500 addr = plus_constant (Pmode, dst, i);
29501 mem = adjust_automodify_address (dstbase, HImode, addr, i);
29502 if ((align & 1) == 0)
29503 emit_move_insn (mem, reg);
29504 else
29505 emit_insn (gen_unaligned_storehi (mem, reg));
29506
29507 i += 2;
29508 }
29509
29510 /* Handle single byte leftover. */
29511 if (i + 1 == length)
29512 {
29513 reg = gen_lowpart (QImode, val_reg);
29514 addr = plus_constant (Pmode, dst, i);
29515 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29516 emit_move_insn (mem, reg);
29517 }
29518
29519 return true;
29520 }
29521
29522 /* Set a block of memory using vectorization instructions for both
29523 aligned and unaligned cases. We fill the first LENGTH bytes of
29524 the memory area starting from DSTBASE with byte constant VALUE.
29525 ALIGN is the alignment requirement of memory. */
29526 static bool
29527 arm_block_set_vect (rtx dstbase,
29528 unsigned HOST_WIDE_INT length,
29529 unsigned HOST_WIDE_INT value,
29530 unsigned HOST_WIDE_INT align)
29531 {
29532 /* Check whether we need to use unaligned store instruction. */
29533 if (((align & 3) != 0 || (length & 3) != 0)
29534 /* Check whether unaligned store instruction is available. */
29535 && (!unaligned_access || BYTES_BIG_ENDIAN))
29536 return false;
29537
29538 if ((align & 3) == 0)
29539 return arm_block_set_aligned_vect (dstbase, length, value, align);
29540 else
29541 return arm_block_set_unaligned_vect (dstbase, length, value, align);
29542 }
29543
29544 /* Expand string store operation. Firstly we try to do that by using
29545 vectorization instructions, then try with ARM unaligned access and
29546 double-word store if profitable. OPERANDS[0] is the destination,
29547 OPERANDS[1] is the number of bytes, operands[2] is the value to
29548 initialize the memory, OPERANDS[3] is the known alignment of the
29549 destination. */
29550 bool
29551 arm_gen_setmem (rtx *operands)
29552 {
29553 rtx dstbase = operands[0];
29554 unsigned HOST_WIDE_INT length;
29555 unsigned HOST_WIDE_INT value;
29556 unsigned HOST_WIDE_INT align;
29557
29558 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29559 return false;
29560
29561 length = UINTVAL (operands[1]);
29562 if (length > 64)
29563 return false;
29564
29565 value = (UINTVAL (operands[2]) & 0xFF);
29566 align = UINTVAL (operands[3]);
29567 if (TARGET_NEON && length >= 8
29568 && current_tune->string_ops_prefer_neon
29569 && arm_block_set_vect (dstbase, length, value, align))
29570 return true;
29571
29572 if (!unaligned_access && (align & 3) != 0)
29573 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29574
29575 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29576 }
29577
29578
29579 static bool
29580 arm_macro_fusion_p (void)
29581 {
29582 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
29583 }
29584
29585
29586 static bool
29587 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
29588 {
29589 rtx set_dest;
29590 rtx prev_set = single_set (prev);
29591 rtx curr_set = single_set (curr);
29592
29593 if (!prev_set
29594 || !curr_set)
29595 return false;
29596
29597 if (any_condjump_p (curr))
29598 return false;
29599
29600 if (!arm_macro_fusion_p ())
29601 return false;
29602
29603 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT)
29604 {
29605 /* We are trying to fuse
29606 movw imm / movt imm
29607 instructions as a group that gets scheduled together. */
29608
29609 set_dest = SET_DEST (curr_set);
29610
29611 if (GET_MODE (set_dest) != SImode)
29612 return false;
29613
29614 /* We are trying to match:
29615 prev (movw) == (set (reg r0) (const_int imm16))
29616 curr (movt) == (set (zero_extract (reg r0)
29617 (const_int 16)
29618 (const_int 16))
29619 (const_int imm16_1))
29620 or
29621 prev (movw) == (set (reg r1)
29622 (high (symbol_ref ("SYM"))))
29623 curr (movt) == (set (reg r0)
29624 (lo_sum (reg r1)
29625 (symbol_ref ("SYM")))) */
29626 if (GET_CODE (set_dest) == ZERO_EXTRACT)
29627 {
29628 if (CONST_INT_P (SET_SRC (curr_set))
29629 && CONST_INT_P (SET_SRC (prev_set))
29630 && REG_P (XEXP (set_dest, 0))
29631 && REG_P (SET_DEST (prev_set))
29632 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
29633 return true;
29634 }
29635 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
29636 && REG_P (SET_DEST (curr_set))
29637 && REG_P (SET_DEST (prev_set))
29638 && GET_CODE (SET_SRC (prev_set)) == HIGH
29639 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
29640 return true;
29641 }
29642 return false;
29643 }
29644
29645 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29646
29647 static unsigned HOST_WIDE_INT
29648 arm_asan_shadow_offset (void)
29649 {
29650 return (unsigned HOST_WIDE_INT) 1 << 29;
29651 }
29652
29653
29654 /* This is a temporary fix for PR60655. Ideally we need
29655 to handle most of these cases in the generic part but
29656 currently we reject minus (..) (sym_ref). We try to
29657 ameliorate the case with minus (sym_ref1) (sym_ref2)
29658 where they are in the same section. */
29659
29660 static bool
29661 arm_const_not_ok_for_debug_p (rtx p)
29662 {
29663 tree decl_op0 = NULL;
29664 tree decl_op1 = NULL;
29665
29666 if (GET_CODE (p) == MINUS)
29667 {
29668 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
29669 {
29670 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
29671 if (decl_op1
29672 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
29673 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
29674 {
29675 if ((TREE_CODE (decl_op1) == VAR_DECL
29676 || TREE_CODE (decl_op1) == CONST_DECL)
29677 && (TREE_CODE (decl_op0) == VAR_DECL
29678 || TREE_CODE (decl_op0) == CONST_DECL))
29679 return (get_variable_section (decl_op1, false)
29680 != get_variable_section (decl_op0, false));
29681
29682 if (TREE_CODE (decl_op1) == LABEL_DECL
29683 && TREE_CODE (decl_op0) == LABEL_DECL)
29684 return (DECL_CONTEXT (decl_op1)
29685 != DECL_CONTEXT (decl_op0));
29686 }
29687
29688 return true;
29689 }
29690 }
29691
29692 return false;
29693 }
29694
29695 /* return TRUE if x is a reference to a value in a constant pool */
29696 extern bool
29697 arm_is_constant_pool_ref (rtx x)
29698 {
29699 return (MEM_P (x)
29700 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
29701 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
29702 }
29703
29704 /* Remember the last target of arm_set_current_function. */
29705 static GTY(()) tree arm_previous_fndecl;
29706
29707 /* Invalidate arm_previous_fndecl. */
29708 void
29709 arm_reset_previous_fndecl (void)
29710 {
29711 arm_previous_fndecl = NULL_TREE;
29712 }
29713
29714 /* Establish appropriate back-end context for processing the function
29715 FNDECL. The argument might be NULL to indicate processing at top
29716 level, outside of any function scope. */
29717 static void
29718 arm_set_current_function (tree fndecl)
29719 {
29720 if (!fndecl || fndecl == arm_previous_fndecl)
29721 return;
29722
29723 tree old_tree = (arm_previous_fndecl
29724 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
29725 : NULL_TREE);
29726
29727 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
29728
29729 arm_previous_fndecl = fndecl;
29730 if (old_tree == new_tree)
29731 return;
29732
29733 if (new_tree && new_tree != target_option_default_node)
29734 {
29735 cl_target_option_restore (&global_options,
29736 TREE_TARGET_OPTION (new_tree));
29737
29738 if (TREE_TARGET_GLOBALS (new_tree))
29739 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
29740 else
29741 TREE_TARGET_GLOBALS (new_tree)
29742 = save_target_globals_default_opts ();
29743 }
29744
29745 else if (old_tree && old_tree != target_option_default_node)
29746 {
29747 new_tree = target_option_current_node;
29748
29749 cl_target_option_restore (&global_options,
29750 TREE_TARGET_OPTION (new_tree));
29751 if (TREE_TARGET_GLOBALS (new_tree))
29752 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
29753 else if (new_tree == target_option_default_node)
29754 restore_target_globals (&default_target_globals);
29755 else
29756 TREE_TARGET_GLOBALS (new_tree)
29757 = save_target_globals_default_opts ();
29758 }
29759
29760 arm_option_params_internal ();
29761 }
29762
29763 /* Implement TARGET_OPTION_PRINT. */
29764
29765 static void
29766 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
29767 {
29768 int flags = ptr->x_target_flags;
29769
29770 fprintf (file, "%*sselected arch %s\n", indent, "",
29771 TARGET_THUMB2_P (flags) ? "thumb2" :
29772 TARGET_THUMB_P (flags) ? "thumb1" :
29773 "arm");
29774 }
29775
29776 /* Hook to determine if one function can safely inline another. */
29777
29778 static bool
29779 arm_can_inline_p (tree caller ATTRIBUTE_UNUSED, tree callee ATTRIBUTE_UNUSED)
29780 {
29781 /* Overidde default hook: Always OK to inline between different modes.
29782 Function with mode specific instructions, e.g using asm, must be explicitely
29783 protected with noinline. */
29784 return true;
29785 }
29786
29787 /* Hook to fix function's alignment affected by target attribute. */
29788
29789 static void
29790 arm_relayout_function (tree fndecl)
29791 {
29792 if (DECL_USER_ALIGN (fndecl))
29793 return;
29794
29795 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
29796
29797 if (!callee_tree)
29798 callee_tree = target_option_default_node;
29799
29800 DECL_ALIGN (fndecl) =
29801 FUNCTION_BOUNDARY_P (TREE_TARGET_OPTION (callee_tree)->x_target_flags);
29802 }
29803
29804 /* Inner function to process the attribute((target(...))), take an argument and
29805 set the current options from the argument. If we have a list, recursively
29806 go over the list. */
29807
29808 static bool
29809 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
29810 {
29811 if (TREE_CODE (args) == TREE_LIST)
29812 {
29813 bool ret = true;
29814 for (; args; args = TREE_CHAIN (args))
29815 if (TREE_VALUE (args)
29816 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
29817 ret = false;
29818 return ret;
29819 }
29820
29821 else if (TREE_CODE (args) != STRING_CST)
29822 {
29823 error ("attribute %<target%> argument not a string");
29824 return false;
29825 }
29826
29827 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
29828 while (argstr && *argstr != '\0')
29829 {
29830 while (ISSPACE (*argstr))
29831 argstr++;
29832
29833 if (!strcmp (argstr, "thumb"))
29834 {
29835 opts->x_target_flags |= MASK_THUMB;
29836 arm_option_check_internal (opts);
29837 return true;
29838 }
29839
29840 if (!strcmp (argstr, "arm"))
29841 {
29842 opts->x_target_flags &= ~MASK_THUMB;
29843 arm_option_check_internal (opts);
29844 return true;
29845 }
29846
29847 warning (0, "attribute(target(\"%s\")) is unknown", argstr);
29848 return false;
29849 }
29850
29851 return false;
29852 }
29853
29854 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
29855
29856 tree
29857 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
29858 struct gcc_options *opts_set)
29859 {
29860 if (!arm_valid_target_attribute_rec (args, opts))
29861 return NULL_TREE;
29862
29863 /* Do any overrides, such as global options arch=xxx. */
29864 arm_option_override_internal (opts, opts_set);
29865
29866 return build_target_option_node (opts);
29867 }
29868
29869 static void
29870 add_attribute (const char * mode, tree *attributes)
29871 {
29872 size_t len = strlen (mode);
29873 tree value = build_string (len, mode);
29874
29875 TREE_TYPE (value) = build_array_type (char_type_node,
29876 build_index_type (size_int (len)));
29877
29878 *attributes = tree_cons (get_identifier ("target"),
29879 build_tree_list (NULL_TREE, value),
29880 *attributes);
29881 }
29882
29883 /* For testing. Insert thumb or arm modes alternatively on functions. */
29884
29885 static void
29886 arm_insert_attributes (tree fndecl, tree * attributes)
29887 {
29888 const char *mode;
29889
29890 if (! TARGET_FLIP_THUMB)
29891 return;
29892
29893 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
29894 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
29895 return;
29896
29897 /* Nested definitions must inherit mode. */
29898 if (current_function_decl)
29899 {
29900 mode = TARGET_THUMB ? "thumb" : "arm";
29901 add_attribute (mode, attributes);
29902 return;
29903 }
29904
29905 /* If there is already a setting don't change it. */
29906 if (lookup_attribute ("target", *attributes) != NULL)
29907 return;
29908
29909 mode = thumb_flipper ? "thumb" : "arm";
29910 add_attribute (mode, attributes);
29911
29912 thumb_flipper = !thumb_flipper;
29913 }
29914
29915 /* Hook to validate attribute((target("string"))). */
29916
29917 static bool
29918 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
29919 tree args, int ARG_UNUSED (flags))
29920 {
29921 bool ret = true;
29922 struct gcc_options func_options;
29923 tree cur_tree, new_optimize;
29924 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
29925
29926 /* Get the optimization options of the current function. */
29927 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
29928
29929 /* If the function changed the optimization levels as well as setting target
29930 options, start with the optimizations specified. */
29931 if (!func_optimize)
29932 func_optimize = optimization_default_node;
29933
29934 /* Init func_options. */
29935 memset (&func_options, 0, sizeof (func_options));
29936 init_options_struct (&func_options, NULL);
29937 lang_hooks.init_options_struct (&func_options);
29938
29939 /* Initialize func_options to the defaults. */
29940 cl_optimization_restore (&func_options,
29941 TREE_OPTIMIZATION (func_optimize));
29942
29943 cl_target_option_restore (&func_options,
29944 TREE_TARGET_OPTION (target_option_default_node));
29945
29946 /* Set func_options flags with new target mode. */
29947 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
29948 &global_options_set);
29949
29950 if (cur_tree == NULL_TREE)
29951 ret = false;
29952
29953 new_optimize = build_optimization_node (&func_options);
29954
29955 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
29956
29957 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
29958
29959 return ret;
29960 }
29961
29962 void
29963 arm_declare_function_name (FILE *stream, const char *name, tree decl)
29964 {
29965
29966 fprintf (stream, "\t.syntax unified\n");
29967
29968 if (TARGET_THUMB)
29969 {
29970 if (is_called_in_ARM_mode (decl)
29971 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
29972 && cfun->is_thunk))
29973 fprintf (stream, "\t.code 32\n");
29974 else if (TARGET_THUMB1)
29975 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
29976 else
29977 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
29978 }
29979 else
29980 fprintf (stream, "\t.arm\n");
29981
29982 if (TARGET_POKE_FUNCTION_NAME)
29983 arm_poke_function_name (stream, (const char *) name);
29984 }
29985
29986 /* If MEM is in the form of [base+offset], extract the two parts
29987 of address and set to BASE and OFFSET, otherwise return false
29988 after clearing BASE and OFFSET. */
29989
29990 static bool
29991 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
29992 {
29993 rtx addr;
29994
29995 gcc_assert (MEM_P (mem));
29996
29997 addr = XEXP (mem, 0);
29998
29999 /* Strip off const from addresses like (const (addr)). */
30000 if (GET_CODE (addr) == CONST)
30001 addr = XEXP (addr, 0);
30002
30003 if (GET_CODE (addr) == REG)
30004 {
30005 *base = addr;
30006 *offset = const0_rtx;
30007 return true;
30008 }
30009
30010 if (GET_CODE (addr) == PLUS
30011 && GET_CODE (XEXP (addr, 0)) == REG
30012 && CONST_INT_P (XEXP (addr, 1)))
30013 {
30014 *base = XEXP (addr, 0);
30015 *offset = XEXP (addr, 1);
30016 return true;
30017 }
30018
30019 *base = NULL_RTX;
30020 *offset = NULL_RTX;
30021
30022 return false;
30023 }
30024
30025 /* If INSN is a load or store of address in the form of [base+offset],
30026 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30027 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30028 otherwise return FALSE. */
30029
30030 static bool
30031 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30032 {
30033 rtx x, dest, src;
30034
30035 gcc_assert (INSN_P (insn));
30036 x = PATTERN (insn);
30037 if (GET_CODE (x) != SET)
30038 return false;
30039
30040 src = SET_SRC (x);
30041 dest = SET_DEST (x);
30042 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30043 {
30044 *is_load = false;
30045 extract_base_offset_in_addr (dest, base, offset);
30046 }
30047 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30048 {
30049 *is_load = true;
30050 extract_base_offset_in_addr (src, base, offset);
30051 }
30052 else
30053 return false;
30054
30055 return (*base != NULL_RTX && *offset != NULL_RTX);
30056 }
30057
30058 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30059
30060 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30061 and PRI are only calculated for these instructions. For other instruction,
30062 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30063 instruction fusion can be supported by returning different priorities.
30064
30065 It's important that irrelevant instructions get the largest FUSION_PRI. */
30066
30067 static void
30068 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30069 int *fusion_pri, int *pri)
30070 {
30071 int tmp, off_val;
30072 bool is_load;
30073 rtx base, offset;
30074
30075 gcc_assert (INSN_P (insn));
30076
30077 tmp = max_pri - 1;
30078 if (!fusion_load_store (insn, &base, &offset, &is_load))
30079 {
30080 *pri = tmp;
30081 *fusion_pri = tmp;
30082 return;
30083 }
30084
30085 /* Load goes first. */
30086 if (is_load)
30087 *fusion_pri = tmp - 1;
30088 else
30089 *fusion_pri = tmp - 2;
30090
30091 tmp /= 2;
30092
30093 /* INSN with smaller base register goes first. */
30094 tmp -= ((REGNO (base) & 0xff) << 20);
30095
30096 /* INSN with smaller offset goes first. */
30097 off_val = (int)(INTVAL (offset));
30098 if (off_val >= 0)
30099 tmp -= (off_val & 0xfffff);
30100 else
30101 tmp += ((- off_val) & 0xfffff);
30102
30103 *pri = tmp;
30104 return;
30105 }
30106
30107 #include "gt-arm.h"