c2dce95e611e29c31f95d1b02acceb34e0dbe29d
[gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "alias.h"
29 #include "symtab.h"
30 #include "tree.h"
31 #include "fold-const.h"
32 #include "stringpool.h"
33 #include "stor-layout.h"
34 #include "calls.h"
35 #include "varasm.h"
36 #include "obstack.h"
37 #include "regs.h"
38 #include "hard-reg-set.h"
39 #include "insn-config.h"
40 #include "conditions.h"
41 #include "output.h"
42 #include "insn-attr.h"
43 #include "flags.h"
44 #include "reload.h"
45 #include "function.h"
46 #include "expmed.h"
47 #include "dojump.h"
48 #include "explow.h"
49 #include "emit-rtl.h"
50 #include "stmt.h"
51 #include "expr.h"
52 #include "insn-codes.h"
53 #include "optabs.h"
54 #include "diagnostic-core.h"
55 #include "recog.h"
56 #include "predict.h"
57 #include "dominance.h"
58 #include "cfg.h"
59 #include "cfgrtl.h"
60 #include "cfganal.h"
61 #include "lcm.h"
62 #include "cfgbuild.h"
63 #include "cfgcleanup.h"
64 #include "basic-block.h"
65 #include "cgraph.h"
66 #include "except.h"
67 #include "tm_p.h"
68 #include "target.h"
69 #include "sched-int.h"
70 #include "debug.h"
71 #include "langhooks.h"
72 #include "df.h"
73 #include "intl.h"
74 #include "libfuncs.h"
75 #include "params.h"
76 #include "opts.h"
77 #include "dumpfile.h"
78 #include "gimple-expr.h"
79 #include "target-globals.h"
80 #include "builtins.h"
81 #include "tm-constrs.h"
82 #include "rtl-iter.h"
83 #include "sched-int.h"
84 #include "tree.h"
85
86 /* This file should be included last. */
87 #include "target-def.h"
88
89 /* Forward definitions of types. */
90 typedef struct minipool_node Mnode;
91 typedef struct minipool_fixup Mfix;
92
93 void (*arm_lang_output_object_attributes_hook)(void);
94
95 struct four_ints
96 {
97 int i[4];
98 };
99
100 /* Forward function declarations. */
101 static bool arm_const_not_ok_for_debug_p (rtx);
102 static bool arm_needs_doubleword_align (machine_mode, const_tree);
103 static int arm_compute_static_chain_stack_bytes (void);
104 static arm_stack_offsets *arm_get_frame_offsets (void);
105 static void arm_add_gc_roots (void);
106 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
107 HOST_WIDE_INT, rtx, rtx, int, int);
108 static unsigned bit_count (unsigned long);
109 static int arm_address_register_rtx_p (rtx, int);
110 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
111 static bool is_called_in_ARM_mode (tree);
112 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
113 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
114 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
115 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
116 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
117 inline static int thumb1_index_register_rtx_p (rtx, int);
118 static int thumb_far_jump_used_p (void);
119 static bool thumb_force_lr_save (void);
120 static unsigned arm_size_return_regs (void);
121 static bool arm_assemble_integer (rtx, unsigned int, int);
122 static void arm_print_operand (FILE *, rtx, int);
123 static void arm_print_operand_address (FILE *, rtx);
124 static bool arm_print_operand_punct_valid_p (unsigned char code);
125 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
126 static arm_cc get_arm_condition_code (rtx);
127 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
128 static const char *output_multi_immediate (rtx *, const char *, const char *,
129 int, HOST_WIDE_INT);
130 static const char *shift_op (rtx, HOST_WIDE_INT *);
131 static struct machine_function *arm_init_machine_status (void);
132 static void thumb_exit (FILE *, int);
133 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
134 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
135 static Mnode *add_minipool_forward_ref (Mfix *);
136 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
137 static Mnode *add_minipool_backward_ref (Mfix *);
138 static void assign_minipool_offsets (Mfix *);
139 static void arm_print_value (FILE *, rtx);
140 static void dump_minipool (rtx_insn *);
141 static int arm_barrier_cost (rtx_insn *);
142 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
143 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
144 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
145 machine_mode, rtx);
146 static void arm_reorg (void);
147 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
148 static unsigned long arm_compute_save_reg0_reg12_mask (void);
149 static unsigned long arm_compute_save_reg_mask (void);
150 static unsigned long arm_isr_value (tree);
151 static unsigned long arm_compute_func_type (void);
152 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
153 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
154 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
155 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
156 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
157 #endif
158 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
159 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
160 static int arm_comp_type_attributes (const_tree, const_tree);
161 static void arm_set_default_type_attributes (tree);
162 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
163 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
164 static int optimal_immediate_sequence (enum rtx_code code,
165 unsigned HOST_WIDE_INT val,
166 struct four_ints *return_sequence);
167 static int optimal_immediate_sequence_1 (enum rtx_code code,
168 unsigned HOST_WIDE_INT val,
169 struct four_ints *return_sequence,
170 int i);
171 static int arm_get_strip_length (int);
172 static bool arm_function_ok_for_sibcall (tree, tree);
173 static machine_mode arm_promote_function_mode (const_tree,
174 machine_mode, int *,
175 const_tree, int);
176 static bool arm_return_in_memory (const_tree, const_tree);
177 static rtx arm_function_value (const_tree, const_tree, bool);
178 static rtx arm_libcall_value_1 (machine_mode);
179 static rtx arm_libcall_value (machine_mode, const_rtx);
180 static bool arm_function_value_regno_p (const unsigned int);
181 static void arm_internal_label (FILE *, const char *, unsigned long);
182 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
183 tree);
184 static bool arm_have_conditional_execution (void);
185 static bool arm_cannot_force_const_mem (machine_mode, rtx);
186 static bool arm_legitimate_constant_p (machine_mode, rtx);
187 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
188 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
189 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
190 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
191 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
192 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
193 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
194 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
195 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
196 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
197 static void emit_constant_insn (rtx cond, rtx pattern);
198 static rtx_insn *emit_set_insn (rtx, rtx);
199 static rtx emit_multi_reg_push (unsigned long, unsigned long);
200 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
201 tree, bool);
202 static rtx arm_function_arg (cumulative_args_t, machine_mode,
203 const_tree, bool);
204 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
205 const_tree, bool);
206 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
207 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
208 const_tree);
209 static rtx aapcs_libcall_value (machine_mode);
210 static int aapcs_select_return_coproc (const_tree, const_tree);
211
212 #ifdef OBJECT_FORMAT_ELF
213 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
214 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
215 #endif
216 #ifndef ARM_PE
217 static void arm_encode_section_info (tree, rtx, int);
218 #endif
219
220 static void arm_file_end (void);
221 static void arm_file_start (void);
222 static void arm_insert_attributes (tree, tree *);
223
224 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
225 tree, int *, int);
226 static bool arm_pass_by_reference (cumulative_args_t,
227 machine_mode, const_tree, bool);
228 static bool arm_promote_prototypes (const_tree);
229 static bool arm_default_short_enums (void);
230 static bool arm_align_anon_bitfield (void);
231 static bool arm_return_in_msb (const_tree);
232 static bool arm_must_pass_in_stack (machine_mode, const_tree);
233 static bool arm_return_in_memory (const_tree, const_tree);
234 #if ARM_UNWIND_INFO
235 static void arm_unwind_emit (FILE *, rtx_insn *);
236 static bool arm_output_ttype (rtx);
237 static void arm_asm_emit_except_personality (rtx);
238 static void arm_asm_init_sections (void);
239 #endif
240 static rtx arm_dwarf_register_span (rtx);
241
242 static tree arm_cxx_guard_type (void);
243 static bool arm_cxx_guard_mask_bit (void);
244 static tree arm_get_cookie_size (tree);
245 static bool arm_cookie_has_size (void);
246 static bool arm_cxx_cdtor_returns_this (void);
247 static bool arm_cxx_key_method_may_be_inline (void);
248 static void arm_cxx_determine_class_data_visibility (tree);
249 static bool arm_cxx_class_data_always_comdat (void);
250 static bool arm_cxx_use_aeabi_atexit (void);
251 static void arm_init_libfuncs (void);
252 static tree arm_build_builtin_va_list (void);
253 static void arm_expand_builtin_va_start (tree, rtx);
254 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
255 static void arm_option_override (void);
256 static void arm_set_current_function (tree);
257 static bool arm_can_inline_p (tree, tree);
258 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
259 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
260 static bool arm_macro_fusion_p (void);
261 static bool arm_cannot_copy_insn_p (rtx_insn *);
262 static int arm_issue_rate (void);
263 static int arm_first_cycle_multipass_dfa_lookahead (void);
264 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
265 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
266 static bool arm_output_addr_const_extra (FILE *, rtx);
267 static bool arm_allocate_stack_slots_for_args (void);
268 static bool arm_warn_func_return (tree);
269 static const char *arm_invalid_parameter_type (const_tree t);
270 static const char *arm_invalid_return_type (const_tree t);
271 static tree arm_promoted_type (const_tree t);
272 static tree arm_convert_to_type (tree type, tree expr);
273 static bool arm_scalar_mode_supported_p (machine_mode);
274 static bool arm_frame_pointer_required (void);
275 static bool arm_can_eliminate (const int, const int);
276 static void arm_asm_trampoline_template (FILE *);
277 static void arm_trampoline_init (rtx, tree, rtx);
278 static rtx arm_trampoline_adjust_address (rtx);
279 static rtx arm_pic_static_addr (rtx orig, rtx reg);
280 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
281 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
282 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
283 static bool arm_array_mode_supported_p (machine_mode,
284 unsigned HOST_WIDE_INT);
285 static machine_mode arm_preferred_simd_mode (machine_mode);
286 static bool arm_class_likely_spilled_p (reg_class_t);
287 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
288 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
289 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
290 const_tree type,
291 int misalignment,
292 bool is_packed);
293 static void arm_conditional_register_usage (void);
294 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
295 static unsigned int arm_autovectorize_vector_sizes (void);
296 static int arm_default_branch_cost (bool, bool);
297 static int arm_cortex_a5_branch_cost (bool, bool);
298 static int arm_cortex_m_branch_cost (bool, bool);
299 static int arm_cortex_m7_branch_cost (bool, bool);
300
301 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
302 const unsigned char *sel);
303
304 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
305
306 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
307 tree vectype,
308 int misalign ATTRIBUTE_UNUSED);
309 static unsigned arm_add_stmt_cost (void *data, int count,
310 enum vect_cost_for_stmt kind,
311 struct _stmt_vec_info *stmt_info,
312 int misalign,
313 enum vect_cost_model_location where);
314
315 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
316 bool op0_preserve_value);
317 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
318
319 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
320 \f
321 /* Table of machine attributes. */
322 static const struct attribute_spec arm_attribute_table[] =
323 {
324 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
325 affects_type_identity } */
326 /* Function calls made to this symbol must be done indirectly, because
327 it may lie outside of the 26 bit addressing range of a normal function
328 call. */
329 { "long_call", 0, 0, false, true, true, NULL, false },
330 /* Whereas these functions are always known to reside within the 26 bit
331 addressing range. */
332 { "short_call", 0, 0, false, true, true, NULL, false },
333 /* Specify the procedure call conventions for a function. */
334 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
335 false },
336 /* Interrupt Service Routines have special prologue and epilogue requirements. */
337 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
338 false },
339 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
340 false },
341 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
342 false },
343 #ifdef ARM_PE
344 /* ARM/PE has three new attributes:
345 interfacearm - ?
346 dllexport - for exporting a function/variable that will live in a dll
347 dllimport - for importing a function/variable from a dll
348
349 Microsoft allows multiple declspecs in one __declspec, separating
350 them with spaces. We do NOT support this. Instead, use __declspec
351 multiple times.
352 */
353 { "dllimport", 0, 0, true, false, false, NULL, false },
354 { "dllexport", 0, 0, true, false, false, NULL, false },
355 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
356 false },
357 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
358 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
359 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
360 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
361 false },
362 #endif
363 { NULL, 0, 0, false, false, false, NULL, false }
364 };
365 \f
366 /* Initialize the GCC target structure. */
367 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
368 #undef TARGET_MERGE_DECL_ATTRIBUTES
369 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
370 #endif
371
372 #undef TARGET_LEGITIMIZE_ADDRESS
373 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
374
375 #undef TARGET_LRA_P
376 #define TARGET_LRA_P hook_bool_void_true
377
378 #undef TARGET_ATTRIBUTE_TABLE
379 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
380
381 #undef TARGET_INSERT_ATTRIBUTES
382 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
383
384 #undef TARGET_ASM_FILE_START
385 #define TARGET_ASM_FILE_START arm_file_start
386 #undef TARGET_ASM_FILE_END
387 #define TARGET_ASM_FILE_END arm_file_end
388
389 #undef TARGET_ASM_ALIGNED_SI_OP
390 #define TARGET_ASM_ALIGNED_SI_OP NULL
391 #undef TARGET_ASM_INTEGER
392 #define TARGET_ASM_INTEGER arm_assemble_integer
393
394 #undef TARGET_PRINT_OPERAND
395 #define TARGET_PRINT_OPERAND arm_print_operand
396 #undef TARGET_PRINT_OPERAND_ADDRESS
397 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
398 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
399 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
400
401 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
402 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
403
404 #undef TARGET_ASM_FUNCTION_PROLOGUE
405 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
406
407 #undef TARGET_ASM_FUNCTION_EPILOGUE
408 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
409
410 #undef TARGET_CAN_INLINE_P
411 #define TARGET_CAN_INLINE_P arm_can_inline_p
412
413 #undef TARGET_OPTION_OVERRIDE
414 #define TARGET_OPTION_OVERRIDE arm_option_override
415
416 #undef TARGET_COMP_TYPE_ATTRIBUTES
417 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
418
419 #undef TARGET_SCHED_MACRO_FUSION_P
420 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
421
422 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
423 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
424
425 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
426 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
427
428 #undef TARGET_SCHED_ADJUST_COST
429 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
430
431 #undef TARGET_SET_CURRENT_FUNCTION
432 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
433
434 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
435 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
436
437 #undef TARGET_SCHED_REORDER
438 #define TARGET_SCHED_REORDER arm_sched_reorder
439
440 #undef TARGET_REGISTER_MOVE_COST
441 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
442
443 #undef TARGET_MEMORY_MOVE_COST
444 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
445
446 #undef TARGET_ENCODE_SECTION_INFO
447 #ifdef ARM_PE
448 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
449 #else
450 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
451 #endif
452
453 #undef TARGET_STRIP_NAME_ENCODING
454 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
455
456 #undef TARGET_ASM_INTERNAL_LABEL
457 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
458
459 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
460 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
461
462 #undef TARGET_FUNCTION_VALUE
463 #define TARGET_FUNCTION_VALUE arm_function_value
464
465 #undef TARGET_LIBCALL_VALUE
466 #define TARGET_LIBCALL_VALUE arm_libcall_value
467
468 #undef TARGET_FUNCTION_VALUE_REGNO_P
469 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
470
471 #undef TARGET_ASM_OUTPUT_MI_THUNK
472 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
473 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
474 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
475
476 #undef TARGET_RTX_COSTS
477 #define TARGET_RTX_COSTS arm_rtx_costs
478 #undef TARGET_ADDRESS_COST
479 #define TARGET_ADDRESS_COST arm_address_cost
480
481 #undef TARGET_SHIFT_TRUNCATION_MASK
482 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
483 #undef TARGET_VECTOR_MODE_SUPPORTED_P
484 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
485 #undef TARGET_ARRAY_MODE_SUPPORTED_P
486 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
487 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
488 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
489 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
490 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
491 arm_autovectorize_vector_sizes
492
493 #undef TARGET_MACHINE_DEPENDENT_REORG
494 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
495
496 #undef TARGET_INIT_BUILTINS
497 #define TARGET_INIT_BUILTINS arm_init_builtins
498 #undef TARGET_EXPAND_BUILTIN
499 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
500 #undef TARGET_BUILTIN_DECL
501 #define TARGET_BUILTIN_DECL arm_builtin_decl
502
503 #undef TARGET_INIT_LIBFUNCS
504 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
505
506 #undef TARGET_PROMOTE_FUNCTION_MODE
507 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
508 #undef TARGET_PROMOTE_PROTOTYPES
509 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
510 #undef TARGET_PASS_BY_REFERENCE
511 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
512 #undef TARGET_ARG_PARTIAL_BYTES
513 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
514 #undef TARGET_FUNCTION_ARG
515 #define TARGET_FUNCTION_ARG arm_function_arg
516 #undef TARGET_FUNCTION_ARG_ADVANCE
517 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
518 #undef TARGET_FUNCTION_ARG_BOUNDARY
519 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
520
521 #undef TARGET_SETUP_INCOMING_VARARGS
522 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
523
524 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
525 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
526
527 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
528 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
529 #undef TARGET_TRAMPOLINE_INIT
530 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
531 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
532 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
533
534 #undef TARGET_WARN_FUNC_RETURN
535 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
536
537 #undef TARGET_DEFAULT_SHORT_ENUMS
538 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
539
540 #undef TARGET_ALIGN_ANON_BITFIELD
541 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
542
543 #undef TARGET_NARROW_VOLATILE_BITFIELD
544 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
545
546 #undef TARGET_CXX_GUARD_TYPE
547 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
548
549 #undef TARGET_CXX_GUARD_MASK_BIT
550 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
551
552 #undef TARGET_CXX_GET_COOKIE_SIZE
553 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
554
555 #undef TARGET_CXX_COOKIE_HAS_SIZE
556 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
557
558 #undef TARGET_CXX_CDTOR_RETURNS_THIS
559 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
560
561 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
562 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
563
564 #undef TARGET_CXX_USE_AEABI_ATEXIT
565 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
566
567 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
568 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
569 arm_cxx_determine_class_data_visibility
570
571 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
572 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
573
574 #undef TARGET_RETURN_IN_MSB
575 #define TARGET_RETURN_IN_MSB arm_return_in_msb
576
577 #undef TARGET_RETURN_IN_MEMORY
578 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
579
580 #undef TARGET_MUST_PASS_IN_STACK
581 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
582
583 #if ARM_UNWIND_INFO
584 #undef TARGET_ASM_UNWIND_EMIT
585 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
586
587 /* EABI unwinding tables use a different format for the typeinfo tables. */
588 #undef TARGET_ASM_TTYPE
589 #define TARGET_ASM_TTYPE arm_output_ttype
590
591 #undef TARGET_ARM_EABI_UNWINDER
592 #define TARGET_ARM_EABI_UNWINDER true
593
594 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
595 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
596
597 #undef TARGET_ASM_INIT_SECTIONS
598 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
599 #endif /* ARM_UNWIND_INFO */
600
601 #undef TARGET_DWARF_REGISTER_SPAN
602 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
603
604 #undef TARGET_CANNOT_COPY_INSN_P
605 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
606
607 #ifdef HAVE_AS_TLS
608 #undef TARGET_HAVE_TLS
609 #define TARGET_HAVE_TLS true
610 #endif
611
612 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
613 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
614
615 #undef TARGET_LEGITIMATE_CONSTANT_P
616 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
617
618 #undef TARGET_CANNOT_FORCE_CONST_MEM
619 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
620
621 #undef TARGET_MAX_ANCHOR_OFFSET
622 #define TARGET_MAX_ANCHOR_OFFSET 4095
623
624 /* The minimum is set such that the total size of the block
625 for a particular anchor is -4088 + 1 + 4095 bytes, which is
626 divisible by eight, ensuring natural spacing of anchors. */
627 #undef TARGET_MIN_ANCHOR_OFFSET
628 #define TARGET_MIN_ANCHOR_OFFSET -4088
629
630 #undef TARGET_SCHED_ISSUE_RATE
631 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
632
633 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
634 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
635 arm_first_cycle_multipass_dfa_lookahead
636
637 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
638 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
639 arm_first_cycle_multipass_dfa_lookahead_guard
640
641 #undef TARGET_MANGLE_TYPE
642 #define TARGET_MANGLE_TYPE arm_mangle_type
643
644 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
645 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
646
647 #undef TARGET_BUILD_BUILTIN_VA_LIST
648 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
649 #undef TARGET_EXPAND_BUILTIN_VA_START
650 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
651 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
652 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
653
654 #ifdef HAVE_AS_TLS
655 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
656 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
657 #endif
658
659 #undef TARGET_LEGITIMATE_ADDRESS_P
660 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
661
662 #undef TARGET_PREFERRED_RELOAD_CLASS
663 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
664
665 #undef TARGET_INVALID_PARAMETER_TYPE
666 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
667
668 #undef TARGET_INVALID_RETURN_TYPE
669 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
670
671 #undef TARGET_PROMOTED_TYPE
672 #define TARGET_PROMOTED_TYPE arm_promoted_type
673
674 #undef TARGET_CONVERT_TO_TYPE
675 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
676
677 #undef TARGET_SCALAR_MODE_SUPPORTED_P
678 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
679
680 #undef TARGET_FRAME_POINTER_REQUIRED
681 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
682
683 #undef TARGET_CAN_ELIMINATE
684 #define TARGET_CAN_ELIMINATE arm_can_eliminate
685
686 #undef TARGET_CONDITIONAL_REGISTER_USAGE
687 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
688
689 #undef TARGET_CLASS_LIKELY_SPILLED_P
690 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
691
692 #undef TARGET_VECTORIZE_BUILTINS
693 #define TARGET_VECTORIZE_BUILTINS
694
695 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
696 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
697 arm_builtin_vectorized_function
698
699 #undef TARGET_VECTOR_ALIGNMENT
700 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
701
702 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
703 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
704 arm_vector_alignment_reachable
705
706 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
707 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
708 arm_builtin_support_vector_misalignment
709
710 #undef TARGET_PREFERRED_RENAME_CLASS
711 #define TARGET_PREFERRED_RENAME_CLASS \
712 arm_preferred_rename_class
713
714 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
715 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
716 arm_vectorize_vec_perm_const_ok
717
718 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
719 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
720 arm_builtin_vectorization_cost
721 #undef TARGET_VECTORIZE_ADD_STMT_COST
722 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
723
724 #undef TARGET_CANONICALIZE_COMPARISON
725 #define TARGET_CANONICALIZE_COMPARISON \
726 arm_canonicalize_comparison
727
728 #undef TARGET_ASAN_SHADOW_OFFSET
729 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
730
731 #undef MAX_INSN_PER_IT_BLOCK
732 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
733
734 #undef TARGET_CAN_USE_DOLOOP_P
735 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
736
737 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
738 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
739
740 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
741 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
742
743 #undef TARGET_SCHED_FUSION_PRIORITY
744 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
745
746 struct gcc_target targetm = TARGET_INITIALIZER;
747 \f
748 /* Obstack for minipool constant handling. */
749 static struct obstack minipool_obstack;
750 static char * minipool_startobj;
751
752 /* The maximum number of insns skipped which
753 will be conditionalised if possible. */
754 static int max_insns_skipped = 5;
755
756 extern FILE * asm_out_file;
757
758 /* True if we are currently building a constant table. */
759 int making_const_table;
760
761 /* The processor for which instructions should be scheduled. */
762 enum processor_type arm_tune = arm_none;
763
764 /* The current tuning set. */
765 const struct tune_params *current_tune;
766
767 /* Which floating point hardware to schedule for. */
768 int arm_fpu_attr;
769
770 /* Which floating popint hardware to use. */
771 const struct arm_fpu_desc *arm_fpu_desc;
772
773 /* Used for Thumb call_via trampolines. */
774 rtx thumb_call_via_label[14];
775 static int thumb_call_reg_needed;
776
777 /* The bits in this mask specify which
778 instructions we are allowed to generate. */
779 unsigned long insn_flags = 0;
780
781 /* The bits in this mask specify which instruction scheduling options should
782 be used. */
783 unsigned long tune_flags = 0;
784
785 /* The highest ARM architecture version supported by the
786 target. */
787 enum base_architecture arm_base_arch = BASE_ARCH_0;
788
789 /* The following are used in the arm.md file as equivalents to bits
790 in the above two flag variables. */
791
792 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
793 int arm_arch3m = 0;
794
795 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
796 int arm_arch4 = 0;
797
798 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
799 int arm_arch4t = 0;
800
801 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
802 int arm_arch5 = 0;
803
804 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
805 int arm_arch5e = 0;
806
807 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
808 int arm_arch6 = 0;
809
810 /* Nonzero if this chip supports the ARM 6K extensions. */
811 int arm_arch6k = 0;
812
813 /* Nonzero if instructions present in ARMv6-M can be used. */
814 int arm_arch6m = 0;
815
816 /* Nonzero if this chip supports the ARM 7 extensions. */
817 int arm_arch7 = 0;
818
819 /* Nonzero if instructions not present in the 'M' profile can be used. */
820 int arm_arch_notm = 0;
821
822 /* Nonzero if instructions present in ARMv7E-M can be used. */
823 int arm_arch7em = 0;
824
825 /* Nonzero if instructions present in ARMv8 can be used. */
826 int arm_arch8 = 0;
827
828 /* Nonzero if this chip can benefit from load scheduling. */
829 int arm_ld_sched = 0;
830
831 /* Nonzero if this chip is a StrongARM. */
832 int arm_tune_strongarm = 0;
833
834 /* Nonzero if this chip supports Intel Wireless MMX technology. */
835 int arm_arch_iwmmxt = 0;
836
837 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
838 int arm_arch_iwmmxt2 = 0;
839
840 /* Nonzero if this chip is an XScale. */
841 int arm_arch_xscale = 0;
842
843 /* Nonzero if tuning for XScale */
844 int arm_tune_xscale = 0;
845
846 /* Nonzero if we want to tune for stores that access the write-buffer.
847 This typically means an ARM6 or ARM7 with MMU or MPU. */
848 int arm_tune_wbuf = 0;
849
850 /* Nonzero if tuning for Cortex-A9. */
851 int arm_tune_cortex_a9 = 0;
852
853 /* Nonzero if we should define __THUMB_INTERWORK__ in the
854 preprocessor.
855 XXX This is a bit of a hack, it's intended to help work around
856 problems in GLD which doesn't understand that armv5t code is
857 interworking clean. */
858 int arm_cpp_interwork = 0;
859
860 /* Nonzero if chip supports Thumb 2. */
861 int arm_arch_thumb2;
862
863 /* Nonzero if chip supports integer division instruction. */
864 int arm_arch_arm_hwdiv;
865 int arm_arch_thumb_hwdiv;
866
867 /* Nonzero if chip disallows volatile memory access in IT block. */
868 int arm_arch_no_volatile_ce;
869
870 /* Nonzero if we should use Neon to handle 64-bits operations rather
871 than core registers. */
872 int prefer_neon_for_64bits = 0;
873
874 /* Nonzero if we shouldn't use literal pools. */
875 bool arm_disable_literal_pool = false;
876
877 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
878 we must report the mode of the memory reference from
879 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
880 machine_mode output_memory_reference_mode;
881
882 /* The register number to be used for the PIC offset register. */
883 unsigned arm_pic_register = INVALID_REGNUM;
884
885 enum arm_pcs arm_pcs_default;
886
887 /* For an explanation of these variables, see final_prescan_insn below. */
888 int arm_ccfsm_state;
889 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
890 enum arm_cond_code arm_current_cc;
891
892 rtx arm_target_insn;
893 int arm_target_label;
894 /* The number of conditionally executed insns, including the current insn. */
895 int arm_condexec_count = 0;
896 /* A bitmask specifying the patterns for the IT block.
897 Zero means do not output an IT block before this insn. */
898 int arm_condexec_mask = 0;
899 /* The number of bits used in arm_condexec_mask. */
900 int arm_condexec_masklen = 0;
901
902 /* Nonzero if chip supports the ARMv8 CRC instructions. */
903 int arm_arch_crc = 0;
904
905 /* Nonzero if the core has a very small, high-latency, multiply unit. */
906 int arm_m_profile_small_mul = 0;
907
908 /* The condition codes of the ARM, and the inverse function. */
909 static const char * const arm_condition_codes[] =
910 {
911 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
912 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
913 };
914
915 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
916 int arm_regs_in_sequence[] =
917 {
918 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
919 };
920
921 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
922 #define streq(string1, string2) (strcmp (string1, string2) == 0)
923
924 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
925 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
926 | (1 << PIC_OFFSET_TABLE_REGNUM)))
927 \f
928 /* Initialization code. */
929
930 struct processors
931 {
932 const char *const name;
933 enum processor_type core;
934 const char *arch;
935 enum base_architecture base_arch;
936 const unsigned long flags;
937 const struct tune_params *const tune;
938 };
939
940
941 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
942 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
943 { \
944 num_slots, \
945 l1_size, \
946 l1_line_size \
947 }
948
949 /* arm generic vectorizer costs. */
950 static const
951 struct cpu_vec_costs arm_default_vec_cost = {
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 1, /* vec_unalign_load_cost. */
960 1, /* vec_unalign_store_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
964 };
965
966 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
967 #include "aarch-cost-tables.h"
968
969
970
971 const struct cpu_cost_table cortexa9_extra_costs =
972 {
973 /* ALU */
974 {
975 0, /* arith. */
976 0, /* logical. */
977 0, /* shift. */
978 COSTS_N_INSNS (1), /* shift_reg. */
979 COSTS_N_INSNS (1), /* arith_shift. */
980 COSTS_N_INSNS (2), /* arith_shift_reg. */
981 0, /* log_shift. */
982 COSTS_N_INSNS (1), /* log_shift_reg. */
983 COSTS_N_INSNS (1), /* extend. */
984 COSTS_N_INSNS (2), /* extend_arith. */
985 COSTS_N_INSNS (1), /* bfi. */
986 COSTS_N_INSNS (1), /* bfx. */
987 0, /* clz. */
988 0, /* rev. */
989 0, /* non_exec. */
990 true /* non_exec_costs_exec. */
991 },
992 {
993 /* MULT SImode */
994 {
995 COSTS_N_INSNS (3), /* simple. */
996 COSTS_N_INSNS (3), /* flag_setting. */
997 COSTS_N_INSNS (2), /* extend. */
998 COSTS_N_INSNS (3), /* add. */
999 COSTS_N_INSNS (2), /* extend_add. */
1000 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1001 },
1002 /* MULT DImode */
1003 {
1004 0, /* simple (N/A). */
1005 0, /* flag_setting (N/A). */
1006 COSTS_N_INSNS (4), /* extend. */
1007 0, /* add (N/A). */
1008 COSTS_N_INSNS (4), /* extend_add. */
1009 0 /* idiv (N/A). */
1010 }
1011 },
1012 /* LD/ST */
1013 {
1014 COSTS_N_INSNS (2), /* load. */
1015 COSTS_N_INSNS (2), /* load_sign_extend. */
1016 COSTS_N_INSNS (2), /* ldrd. */
1017 COSTS_N_INSNS (2), /* ldm_1st. */
1018 1, /* ldm_regs_per_insn_1st. */
1019 2, /* ldm_regs_per_insn_subsequent. */
1020 COSTS_N_INSNS (5), /* loadf. */
1021 COSTS_N_INSNS (5), /* loadd. */
1022 COSTS_N_INSNS (1), /* load_unaligned. */
1023 COSTS_N_INSNS (2), /* store. */
1024 COSTS_N_INSNS (2), /* strd. */
1025 COSTS_N_INSNS (2), /* stm_1st. */
1026 1, /* stm_regs_per_insn_1st. */
1027 2, /* stm_regs_per_insn_subsequent. */
1028 COSTS_N_INSNS (1), /* storef. */
1029 COSTS_N_INSNS (1), /* stored. */
1030 COSTS_N_INSNS (1), /* store_unaligned. */
1031 COSTS_N_INSNS (1), /* loadv. */
1032 COSTS_N_INSNS (1) /* storev. */
1033 },
1034 {
1035 /* FP SFmode */
1036 {
1037 COSTS_N_INSNS (14), /* div. */
1038 COSTS_N_INSNS (4), /* mult. */
1039 COSTS_N_INSNS (7), /* mult_addsub. */
1040 COSTS_N_INSNS (30), /* fma. */
1041 COSTS_N_INSNS (3), /* addsub. */
1042 COSTS_N_INSNS (1), /* fpconst. */
1043 COSTS_N_INSNS (1), /* neg. */
1044 COSTS_N_INSNS (3), /* compare. */
1045 COSTS_N_INSNS (3), /* widen. */
1046 COSTS_N_INSNS (3), /* narrow. */
1047 COSTS_N_INSNS (3), /* toint. */
1048 COSTS_N_INSNS (3), /* fromint. */
1049 COSTS_N_INSNS (3) /* roundint. */
1050 },
1051 /* FP DFmode */
1052 {
1053 COSTS_N_INSNS (24), /* div. */
1054 COSTS_N_INSNS (5), /* mult. */
1055 COSTS_N_INSNS (8), /* mult_addsub. */
1056 COSTS_N_INSNS (30), /* fma. */
1057 COSTS_N_INSNS (3), /* addsub. */
1058 COSTS_N_INSNS (1), /* fpconst. */
1059 COSTS_N_INSNS (1), /* neg. */
1060 COSTS_N_INSNS (3), /* compare. */
1061 COSTS_N_INSNS (3), /* widen. */
1062 COSTS_N_INSNS (3), /* narrow. */
1063 COSTS_N_INSNS (3), /* toint. */
1064 COSTS_N_INSNS (3), /* fromint. */
1065 COSTS_N_INSNS (3) /* roundint. */
1066 }
1067 },
1068 /* Vector */
1069 {
1070 COSTS_N_INSNS (1) /* alu. */
1071 }
1072 };
1073
1074 const struct cpu_cost_table cortexa8_extra_costs =
1075 {
1076 /* ALU */
1077 {
1078 0, /* arith. */
1079 0, /* logical. */
1080 COSTS_N_INSNS (1), /* shift. */
1081 0, /* shift_reg. */
1082 COSTS_N_INSNS (1), /* arith_shift. */
1083 0, /* arith_shift_reg. */
1084 COSTS_N_INSNS (1), /* log_shift. */
1085 0, /* log_shift_reg. */
1086 0, /* extend. */
1087 0, /* extend_arith. */
1088 0, /* bfi. */
1089 0, /* bfx. */
1090 0, /* clz. */
1091 0, /* rev. */
1092 0, /* non_exec. */
1093 true /* non_exec_costs_exec. */
1094 },
1095 {
1096 /* MULT SImode */
1097 {
1098 COSTS_N_INSNS (1), /* simple. */
1099 COSTS_N_INSNS (1), /* flag_setting. */
1100 COSTS_N_INSNS (1), /* extend. */
1101 COSTS_N_INSNS (1), /* add. */
1102 COSTS_N_INSNS (1), /* extend_add. */
1103 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1104 },
1105 /* MULT DImode */
1106 {
1107 0, /* simple (N/A). */
1108 0, /* flag_setting (N/A). */
1109 COSTS_N_INSNS (2), /* extend. */
1110 0, /* add (N/A). */
1111 COSTS_N_INSNS (2), /* extend_add. */
1112 0 /* idiv (N/A). */
1113 }
1114 },
1115 /* LD/ST */
1116 {
1117 COSTS_N_INSNS (1), /* load. */
1118 COSTS_N_INSNS (1), /* load_sign_extend. */
1119 COSTS_N_INSNS (1), /* ldrd. */
1120 COSTS_N_INSNS (1), /* ldm_1st. */
1121 1, /* ldm_regs_per_insn_1st. */
1122 2, /* ldm_regs_per_insn_subsequent. */
1123 COSTS_N_INSNS (1), /* loadf. */
1124 COSTS_N_INSNS (1), /* loadd. */
1125 COSTS_N_INSNS (1), /* load_unaligned. */
1126 COSTS_N_INSNS (1), /* store. */
1127 COSTS_N_INSNS (1), /* strd. */
1128 COSTS_N_INSNS (1), /* stm_1st. */
1129 1, /* stm_regs_per_insn_1st. */
1130 2, /* stm_regs_per_insn_subsequent. */
1131 COSTS_N_INSNS (1), /* storef. */
1132 COSTS_N_INSNS (1), /* stored. */
1133 COSTS_N_INSNS (1), /* store_unaligned. */
1134 COSTS_N_INSNS (1), /* loadv. */
1135 COSTS_N_INSNS (1) /* storev. */
1136 },
1137 {
1138 /* FP SFmode */
1139 {
1140 COSTS_N_INSNS (36), /* div. */
1141 COSTS_N_INSNS (11), /* mult. */
1142 COSTS_N_INSNS (20), /* mult_addsub. */
1143 COSTS_N_INSNS (30), /* fma. */
1144 COSTS_N_INSNS (9), /* addsub. */
1145 COSTS_N_INSNS (3), /* fpconst. */
1146 COSTS_N_INSNS (3), /* neg. */
1147 COSTS_N_INSNS (6), /* compare. */
1148 COSTS_N_INSNS (4), /* widen. */
1149 COSTS_N_INSNS (4), /* narrow. */
1150 COSTS_N_INSNS (8), /* toint. */
1151 COSTS_N_INSNS (8), /* fromint. */
1152 COSTS_N_INSNS (8) /* roundint. */
1153 },
1154 /* FP DFmode */
1155 {
1156 COSTS_N_INSNS (64), /* div. */
1157 COSTS_N_INSNS (16), /* mult. */
1158 COSTS_N_INSNS (25), /* mult_addsub. */
1159 COSTS_N_INSNS (30), /* fma. */
1160 COSTS_N_INSNS (9), /* addsub. */
1161 COSTS_N_INSNS (3), /* fpconst. */
1162 COSTS_N_INSNS (3), /* neg. */
1163 COSTS_N_INSNS (6), /* compare. */
1164 COSTS_N_INSNS (6), /* widen. */
1165 COSTS_N_INSNS (6), /* narrow. */
1166 COSTS_N_INSNS (8), /* toint. */
1167 COSTS_N_INSNS (8), /* fromint. */
1168 COSTS_N_INSNS (8) /* roundint. */
1169 }
1170 },
1171 /* Vector */
1172 {
1173 COSTS_N_INSNS (1) /* alu. */
1174 }
1175 };
1176
1177 const struct cpu_cost_table cortexa5_extra_costs =
1178 {
1179 /* ALU */
1180 {
1181 0, /* arith. */
1182 0, /* logical. */
1183 COSTS_N_INSNS (1), /* shift. */
1184 COSTS_N_INSNS (1), /* shift_reg. */
1185 COSTS_N_INSNS (1), /* arith_shift. */
1186 COSTS_N_INSNS (1), /* arith_shift_reg. */
1187 COSTS_N_INSNS (1), /* log_shift. */
1188 COSTS_N_INSNS (1), /* log_shift_reg. */
1189 COSTS_N_INSNS (1), /* extend. */
1190 COSTS_N_INSNS (1), /* extend_arith. */
1191 COSTS_N_INSNS (1), /* bfi. */
1192 COSTS_N_INSNS (1), /* bfx. */
1193 COSTS_N_INSNS (1), /* clz. */
1194 COSTS_N_INSNS (1), /* rev. */
1195 0, /* non_exec. */
1196 true /* non_exec_costs_exec. */
1197 },
1198
1199 {
1200 /* MULT SImode */
1201 {
1202 0, /* simple. */
1203 COSTS_N_INSNS (1), /* flag_setting. */
1204 COSTS_N_INSNS (1), /* extend. */
1205 COSTS_N_INSNS (1), /* add. */
1206 COSTS_N_INSNS (1), /* extend_add. */
1207 COSTS_N_INSNS (7) /* idiv. */
1208 },
1209 /* MULT DImode */
1210 {
1211 0, /* simple (N/A). */
1212 0, /* flag_setting (N/A). */
1213 COSTS_N_INSNS (1), /* extend. */
1214 0, /* add. */
1215 COSTS_N_INSNS (2), /* extend_add. */
1216 0 /* idiv (N/A). */
1217 }
1218 },
1219 /* LD/ST */
1220 {
1221 COSTS_N_INSNS (1), /* load. */
1222 COSTS_N_INSNS (1), /* load_sign_extend. */
1223 COSTS_N_INSNS (6), /* ldrd. */
1224 COSTS_N_INSNS (1), /* ldm_1st. */
1225 1, /* ldm_regs_per_insn_1st. */
1226 2, /* ldm_regs_per_insn_subsequent. */
1227 COSTS_N_INSNS (2), /* loadf. */
1228 COSTS_N_INSNS (4), /* loadd. */
1229 COSTS_N_INSNS (1), /* load_unaligned. */
1230 COSTS_N_INSNS (1), /* store. */
1231 COSTS_N_INSNS (3), /* strd. */
1232 COSTS_N_INSNS (1), /* stm_1st. */
1233 1, /* stm_regs_per_insn_1st. */
1234 2, /* stm_regs_per_insn_subsequent. */
1235 COSTS_N_INSNS (2), /* storef. */
1236 COSTS_N_INSNS (2), /* stored. */
1237 COSTS_N_INSNS (1), /* store_unaligned. */
1238 COSTS_N_INSNS (1), /* loadv. */
1239 COSTS_N_INSNS (1) /* storev. */
1240 },
1241 {
1242 /* FP SFmode */
1243 {
1244 COSTS_N_INSNS (15), /* div. */
1245 COSTS_N_INSNS (3), /* mult. */
1246 COSTS_N_INSNS (7), /* mult_addsub. */
1247 COSTS_N_INSNS (7), /* fma. */
1248 COSTS_N_INSNS (3), /* addsub. */
1249 COSTS_N_INSNS (3), /* fpconst. */
1250 COSTS_N_INSNS (3), /* neg. */
1251 COSTS_N_INSNS (3), /* compare. */
1252 COSTS_N_INSNS (3), /* widen. */
1253 COSTS_N_INSNS (3), /* narrow. */
1254 COSTS_N_INSNS (3), /* toint. */
1255 COSTS_N_INSNS (3), /* fromint. */
1256 COSTS_N_INSNS (3) /* roundint. */
1257 },
1258 /* FP DFmode */
1259 {
1260 COSTS_N_INSNS (30), /* div. */
1261 COSTS_N_INSNS (6), /* mult. */
1262 COSTS_N_INSNS (10), /* mult_addsub. */
1263 COSTS_N_INSNS (7), /* fma. */
1264 COSTS_N_INSNS (3), /* addsub. */
1265 COSTS_N_INSNS (3), /* fpconst. */
1266 COSTS_N_INSNS (3), /* neg. */
1267 COSTS_N_INSNS (3), /* compare. */
1268 COSTS_N_INSNS (3), /* widen. */
1269 COSTS_N_INSNS (3), /* narrow. */
1270 COSTS_N_INSNS (3), /* toint. */
1271 COSTS_N_INSNS (3), /* fromint. */
1272 COSTS_N_INSNS (3) /* roundint. */
1273 }
1274 },
1275 /* Vector */
1276 {
1277 COSTS_N_INSNS (1) /* alu. */
1278 }
1279 };
1280
1281
1282 const struct cpu_cost_table cortexa7_extra_costs =
1283 {
1284 /* ALU */
1285 {
1286 0, /* arith. */
1287 0, /* logical. */
1288 COSTS_N_INSNS (1), /* shift. */
1289 COSTS_N_INSNS (1), /* shift_reg. */
1290 COSTS_N_INSNS (1), /* arith_shift. */
1291 COSTS_N_INSNS (1), /* arith_shift_reg. */
1292 COSTS_N_INSNS (1), /* log_shift. */
1293 COSTS_N_INSNS (1), /* log_shift_reg. */
1294 COSTS_N_INSNS (1), /* extend. */
1295 COSTS_N_INSNS (1), /* extend_arith. */
1296 COSTS_N_INSNS (1), /* bfi. */
1297 COSTS_N_INSNS (1), /* bfx. */
1298 COSTS_N_INSNS (1), /* clz. */
1299 COSTS_N_INSNS (1), /* rev. */
1300 0, /* non_exec. */
1301 true /* non_exec_costs_exec. */
1302 },
1303
1304 {
1305 /* MULT SImode */
1306 {
1307 0, /* simple. */
1308 COSTS_N_INSNS (1), /* flag_setting. */
1309 COSTS_N_INSNS (1), /* extend. */
1310 COSTS_N_INSNS (1), /* add. */
1311 COSTS_N_INSNS (1), /* extend_add. */
1312 COSTS_N_INSNS (7) /* idiv. */
1313 },
1314 /* MULT DImode */
1315 {
1316 0, /* simple (N/A). */
1317 0, /* flag_setting (N/A). */
1318 COSTS_N_INSNS (1), /* extend. */
1319 0, /* add. */
1320 COSTS_N_INSNS (2), /* extend_add. */
1321 0 /* idiv (N/A). */
1322 }
1323 },
1324 /* LD/ST */
1325 {
1326 COSTS_N_INSNS (1), /* load. */
1327 COSTS_N_INSNS (1), /* load_sign_extend. */
1328 COSTS_N_INSNS (3), /* ldrd. */
1329 COSTS_N_INSNS (1), /* ldm_1st. */
1330 1, /* ldm_regs_per_insn_1st. */
1331 2, /* ldm_regs_per_insn_subsequent. */
1332 COSTS_N_INSNS (2), /* loadf. */
1333 COSTS_N_INSNS (2), /* loadd. */
1334 COSTS_N_INSNS (1), /* load_unaligned. */
1335 COSTS_N_INSNS (1), /* store. */
1336 COSTS_N_INSNS (3), /* strd. */
1337 COSTS_N_INSNS (1), /* stm_1st. */
1338 1, /* stm_regs_per_insn_1st. */
1339 2, /* stm_regs_per_insn_subsequent. */
1340 COSTS_N_INSNS (2), /* storef. */
1341 COSTS_N_INSNS (2), /* stored. */
1342 COSTS_N_INSNS (1), /* store_unaligned. */
1343 COSTS_N_INSNS (1), /* loadv. */
1344 COSTS_N_INSNS (1) /* storev. */
1345 },
1346 {
1347 /* FP SFmode */
1348 {
1349 COSTS_N_INSNS (15), /* div. */
1350 COSTS_N_INSNS (3), /* mult. */
1351 COSTS_N_INSNS (7), /* mult_addsub. */
1352 COSTS_N_INSNS (7), /* fma. */
1353 COSTS_N_INSNS (3), /* addsub. */
1354 COSTS_N_INSNS (3), /* fpconst. */
1355 COSTS_N_INSNS (3), /* neg. */
1356 COSTS_N_INSNS (3), /* compare. */
1357 COSTS_N_INSNS (3), /* widen. */
1358 COSTS_N_INSNS (3), /* narrow. */
1359 COSTS_N_INSNS (3), /* toint. */
1360 COSTS_N_INSNS (3), /* fromint. */
1361 COSTS_N_INSNS (3) /* roundint. */
1362 },
1363 /* FP DFmode */
1364 {
1365 COSTS_N_INSNS (30), /* div. */
1366 COSTS_N_INSNS (6), /* mult. */
1367 COSTS_N_INSNS (10), /* mult_addsub. */
1368 COSTS_N_INSNS (7), /* fma. */
1369 COSTS_N_INSNS (3), /* addsub. */
1370 COSTS_N_INSNS (3), /* fpconst. */
1371 COSTS_N_INSNS (3), /* neg. */
1372 COSTS_N_INSNS (3), /* compare. */
1373 COSTS_N_INSNS (3), /* widen. */
1374 COSTS_N_INSNS (3), /* narrow. */
1375 COSTS_N_INSNS (3), /* toint. */
1376 COSTS_N_INSNS (3), /* fromint. */
1377 COSTS_N_INSNS (3) /* roundint. */
1378 }
1379 },
1380 /* Vector */
1381 {
1382 COSTS_N_INSNS (1) /* alu. */
1383 }
1384 };
1385
1386 const struct cpu_cost_table cortexa12_extra_costs =
1387 {
1388 /* ALU */
1389 {
1390 0, /* arith. */
1391 0, /* logical. */
1392 0, /* shift. */
1393 COSTS_N_INSNS (1), /* shift_reg. */
1394 COSTS_N_INSNS (1), /* arith_shift. */
1395 COSTS_N_INSNS (1), /* arith_shift_reg. */
1396 COSTS_N_INSNS (1), /* log_shift. */
1397 COSTS_N_INSNS (1), /* log_shift_reg. */
1398 0, /* extend. */
1399 COSTS_N_INSNS (1), /* extend_arith. */
1400 0, /* bfi. */
1401 COSTS_N_INSNS (1), /* bfx. */
1402 COSTS_N_INSNS (1), /* clz. */
1403 COSTS_N_INSNS (1), /* rev. */
1404 0, /* non_exec. */
1405 true /* non_exec_costs_exec. */
1406 },
1407 /* MULT SImode */
1408 {
1409 {
1410 COSTS_N_INSNS (2), /* simple. */
1411 COSTS_N_INSNS (3), /* flag_setting. */
1412 COSTS_N_INSNS (2), /* extend. */
1413 COSTS_N_INSNS (3), /* add. */
1414 COSTS_N_INSNS (2), /* extend_add. */
1415 COSTS_N_INSNS (18) /* idiv. */
1416 },
1417 /* MULT DImode */
1418 {
1419 0, /* simple (N/A). */
1420 0, /* flag_setting (N/A). */
1421 COSTS_N_INSNS (3), /* extend. */
1422 0, /* add (N/A). */
1423 COSTS_N_INSNS (3), /* extend_add. */
1424 0 /* idiv (N/A). */
1425 }
1426 },
1427 /* LD/ST */
1428 {
1429 COSTS_N_INSNS (3), /* load. */
1430 COSTS_N_INSNS (3), /* load_sign_extend. */
1431 COSTS_N_INSNS (3), /* ldrd. */
1432 COSTS_N_INSNS (3), /* ldm_1st. */
1433 1, /* ldm_regs_per_insn_1st. */
1434 2, /* ldm_regs_per_insn_subsequent. */
1435 COSTS_N_INSNS (3), /* loadf. */
1436 COSTS_N_INSNS (3), /* loadd. */
1437 0, /* load_unaligned. */
1438 0, /* store. */
1439 0, /* strd. */
1440 0, /* stm_1st. */
1441 1, /* stm_regs_per_insn_1st. */
1442 2, /* stm_regs_per_insn_subsequent. */
1443 COSTS_N_INSNS (2), /* storef. */
1444 COSTS_N_INSNS (2), /* stored. */
1445 0, /* store_unaligned. */
1446 COSTS_N_INSNS (1), /* loadv. */
1447 COSTS_N_INSNS (1) /* storev. */
1448 },
1449 {
1450 /* FP SFmode */
1451 {
1452 COSTS_N_INSNS (17), /* div. */
1453 COSTS_N_INSNS (4), /* mult. */
1454 COSTS_N_INSNS (8), /* mult_addsub. */
1455 COSTS_N_INSNS (8), /* fma. */
1456 COSTS_N_INSNS (4), /* addsub. */
1457 COSTS_N_INSNS (2), /* fpconst. */
1458 COSTS_N_INSNS (2), /* neg. */
1459 COSTS_N_INSNS (2), /* compare. */
1460 COSTS_N_INSNS (4), /* widen. */
1461 COSTS_N_INSNS (4), /* narrow. */
1462 COSTS_N_INSNS (4), /* toint. */
1463 COSTS_N_INSNS (4), /* fromint. */
1464 COSTS_N_INSNS (4) /* roundint. */
1465 },
1466 /* FP DFmode */
1467 {
1468 COSTS_N_INSNS (31), /* div. */
1469 COSTS_N_INSNS (4), /* mult. */
1470 COSTS_N_INSNS (8), /* mult_addsub. */
1471 COSTS_N_INSNS (8), /* fma. */
1472 COSTS_N_INSNS (4), /* addsub. */
1473 COSTS_N_INSNS (2), /* fpconst. */
1474 COSTS_N_INSNS (2), /* neg. */
1475 COSTS_N_INSNS (2), /* compare. */
1476 COSTS_N_INSNS (4), /* widen. */
1477 COSTS_N_INSNS (4), /* narrow. */
1478 COSTS_N_INSNS (4), /* toint. */
1479 COSTS_N_INSNS (4), /* fromint. */
1480 COSTS_N_INSNS (4) /* roundint. */
1481 }
1482 },
1483 /* Vector */
1484 {
1485 COSTS_N_INSNS (1) /* alu. */
1486 }
1487 };
1488
1489 const struct cpu_cost_table cortexa15_extra_costs =
1490 {
1491 /* ALU */
1492 {
1493 0, /* arith. */
1494 0, /* logical. */
1495 0, /* shift. */
1496 0, /* shift_reg. */
1497 COSTS_N_INSNS (1), /* arith_shift. */
1498 COSTS_N_INSNS (1), /* arith_shift_reg. */
1499 COSTS_N_INSNS (1), /* log_shift. */
1500 COSTS_N_INSNS (1), /* log_shift_reg. */
1501 0, /* extend. */
1502 COSTS_N_INSNS (1), /* extend_arith. */
1503 COSTS_N_INSNS (1), /* bfi. */
1504 0, /* bfx. */
1505 0, /* clz. */
1506 0, /* rev. */
1507 0, /* non_exec. */
1508 true /* non_exec_costs_exec. */
1509 },
1510 /* MULT SImode */
1511 {
1512 {
1513 COSTS_N_INSNS (2), /* simple. */
1514 COSTS_N_INSNS (3), /* flag_setting. */
1515 COSTS_N_INSNS (2), /* extend. */
1516 COSTS_N_INSNS (2), /* add. */
1517 COSTS_N_INSNS (2), /* extend_add. */
1518 COSTS_N_INSNS (18) /* idiv. */
1519 },
1520 /* MULT DImode */
1521 {
1522 0, /* simple (N/A). */
1523 0, /* flag_setting (N/A). */
1524 COSTS_N_INSNS (3), /* extend. */
1525 0, /* add (N/A). */
1526 COSTS_N_INSNS (3), /* extend_add. */
1527 0 /* idiv (N/A). */
1528 }
1529 },
1530 /* LD/ST */
1531 {
1532 COSTS_N_INSNS (3), /* load. */
1533 COSTS_N_INSNS (3), /* load_sign_extend. */
1534 COSTS_N_INSNS (3), /* ldrd. */
1535 COSTS_N_INSNS (4), /* ldm_1st. */
1536 1, /* ldm_regs_per_insn_1st. */
1537 2, /* ldm_regs_per_insn_subsequent. */
1538 COSTS_N_INSNS (4), /* loadf. */
1539 COSTS_N_INSNS (4), /* loadd. */
1540 0, /* load_unaligned. */
1541 0, /* store. */
1542 0, /* strd. */
1543 COSTS_N_INSNS (1), /* stm_1st. */
1544 1, /* stm_regs_per_insn_1st. */
1545 2, /* stm_regs_per_insn_subsequent. */
1546 0, /* storef. */
1547 0, /* stored. */
1548 0, /* store_unaligned. */
1549 COSTS_N_INSNS (1), /* loadv. */
1550 COSTS_N_INSNS (1) /* storev. */
1551 },
1552 {
1553 /* FP SFmode */
1554 {
1555 COSTS_N_INSNS (17), /* div. */
1556 COSTS_N_INSNS (4), /* mult. */
1557 COSTS_N_INSNS (8), /* mult_addsub. */
1558 COSTS_N_INSNS (8), /* fma. */
1559 COSTS_N_INSNS (4), /* addsub. */
1560 COSTS_N_INSNS (2), /* fpconst. */
1561 COSTS_N_INSNS (2), /* neg. */
1562 COSTS_N_INSNS (5), /* compare. */
1563 COSTS_N_INSNS (4), /* widen. */
1564 COSTS_N_INSNS (4), /* narrow. */
1565 COSTS_N_INSNS (4), /* toint. */
1566 COSTS_N_INSNS (4), /* fromint. */
1567 COSTS_N_INSNS (4) /* roundint. */
1568 },
1569 /* FP DFmode */
1570 {
1571 COSTS_N_INSNS (31), /* div. */
1572 COSTS_N_INSNS (4), /* mult. */
1573 COSTS_N_INSNS (8), /* mult_addsub. */
1574 COSTS_N_INSNS (8), /* fma. */
1575 COSTS_N_INSNS (4), /* addsub. */
1576 COSTS_N_INSNS (2), /* fpconst. */
1577 COSTS_N_INSNS (2), /* neg. */
1578 COSTS_N_INSNS (2), /* compare. */
1579 COSTS_N_INSNS (4), /* widen. */
1580 COSTS_N_INSNS (4), /* narrow. */
1581 COSTS_N_INSNS (4), /* toint. */
1582 COSTS_N_INSNS (4), /* fromint. */
1583 COSTS_N_INSNS (4) /* roundint. */
1584 }
1585 },
1586 /* Vector */
1587 {
1588 COSTS_N_INSNS (1) /* alu. */
1589 }
1590 };
1591
1592 const struct cpu_cost_table v7m_extra_costs =
1593 {
1594 /* ALU */
1595 {
1596 0, /* arith. */
1597 0, /* logical. */
1598 0, /* shift. */
1599 0, /* shift_reg. */
1600 0, /* arith_shift. */
1601 COSTS_N_INSNS (1), /* arith_shift_reg. */
1602 0, /* log_shift. */
1603 COSTS_N_INSNS (1), /* log_shift_reg. */
1604 0, /* extend. */
1605 COSTS_N_INSNS (1), /* extend_arith. */
1606 0, /* bfi. */
1607 0, /* bfx. */
1608 0, /* clz. */
1609 0, /* rev. */
1610 COSTS_N_INSNS (1), /* non_exec. */
1611 false /* non_exec_costs_exec. */
1612 },
1613 {
1614 /* MULT SImode */
1615 {
1616 COSTS_N_INSNS (1), /* simple. */
1617 COSTS_N_INSNS (1), /* flag_setting. */
1618 COSTS_N_INSNS (2), /* extend. */
1619 COSTS_N_INSNS (1), /* add. */
1620 COSTS_N_INSNS (3), /* extend_add. */
1621 COSTS_N_INSNS (8) /* idiv. */
1622 },
1623 /* MULT DImode */
1624 {
1625 0, /* simple (N/A). */
1626 0, /* flag_setting (N/A). */
1627 COSTS_N_INSNS (2), /* extend. */
1628 0, /* add (N/A). */
1629 COSTS_N_INSNS (3), /* extend_add. */
1630 0 /* idiv (N/A). */
1631 }
1632 },
1633 /* LD/ST */
1634 {
1635 COSTS_N_INSNS (2), /* load. */
1636 0, /* load_sign_extend. */
1637 COSTS_N_INSNS (3), /* ldrd. */
1638 COSTS_N_INSNS (2), /* ldm_1st. */
1639 1, /* ldm_regs_per_insn_1st. */
1640 1, /* ldm_regs_per_insn_subsequent. */
1641 COSTS_N_INSNS (2), /* loadf. */
1642 COSTS_N_INSNS (3), /* loadd. */
1643 COSTS_N_INSNS (1), /* load_unaligned. */
1644 COSTS_N_INSNS (2), /* store. */
1645 COSTS_N_INSNS (3), /* strd. */
1646 COSTS_N_INSNS (2), /* stm_1st. */
1647 1, /* stm_regs_per_insn_1st. */
1648 1, /* stm_regs_per_insn_subsequent. */
1649 COSTS_N_INSNS (2), /* storef. */
1650 COSTS_N_INSNS (3), /* stored. */
1651 COSTS_N_INSNS (1), /* store_unaligned. */
1652 COSTS_N_INSNS (1), /* loadv. */
1653 COSTS_N_INSNS (1) /* storev. */
1654 },
1655 {
1656 /* FP SFmode */
1657 {
1658 COSTS_N_INSNS (7), /* div. */
1659 COSTS_N_INSNS (2), /* mult. */
1660 COSTS_N_INSNS (5), /* mult_addsub. */
1661 COSTS_N_INSNS (3), /* fma. */
1662 COSTS_N_INSNS (1), /* addsub. */
1663 0, /* fpconst. */
1664 0, /* neg. */
1665 0, /* compare. */
1666 0, /* widen. */
1667 0, /* narrow. */
1668 0, /* toint. */
1669 0, /* fromint. */
1670 0 /* roundint. */
1671 },
1672 /* FP DFmode */
1673 {
1674 COSTS_N_INSNS (15), /* div. */
1675 COSTS_N_INSNS (5), /* mult. */
1676 COSTS_N_INSNS (7), /* mult_addsub. */
1677 COSTS_N_INSNS (7), /* fma. */
1678 COSTS_N_INSNS (3), /* addsub. */
1679 0, /* fpconst. */
1680 0, /* neg. */
1681 0, /* compare. */
1682 0, /* widen. */
1683 0, /* narrow. */
1684 0, /* toint. */
1685 0, /* fromint. */
1686 0 /* roundint. */
1687 }
1688 },
1689 /* Vector */
1690 {
1691 COSTS_N_INSNS (1) /* alu. */
1692 }
1693 };
1694
1695 const struct tune_params arm_slowmul_tune =
1696 {
1697 arm_slowmul_rtx_costs,
1698 NULL, /* Insn extra costs. */
1699 NULL, /* Sched adj cost. */
1700 arm_default_branch_cost,
1701 &arm_default_vec_cost,
1702 3, /* Constant limit. */
1703 5, /* Max cond insns. */
1704 8, /* Memset max inline. */
1705 1, /* Issue rate. */
1706 ARM_PREFETCH_NOT_BENEFICIAL,
1707 tune_params::PREF_CONST_POOL_TRUE,
1708 tune_params::PREF_LDRD_FALSE,
1709 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1710 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1711 tune_params::DISPARAGE_FLAGS_NEITHER,
1712 tune_params::PREF_NEON_64_FALSE,
1713 tune_params::PREF_NEON_STRINGOPS_FALSE,
1714 tune_params::FUSE_NOTHING,
1715 tune_params::SCHED_AUTOPREF_OFF
1716 };
1717
1718 const struct tune_params arm_fastmul_tune =
1719 {
1720 arm_fastmul_rtx_costs,
1721 NULL, /* Insn extra costs. */
1722 NULL, /* Sched adj cost. */
1723 arm_default_branch_cost,
1724 &arm_default_vec_cost,
1725 1, /* Constant limit. */
1726 5, /* Max cond insns. */
1727 8, /* Memset max inline. */
1728 1, /* Issue rate. */
1729 ARM_PREFETCH_NOT_BENEFICIAL,
1730 tune_params::PREF_CONST_POOL_TRUE,
1731 tune_params::PREF_LDRD_FALSE,
1732 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1733 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1734 tune_params::DISPARAGE_FLAGS_NEITHER,
1735 tune_params::PREF_NEON_64_FALSE,
1736 tune_params::PREF_NEON_STRINGOPS_FALSE,
1737 tune_params::FUSE_NOTHING,
1738 tune_params::SCHED_AUTOPREF_OFF
1739 };
1740
1741 /* StrongARM has early execution of branches, so a sequence that is worth
1742 skipping is shorter. Set max_insns_skipped to a lower value. */
1743
1744 const struct tune_params arm_strongarm_tune =
1745 {
1746 arm_fastmul_rtx_costs,
1747 NULL, /* Insn extra costs. */
1748 NULL, /* Sched adj cost. */
1749 arm_default_branch_cost,
1750 &arm_default_vec_cost,
1751 1, /* Constant limit. */
1752 3, /* Max cond insns. */
1753 8, /* Memset max inline. */
1754 1, /* Issue rate. */
1755 ARM_PREFETCH_NOT_BENEFICIAL,
1756 tune_params::PREF_CONST_POOL_TRUE,
1757 tune_params::PREF_LDRD_FALSE,
1758 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1759 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1760 tune_params::DISPARAGE_FLAGS_NEITHER,
1761 tune_params::PREF_NEON_64_FALSE,
1762 tune_params::PREF_NEON_STRINGOPS_FALSE,
1763 tune_params::FUSE_NOTHING,
1764 tune_params::SCHED_AUTOPREF_OFF
1765 };
1766
1767 const struct tune_params arm_xscale_tune =
1768 {
1769 arm_xscale_rtx_costs,
1770 NULL, /* Insn extra costs. */
1771 xscale_sched_adjust_cost,
1772 arm_default_branch_cost,
1773 &arm_default_vec_cost,
1774 2, /* Constant limit. */
1775 3, /* Max cond insns. */
1776 8, /* Memset max inline. */
1777 1, /* Issue rate. */
1778 ARM_PREFETCH_NOT_BENEFICIAL,
1779 tune_params::PREF_CONST_POOL_TRUE,
1780 tune_params::PREF_LDRD_FALSE,
1781 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1782 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1783 tune_params::DISPARAGE_FLAGS_NEITHER,
1784 tune_params::PREF_NEON_64_FALSE,
1785 tune_params::PREF_NEON_STRINGOPS_FALSE,
1786 tune_params::FUSE_NOTHING,
1787 tune_params::SCHED_AUTOPREF_OFF
1788 };
1789
1790 const struct tune_params arm_9e_tune =
1791 {
1792 arm_9e_rtx_costs,
1793 NULL, /* Insn extra costs. */
1794 NULL, /* Sched adj cost. */
1795 arm_default_branch_cost,
1796 &arm_default_vec_cost,
1797 1, /* Constant limit. */
1798 5, /* Max cond insns. */
1799 8, /* Memset max inline. */
1800 1, /* Issue rate. */
1801 ARM_PREFETCH_NOT_BENEFICIAL,
1802 tune_params::PREF_CONST_POOL_TRUE,
1803 tune_params::PREF_LDRD_FALSE,
1804 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1805 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1806 tune_params::DISPARAGE_FLAGS_NEITHER,
1807 tune_params::PREF_NEON_64_FALSE,
1808 tune_params::PREF_NEON_STRINGOPS_FALSE,
1809 tune_params::FUSE_NOTHING,
1810 tune_params::SCHED_AUTOPREF_OFF
1811 };
1812
1813 const struct tune_params arm_marvell_pj4_tune =
1814 {
1815 arm_9e_rtx_costs,
1816 NULL, /* Insn extra costs. */
1817 NULL, /* Sched adj cost. */
1818 arm_default_branch_cost,
1819 &arm_default_vec_cost,
1820 1, /* Constant limit. */
1821 5, /* Max cond insns. */
1822 8, /* Memset max inline. */
1823 2, /* Issue rate. */
1824 ARM_PREFETCH_NOT_BENEFICIAL,
1825 tune_params::PREF_CONST_POOL_TRUE,
1826 tune_params::PREF_LDRD_FALSE,
1827 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1828 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1829 tune_params::DISPARAGE_FLAGS_NEITHER,
1830 tune_params::PREF_NEON_64_FALSE,
1831 tune_params::PREF_NEON_STRINGOPS_FALSE,
1832 tune_params::FUSE_NOTHING,
1833 tune_params::SCHED_AUTOPREF_OFF
1834 };
1835
1836 const struct tune_params arm_v6t2_tune =
1837 {
1838 arm_9e_rtx_costs,
1839 NULL, /* Insn extra costs. */
1840 NULL, /* Sched adj cost. */
1841 arm_default_branch_cost,
1842 &arm_default_vec_cost,
1843 1, /* Constant limit. */
1844 5, /* Max cond insns. */
1845 8, /* Memset max inline. */
1846 1, /* Issue rate. */
1847 ARM_PREFETCH_NOT_BENEFICIAL,
1848 tune_params::PREF_CONST_POOL_FALSE,
1849 tune_params::PREF_LDRD_FALSE,
1850 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1851 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1852 tune_params::DISPARAGE_FLAGS_NEITHER,
1853 tune_params::PREF_NEON_64_FALSE,
1854 tune_params::PREF_NEON_STRINGOPS_FALSE,
1855 tune_params::FUSE_NOTHING,
1856 tune_params::SCHED_AUTOPREF_OFF
1857 };
1858
1859
1860 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1861 const struct tune_params arm_cortex_tune =
1862 {
1863 arm_9e_rtx_costs,
1864 &generic_extra_costs,
1865 NULL, /* Sched adj cost. */
1866 arm_default_branch_cost,
1867 &arm_default_vec_cost,
1868 1, /* Constant limit. */
1869 5, /* Max cond insns. */
1870 8, /* Memset max inline. */
1871 2, /* Issue rate. */
1872 ARM_PREFETCH_NOT_BENEFICIAL,
1873 tune_params::PREF_CONST_POOL_FALSE,
1874 tune_params::PREF_LDRD_FALSE,
1875 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1876 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1877 tune_params::DISPARAGE_FLAGS_NEITHER,
1878 tune_params::PREF_NEON_64_FALSE,
1879 tune_params::PREF_NEON_STRINGOPS_FALSE,
1880 tune_params::FUSE_NOTHING,
1881 tune_params::SCHED_AUTOPREF_OFF
1882 };
1883
1884 const struct tune_params arm_cortex_a8_tune =
1885 {
1886 arm_9e_rtx_costs,
1887 &cortexa8_extra_costs,
1888 NULL, /* Sched adj cost. */
1889 arm_default_branch_cost,
1890 &arm_default_vec_cost,
1891 1, /* Constant limit. */
1892 5, /* Max cond insns. */
1893 8, /* Memset max inline. */
1894 2, /* Issue rate. */
1895 ARM_PREFETCH_NOT_BENEFICIAL,
1896 tune_params::PREF_CONST_POOL_FALSE,
1897 tune_params::PREF_LDRD_FALSE,
1898 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1899 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1900 tune_params::DISPARAGE_FLAGS_NEITHER,
1901 tune_params::PREF_NEON_64_FALSE,
1902 tune_params::PREF_NEON_STRINGOPS_TRUE,
1903 tune_params::FUSE_NOTHING,
1904 tune_params::SCHED_AUTOPREF_OFF
1905 };
1906
1907 const struct tune_params arm_cortex_a7_tune =
1908 {
1909 arm_9e_rtx_costs,
1910 &cortexa7_extra_costs,
1911 NULL, /* Sched adj cost. */
1912 arm_default_branch_cost,
1913 &arm_default_vec_cost,
1914 1, /* Constant limit. */
1915 5, /* Max cond insns. */
1916 8, /* Memset max inline. */
1917 2, /* Issue rate. */
1918 ARM_PREFETCH_NOT_BENEFICIAL,
1919 tune_params::PREF_CONST_POOL_FALSE,
1920 tune_params::PREF_LDRD_FALSE,
1921 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1922 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1923 tune_params::DISPARAGE_FLAGS_NEITHER,
1924 tune_params::PREF_NEON_64_FALSE,
1925 tune_params::PREF_NEON_STRINGOPS_TRUE,
1926 tune_params::FUSE_NOTHING,
1927 tune_params::SCHED_AUTOPREF_OFF
1928 };
1929
1930 const struct tune_params arm_cortex_a15_tune =
1931 {
1932 arm_9e_rtx_costs,
1933 &cortexa15_extra_costs,
1934 NULL, /* Sched adj cost. */
1935 arm_default_branch_cost,
1936 &arm_default_vec_cost,
1937 1, /* Constant limit. */
1938 2, /* Max cond insns. */
1939 8, /* Memset max inline. */
1940 3, /* Issue rate. */
1941 ARM_PREFETCH_NOT_BENEFICIAL,
1942 tune_params::PREF_CONST_POOL_FALSE,
1943 tune_params::PREF_LDRD_TRUE,
1944 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1945 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1946 tune_params::DISPARAGE_FLAGS_ALL,
1947 tune_params::PREF_NEON_64_FALSE,
1948 tune_params::PREF_NEON_STRINGOPS_TRUE,
1949 tune_params::FUSE_NOTHING,
1950 tune_params::SCHED_AUTOPREF_FULL
1951 };
1952
1953 const struct tune_params arm_cortex_a53_tune =
1954 {
1955 arm_9e_rtx_costs,
1956 &cortexa53_extra_costs,
1957 NULL, /* Sched adj cost. */
1958 arm_default_branch_cost,
1959 &arm_default_vec_cost,
1960 1, /* Constant limit. */
1961 5, /* Max cond insns. */
1962 8, /* Memset max inline. */
1963 2, /* Issue rate. */
1964 ARM_PREFETCH_NOT_BENEFICIAL,
1965 tune_params::PREF_CONST_POOL_FALSE,
1966 tune_params::PREF_LDRD_FALSE,
1967 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1968 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1969 tune_params::DISPARAGE_FLAGS_NEITHER,
1970 tune_params::PREF_NEON_64_FALSE,
1971 tune_params::PREF_NEON_STRINGOPS_TRUE,
1972 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
1973 tune_params::SCHED_AUTOPREF_OFF
1974 };
1975
1976 const struct tune_params arm_cortex_a57_tune =
1977 {
1978 arm_9e_rtx_costs,
1979 &cortexa57_extra_costs,
1980 NULL, /* Sched adj cost. */
1981 arm_default_branch_cost,
1982 &arm_default_vec_cost,
1983 1, /* Constant limit. */
1984 2, /* Max cond insns. */
1985 8, /* Memset max inline. */
1986 3, /* Issue rate. */
1987 ARM_PREFETCH_NOT_BENEFICIAL,
1988 tune_params::PREF_CONST_POOL_FALSE,
1989 tune_params::PREF_LDRD_TRUE,
1990 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1991 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1992 tune_params::DISPARAGE_FLAGS_ALL,
1993 tune_params::PREF_NEON_64_FALSE,
1994 tune_params::PREF_NEON_STRINGOPS_TRUE,
1995 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
1996 tune_params::SCHED_AUTOPREF_FULL
1997 };
1998
1999 const struct tune_params arm_xgene1_tune =
2000 {
2001 arm_9e_rtx_costs,
2002 &xgene1_extra_costs,
2003 NULL, /* Sched adj cost. */
2004 arm_default_branch_cost,
2005 &arm_default_vec_cost,
2006 1, /* Constant limit. */
2007 2, /* Max cond insns. */
2008 32, /* Memset max inline. */
2009 4, /* Issue rate. */
2010 ARM_PREFETCH_NOT_BENEFICIAL,
2011 tune_params::PREF_CONST_POOL_FALSE,
2012 tune_params::PREF_LDRD_TRUE,
2013 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2014 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2015 tune_params::DISPARAGE_FLAGS_ALL,
2016 tune_params::PREF_NEON_64_FALSE,
2017 tune_params::PREF_NEON_STRINGOPS_FALSE,
2018 tune_params::FUSE_NOTHING,
2019 tune_params::SCHED_AUTOPREF_OFF
2020 };
2021
2022 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2023 less appealing. Set max_insns_skipped to a low value. */
2024
2025 const struct tune_params arm_cortex_a5_tune =
2026 {
2027 arm_9e_rtx_costs,
2028 &cortexa5_extra_costs,
2029 NULL, /* Sched adj cost. */
2030 arm_cortex_a5_branch_cost,
2031 &arm_default_vec_cost,
2032 1, /* Constant limit. */
2033 1, /* Max cond insns. */
2034 8, /* Memset max inline. */
2035 2, /* Issue rate. */
2036 ARM_PREFETCH_NOT_BENEFICIAL,
2037 tune_params::PREF_CONST_POOL_FALSE,
2038 tune_params::PREF_LDRD_FALSE,
2039 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2040 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2041 tune_params::DISPARAGE_FLAGS_NEITHER,
2042 tune_params::PREF_NEON_64_FALSE,
2043 tune_params::PREF_NEON_STRINGOPS_TRUE,
2044 tune_params::FUSE_NOTHING,
2045 tune_params::SCHED_AUTOPREF_OFF
2046 };
2047
2048 const struct tune_params arm_cortex_a9_tune =
2049 {
2050 arm_9e_rtx_costs,
2051 &cortexa9_extra_costs,
2052 cortex_a9_sched_adjust_cost,
2053 arm_default_branch_cost,
2054 &arm_default_vec_cost,
2055 1, /* Constant limit. */
2056 5, /* Max cond insns. */
2057 8, /* Memset max inline. */
2058 2, /* Issue rate. */
2059 ARM_PREFETCH_BENEFICIAL(4,32,32),
2060 tune_params::PREF_CONST_POOL_FALSE,
2061 tune_params::PREF_LDRD_FALSE,
2062 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2063 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2064 tune_params::DISPARAGE_FLAGS_NEITHER,
2065 tune_params::PREF_NEON_64_FALSE,
2066 tune_params::PREF_NEON_STRINGOPS_FALSE,
2067 tune_params::FUSE_NOTHING,
2068 tune_params::SCHED_AUTOPREF_OFF
2069 };
2070
2071 const struct tune_params arm_cortex_a12_tune =
2072 {
2073 arm_9e_rtx_costs,
2074 &cortexa12_extra_costs,
2075 NULL, /* Sched adj cost. */
2076 arm_default_branch_cost,
2077 &arm_default_vec_cost, /* Vectorizer costs. */
2078 1, /* Constant limit. */
2079 2, /* Max cond insns. */
2080 8, /* Memset max inline. */
2081 2, /* Issue rate. */
2082 ARM_PREFETCH_NOT_BENEFICIAL,
2083 tune_params::PREF_CONST_POOL_FALSE,
2084 tune_params::PREF_LDRD_TRUE,
2085 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2086 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2087 tune_params::DISPARAGE_FLAGS_ALL,
2088 tune_params::PREF_NEON_64_FALSE,
2089 tune_params::PREF_NEON_STRINGOPS_TRUE,
2090 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2091 tune_params::SCHED_AUTOPREF_OFF
2092 };
2093
2094 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2095 cycle to execute each. An LDR from the constant pool also takes two cycles
2096 to execute, but mildly increases pipelining opportunity (consecutive
2097 loads/stores can be pipelined together, saving one cycle), and may also
2098 improve icache utilisation. Hence we prefer the constant pool for such
2099 processors. */
2100
2101 const struct tune_params arm_v7m_tune =
2102 {
2103 arm_9e_rtx_costs,
2104 &v7m_extra_costs,
2105 NULL, /* Sched adj cost. */
2106 arm_cortex_m_branch_cost,
2107 &arm_default_vec_cost,
2108 1, /* Constant limit. */
2109 2, /* Max cond insns. */
2110 8, /* Memset max inline. */
2111 1, /* Issue rate. */
2112 ARM_PREFETCH_NOT_BENEFICIAL,
2113 tune_params::PREF_CONST_POOL_TRUE,
2114 tune_params::PREF_LDRD_FALSE,
2115 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2116 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2117 tune_params::DISPARAGE_FLAGS_NEITHER,
2118 tune_params::PREF_NEON_64_FALSE,
2119 tune_params::PREF_NEON_STRINGOPS_FALSE,
2120 tune_params::FUSE_NOTHING,
2121 tune_params::SCHED_AUTOPREF_OFF
2122 };
2123
2124 /* Cortex-M7 tuning. */
2125
2126 const struct tune_params arm_cortex_m7_tune =
2127 {
2128 arm_9e_rtx_costs,
2129 &v7m_extra_costs,
2130 NULL, /* Sched adj cost. */
2131 arm_cortex_m7_branch_cost,
2132 &arm_default_vec_cost,
2133 0, /* Constant limit. */
2134 1, /* Max cond insns. */
2135 8, /* Memset max inline. */
2136 2, /* Issue rate. */
2137 ARM_PREFETCH_NOT_BENEFICIAL,
2138 tune_params::PREF_CONST_POOL_TRUE,
2139 tune_params::PREF_LDRD_FALSE,
2140 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2141 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2142 tune_params::DISPARAGE_FLAGS_NEITHER,
2143 tune_params::PREF_NEON_64_FALSE,
2144 tune_params::PREF_NEON_STRINGOPS_FALSE,
2145 tune_params::FUSE_NOTHING,
2146 tune_params::SCHED_AUTOPREF_OFF
2147 };
2148
2149 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2150 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2151 const struct tune_params arm_v6m_tune =
2152 {
2153 arm_9e_rtx_costs,
2154 NULL, /* Insn extra costs. */
2155 NULL, /* Sched adj cost. */
2156 arm_default_branch_cost,
2157 &arm_default_vec_cost, /* Vectorizer costs. */
2158 1, /* Constant limit. */
2159 5, /* Max cond insns. */
2160 8, /* Memset max inline. */
2161 1, /* Issue rate. */
2162 ARM_PREFETCH_NOT_BENEFICIAL,
2163 tune_params::PREF_CONST_POOL_FALSE,
2164 tune_params::PREF_LDRD_FALSE,
2165 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2166 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2167 tune_params::DISPARAGE_FLAGS_NEITHER,
2168 tune_params::PREF_NEON_64_FALSE,
2169 tune_params::PREF_NEON_STRINGOPS_FALSE,
2170 tune_params::FUSE_NOTHING,
2171 tune_params::SCHED_AUTOPREF_OFF
2172 };
2173
2174 const struct tune_params arm_fa726te_tune =
2175 {
2176 arm_9e_rtx_costs,
2177 NULL, /* Insn extra costs. */
2178 fa726te_sched_adjust_cost,
2179 arm_default_branch_cost,
2180 &arm_default_vec_cost,
2181 1, /* Constant limit. */
2182 5, /* Max cond insns. */
2183 8, /* Memset max inline. */
2184 2, /* Issue rate. */
2185 ARM_PREFETCH_NOT_BENEFICIAL,
2186 tune_params::PREF_CONST_POOL_TRUE,
2187 tune_params::PREF_LDRD_FALSE,
2188 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2189 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2190 tune_params::DISPARAGE_FLAGS_NEITHER,
2191 tune_params::PREF_NEON_64_FALSE,
2192 tune_params::PREF_NEON_STRINGOPS_FALSE,
2193 tune_params::FUSE_NOTHING,
2194 tune_params::SCHED_AUTOPREF_OFF
2195 };
2196
2197
2198 /* Not all of these give usefully different compilation alternatives,
2199 but there is no simple way of generalizing them. */
2200 static const struct processors all_cores[] =
2201 {
2202 /* ARM Cores */
2203 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2204 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2205 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2206 #include "arm-cores.def"
2207 #undef ARM_CORE
2208 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2209 };
2210
2211 static const struct processors all_architectures[] =
2212 {
2213 /* ARM Architectures */
2214 /* We don't specify tuning costs here as it will be figured out
2215 from the core. */
2216
2217 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2218 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2219 #include "arm-arches.def"
2220 #undef ARM_ARCH
2221 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2222 };
2223
2224
2225 /* These are populated as commandline arguments are processed, or NULL
2226 if not specified. */
2227 static const struct processors *arm_selected_arch;
2228 static const struct processors *arm_selected_cpu;
2229 static const struct processors *arm_selected_tune;
2230
2231 /* The name of the preprocessor macro to define for this architecture. */
2232
2233 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2234
2235 /* Available values for -mfpu=. */
2236
2237 static const struct arm_fpu_desc all_fpus[] =
2238 {
2239 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2240 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2241 #include "arm-fpus.def"
2242 #undef ARM_FPU
2243 };
2244
2245
2246 /* Supported TLS relocations. */
2247
2248 enum tls_reloc {
2249 TLS_GD32,
2250 TLS_LDM32,
2251 TLS_LDO32,
2252 TLS_IE32,
2253 TLS_LE32,
2254 TLS_DESCSEQ /* GNU scheme */
2255 };
2256
2257 /* The maximum number of insns to be used when loading a constant. */
2258 inline static int
2259 arm_constant_limit (bool size_p)
2260 {
2261 return size_p ? 1 : current_tune->constant_limit;
2262 }
2263
2264 /* Emit an insn that's a simple single-set. Both the operands must be known
2265 to be valid. */
2266 inline static rtx_insn *
2267 emit_set_insn (rtx x, rtx y)
2268 {
2269 return emit_insn (gen_rtx_SET (x, y));
2270 }
2271
2272 /* Return the number of bits set in VALUE. */
2273 static unsigned
2274 bit_count (unsigned long value)
2275 {
2276 unsigned long count = 0;
2277
2278 while (value)
2279 {
2280 count++;
2281 value &= value - 1; /* Clear the least-significant set bit. */
2282 }
2283
2284 return count;
2285 }
2286
2287 typedef struct
2288 {
2289 machine_mode mode;
2290 const char *name;
2291 } arm_fixed_mode_set;
2292
2293 /* A small helper for setting fixed-point library libfuncs. */
2294
2295 static void
2296 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2297 const char *funcname, const char *modename,
2298 int num_suffix)
2299 {
2300 char buffer[50];
2301
2302 if (num_suffix == 0)
2303 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2304 else
2305 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2306
2307 set_optab_libfunc (optable, mode, buffer);
2308 }
2309
2310 static void
2311 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2312 machine_mode from, const char *funcname,
2313 const char *toname, const char *fromname)
2314 {
2315 char buffer[50];
2316 const char *maybe_suffix_2 = "";
2317
2318 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2319 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2320 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2321 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2322 maybe_suffix_2 = "2";
2323
2324 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2325 maybe_suffix_2);
2326
2327 set_conv_libfunc (optable, to, from, buffer);
2328 }
2329
2330 /* Set up library functions unique to ARM. */
2331
2332 static void
2333 arm_init_libfuncs (void)
2334 {
2335 /* For Linux, we have access to kernel support for atomic operations. */
2336 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2337 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2338
2339 /* There are no special library functions unless we are using the
2340 ARM BPABI. */
2341 if (!TARGET_BPABI)
2342 return;
2343
2344 /* The functions below are described in Section 4 of the "Run-Time
2345 ABI for the ARM architecture", Version 1.0. */
2346
2347 /* Double-precision floating-point arithmetic. Table 2. */
2348 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2349 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2350 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2351 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2352 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2353
2354 /* Double-precision comparisons. Table 3. */
2355 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2356 set_optab_libfunc (ne_optab, DFmode, NULL);
2357 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2358 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2359 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2360 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2361 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2362
2363 /* Single-precision floating-point arithmetic. Table 4. */
2364 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2365 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2366 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2367 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2368 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2369
2370 /* Single-precision comparisons. Table 5. */
2371 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2372 set_optab_libfunc (ne_optab, SFmode, NULL);
2373 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2374 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2375 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2376 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2377 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2378
2379 /* Floating-point to integer conversions. Table 6. */
2380 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2381 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2382 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2383 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2384 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2385 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2386 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2387 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2388
2389 /* Conversions between floating types. Table 7. */
2390 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2391 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2392
2393 /* Integer to floating-point conversions. Table 8. */
2394 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2395 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2396 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2397 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2398 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2399 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2400 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2401 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2402
2403 /* Long long. Table 9. */
2404 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2405 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2406 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2407 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2408 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2409 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2410 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2411 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2412
2413 /* Integer (32/32->32) division. \S 4.3.1. */
2414 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2415 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2416
2417 /* The divmod functions are designed so that they can be used for
2418 plain division, even though they return both the quotient and the
2419 remainder. The quotient is returned in the usual location (i.e.,
2420 r0 for SImode, {r0, r1} for DImode), just as would be expected
2421 for an ordinary division routine. Because the AAPCS calling
2422 conventions specify that all of { r0, r1, r2, r3 } are
2423 callee-saved registers, there is no need to tell the compiler
2424 explicitly that those registers are clobbered by these
2425 routines. */
2426 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2427 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2428
2429 /* For SImode division the ABI provides div-without-mod routines,
2430 which are faster. */
2431 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2432 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2433
2434 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2435 divmod libcalls instead. */
2436 set_optab_libfunc (smod_optab, DImode, NULL);
2437 set_optab_libfunc (umod_optab, DImode, NULL);
2438 set_optab_libfunc (smod_optab, SImode, NULL);
2439 set_optab_libfunc (umod_optab, SImode, NULL);
2440
2441 /* Half-precision float operations. The compiler handles all operations
2442 with NULL libfuncs by converting the SFmode. */
2443 switch (arm_fp16_format)
2444 {
2445 case ARM_FP16_FORMAT_IEEE:
2446 case ARM_FP16_FORMAT_ALTERNATIVE:
2447
2448 /* Conversions. */
2449 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2450 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2451 ? "__gnu_f2h_ieee"
2452 : "__gnu_f2h_alternative"));
2453 set_conv_libfunc (sext_optab, SFmode, HFmode,
2454 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2455 ? "__gnu_h2f_ieee"
2456 : "__gnu_h2f_alternative"));
2457
2458 /* Arithmetic. */
2459 set_optab_libfunc (add_optab, HFmode, NULL);
2460 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2461 set_optab_libfunc (smul_optab, HFmode, NULL);
2462 set_optab_libfunc (neg_optab, HFmode, NULL);
2463 set_optab_libfunc (sub_optab, HFmode, NULL);
2464
2465 /* Comparisons. */
2466 set_optab_libfunc (eq_optab, HFmode, NULL);
2467 set_optab_libfunc (ne_optab, HFmode, NULL);
2468 set_optab_libfunc (lt_optab, HFmode, NULL);
2469 set_optab_libfunc (le_optab, HFmode, NULL);
2470 set_optab_libfunc (ge_optab, HFmode, NULL);
2471 set_optab_libfunc (gt_optab, HFmode, NULL);
2472 set_optab_libfunc (unord_optab, HFmode, NULL);
2473 break;
2474
2475 default:
2476 break;
2477 }
2478
2479 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2480 {
2481 const arm_fixed_mode_set fixed_arith_modes[] =
2482 {
2483 { QQmode, "qq" },
2484 { UQQmode, "uqq" },
2485 { HQmode, "hq" },
2486 { UHQmode, "uhq" },
2487 { SQmode, "sq" },
2488 { USQmode, "usq" },
2489 { DQmode, "dq" },
2490 { UDQmode, "udq" },
2491 { TQmode, "tq" },
2492 { UTQmode, "utq" },
2493 { HAmode, "ha" },
2494 { UHAmode, "uha" },
2495 { SAmode, "sa" },
2496 { USAmode, "usa" },
2497 { DAmode, "da" },
2498 { UDAmode, "uda" },
2499 { TAmode, "ta" },
2500 { UTAmode, "uta" }
2501 };
2502 const arm_fixed_mode_set fixed_conv_modes[] =
2503 {
2504 { QQmode, "qq" },
2505 { UQQmode, "uqq" },
2506 { HQmode, "hq" },
2507 { UHQmode, "uhq" },
2508 { SQmode, "sq" },
2509 { USQmode, "usq" },
2510 { DQmode, "dq" },
2511 { UDQmode, "udq" },
2512 { TQmode, "tq" },
2513 { UTQmode, "utq" },
2514 { HAmode, "ha" },
2515 { UHAmode, "uha" },
2516 { SAmode, "sa" },
2517 { USAmode, "usa" },
2518 { DAmode, "da" },
2519 { UDAmode, "uda" },
2520 { TAmode, "ta" },
2521 { UTAmode, "uta" },
2522 { QImode, "qi" },
2523 { HImode, "hi" },
2524 { SImode, "si" },
2525 { DImode, "di" },
2526 { TImode, "ti" },
2527 { SFmode, "sf" },
2528 { DFmode, "df" }
2529 };
2530 unsigned int i, j;
2531
2532 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2533 {
2534 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2535 "add", fixed_arith_modes[i].name, 3);
2536 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2537 "ssadd", fixed_arith_modes[i].name, 3);
2538 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2539 "usadd", fixed_arith_modes[i].name, 3);
2540 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2541 "sub", fixed_arith_modes[i].name, 3);
2542 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2543 "sssub", fixed_arith_modes[i].name, 3);
2544 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2545 "ussub", fixed_arith_modes[i].name, 3);
2546 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2547 "mul", fixed_arith_modes[i].name, 3);
2548 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2549 "ssmul", fixed_arith_modes[i].name, 3);
2550 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2551 "usmul", fixed_arith_modes[i].name, 3);
2552 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2553 "div", fixed_arith_modes[i].name, 3);
2554 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2555 "udiv", fixed_arith_modes[i].name, 3);
2556 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2557 "ssdiv", fixed_arith_modes[i].name, 3);
2558 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2559 "usdiv", fixed_arith_modes[i].name, 3);
2560 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2561 "neg", fixed_arith_modes[i].name, 2);
2562 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2563 "ssneg", fixed_arith_modes[i].name, 2);
2564 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2565 "usneg", fixed_arith_modes[i].name, 2);
2566 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2567 "ashl", fixed_arith_modes[i].name, 3);
2568 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2569 "ashr", fixed_arith_modes[i].name, 3);
2570 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2571 "lshr", fixed_arith_modes[i].name, 3);
2572 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2573 "ssashl", fixed_arith_modes[i].name, 3);
2574 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2575 "usashl", fixed_arith_modes[i].name, 3);
2576 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2577 "cmp", fixed_arith_modes[i].name, 2);
2578 }
2579
2580 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2581 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2582 {
2583 if (i == j
2584 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2585 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2586 continue;
2587
2588 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2589 fixed_conv_modes[j].mode, "fract",
2590 fixed_conv_modes[i].name,
2591 fixed_conv_modes[j].name);
2592 arm_set_fixed_conv_libfunc (satfract_optab,
2593 fixed_conv_modes[i].mode,
2594 fixed_conv_modes[j].mode, "satfract",
2595 fixed_conv_modes[i].name,
2596 fixed_conv_modes[j].name);
2597 arm_set_fixed_conv_libfunc (fractuns_optab,
2598 fixed_conv_modes[i].mode,
2599 fixed_conv_modes[j].mode, "fractuns",
2600 fixed_conv_modes[i].name,
2601 fixed_conv_modes[j].name);
2602 arm_set_fixed_conv_libfunc (satfractuns_optab,
2603 fixed_conv_modes[i].mode,
2604 fixed_conv_modes[j].mode, "satfractuns",
2605 fixed_conv_modes[i].name,
2606 fixed_conv_modes[j].name);
2607 }
2608 }
2609
2610 if (TARGET_AAPCS_BASED)
2611 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2612 }
2613
2614 /* On AAPCS systems, this is the "struct __va_list". */
2615 static GTY(()) tree va_list_type;
2616
2617 /* Return the type to use as __builtin_va_list. */
2618 static tree
2619 arm_build_builtin_va_list (void)
2620 {
2621 tree va_list_name;
2622 tree ap_field;
2623
2624 if (!TARGET_AAPCS_BASED)
2625 return std_build_builtin_va_list ();
2626
2627 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2628 defined as:
2629
2630 struct __va_list
2631 {
2632 void *__ap;
2633 };
2634
2635 The C Library ABI further reinforces this definition in \S
2636 4.1.
2637
2638 We must follow this definition exactly. The structure tag
2639 name is visible in C++ mangled names, and thus forms a part
2640 of the ABI. The field name may be used by people who
2641 #include <stdarg.h>. */
2642 /* Create the type. */
2643 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2644 /* Give it the required name. */
2645 va_list_name = build_decl (BUILTINS_LOCATION,
2646 TYPE_DECL,
2647 get_identifier ("__va_list"),
2648 va_list_type);
2649 DECL_ARTIFICIAL (va_list_name) = 1;
2650 TYPE_NAME (va_list_type) = va_list_name;
2651 TYPE_STUB_DECL (va_list_type) = va_list_name;
2652 /* Create the __ap field. */
2653 ap_field = build_decl (BUILTINS_LOCATION,
2654 FIELD_DECL,
2655 get_identifier ("__ap"),
2656 ptr_type_node);
2657 DECL_ARTIFICIAL (ap_field) = 1;
2658 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2659 TYPE_FIELDS (va_list_type) = ap_field;
2660 /* Compute its layout. */
2661 layout_type (va_list_type);
2662
2663 return va_list_type;
2664 }
2665
2666 /* Return an expression of type "void *" pointing to the next
2667 available argument in a variable-argument list. VALIST is the
2668 user-level va_list object, of type __builtin_va_list. */
2669 static tree
2670 arm_extract_valist_ptr (tree valist)
2671 {
2672 if (TREE_TYPE (valist) == error_mark_node)
2673 return error_mark_node;
2674
2675 /* On an AAPCS target, the pointer is stored within "struct
2676 va_list". */
2677 if (TARGET_AAPCS_BASED)
2678 {
2679 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2680 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2681 valist, ap_field, NULL_TREE);
2682 }
2683
2684 return valist;
2685 }
2686
2687 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2688 static void
2689 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2690 {
2691 valist = arm_extract_valist_ptr (valist);
2692 std_expand_builtin_va_start (valist, nextarg);
2693 }
2694
2695 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2696 static tree
2697 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2698 gimple_seq *post_p)
2699 {
2700 valist = arm_extract_valist_ptr (valist);
2701 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2702 }
2703
2704 /* Check any incompatible options that the user has specified. */
2705 static void
2706 arm_option_check_internal (struct gcc_options *opts)
2707 {
2708 int flags = opts->x_target_flags;
2709
2710 /* Make sure that the processor choice does not conflict with any of the
2711 other command line choices. */
2712 if (TARGET_ARM_P (flags) && !(insn_flags & FL_NOTM))
2713 error ("target CPU does not support ARM mode");
2714
2715 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2716 from here where no function is being compiled currently. */
2717 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2718 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2719
2720 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2721 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2722
2723 /* If this target is normally configured to use APCS frames, warn if they
2724 are turned off and debugging is turned on. */
2725 if (TARGET_ARM_P (flags)
2726 && write_symbols != NO_DEBUG
2727 && !TARGET_APCS_FRAME
2728 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2729 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2730
2731 /* iWMMXt unsupported under Thumb mode. */
2732 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2733 error ("iWMMXt unsupported under Thumb mode");
2734
2735 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2736 error ("can not use -mtp=cp15 with 16-bit Thumb");
2737
2738 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2739 {
2740 error ("RTP PIC is incompatible with Thumb");
2741 flag_pic = 0;
2742 }
2743
2744 /* We only support -mslow-flash-data on armv7-m targets. */
2745 if (target_slow_flash_data
2746 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2747 || (TARGET_THUMB1_P (flags) || flag_pic || TARGET_NEON)))
2748 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2749 }
2750
2751 /* Set params depending on attributes and optimization options. */
2752 static void
2753 arm_option_params_internal (struct gcc_options *opts)
2754 {
2755 int flags = opts->x_target_flags;
2756
2757 /* If we are not using the default (ARM mode) section anchor offset
2758 ranges, then set the correct ranges now. */
2759 if (TARGET_THUMB1_P (flags))
2760 {
2761 /* Thumb-1 LDR instructions cannot have negative offsets.
2762 Permissible positive offset ranges are 5-bit (for byte loads),
2763 6-bit (for halfword loads), or 7-bit (for word loads).
2764 Empirical results suggest a 7-bit anchor range gives the best
2765 overall code size. */
2766 targetm.min_anchor_offset = 0;
2767 targetm.max_anchor_offset = 127;
2768 }
2769 else if (TARGET_THUMB2_P (flags))
2770 {
2771 /* The minimum is set such that the total size of the block
2772 for a particular anchor is 248 + 1 + 4095 bytes, which is
2773 divisible by eight, ensuring natural spacing of anchors. */
2774 targetm.min_anchor_offset = -248;
2775 targetm.max_anchor_offset = 4095;
2776 }
2777 else
2778 {
2779 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2780 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2781 }
2782
2783 if (optimize_size)
2784 {
2785 /* If optimizing for size, bump the number of instructions that we
2786 are prepared to conditionally execute (even on a StrongARM). */
2787 max_insns_skipped = 6;
2788
2789 /* For THUMB2, we limit the conditional sequence to one IT block. */
2790 if (TARGET_THUMB2_P (flags))
2791 max_insns_skipped = opts->x_arm_restrict_it ? 1 : 4;
2792 }
2793 else
2794 /* When -mrestrict-it is in use tone down the if-conversion. */
2795 max_insns_skipped
2796 = (TARGET_THUMB2_P (opts->x_target_flags) && opts->x_arm_restrict_it)
2797 ? 1 : current_tune->max_insns_skipped;
2798 }
2799
2800 /* True if -mflip-thumb should next add an attribute for the default
2801 mode, false if it should next add an attribute for the opposite mode. */
2802 static GTY(()) bool thumb_flipper;
2803
2804 /* Options after initial target override. */
2805 static GTY(()) tree init_optimize;
2806
2807 /* Reset options between modes that the user has specified. */
2808 static void
2809 arm_option_override_internal (struct gcc_options *opts,
2810 struct gcc_options *opts_set)
2811 {
2812 if (TARGET_THUMB_P (opts->x_target_flags) && !(insn_flags & FL_THUMB))
2813 {
2814 warning (0, "target CPU does not support THUMB instructions");
2815 opts->x_target_flags &= ~MASK_THUMB;
2816 }
2817
2818 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2819 {
2820 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2821 opts->x_target_flags &= ~MASK_APCS_FRAME;
2822 }
2823
2824 /* Callee super interworking implies thumb interworking. Adding
2825 this to the flags here simplifies the logic elsewhere. */
2826 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2827 opts->x_target_flags |= MASK_INTERWORK;
2828
2829 /* need to remember initial values so combinaisons of options like
2830 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2831 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2832
2833 if (! opts_set->x_arm_restrict_it)
2834 opts->x_arm_restrict_it = arm_arch8;
2835
2836 if (!TARGET_THUMB2_P (opts->x_target_flags))
2837 opts->x_arm_restrict_it = 0;
2838
2839 /* Don't warn since it's on by default in -O2. */
2840 if (TARGET_THUMB1_P (opts->x_target_flags))
2841 opts->x_flag_schedule_insns = 0;
2842 else
2843 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
2844
2845 /* Disable shrink-wrap when optimizing function for size, since it tends to
2846 generate additional returns. */
2847 if (optimize_function_for_size_p (cfun)
2848 && TARGET_THUMB2_P (opts->x_target_flags))
2849 opts->x_flag_shrink_wrap = false;
2850 else
2851 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
2852
2853 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
2854 - epilogue_insns - does not accurately model the corresponding insns
2855 emitted in the asm file. In particular, see the comment in thumb_exit
2856 'Find out how many of the (return) argument registers we can corrupt'.
2857 As a consequence, the epilogue may clobber registers without fipa-ra
2858 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
2859 TODO: Accurately model clobbers for epilogue_insns and reenable
2860 fipa-ra. */
2861 if (TARGET_THUMB1_P (opts->x_target_flags))
2862 opts->x_flag_ipa_ra = 0;
2863 else
2864 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
2865
2866 /* Thumb2 inline assembly code should always use unified syntax.
2867 This will apply to ARM and Thumb1 eventually. */
2868 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
2869 }
2870
2871 /* Fix up any incompatible options that the user has specified. */
2872 static void
2873 arm_option_override (void)
2874 {
2875 arm_selected_arch = NULL;
2876 arm_selected_cpu = NULL;
2877 arm_selected_tune = NULL;
2878
2879 if (global_options_set.x_arm_arch_option)
2880 arm_selected_arch = &all_architectures[arm_arch_option];
2881
2882 if (global_options_set.x_arm_cpu_option)
2883 {
2884 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2885 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2886 }
2887
2888 if (global_options_set.x_arm_tune_option)
2889 arm_selected_tune = &all_cores[(int) arm_tune_option];
2890
2891 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2892 SUBTARGET_OVERRIDE_OPTIONS;
2893 #endif
2894
2895 if (arm_selected_arch)
2896 {
2897 if (arm_selected_cpu)
2898 {
2899 /* Check for conflict between mcpu and march. */
2900 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2901 {
2902 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2903 arm_selected_cpu->name, arm_selected_arch->name);
2904 /* -march wins for code generation.
2905 -mcpu wins for default tuning. */
2906 if (!arm_selected_tune)
2907 arm_selected_tune = arm_selected_cpu;
2908
2909 arm_selected_cpu = arm_selected_arch;
2910 }
2911 else
2912 /* -mcpu wins. */
2913 arm_selected_arch = NULL;
2914 }
2915 else
2916 /* Pick a CPU based on the architecture. */
2917 arm_selected_cpu = arm_selected_arch;
2918 }
2919
2920 /* If the user did not specify a processor, choose one for them. */
2921 if (!arm_selected_cpu)
2922 {
2923 const struct processors * sel;
2924 unsigned int sought;
2925
2926 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2927 if (!arm_selected_cpu->name)
2928 {
2929 #ifdef SUBTARGET_CPU_DEFAULT
2930 /* Use the subtarget default CPU if none was specified by
2931 configure. */
2932 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2933 #endif
2934 /* Default to ARM6. */
2935 if (!arm_selected_cpu->name)
2936 arm_selected_cpu = &all_cores[arm6];
2937 }
2938
2939 sel = arm_selected_cpu;
2940 insn_flags = sel->flags;
2941
2942 /* Now check to see if the user has specified some command line
2943 switch that require certain abilities from the cpu. */
2944 sought = 0;
2945
2946 if (TARGET_INTERWORK || TARGET_THUMB)
2947 {
2948 sought |= (FL_THUMB | FL_MODE32);
2949
2950 /* There are no ARM processors that support both APCS-26 and
2951 interworking. Therefore we force FL_MODE26 to be removed
2952 from insn_flags here (if it was set), so that the search
2953 below will always be able to find a compatible processor. */
2954 insn_flags &= ~FL_MODE26;
2955 }
2956
2957 if (sought != 0 && ((sought & insn_flags) != sought))
2958 {
2959 /* Try to locate a CPU type that supports all of the abilities
2960 of the default CPU, plus the extra abilities requested by
2961 the user. */
2962 for (sel = all_cores; sel->name != NULL; sel++)
2963 if ((sel->flags & sought) == (sought | insn_flags))
2964 break;
2965
2966 if (sel->name == NULL)
2967 {
2968 unsigned current_bit_count = 0;
2969 const struct processors * best_fit = NULL;
2970
2971 /* Ideally we would like to issue an error message here
2972 saying that it was not possible to find a CPU compatible
2973 with the default CPU, but which also supports the command
2974 line options specified by the programmer, and so they
2975 ought to use the -mcpu=<name> command line option to
2976 override the default CPU type.
2977
2978 If we cannot find a cpu that has both the
2979 characteristics of the default cpu and the given
2980 command line options we scan the array again looking
2981 for a best match. */
2982 for (sel = all_cores; sel->name != NULL; sel++)
2983 if ((sel->flags & sought) == sought)
2984 {
2985 unsigned count;
2986
2987 count = bit_count (sel->flags & insn_flags);
2988
2989 if (count >= current_bit_count)
2990 {
2991 best_fit = sel;
2992 current_bit_count = count;
2993 }
2994 }
2995
2996 gcc_assert (best_fit);
2997 sel = best_fit;
2998 }
2999
3000 arm_selected_cpu = sel;
3001 }
3002 }
3003
3004 gcc_assert (arm_selected_cpu);
3005 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
3006 if (!arm_selected_tune)
3007 arm_selected_tune = &all_cores[arm_selected_cpu->core];
3008
3009 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
3010 insn_flags = arm_selected_cpu->flags;
3011 arm_base_arch = arm_selected_cpu->base_arch;
3012
3013 arm_tune = arm_selected_tune->core;
3014 tune_flags = arm_selected_tune->flags;
3015 current_tune = arm_selected_tune->tune;
3016
3017 /* TBD: Dwarf info for apcs frame is not handled yet. */
3018 if (TARGET_APCS_FRAME)
3019 flag_shrink_wrap = false;
3020
3021 /* BPABI targets use linker tricks to allow interworking on cores
3022 without thumb support. */
3023 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
3024 {
3025 warning (0, "target CPU does not support interworking" );
3026 target_flags &= ~MASK_INTERWORK;
3027 }
3028
3029 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3030 {
3031 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3032 target_flags |= MASK_APCS_FRAME;
3033 }
3034
3035 if (TARGET_POKE_FUNCTION_NAME)
3036 target_flags |= MASK_APCS_FRAME;
3037
3038 if (TARGET_APCS_REENT && flag_pic)
3039 error ("-fpic and -mapcs-reent are incompatible");
3040
3041 if (TARGET_APCS_REENT)
3042 warning (0, "APCS reentrant code not supported. Ignored");
3043
3044 if (TARGET_APCS_FLOAT)
3045 warning (0, "passing floating point arguments in fp regs not yet supported");
3046
3047 /* Initialize boolean versions of the flags, for use in the arm.md file. */
3048 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
3049 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
3050 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
3051 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
3052 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
3053 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
3054 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
3055 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
3056 arm_arch6m = arm_arch6 && !arm_arch_notm;
3057 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
3058 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
3059 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
3060 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
3061 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
3062
3063 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
3064 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
3065 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
3066 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
3067 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
3068 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
3069 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
3070 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
3071 arm_arch_no_volatile_ce = (insn_flags & FL_NO_VOLATILE_CE) != 0;
3072 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
3073 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
3074 arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
3075
3076 /* V5 code we generate is completely interworking capable, so we turn off
3077 TARGET_INTERWORK here to avoid many tests later on. */
3078
3079 /* XXX However, we must pass the right pre-processor defines to CPP
3080 or GLD can get confused. This is a hack. */
3081 if (TARGET_INTERWORK)
3082 arm_cpp_interwork = 1;
3083
3084 if (arm_arch5)
3085 target_flags &= ~MASK_INTERWORK;
3086
3087 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3088 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3089
3090 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3091 error ("iwmmxt abi requires an iwmmxt capable cpu");
3092
3093 if (!global_options_set.x_arm_fpu_index)
3094 {
3095 const char *target_fpu_name;
3096 bool ok;
3097
3098 #ifdef FPUTYPE_DEFAULT
3099 target_fpu_name = FPUTYPE_DEFAULT;
3100 #else
3101 target_fpu_name = "vfp";
3102 #endif
3103
3104 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
3105 CL_TARGET);
3106 gcc_assert (ok);
3107 }
3108
3109 arm_fpu_desc = &all_fpus[arm_fpu_index];
3110
3111 switch (arm_fpu_desc->model)
3112 {
3113 case ARM_FP_MODEL_VFP:
3114 arm_fpu_attr = FPU_VFP;
3115 break;
3116
3117 default:
3118 gcc_unreachable();
3119 }
3120
3121 if (TARGET_AAPCS_BASED)
3122 {
3123 if (TARGET_CALLER_INTERWORKING)
3124 error ("AAPCS does not support -mcaller-super-interworking");
3125 else
3126 if (TARGET_CALLEE_INTERWORKING)
3127 error ("AAPCS does not support -mcallee-super-interworking");
3128 }
3129
3130 /* iWMMXt and NEON are incompatible. */
3131 if (TARGET_IWMMXT && TARGET_NEON)
3132 error ("iWMMXt and NEON are incompatible");
3133
3134 /* __fp16 support currently assumes the core has ldrh. */
3135 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3136 sorry ("__fp16 and no ldrh");
3137
3138 /* If soft-float is specified then don't use FPU. */
3139 if (TARGET_SOFT_FLOAT)
3140 arm_fpu_attr = FPU_NONE;
3141
3142 if (TARGET_AAPCS_BASED)
3143 {
3144 if (arm_abi == ARM_ABI_IWMMXT)
3145 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3146 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3147 && TARGET_HARD_FLOAT
3148 && TARGET_VFP)
3149 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3150 else
3151 arm_pcs_default = ARM_PCS_AAPCS;
3152 }
3153 else
3154 {
3155 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
3156 sorry ("-mfloat-abi=hard and VFP");
3157
3158 if (arm_abi == ARM_ABI_APCS)
3159 arm_pcs_default = ARM_PCS_APCS;
3160 else
3161 arm_pcs_default = ARM_PCS_ATPCS;
3162 }
3163
3164 /* For arm2/3 there is no need to do any scheduling if we are doing
3165 software floating-point. */
3166 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
3167 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3168
3169 /* Use the cp15 method if it is available. */
3170 if (target_thread_pointer == TP_AUTO)
3171 {
3172 if (arm_arch6k && !TARGET_THUMB1)
3173 target_thread_pointer = TP_CP15;
3174 else
3175 target_thread_pointer = TP_SOFT;
3176 }
3177
3178 /* Override the default structure alignment for AAPCS ABI. */
3179 if (!global_options_set.x_arm_structure_size_boundary)
3180 {
3181 if (TARGET_AAPCS_BASED)
3182 arm_structure_size_boundary = 8;
3183 }
3184 else
3185 {
3186 if (arm_structure_size_boundary != 8
3187 && arm_structure_size_boundary != 32
3188 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3189 {
3190 if (ARM_DOUBLEWORD_ALIGN)
3191 warning (0,
3192 "structure size boundary can only be set to 8, 32 or 64");
3193 else
3194 warning (0, "structure size boundary can only be set to 8 or 32");
3195 arm_structure_size_boundary
3196 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3197 }
3198 }
3199
3200 /* If stack checking is disabled, we can use r10 as the PIC register,
3201 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3202 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3203 {
3204 if (TARGET_VXWORKS_RTP)
3205 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3206 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3207 }
3208
3209 if (flag_pic && TARGET_VXWORKS_RTP)
3210 arm_pic_register = 9;
3211
3212 if (arm_pic_register_string != NULL)
3213 {
3214 int pic_register = decode_reg_name (arm_pic_register_string);
3215
3216 if (!flag_pic)
3217 warning (0, "-mpic-register= is useless without -fpic");
3218
3219 /* Prevent the user from choosing an obviously stupid PIC register. */
3220 else if (pic_register < 0 || call_used_regs[pic_register]
3221 || pic_register == HARD_FRAME_POINTER_REGNUM
3222 || pic_register == STACK_POINTER_REGNUM
3223 || pic_register >= PC_REGNUM
3224 || (TARGET_VXWORKS_RTP
3225 && (unsigned int) pic_register != arm_pic_register))
3226 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3227 else
3228 arm_pic_register = pic_register;
3229 }
3230
3231 if (TARGET_VXWORKS_RTP
3232 && !global_options_set.x_arm_pic_data_is_text_relative)
3233 arm_pic_data_is_text_relative = 0;
3234
3235 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3236 if (fix_cm3_ldrd == 2)
3237 {
3238 if (arm_selected_cpu->core == cortexm3)
3239 fix_cm3_ldrd = 1;
3240 else
3241 fix_cm3_ldrd = 0;
3242 }
3243
3244 /* Enable -munaligned-access by default for
3245 - all ARMv6 architecture-based processors
3246 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3247 - ARMv8 architecture-base processors.
3248
3249 Disable -munaligned-access by default for
3250 - all pre-ARMv6 architecture-based processors
3251 - ARMv6-M architecture-based processors. */
3252
3253 if (unaligned_access == 2)
3254 {
3255 if (arm_arch6 && (arm_arch_notm || arm_arch7))
3256 unaligned_access = 1;
3257 else
3258 unaligned_access = 0;
3259 }
3260 else if (unaligned_access == 1
3261 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3262 {
3263 warning (0, "target CPU does not support unaligned accesses");
3264 unaligned_access = 0;
3265 }
3266
3267 /* Hot/Cold partitioning is not currently supported, since we can't
3268 handle literal pool placement in that case. */
3269 if (flag_reorder_blocks_and_partition)
3270 {
3271 inform (input_location,
3272 "-freorder-blocks-and-partition not supported on this architecture");
3273 flag_reorder_blocks_and_partition = 0;
3274 flag_reorder_blocks = 1;
3275 }
3276
3277 if (flag_pic)
3278 /* Hoisting PIC address calculations more aggressively provides a small,
3279 but measurable, size reduction for PIC code. Therefore, we decrease
3280 the bar for unrestricted expression hoisting to the cost of PIC address
3281 calculation, which is 2 instructions. */
3282 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3283 global_options.x_param_values,
3284 global_options_set.x_param_values);
3285
3286 /* ARM EABI defaults to strict volatile bitfields. */
3287 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3288 && abi_version_at_least(2))
3289 flag_strict_volatile_bitfields = 1;
3290
3291 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3292 have deemed it beneficial (signified by setting
3293 prefetch.num_slots to 1 or more). */
3294 if (flag_prefetch_loop_arrays < 0
3295 && HAVE_prefetch
3296 && optimize >= 3
3297 && current_tune->prefetch.num_slots > 0)
3298 flag_prefetch_loop_arrays = 1;
3299
3300 /* Set up parameters to be used in prefetching algorithm. Do not
3301 override the defaults unless we are tuning for a core we have
3302 researched values for. */
3303 if (current_tune->prefetch.num_slots > 0)
3304 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3305 current_tune->prefetch.num_slots,
3306 global_options.x_param_values,
3307 global_options_set.x_param_values);
3308 if (current_tune->prefetch.l1_cache_line_size >= 0)
3309 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3310 current_tune->prefetch.l1_cache_line_size,
3311 global_options.x_param_values,
3312 global_options_set.x_param_values);
3313 if (current_tune->prefetch.l1_cache_size >= 0)
3314 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3315 current_tune->prefetch.l1_cache_size,
3316 global_options.x_param_values,
3317 global_options_set.x_param_values);
3318
3319 /* Use Neon to perform 64-bits operations rather than core
3320 registers. */
3321 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3322 if (use_neon_for_64bits == 1)
3323 prefer_neon_for_64bits = true;
3324
3325 /* Use the alternative scheduling-pressure algorithm by default. */
3326 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3327 global_options.x_param_values,
3328 global_options_set.x_param_values);
3329
3330 /* Look through ready list and all of queue for instructions
3331 relevant for L2 auto-prefetcher. */
3332 int param_sched_autopref_queue_depth;
3333
3334 switch (current_tune->sched_autopref)
3335 {
3336 case tune_params::SCHED_AUTOPREF_OFF:
3337 param_sched_autopref_queue_depth = -1;
3338 break;
3339
3340 case tune_params::SCHED_AUTOPREF_RANK:
3341 param_sched_autopref_queue_depth = 0;
3342 break;
3343
3344 case tune_params::SCHED_AUTOPREF_FULL:
3345 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3346 break;
3347
3348 default:
3349 gcc_unreachable ();
3350 }
3351
3352 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3353 param_sched_autopref_queue_depth,
3354 global_options.x_param_values,
3355 global_options_set.x_param_values);
3356
3357 /* Currently, for slow flash data, we just disable literal pools. */
3358 if (target_slow_flash_data)
3359 arm_disable_literal_pool = true;
3360
3361 /* Disable scheduling fusion by default if it's not armv7 processor
3362 or doesn't prefer ldrd/strd. */
3363 if (flag_schedule_fusion == 2
3364 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3365 flag_schedule_fusion = 0;
3366
3367 /* Need to remember initial options before they are overriden. */
3368 init_optimize = build_optimization_node (&global_options);
3369
3370 arm_option_override_internal (&global_options, &global_options_set);
3371 arm_option_check_internal (&global_options);
3372 arm_option_params_internal (&global_options);
3373
3374 /* Register global variables with the garbage collector. */
3375 arm_add_gc_roots ();
3376
3377 /* Save the initial options in case the user does function specific
3378 options. */
3379 target_option_default_node = target_option_current_node
3380 = build_target_option_node (&global_options);
3381
3382 /* Init initial mode for testing. */
3383 thumb_flipper = TARGET_THUMB;
3384 }
3385
3386 static void
3387 arm_add_gc_roots (void)
3388 {
3389 gcc_obstack_init(&minipool_obstack);
3390 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3391 }
3392 \f
3393 /* A table of known ARM exception types.
3394 For use with the interrupt function attribute. */
3395
3396 typedef struct
3397 {
3398 const char *const arg;
3399 const unsigned long return_value;
3400 }
3401 isr_attribute_arg;
3402
3403 static const isr_attribute_arg isr_attribute_args [] =
3404 {
3405 { "IRQ", ARM_FT_ISR },
3406 { "irq", ARM_FT_ISR },
3407 { "FIQ", ARM_FT_FIQ },
3408 { "fiq", ARM_FT_FIQ },
3409 { "ABORT", ARM_FT_ISR },
3410 { "abort", ARM_FT_ISR },
3411 { "ABORT", ARM_FT_ISR },
3412 { "abort", ARM_FT_ISR },
3413 { "UNDEF", ARM_FT_EXCEPTION },
3414 { "undef", ARM_FT_EXCEPTION },
3415 { "SWI", ARM_FT_EXCEPTION },
3416 { "swi", ARM_FT_EXCEPTION },
3417 { NULL, ARM_FT_NORMAL }
3418 };
3419
3420 /* Returns the (interrupt) function type of the current
3421 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3422
3423 static unsigned long
3424 arm_isr_value (tree argument)
3425 {
3426 const isr_attribute_arg * ptr;
3427 const char * arg;
3428
3429 if (!arm_arch_notm)
3430 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3431
3432 /* No argument - default to IRQ. */
3433 if (argument == NULL_TREE)
3434 return ARM_FT_ISR;
3435
3436 /* Get the value of the argument. */
3437 if (TREE_VALUE (argument) == NULL_TREE
3438 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3439 return ARM_FT_UNKNOWN;
3440
3441 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3442
3443 /* Check it against the list of known arguments. */
3444 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3445 if (streq (arg, ptr->arg))
3446 return ptr->return_value;
3447
3448 /* An unrecognized interrupt type. */
3449 return ARM_FT_UNKNOWN;
3450 }
3451
3452 /* Computes the type of the current function. */
3453
3454 static unsigned long
3455 arm_compute_func_type (void)
3456 {
3457 unsigned long type = ARM_FT_UNKNOWN;
3458 tree a;
3459 tree attr;
3460
3461 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3462
3463 /* Decide if the current function is volatile. Such functions
3464 never return, and many memory cycles can be saved by not storing
3465 register values that will never be needed again. This optimization
3466 was added to speed up context switching in a kernel application. */
3467 if (optimize > 0
3468 && (TREE_NOTHROW (current_function_decl)
3469 || !(flag_unwind_tables
3470 || (flag_exceptions
3471 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3472 && TREE_THIS_VOLATILE (current_function_decl))
3473 type |= ARM_FT_VOLATILE;
3474
3475 if (cfun->static_chain_decl != NULL)
3476 type |= ARM_FT_NESTED;
3477
3478 attr = DECL_ATTRIBUTES (current_function_decl);
3479
3480 a = lookup_attribute ("naked", attr);
3481 if (a != NULL_TREE)
3482 type |= ARM_FT_NAKED;
3483
3484 a = lookup_attribute ("isr", attr);
3485 if (a == NULL_TREE)
3486 a = lookup_attribute ("interrupt", attr);
3487
3488 if (a == NULL_TREE)
3489 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3490 else
3491 type |= arm_isr_value (TREE_VALUE (a));
3492
3493 return type;
3494 }
3495
3496 /* Returns the type of the current function. */
3497
3498 unsigned long
3499 arm_current_func_type (void)
3500 {
3501 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3502 cfun->machine->func_type = arm_compute_func_type ();
3503
3504 return cfun->machine->func_type;
3505 }
3506
3507 bool
3508 arm_allocate_stack_slots_for_args (void)
3509 {
3510 /* Naked functions should not allocate stack slots for arguments. */
3511 return !IS_NAKED (arm_current_func_type ());
3512 }
3513
3514 static bool
3515 arm_warn_func_return (tree decl)
3516 {
3517 /* Naked functions are implemented entirely in assembly, including the
3518 return sequence, so suppress warnings about this. */
3519 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3520 }
3521
3522 \f
3523 /* Output assembler code for a block containing the constant parts
3524 of a trampoline, leaving space for the variable parts.
3525
3526 On the ARM, (if r8 is the static chain regnum, and remembering that
3527 referencing pc adds an offset of 8) the trampoline looks like:
3528 ldr r8, [pc, #0]
3529 ldr pc, [pc]
3530 .word static chain value
3531 .word function's address
3532 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3533
3534 static void
3535 arm_asm_trampoline_template (FILE *f)
3536 {
3537 if (TARGET_UNIFIED_ASM)
3538 fprintf (f, "\t.syntax unified\n");
3539 else
3540 fprintf (f, "\t.syntax divided\n");
3541
3542 if (TARGET_ARM)
3543 {
3544 fprintf (f, "\t.arm\n");
3545 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3546 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3547 }
3548 else if (TARGET_THUMB2)
3549 {
3550 fprintf (f, "\t.thumb\n");
3551 /* The Thumb-2 trampoline is similar to the arm implementation.
3552 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3553 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3554 STATIC_CHAIN_REGNUM, PC_REGNUM);
3555 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3556 }
3557 else
3558 {
3559 ASM_OUTPUT_ALIGN (f, 2);
3560 fprintf (f, "\t.code\t16\n");
3561 fprintf (f, ".Ltrampoline_start:\n");
3562 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3563 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3564 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3565 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3566 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3567 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3568 }
3569 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3570 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3571 }
3572
3573 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3574
3575 static void
3576 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3577 {
3578 rtx fnaddr, mem, a_tramp;
3579
3580 emit_block_move (m_tramp, assemble_trampoline_template (),
3581 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3582
3583 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3584 emit_move_insn (mem, chain_value);
3585
3586 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3587 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3588 emit_move_insn (mem, fnaddr);
3589
3590 a_tramp = XEXP (m_tramp, 0);
3591 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3592 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3593 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3594 }
3595
3596 /* Thumb trampolines should be entered in thumb mode, so set
3597 the bottom bit of the address. */
3598
3599 static rtx
3600 arm_trampoline_adjust_address (rtx addr)
3601 {
3602 if (TARGET_THUMB)
3603 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3604 NULL, 0, OPTAB_LIB_WIDEN);
3605 return addr;
3606 }
3607 \f
3608 /* Return 1 if it is possible to return using a single instruction.
3609 If SIBLING is non-null, this is a test for a return before a sibling
3610 call. SIBLING is the call insn, so we can examine its register usage. */
3611
3612 int
3613 use_return_insn (int iscond, rtx sibling)
3614 {
3615 int regno;
3616 unsigned int func_type;
3617 unsigned long saved_int_regs;
3618 unsigned HOST_WIDE_INT stack_adjust;
3619 arm_stack_offsets *offsets;
3620
3621 /* Never use a return instruction before reload has run. */
3622 if (!reload_completed)
3623 return 0;
3624
3625 func_type = arm_current_func_type ();
3626
3627 /* Naked, volatile and stack alignment functions need special
3628 consideration. */
3629 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3630 return 0;
3631
3632 /* So do interrupt functions that use the frame pointer and Thumb
3633 interrupt functions. */
3634 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3635 return 0;
3636
3637 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3638 && !optimize_function_for_size_p (cfun))
3639 return 0;
3640
3641 offsets = arm_get_frame_offsets ();
3642 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3643
3644 /* As do variadic functions. */
3645 if (crtl->args.pretend_args_size
3646 || cfun->machine->uses_anonymous_args
3647 /* Or if the function calls __builtin_eh_return () */
3648 || crtl->calls_eh_return
3649 /* Or if the function calls alloca */
3650 || cfun->calls_alloca
3651 /* Or if there is a stack adjustment. However, if the stack pointer
3652 is saved on the stack, we can use a pre-incrementing stack load. */
3653 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3654 && stack_adjust == 4)))
3655 return 0;
3656
3657 saved_int_regs = offsets->saved_regs_mask;
3658
3659 /* Unfortunately, the insn
3660
3661 ldmib sp, {..., sp, ...}
3662
3663 triggers a bug on most SA-110 based devices, such that the stack
3664 pointer won't be correctly restored if the instruction takes a
3665 page fault. We work around this problem by popping r3 along with
3666 the other registers, since that is never slower than executing
3667 another instruction.
3668
3669 We test for !arm_arch5 here, because code for any architecture
3670 less than this could potentially be run on one of the buggy
3671 chips. */
3672 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3673 {
3674 /* Validate that r3 is a call-clobbered register (always true in
3675 the default abi) ... */
3676 if (!call_used_regs[3])
3677 return 0;
3678
3679 /* ... that it isn't being used for a return value ... */
3680 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3681 return 0;
3682
3683 /* ... or for a tail-call argument ... */
3684 if (sibling)
3685 {
3686 gcc_assert (CALL_P (sibling));
3687
3688 if (find_regno_fusage (sibling, USE, 3))
3689 return 0;
3690 }
3691
3692 /* ... and that there are no call-saved registers in r0-r2
3693 (always true in the default ABI). */
3694 if (saved_int_regs & 0x7)
3695 return 0;
3696 }
3697
3698 /* Can't be done if interworking with Thumb, and any registers have been
3699 stacked. */
3700 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3701 return 0;
3702
3703 /* On StrongARM, conditional returns are expensive if they aren't
3704 taken and multiple registers have been stacked. */
3705 if (iscond && arm_tune_strongarm)
3706 {
3707 /* Conditional return when just the LR is stored is a simple
3708 conditional-load instruction, that's not expensive. */
3709 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3710 return 0;
3711
3712 if (flag_pic
3713 && arm_pic_register != INVALID_REGNUM
3714 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3715 return 0;
3716 }
3717
3718 /* If there are saved registers but the LR isn't saved, then we need
3719 two instructions for the return. */
3720 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3721 return 0;
3722
3723 /* Can't be done if any of the VFP regs are pushed,
3724 since this also requires an insn. */
3725 if (TARGET_HARD_FLOAT && TARGET_VFP)
3726 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3727 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3728 return 0;
3729
3730 if (TARGET_REALLY_IWMMXT)
3731 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3732 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3733 return 0;
3734
3735 return 1;
3736 }
3737
3738 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3739 shrink-wrapping if possible. This is the case if we need to emit a
3740 prologue, which we can test by looking at the offsets. */
3741 bool
3742 use_simple_return_p (void)
3743 {
3744 arm_stack_offsets *offsets;
3745
3746 offsets = arm_get_frame_offsets ();
3747 return offsets->outgoing_args != 0;
3748 }
3749
3750 /* Return TRUE if int I is a valid immediate ARM constant. */
3751
3752 int
3753 const_ok_for_arm (HOST_WIDE_INT i)
3754 {
3755 int lowbit;
3756
3757 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3758 be all zero, or all one. */
3759 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3760 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3761 != ((~(unsigned HOST_WIDE_INT) 0)
3762 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3763 return FALSE;
3764
3765 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3766
3767 /* Fast return for 0 and small values. We must do this for zero, since
3768 the code below can't handle that one case. */
3769 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3770 return TRUE;
3771
3772 /* Get the number of trailing zeros. */
3773 lowbit = ffs((int) i) - 1;
3774
3775 /* Only even shifts are allowed in ARM mode so round down to the
3776 nearest even number. */
3777 if (TARGET_ARM)
3778 lowbit &= ~1;
3779
3780 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3781 return TRUE;
3782
3783 if (TARGET_ARM)
3784 {
3785 /* Allow rotated constants in ARM mode. */
3786 if (lowbit <= 4
3787 && ((i & ~0xc000003f) == 0
3788 || (i & ~0xf000000f) == 0
3789 || (i & ~0xfc000003) == 0))
3790 return TRUE;
3791 }
3792 else
3793 {
3794 HOST_WIDE_INT v;
3795
3796 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3797 v = i & 0xff;
3798 v |= v << 16;
3799 if (i == v || i == (v | (v << 8)))
3800 return TRUE;
3801
3802 /* Allow repeated pattern 0xXY00XY00. */
3803 v = i & 0xff00;
3804 v |= v << 16;
3805 if (i == v)
3806 return TRUE;
3807 }
3808
3809 return FALSE;
3810 }
3811
3812 /* Return true if I is a valid constant for the operation CODE. */
3813 int
3814 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3815 {
3816 if (const_ok_for_arm (i))
3817 return 1;
3818
3819 switch (code)
3820 {
3821 case SET:
3822 /* See if we can use movw. */
3823 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3824 return 1;
3825 else
3826 /* Otherwise, try mvn. */
3827 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3828
3829 case PLUS:
3830 /* See if we can use addw or subw. */
3831 if (TARGET_THUMB2
3832 && ((i & 0xfffff000) == 0
3833 || ((-i) & 0xfffff000) == 0))
3834 return 1;
3835 /* else fall through. */
3836
3837 case COMPARE:
3838 case EQ:
3839 case NE:
3840 case GT:
3841 case LE:
3842 case LT:
3843 case GE:
3844 case GEU:
3845 case LTU:
3846 case GTU:
3847 case LEU:
3848 case UNORDERED:
3849 case ORDERED:
3850 case UNEQ:
3851 case UNGE:
3852 case UNLT:
3853 case UNGT:
3854 case UNLE:
3855 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3856
3857 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3858 case XOR:
3859 return 0;
3860
3861 case IOR:
3862 if (TARGET_THUMB2)
3863 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3864 return 0;
3865
3866 case AND:
3867 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3868
3869 default:
3870 gcc_unreachable ();
3871 }
3872 }
3873
3874 /* Return true if I is a valid di mode constant for the operation CODE. */
3875 int
3876 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3877 {
3878 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3879 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3880 rtx hi = GEN_INT (hi_val);
3881 rtx lo = GEN_INT (lo_val);
3882
3883 if (TARGET_THUMB1)
3884 return 0;
3885
3886 switch (code)
3887 {
3888 case AND:
3889 case IOR:
3890 case XOR:
3891 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3892 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3893 case PLUS:
3894 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3895
3896 default:
3897 return 0;
3898 }
3899 }
3900
3901 /* Emit a sequence of insns to handle a large constant.
3902 CODE is the code of the operation required, it can be any of SET, PLUS,
3903 IOR, AND, XOR, MINUS;
3904 MODE is the mode in which the operation is being performed;
3905 VAL is the integer to operate on;
3906 SOURCE is the other operand (a register, or a null-pointer for SET);
3907 SUBTARGETS means it is safe to create scratch registers if that will
3908 either produce a simpler sequence, or we will want to cse the values.
3909 Return value is the number of insns emitted. */
3910
3911 /* ??? Tweak this for thumb2. */
3912 int
3913 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3914 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3915 {
3916 rtx cond;
3917
3918 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3919 cond = COND_EXEC_TEST (PATTERN (insn));
3920 else
3921 cond = NULL_RTX;
3922
3923 if (subtargets || code == SET
3924 || (REG_P (target) && REG_P (source)
3925 && REGNO (target) != REGNO (source)))
3926 {
3927 /* After arm_reorg has been called, we can't fix up expensive
3928 constants by pushing them into memory so we must synthesize
3929 them in-line, regardless of the cost. This is only likely to
3930 be more costly on chips that have load delay slots and we are
3931 compiling without running the scheduler (so no splitting
3932 occurred before the final instruction emission).
3933
3934 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3935 */
3936 if (!cfun->machine->after_arm_reorg
3937 && !cond
3938 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3939 1, 0)
3940 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3941 + (code != SET))))
3942 {
3943 if (code == SET)
3944 {
3945 /* Currently SET is the only monadic value for CODE, all
3946 the rest are diadic. */
3947 if (TARGET_USE_MOVT)
3948 arm_emit_movpair (target, GEN_INT (val));
3949 else
3950 emit_set_insn (target, GEN_INT (val));
3951
3952 return 1;
3953 }
3954 else
3955 {
3956 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3957
3958 if (TARGET_USE_MOVT)
3959 arm_emit_movpair (temp, GEN_INT (val));
3960 else
3961 emit_set_insn (temp, GEN_INT (val));
3962
3963 /* For MINUS, the value is subtracted from, since we never
3964 have subtraction of a constant. */
3965 if (code == MINUS)
3966 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3967 else
3968 emit_set_insn (target,
3969 gen_rtx_fmt_ee (code, mode, source, temp));
3970 return 2;
3971 }
3972 }
3973 }
3974
3975 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3976 1);
3977 }
3978
3979 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3980 ARM/THUMB2 immediates, and add up to VAL.
3981 Thr function return value gives the number of insns required. */
3982 static int
3983 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3984 struct four_ints *return_sequence)
3985 {
3986 int best_consecutive_zeros = 0;
3987 int i;
3988 int best_start = 0;
3989 int insns1, insns2;
3990 struct four_ints tmp_sequence;
3991
3992 /* If we aren't targeting ARM, the best place to start is always at
3993 the bottom, otherwise look more closely. */
3994 if (TARGET_ARM)
3995 {
3996 for (i = 0; i < 32; i += 2)
3997 {
3998 int consecutive_zeros = 0;
3999
4000 if (!(val & (3 << i)))
4001 {
4002 while ((i < 32) && !(val & (3 << i)))
4003 {
4004 consecutive_zeros += 2;
4005 i += 2;
4006 }
4007 if (consecutive_zeros > best_consecutive_zeros)
4008 {
4009 best_consecutive_zeros = consecutive_zeros;
4010 best_start = i - consecutive_zeros;
4011 }
4012 i -= 2;
4013 }
4014 }
4015 }
4016
4017 /* So long as it won't require any more insns to do so, it's
4018 desirable to emit a small constant (in bits 0...9) in the last
4019 insn. This way there is more chance that it can be combined with
4020 a later addressing insn to form a pre-indexed load or store
4021 operation. Consider:
4022
4023 *((volatile int *)0xe0000100) = 1;
4024 *((volatile int *)0xe0000110) = 2;
4025
4026 We want this to wind up as:
4027
4028 mov rA, #0xe0000000
4029 mov rB, #1
4030 str rB, [rA, #0x100]
4031 mov rB, #2
4032 str rB, [rA, #0x110]
4033
4034 rather than having to synthesize both large constants from scratch.
4035
4036 Therefore, we calculate how many insns would be required to emit
4037 the constant starting from `best_start', and also starting from
4038 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4039 yield a shorter sequence, we may as well use zero. */
4040 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4041 if (best_start != 0
4042 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
4043 {
4044 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4045 if (insns2 <= insns1)
4046 {
4047 *return_sequence = tmp_sequence;
4048 insns1 = insns2;
4049 }
4050 }
4051
4052 return insns1;
4053 }
4054
4055 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4056 static int
4057 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4058 struct four_ints *return_sequence, int i)
4059 {
4060 int remainder = val & 0xffffffff;
4061 int insns = 0;
4062
4063 /* Try and find a way of doing the job in either two or three
4064 instructions.
4065
4066 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4067 location. We start at position I. This may be the MSB, or
4068 optimial_immediate_sequence may have positioned it at the largest block
4069 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4070 wrapping around to the top of the word when we drop off the bottom.
4071 In the worst case this code should produce no more than four insns.
4072
4073 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4074 constants, shifted to any arbitrary location. We should always start
4075 at the MSB. */
4076 do
4077 {
4078 int end;
4079 unsigned int b1, b2, b3, b4;
4080 unsigned HOST_WIDE_INT result;
4081 int loc;
4082
4083 gcc_assert (insns < 4);
4084
4085 if (i <= 0)
4086 i += 32;
4087
4088 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4089 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4090 {
4091 loc = i;
4092 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4093 /* We can use addw/subw for the last 12 bits. */
4094 result = remainder;
4095 else
4096 {
4097 /* Use an 8-bit shifted/rotated immediate. */
4098 end = i - 8;
4099 if (end < 0)
4100 end += 32;
4101 result = remainder & ((0x0ff << end)
4102 | ((i < end) ? (0xff >> (32 - end))
4103 : 0));
4104 i -= 8;
4105 }
4106 }
4107 else
4108 {
4109 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4110 arbitrary shifts. */
4111 i -= TARGET_ARM ? 2 : 1;
4112 continue;
4113 }
4114
4115 /* Next, see if we can do a better job with a thumb2 replicated
4116 constant.
4117
4118 We do it this way around to catch the cases like 0x01F001E0 where
4119 two 8-bit immediates would work, but a replicated constant would
4120 make it worse.
4121
4122 TODO: 16-bit constants that don't clear all the bits, but still win.
4123 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4124 if (TARGET_THUMB2)
4125 {
4126 b1 = (remainder & 0xff000000) >> 24;
4127 b2 = (remainder & 0x00ff0000) >> 16;
4128 b3 = (remainder & 0x0000ff00) >> 8;
4129 b4 = remainder & 0xff;
4130
4131 if (loc > 24)
4132 {
4133 /* The 8-bit immediate already found clears b1 (and maybe b2),
4134 but must leave b3 and b4 alone. */
4135
4136 /* First try to find a 32-bit replicated constant that clears
4137 almost everything. We can assume that we can't do it in one,
4138 or else we wouldn't be here. */
4139 unsigned int tmp = b1 & b2 & b3 & b4;
4140 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4141 + (tmp << 24);
4142 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4143 + (tmp == b3) + (tmp == b4);
4144 if (tmp
4145 && (matching_bytes >= 3
4146 || (matching_bytes == 2
4147 && const_ok_for_op (remainder & ~tmp2, code))))
4148 {
4149 /* At least 3 of the bytes match, and the fourth has at
4150 least as many bits set, or two of the bytes match
4151 and it will only require one more insn to finish. */
4152 result = tmp2;
4153 i = tmp != b1 ? 32
4154 : tmp != b2 ? 24
4155 : tmp != b3 ? 16
4156 : 8;
4157 }
4158
4159 /* Second, try to find a 16-bit replicated constant that can
4160 leave three of the bytes clear. If b2 or b4 is already
4161 zero, then we can. If the 8-bit from above would not
4162 clear b2 anyway, then we still win. */
4163 else if (b1 == b3 && (!b2 || !b4
4164 || (remainder & 0x00ff0000 & ~result)))
4165 {
4166 result = remainder & 0xff00ff00;
4167 i = 24;
4168 }
4169 }
4170 else if (loc > 16)
4171 {
4172 /* The 8-bit immediate already found clears b2 (and maybe b3)
4173 and we don't get here unless b1 is alredy clear, but it will
4174 leave b4 unchanged. */
4175
4176 /* If we can clear b2 and b4 at once, then we win, since the
4177 8-bits couldn't possibly reach that far. */
4178 if (b2 == b4)
4179 {
4180 result = remainder & 0x00ff00ff;
4181 i = 16;
4182 }
4183 }
4184 }
4185
4186 return_sequence->i[insns++] = result;
4187 remainder &= ~result;
4188
4189 if (code == SET || code == MINUS)
4190 code = PLUS;
4191 }
4192 while (remainder);
4193
4194 return insns;
4195 }
4196
4197 /* Emit an instruction with the indicated PATTERN. If COND is
4198 non-NULL, conditionalize the execution of the instruction on COND
4199 being true. */
4200
4201 static void
4202 emit_constant_insn (rtx cond, rtx pattern)
4203 {
4204 if (cond)
4205 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4206 emit_insn (pattern);
4207 }
4208
4209 /* As above, but extra parameter GENERATE which, if clear, suppresses
4210 RTL generation. */
4211
4212 static int
4213 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4214 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
4215 int generate)
4216 {
4217 int can_invert = 0;
4218 int can_negate = 0;
4219 int final_invert = 0;
4220 int i;
4221 int set_sign_bit_copies = 0;
4222 int clear_sign_bit_copies = 0;
4223 int clear_zero_bit_copies = 0;
4224 int set_zero_bit_copies = 0;
4225 int insns = 0, neg_insns, inv_insns;
4226 unsigned HOST_WIDE_INT temp1, temp2;
4227 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4228 struct four_ints *immediates;
4229 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4230
4231 /* Find out which operations are safe for a given CODE. Also do a quick
4232 check for degenerate cases; these can occur when DImode operations
4233 are split. */
4234 switch (code)
4235 {
4236 case SET:
4237 can_invert = 1;
4238 break;
4239
4240 case PLUS:
4241 can_negate = 1;
4242 break;
4243
4244 case IOR:
4245 if (remainder == 0xffffffff)
4246 {
4247 if (generate)
4248 emit_constant_insn (cond,
4249 gen_rtx_SET (target,
4250 GEN_INT (ARM_SIGN_EXTEND (val))));
4251 return 1;
4252 }
4253
4254 if (remainder == 0)
4255 {
4256 if (reload_completed && rtx_equal_p (target, source))
4257 return 0;
4258
4259 if (generate)
4260 emit_constant_insn (cond, gen_rtx_SET (target, source));
4261 return 1;
4262 }
4263 break;
4264
4265 case AND:
4266 if (remainder == 0)
4267 {
4268 if (generate)
4269 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4270 return 1;
4271 }
4272 if (remainder == 0xffffffff)
4273 {
4274 if (reload_completed && rtx_equal_p (target, source))
4275 return 0;
4276 if (generate)
4277 emit_constant_insn (cond, gen_rtx_SET (target, source));
4278 return 1;
4279 }
4280 can_invert = 1;
4281 break;
4282
4283 case XOR:
4284 if (remainder == 0)
4285 {
4286 if (reload_completed && rtx_equal_p (target, source))
4287 return 0;
4288 if (generate)
4289 emit_constant_insn (cond, gen_rtx_SET (target, source));
4290 return 1;
4291 }
4292
4293 if (remainder == 0xffffffff)
4294 {
4295 if (generate)
4296 emit_constant_insn (cond,
4297 gen_rtx_SET (target,
4298 gen_rtx_NOT (mode, source)));
4299 return 1;
4300 }
4301 final_invert = 1;
4302 break;
4303
4304 case MINUS:
4305 /* We treat MINUS as (val - source), since (source - val) is always
4306 passed as (source + (-val)). */
4307 if (remainder == 0)
4308 {
4309 if (generate)
4310 emit_constant_insn (cond,
4311 gen_rtx_SET (target,
4312 gen_rtx_NEG (mode, source)));
4313 return 1;
4314 }
4315 if (const_ok_for_arm (val))
4316 {
4317 if (generate)
4318 emit_constant_insn (cond,
4319 gen_rtx_SET (target,
4320 gen_rtx_MINUS (mode, GEN_INT (val),
4321 source)));
4322 return 1;
4323 }
4324
4325 break;
4326
4327 default:
4328 gcc_unreachable ();
4329 }
4330
4331 /* If we can do it in one insn get out quickly. */
4332 if (const_ok_for_op (val, code))
4333 {
4334 if (generate)
4335 emit_constant_insn (cond,
4336 gen_rtx_SET (target,
4337 (source
4338 ? gen_rtx_fmt_ee (code, mode, source,
4339 GEN_INT (val))
4340 : GEN_INT (val))));
4341 return 1;
4342 }
4343
4344 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4345 insn. */
4346 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4347 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4348 {
4349 if (generate)
4350 {
4351 if (mode == SImode && i == 16)
4352 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4353 smaller insn. */
4354 emit_constant_insn (cond,
4355 gen_zero_extendhisi2
4356 (target, gen_lowpart (HImode, source)));
4357 else
4358 /* Extz only supports SImode, but we can coerce the operands
4359 into that mode. */
4360 emit_constant_insn (cond,
4361 gen_extzv_t2 (gen_lowpart (SImode, target),
4362 gen_lowpart (SImode, source),
4363 GEN_INT (i), const0_rtx));
4364 }
4365
4366 return 1;
4367 }
4368
4369 /* Calculate a few attributes that may be useful for specific
4370 optimizations. */
4371 /* Count number of leading zeros. */
4372 for (i = 31; i >= 0; i--)
4373 {
4374 if ((remainder & (1 << i)) == 0)
4375 clear_sign_bit_copies++;
4376 else
4377 break;
4378 }
4379
4380 /* Count number of leading 1's. */
4381 for (i = 31; i >= 0; i--)
4382 {
4383 if ((remainder & (1 << i)) != 0)
4384 set_sign_bit_copies++;
4385 else
4386 break;
4387 }
4388
4389 /* Count number of trailing zero's. */
4390 for (i = 0; i <= 31; i++)
4391 {
4392 if ((remainder & (1 << i)) == 0)
4393 clear_zero_bit_copies++;
4394 else
4395 break;
4396 }
4397
4398 /* Count number of trailing 1's. */
4399 for (i = 0; i <= 31; i++)
4400 {
4401 if ((remainder & (1 << i)) != 0)
4402 set_zero_bit_copies++;
4403 else
4404 break;
4405 }
4406
4407 switch (code)
4408 {
4409 case SET:
4410 /* See if we can do this by sign_extending a constant that is known
4411 to be negative. This is a good, way of doing it, since the shift
4412 may well merge into a subsequent insn. */
4413 if (set_sign_bit_copies > 1)
4414 {
4415 if (const_ok_for_arm
4416 (temp1 = ARM_SIGN_EXTEND (remainder
4417 << (set_sign_bit_copies - 1))))
4418 {
4419 if (generate)
4420 {
4421 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4422 emit_constant_insn (cond,
4423 gen_rtx_SET (new_src, GEN_INT (temp1)));
4424 emit_constant_insn (cond,
4425 gen_ashrsi3 (target, new_src,
4426 GEN_INT (set_sign_bit_copies - 1)));
4427 }
4428 return 2;
4429 }
4430 /* For an inverted constant, we will need to set the low bits,
4431 these will be shifted out of harm's way. */
4432 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4433 if (const_ok_for_arm (~temp1))
4434 {
4435 if (generate)
4436 {
4437 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4438 emit_constant_insn (cond,
4439 gen_rtx_SET (new_src, GEN_INT (temp1)));
4440 emit_constant_insn (cond,
4441 gen_ashrsi3 (target, new_src,
4442 GEN_INT (set_sign_bit_copies - 1)));
4443 }
4444 return 2;
4445 }
4446 }
4447
4448 /* See if we can calculate the value as the difference between two
4449 valid immediates. */
4450 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4451 {
4452 int topshift = clear_sign_bit_copies & ~1;
4453
4454 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4455 & (0xff000000 >> topshift));
4456
4457 /* If temp1 is zero, then that means the 9 most significant
4458 bits of remainder were 1 and we've caused it to overflow.
4459 When topshift is 0 we don't need to do anything since we
4460 can borrow from 'bit 32'. */
4461 if (temp1 == 0 && topshift != 0)
4462 temp1 = 0x80000000 >> (topshift - 1);
4463
4464 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4465
4466 if (const_ok_for_arm (temp2))
4467 {
4468 if (generate)
4469 {
4470 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4471 emit_constant_insn (cond,
4472 gen_rtx_SET (new_src, GEN_INT (temp1)));
4473 emit_constant_insn (cond,
4474 gen_addsi3 (target, new_src,
4475 GEN_INT (-temp2)));
4476 }
4477
4478 return 2;
4479 }
4480 }
4481
4482 /* See if we can generate this by setting the bottom (or the top)
4483 16 bits, and then shifting these into the other half of the
4484 word. We only look for the simplest cases, to do more would cost
4485 too much. Be careful, however, not to generate this when the
4486 alternative would take fewer insns. */
4487 if (val & 0xffff0000)
4488 {
4489 temp1 = remainder & 0xffff0000;
4490 temp2 = remainder & 0x0000ffff;
4491
4492 /* Overlaps outside this range are best done using other methods. */
4493 for (i = 9; i < 24; i++)
4494 {
4495 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4496 && !const_ok_for_arm (temp2))
4497 {
4498 rtx new_src = (subtargets
4499 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4500 : target);
4501 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4502 source, subtargets, generate);
4503 source = new_src;
4504 if (generate)
4505 emit_constant_insn
4506 (cond,
4507 gen_rtx_SET
4508 (target,
4509 gen_rtx_IOR (mode,
4510 gen_rtx_ASHIFT (mode, source,
4511 GEN_INT (i)),
4512 source)));
4513 return insns + 1;
4514 }
4515 }
4516
4517 /* Don't duplicate cases already considered. */
4518 for (i = 17; i < 24; i++)
4519 {
4520 if (((temp1 | (temp1 >> i)) == remainder)
4521 && !const_ok_for_arm (temp1))
4522 {
4523 rtx new_src = (subtargets
4524 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4525 : target);
4526 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4527 source, subtargets, generate);
4528 source = new_src;
4529 if (generate)
4530 emit_constant_insn
4531 (cond,
4532 gen_rtx_SET (target,
4533 gen_rtx_IOR
4534 (mode,
4535 gen_rtx_LSHIFTRT (mode, source,
4536 GEN_INT (i)),
4537 source)));
4538 return insns + 1;
4539 }
4540 }
4541 }
4542 break;
4543
4544 case IOR:
4545 case XOR:
4546 /* If we have IOR or XOR, and the constant can be loaded in a
4547 single instruction, and we can find a temporary to put it in,
4548 then this can be done in two instructions instead of 3-4. */
4549 if (subtargets
4550 /* TARGET can't be NULL if SUBTARGETS is 0 */
4551 || (reload_completed && !reg_mentioned_p (target, source)))
4552 {
4553 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4554 {
4555 if (generate)
4556 {
4557 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4558
4559 emit_constant_insn (cond,
4560 gen_rtx_SET (sub, GEN_INT (val)));
4561 emit_constant_insn (cond,
4562 gen_rtx_SET (target,
4563 gen_rtx_fmt_ee (code, mode,
4564 source, sub)));
4565 }
4566 return 2;
4567 }
4568 }
4569
4570 if (code == XOR)
4571 break;
4572
4573 /* Convert.
4574 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4575 and the remainder 0s for e.g. 0xfff00000)
4576 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4577
4578 This can be done in 2 instructions by using shifts with mov or mvn.
4579 e.g. for
4580 x = x | 0xfff00000;
4581 we generate.
4582 mvn r0, r0, asl #12
4583 mvn r0, r0, lsr #12 */
4584 if (set_sign_bit_copies > 8
4585 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4586 {
4587 if (generate)
4588 {
4589 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4590 rtx shift = GEN_INT (set_sign_bit_copies);
4591
4592 emit_constant_insn
4593 (cond,
4594 gen_rtx_SET (sub,
4595 gen_rtx_NOT (mode,
4596 gen_rtx_ASHIFT (mode,
4597 source,
4598 shift))));
4599 emit_constant_insn
4600 (cond,
4601 gen_rtx_SET (target,
4602 gen_rtx_NOT (mode,
4603 gen_rtx_LSHIFTRT (mode, sub,
4604 shift))));
4605 }
4606 return 2;
4607 }
4608
4609 /* Convert
4610 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4611 to
4612 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4613
4614 For eg. r0 = r0 | 0xfff
4615 mvn r0, r0, lsr #12
4616 mvn r0, r0, asl #12
4617
4618 */
4619 if (set_zero_bit_copies > 8
4620 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4621 {
4622 if (generate)
4623 {
4624 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4625 rtx shift = GEN_INT (set_zero_bit_copies);
4626
4627 emit_constant_insn
4628 (cond,
4629 gen_rtx_SET (sub,
4630 gen_rtx_NOT (mode,
4631 gen_rtx_LSHIFTRT (mode,
4632 source,
4633 shift))));
4634 emit_constant_insn
4635 (cond,
4636 gen_rtx_SET (target,
4637 gen_rtx_NOT (mode,
4638 gen_rtx_ASHIFT (mode, sub,
4639 shift))));
4640 }
4641 return 2;
4642 }
4643
4644 /* This will never be reached for Thumb2 because orn is a valid
4645 instruction. This is for Thumb1 and the ARM 32 bit cases.
4646
4647 x = y | constant (such that ~constant is a valid constant)
4648 Transform this to
4649 x = ~(~y & ~constant).
4650 */
4651 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4652 {
4653 if (generate)
4654 {
4655 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4656 emit_constant_insn (cond,
4657 gen_rtx_SET (sub,
4658 gen_rtx_NOT (mode, source)));
4659 source = sub;
4660 if (subtargets)
4661 sub = gen_reg_rtx (mode);
4662 emit_constant_insn (cond,
4663 gen_rtx_SET (sub,
4664 gen_rtx_AND (mode, source,
4665 GEN_INT (temp1))));
4666 emit_constant_insn (cond,
4667 gen_rtx_SET (target,
4668 gen_rtx_NOT (mode, sub)));
4669 }
4670 return 3;
4671 }
4672 break;
4673
4674 case AND:
4675 /* See if two shifts will do 2 or more insn's worth of work. */
4676 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4677 {
4678 HOST_WIDE_INT shift_mask = ((0xffffffff
4679 << (32 - clear_sign_bit_copies))
4680 & 0xffffffff);
4681
4682 if ((remainder | shift_mask) != 0xffffffff)
4683 {
4684 HOST_WIDE_INT new_val
4685 = ARM_SIGN_EXTEND (remainder | shift_mask);
4686
4687 if (generate)
4688 {
4689 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4690 insns = arm_gen_constant (AND, SImode, cond, new_val,
4691 new_src, source, subtargets, 1);
4692 source = new_src;
4693 }
4694 else
4695 {
4696 rtx targ = subtargets ? NULL_RTX : target;
4697 insns = arm_gen_constant (AND, mode, cond, new_val,
4698 targ, source, subtargets, 0);
4699 }
4700 }
4701
4702 if (generate)
4703 {
4704 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4705 rtx shift = GEN_INT (clear_sign_bit_copies);
4706
4707 emit_insn (gen_ashlsi3 (new_src, source, shift));
4708 emit_insn (gen_lshrsi3 (target, new_src, shift));
4709 }
4710
4711 return insns + 2;
4712 }
4713
4714 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4715 {
4716 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4717
4718 if ((remainder | shift_mask) != 0xffffffff)
4719 {
4720 HOST_WIDE_INT new_val
4721 = ARM_SIGN_EXTEND (remainder | shift_mask);
4722 if (generate)
4723 {
4724 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4725
4726 insns = arm_gen_constant (AND, mode, cond, new_val,
4727 new_src, source, subtargets, 1);
4728 source = new_src;
4729 }
4730 else
4731 {
4732 rtx targ = subtargets ? NULL_RTX : target;
4733
4734 insns = arm_gen_constant (AND, mode, cond, new_val,
4735 targ, source, subtargets, 0);
4736 }
4737 }
4738
4739 if (generate)
4740 {
4741 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4742 rtx shift = GEN_INT (clear_zero_bit_copies);
4743
4744 emit_insn (gen_lshrsi3 (new_src, source, shift));
4745 emit_insn (gen_ashlsi3 (target, new_src, shift));
4746 }
4747
4748 return insns + 2;
4749 }
4750
4751 break;
4752
4753 default:
4754 break;
4755 }
4756
4757 /* Calculate what the instruction sequences would be if we generated it
4758 normally, negated, or inverted. */
4759 if (code == AND)
4760 /* AND cannot be split into multiple insns, so invert and use BIC. */
4761 insns = 99;
4762 else
4763 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4764
4765 if (can_negate)
4766 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4767 &neg_immediates);
4768 else
4769 neg_insns = 99;
4770
4771 if (can_invert || final_invert)
4772 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4773 &inv_immediates);
4774 else
4775 inv_insns = 99;
4776
4777 immediates = &pos_immediates;
4778
4779 /* Is the negated immediate sequence more efficient? */
4780 if (neg_insns < insns && neg_insns <= inv_insns)
4781 {
4782 insns = neg_insns;
4783 immediates = &neg_immediates;
4784 }
4785 else
4786 can_negate = 0;
4787
4788 /* Is the inverted immediate sequence more efficient?
4789 We must allow for an extra NOT instruction for XOR operations, although
4790 there is some chance that the final 'mvn' will get optimized later. */
4791 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4792 {
4793 insns = inv_insns;
4794 immediates = &inv_immediates;
4795 }
4796 else
4797 {
4798 can_invert = 0;
4799 final_invert = 0;
4800 }
4801
4802 /* Now output the chosen sequence as instructions. */
4803 if (generate)
4804 {
4805 for (i = 0; i < insns; i++)
4806 {
4807 rtx new_src, temp1_rtx;
4808
4809 temp1 = immediates->i[i];
4810
4811 if (code == SET || code == MINUS)
4812 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4813 else if ((final_invert || i < (insns - 1)) && subtargets)
4814 new_src = gen_reg_rtx (mode);
4815 else
4816 new_src = target;
4817
4818 if (can_invert)
4819 temp1 = ~temp1;
4820 else if (can_negate)
4821 temp1 = -temp1;
4822
4823 temp1 = trunc_int_for_mode (temp1, mode);
4824 temp1_rtx = GEN_INT (temp1);
4825
4826 if (code == SET)
4827 ;
4828 else if (code == MINUS)
4829 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4830 else
4831 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4832
4833 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
4834 source = new_src;
4835
4836 if (code == SET)
4837 {
4838 can_negate = can_invert;
4839 can_invert = 0;
4840 code = PLUS;
4841 }
4842 else if (code == MINUS)
4843 code = PLUS;
4844 }
4845 }
4846
4847 if (final_invert)
4848 {
4849 if (generate)
4850 emit_constant_insn (cond, gen_rtx_SET (target,
4851 gen_rtx_NOT (mode, source)));
4852 insns++;
4853 }
4854
4855 return insns;
4856 }
4857
4858 /* Canonicalize a comparison so that we are more likely to recognize it.
4859 This can be done for a few constant compares, where we can make the
4860 immediate value easier to load. */
4861
4862 static void
4863 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4864 bool op0_preserve_value)
4865 {
4866 machine_mode mode;
4867 unsigned HOST_WIDE_INT i, maxval;
4868
4869 mode = GET_MODE (*op0);
4870 if (mode == VOIDmode)
4871 mode = GET_MODE (*op1);
4872
4873 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4874
4875 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4876 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4877 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4878 for GTU/LEU in Thumb mode. */
4879 if (mode == DImode)
4880 {
4881
4882 if (*code == GT || *code == LE
4883 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4884 {
4885 /* Missing comparison. First try to use an available
4886 comparison. */
4887 if (CONST_INT_P (*op1))
4888 {
4889 i = INTVAL (*op1);
4890 switch (*code)
4891 {
4892 case GT:
4893 case LE:
4894 if (i != maxval
4895 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4896 {
4897 *op1 = GEN_INT (i + 1);
4898 *code = *code == GT ? GE : LT;
4899 return;
4900 }
4901 break;
4902 case GTU:
4903 case LEU:
4904 if (i != ~((unsigned HOST_WIDE_INT) 0)
4905 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4906 {
4907 *op1 = GEN_INT (i + 1);
4908 *code = *code == GTU ? GEU : LTU;
4909 return;
4910 }
4911 break;
4912 default:
4913 gcc_unreachable ();
4914 }
4915 }
4916
4917 /* If that did not work, reverse the condition. */
4918 if (!op0_preserve_value)
4919 {
4920 std::swap (*op0, *op1);
4921 *code = (int)swap_condition ((enum rtx_code)*code);
4922 }
4923 }
4924 return;
4925 }
4926
4927 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4928 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4929 to facilitate possible combining with a cmp into 'ands'. */
4930 if (mode == SImode
4931 && GET_CODE (*op0) == ZERO_EXTEND
4932 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4933 && GET_MODE (XEXP (*op0, 0)) == QImode
4934 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4935 && subreg_lowpart_p (XEXP (*op0, 0))
4936 && *op1 == const0_rtx)
4937 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4938 GEN_INT (255));
4939
4940 /* Comparisons smaller than DImode. Only adjust comparisons against
4941 an out-of-range constant. */
4942 if (!CONST_INT_P (*op1)
4943 || const_ok_for_arm (INTVAL (*op1))
4944 || const_ok_for_arm (- INTVAL (*op1)))
4945 return;
4946
4947 i = INTVAL (*op1);
4948
4949 switch (*code)
4950 {
4951 case EQ:
4952 case NE:
4953 return;
4954
4955 case GT:
4956 case LE:
4957 if (i != maxval
4958 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4959 {
4960 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
4961 *code = *code == GT ? GE : LT;
4962 return;
4963 }
4964 break;
4965
4966 case GE:
4967 case LT:
4968 if (i != ~maxval
4969 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4970 {
4971 *op1 = GEN_INT (i - 1);
4972 *code = *code == GE ? GT : LE;
4973 return;
4974 }
4975 break;
4976
4977 case GTU:
4978 case LEU:
4979 if (i != ~((unsigned HOST_WIDE_INT) 0)
4980 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4981 {
4982 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
4983 *code = *code == GTU ? GEU : LTU;
4984 return;
4985 }
4986 break;
4987
4988 case GEU:
4989 case LTU:
4990 if (i != 0
4991 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4992 {
4993 *op1 = GEN_INT (i - 1);
4994 *code = *code == GEU ? GTU : LEU;
4995 return;
4996 }
4997 break;
4998
4999 default:
5000 gcc_unreachable ();
5001 }
5002 }
5003
5004
5005 /* Define how to find the value returned by a function. */
5006
5007 static rtx
5008 arm_function_value(const_tree type, const_tree func,
5009 bool outgoing ATTRIBUTE_UNUSED)
5010 {
5011 machine_mode mode;
5012 int unsignedp ATTRIBUTE_UNUSED;
5013 rtx r ATTRIBUTE_UNUSED;
5014
5015 mode = TYPE_MODE (type);
5016
5017 if (TARGET_AAPCS_BASED)
5018 return aapcs_allocate_return_reg (mode, type, func);
5019
5020 /* Promote integer types. */
5021 if (INTEGRAL_TYPE_P (type))
5022 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5023
5024 /* Promotes small structs returned in a register to full-word size
5025 for big-endian AAPCS. */
5026 if (arm_return_in_msb (type))
5027 {
5028 HOST_WIDE_INT size = int_size_in_bytes (type);
5029 if (size % UNITS_PER_WORD != 0)
5030 {
5031 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5032 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5033 }
5034 }
5035
5036 return arm_libcall_value_1 (mode);
5037 }
5038
5039 /* libcall hashtable helpers. */
5040
5041 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5042 {
5043 static inline hashval_t hash (const rtx_def *);
5044 static inline bool equal (const rtx_def *, const rtx_def *);
5045 static inline void remove (rtx_def *);
5046 };
5047
5048 inline bool
5049 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5050 {
5051 return rtx_equal_p (p1, p2);
5052 }
5053
5054 inline hashval_t
5055 libcall_hasher::hash (const rtx_def *p1)
5056 {
5057 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5058 }
5059
5060 typedef hash_table<libcall_hasher> libcall_table_type;
5061
5062 static void
5063 add_libcall (libcall_table_type *htab, rtx libcall)
5064 {
5065 *htab->find_slot (libcall, INSERT) = libcall;
5066 }
5067
5068 static bool
5069 arm_libcall_uses_aapcs_base (const_rtx libcall)
5070 {
5071 static bool init_done = false;
5072 static libcall_table_type *libcall_htab = NULL;
5073
5074 if (!init_done)
5075 {
5076 init_done = true;
5077
5078 libcall_htab = new libcall_table_type (31);
5079 add_libcall (libcall_htab,
5080 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5081 add_libcall (libcall_htab,
5082 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5083 add_libcall (libcall_htab,
5084 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5085 add_libcall (libcall_htab,
5086 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5087
5088 add_libcall (libcall_htab,
5089 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5090 add_libcall (libcall_htab,
5091 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5092 add_libcall (libcall_htab,
5093 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5094 add_libcall (libcall_htab,
5095 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5096
5097 add_libcall (libcall_htab,
5098 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5099 add_libcall (libcall_htab,
5100 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5101 add_libcall (libcall_htab,
5102 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5103 add_libcall (libcall_htab,
5104 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5105 add_libcall (libcall_htab,
5106 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5107 add_libcall (libcall_htab,
5108 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5109 add_libcall (libcall_htab,
5110 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5111 add_libcall (libcall_htab,
5112 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5113
5114 /* Values from double-precision helper functions are returned in core
5115 registers if the selected core only supports single-precision
5116 arithmetic, even if we are using the hard-float ABI. The same is
5117 true for single-precision helpers, but we will never be using the
5118 hard-float ABI on a CPU which doesn't support single-precision
5119 operations in hardware. */
5120 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5121 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5122 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5123 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5124 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5125 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5126 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5127 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5128 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5129 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5130 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5131 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5132 SFmode));
5133 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5134 DFmode));
5135 }
5136
5137 return libcall && libcall_htab->find (libcall) != NULL;
5138 }
5139
5140 static rtx
5141 arm_libcall_value_1 (machine_mode mode)
5142 {
5143 if (TARGET_AAPCS_BASED)
5144 return aapcs_libcall_value (mode);
5145 else if (TARGET_IWMMXT_ABI
5146 && arm_vector_mode_supported_p (mode))
5147 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5148 else
5149 return gen_rtx_REG (mode, ARG_REGISTER (1));
5150 }
5151
5152 /* Define how to find the value returned by a library function
5153 assuming the value has mode MODE. */
5154
5155 static rtx
5156 arm_libcall_value (machine_mode mode, const_rtx libcall)
5157 {
5158 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5159 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5160 {
5161 /* The following libcalls return their result in integer registers,
5162 even though they return a floating point value. */
5163 if (arm_libcall_uses_aapcs_base (libcall))
5164 return gen_rtx_REG (mode, ARG_REGISTER(1));
5165
5166 }
5167
5168 return arm_libcall_value_1 (mode);
5169 }
5170
5171 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5172
5173 static bool
5174 arm_function_value_regno_p (const unsigned int regno)
5175 {
5176 if (regno == ARG_REGISTER (1)
5177 || (TARGET_32BIT
5178 && TARGET_AAPCS_BASED
5179 && TARGET_VFP
5180 && TARGET_HARD_FLOAT
5181 && regno == FIRST_VFP_REGNUM)
5182 || (TARGET_IWMMXT_ABI
5183 && regno == FIRST_IWMMXT_REGNUM))
5184 return true;
5185
5186 return false;
5187 }
5188
5189 /* Determine the amount of memory needed to store the possible return
5190 registers of an untyped call. */
5191 int
5192 arm_apply_result_size (void)
5193 {
5194 int size = 16;
5195
5196 if (TARGET_32BIT)
5197 {
5198 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
5199 size += 32;
5200 if (TARGET_IWMMXT_ABI)
5201 size += 8;
5202 }
5203
5204 return size;
5205 }
5206
5207 /* Decide whether TYPE should be returned in memory (true)
5208 or in a register (false). FNTYPE is the type of the function making
5209 the call. */
5210 static bool
5211 arm_return_in_memory (const_tree type, const_tree fntype)
5212 {
5213 HOST_WIDE_INT size;
5214
5215 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5216
5217 if (TARGET_AAPCS_BASED)
5218 {
5219 /* Simple, non-aggregate types (ie not including vectors and
5220 complex) are always returned in a register (or registers).
5221 We don't care about which register here, so we can short-cut
5222 some of the detail. */
5223 if (!AGGREGATE_TYPE_P (type)
5224 && TREE_CODE (type) != VECTOR_TYPE
5225 && TREE_CODE (type) != COMPLEX_TYPE)
5226 return false;
5227
5228 /* Any return value that is no larger than one word can be
5229 returned in r0. */
5230 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5231 return false;
5232
5233 /* Check any available co-processors to see if they accept the
5234 type as a register candidate (VFP, for example, can return
5235 some aggregates in consecutive registers). These aren't
5236 available if the call is variadic. */
5237 if (aapcs_select_return_coproc (type, fntype) >= 0)
5238 return false;
5239
5240 /* Vector values should be returned using ARM registers, not
5241 memory (unless they're over 16 bytes, which will break since
5242 we only have four call-clobbered registers to play with). */
5243 if (TREE_CODE (type) == VECTOR_TYPE)
5244 return (size < 0 || size > (4 * UNITS_PER_WORD));
5245
5246 /* The rest go in memory. */
5247 return true;
5248 }
5249
5250 if (TREE_CODE (type) == VECTOR_TYPE)
5251 return (size < 0 || size > (4 * UNITS_PER_WORD));
5252
5253 if (!AGGREGATE_TYPE_P (type) &&
5254 (TREE_CODE (type) != VECTOR_TYPE))
5255 /* All simple types are returned in registers. */
5256 return false;
5257
5258 if (arm_abi != ARM_ABI_APCS)
5259 {
5260 /* ATPCS and later return aggregate types in memory only if they are
5261 larger than a word (or are variable size). */
5262 return (size < 0 || size > UNITS_PER_WORD);
5263 }
5264
5265 /* For the arm-wince targets we choose to be compatible with Microsoft's
5266 ARM and Thumb compilers, which always return aggregates in memory. */
5267 #ifndef ARM_WINCE
5268 /* All structures/unions bigger than one word are returned in memory.
5269 Also catch the case where int_size_in_bytes returns -1. In this case
5270 the aggregate is either huge or of variable size, and in either case
5271 we will want to return it via memory and not in a register. */
5272 if (size < 0 || size > UNITS_PER_WORD)
5273 return true;
5274
5275 if (TREE_CODE (type) == RECORD_TYPE)
5276 {
5277 tree field;
5278
5279 /* For a struct the APCS says that we only return in a register
5280 if the type is 'integer like' and every addressable element
5281 has an offset of zero. For practical purposes this means
5282 that the structure can have at most one non bit-field element
5283 and that this element must be the first one in the structure. */
5284
5285 /* Find the first field, ignoring non FIELD_DECL things which will
5286 have been created by C++. */
5287 for (field = TYPE_FIELDS (type);
5288 field && TREE_CODE (field) != FIELD_DECL;
5289 field = DECL_CHAIN (field))
5290 continue;
5291
5292 if (field == NULL)
5293 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5294
5295 /* Check that the first field is valid for returning in a register. */
5296
5297 /* ... Floats are not allowed */
5298 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5299 return true;
5300
5301 /* ... Aggregates that are not themselves valid for returning in
5302 a register are not allowed. */
5303 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5304 return true;
5305
5306 /* Now check the remaining fields, if any. Only bitfields are allowed,
5307 since they are not addressable. */
5308 for (field = DECL_CHAIN (field);
5309 field;
5310 field = DECL_CHAIN (field))
5311 {
5312 if (TREE_CODE (field) != FIELD_DECL)
5313 continue;
5314
5315 if (!DECL_BIT_FIELD_TYPE (field))
5316 return true;
5317 }
5318
5319 return false;
5320 }
5321
5322 if (TREE_CODE (type) == UNION_TYPE)
5323 {
5324 tree field;
5325
5326 /* Unions can be returned in registers if every element is
5327 integral, or can be returned in an integer register. */
5328 for (field = TYPE_FIELDS (type);
5329 field;
5330 field = DECL_CHAIN (field))
5331 {
5332 if (TREE_CODE (field) != FIELD_DECL)
5333 continue;
5334
5335 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5336 return true;
5337
5338 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5339 return true;
5340 }
5341
5342 return false;
5343 }
5344 #endif /* not ARM_WINCE */
5345
5346 /* Return all other types in memory. */
5347 return true;
5348 }
5349
5350 const struct pcs_attribute_arg
5351 {
5352 const char *arg;
5353 enum arm_pcs value;
5354 } pcs_attribute_args[] =
5355 {
5356 {"aapcs", ARM_PCS_AAPCS},
5357 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5358 #if 0
5359 /* We could recognize these, but changes would be needed elsewhere
5360 * to implement them. */
5361 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5362 {"atpcs", ARM_PCS_ATPCS},
5363 {"apcs", ARM_PCS_APCS},
5364 #endif
5365 {NULL, ARM_PCS_UNKNOWN}
5366 };
5367
5368 static enum arm_pcs
5369 arm_pcs_from_attribute (tree attr)
5370 {
5371 const struct pcs_attribute_arg *ptr;
5372 const char *arg;
5373
5374 /* Get the value of the argument. */
5375 if (TREE_VALUE (attr) == NULL_TREE
5376 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5377 return ARM_PCS_UNKNOWN;
5378
5379 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5380
5381 /* Check it against the list of known arguments. */
5382 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5383 if (streq (arg, ptr->arg))
5384 return ptr->value;
5385
5386 /* An unrecognized interrupt type. */
5387 return ARM_PCS_UNKNOWN;
5388 }
5389
5390 /* Get the PCS variant to use for this call. TYPE is the function's type
5391 specification, DECL is the specific declartion. DECL may be null if
5392 the call could be indirect or if this is a library call. */
5393 static enum arm_pcs
5394 arm_get_pcs_model (const_tree type, const_tree decl)
5395 {
5396 bool user_convention = false;
5397 enum arm_pcs user_pcs = arm_pcs_default;
5398 tree attr;
5399
5400 gcc_assert (type);
5401
5402 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5403 if (attr)
5404 {
5405 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5406 user_convention = true;
5407 }
5408
5409 if (TARGET_AAPCS_BASED)
5410 {
5411 /* Detect varargs functions. These always use the base rules
5412 (no argument is ever a candidate for a co-processor
5413 register). */
5414 bool base_rules = stdarg_p (type);
5415
5416 if (user_convention)
5417 {
5418 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5419 sorry ("non-AAPCS derived PCS variant");
5420 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5421 error ("variadic functions must use the base AAPCS variant");
5422 }
5423
5424 if (base_rules)
5425 return ARM_PCS_AAPCS;
5426 else if (user_convention)
5427 return user_pcs;
5428 else if (decl && flag_unit_at_a_time)
5429 {
5430 /* Local functions never leak outside this compilation unit,
5431 so we are free to use whatever conventions are
5432 appropriate. */
5433 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5434 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5435 if (i && i->local)
5436 return ARM_PCS_AAPCS_LOCAL;
5437 }
5438 }
5439 else if (user_convention && user_pcs != arm_pcs_default)
5440 sorry ("PCS variant");
5441
5442 /* For everything else we use the target's default. */
5443 return arm_pcs_default;
5444 }
5445
5446
5447 static void
5448 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5449 const_tree fntype ATTRIBUTE_UNUSED,
5450 rtx libcall ATTRIBUTE_UNUSED,
5451 const_tree fndecl ATTRIBUTE_UNUSED)
5452 {
5453 /* Record the unallocated VFP registers. */
5454 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5455 pcum->aapcs_vfp_reg_alloc = 0;
5456 }
5457
5458 /* Walk down the type tree of TYPE counting consecutive base elements.
5459 If *MODEP is VOIDmode, then set it to the first valid floating point
5460 type. If a non-floating point type is found, or if a floating point
5461 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5462 otherwise return the count in the sub-tree. */
5463 static int
5464 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5465 {
5466 machine_mode mode;
5467 HOST_WIDE_INT size;
5468
5469 switch (TREE_CODE (type))
5470 {
5471 case REAL_TYPE:
5472 mode = TYPE_MODE (type);
5473 if (mode != DFmode && mode != SFmode)
5474 return -1;
5475
5476 if (*modep == VOIDmode)
5477 *modep = mode;
5478
5479 if (*modep == mode)
5480 return 1;
5481
5482 break;
5483
5484 case COMPLEX_TYPE:
5485 mode = TYPE_MODE (TREE_TYPE (type));
5486 if (mode != DFmode && mode != SFmode)
5487 return -1;
5488
5489 if (*modep == VOIDmode)
5490 *modep = mode;
5491
5492 if (*modep == mode)
5493 return 2;
5494
5495 break;
5496
5497 case VECTOR_TYPE:
5498 /* Use V2SImode and V4SImode as representatives of all 64-bit
5499 and 128-bit vector types, whether or not those modes are
5500 supported with the present options. */
5501 size = int_size_in_bytes (type);
5502 switch (size)
5503 {
5504 case 8:
5505 mode = V2SImode;
5506 break;
5507 case 16:
5508 mode = V4SImode;
5509 break;
5510 default:
5511 return -1;
5512 }
5513
5514 if (*modep == VOIDmode)
5515 *modep = mode;
5516
5517 /* Vector modes are considered to be opaque: two vectors are
5518 equivalent for the purposes of being homogeneous aggregates
5519 if they are the same size. */
5520 if (*modep == mode)
5521 return 1;
5522
5523 break;
5524
5525 case ARRAY_TYPE:
5526 {
5527 int count;
5528 tree index = TYPE_DOMAIN (type);
5529
5530 /* Can't handle incomplete types nor sizes that are not
5531 fixed. */
5532 if (!COMPLETE_TYPE_P (type)
5533 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5534 return -1;
5535
5536 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5537 if (count == -1
5538 || !index
5539 || !TYPE_MAX_VALUE (index)
5540 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5541 || !TYPE_MIN_VALUE (index)
5542 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5543 || count < 0)
5544 return -1;
5545
5546 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5547 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5548
5549 /* There must be no padding. */
5550 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5551 return -1;
5552
5553 return count;
5554 }
5555
5556 case RECORD_TYPE:
5557 {
5558 int count = 0;
5559 int sub_count;
5560 tree field;
5561
5562 /* Can't handle incomplete types nor sizes that are not
5563 fixed. */
5564 if (!COMPLETE_TYPE_P (type)
5565 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5566 return -1;
5567
5568 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5569 {
5570 if (TREE_CODE (field) != FIELD_DECL)
5571 continue;
5572
5573 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5574 if (sub_count < 0)
5575 return -1;
5576 count += sub_count;
5577 }
5578
5579 /* There must be no padding. */
5580 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5581 return -1;
5582
5583 return count;
5584 }
5585
5586 case UNION_TYPE:
5587 case QUAL_UNION_TYPE:
5588 {
5589 /* These aren't very interesting except in a degenerate case. */
5590 int count = 0;
5591 int sub_count;
5592 tree field;
5593
5594 /* Can't handle incomplete types nor sizes that are not
5595 fixed. */
5596 if (!COMPLETE_TYPE_P (type)
5597 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5598 return -1;
5599
5600 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5601 {
5602 if (TREE_CODE (field) != FIELD_DECL)
5603 continue;
5604
5605 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5606 if (sub_count < 0)
5607 return -1;
5608 count = count > sub_count ? count : sub_count;
5609 }
5610
5611 /* There must be no padding. */
5612 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5613 return -1;
5614
5615 return count;
5616 }
5617
5618 default:
5619 break;
5620 }
5621
5622 return -1;
5623 }
5624
5625 /* Return true if PCS_VARIANT should use VFP registers. */
5626 static bool
5627 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5628 {
5629 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5630 {
5631 static bool seen_thumb1_vfp = false;
5632
5633 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5634 {
5635 sorry ("Thumb-1 hard-float VFP ABI");
5636 /* sorry() is not immediately fatal, so only display this once. */
5637 seen_thumb1_vfp = true;
5638 }
5639
5640 return true;
5641 }
5642
5643 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5644 return false;
5645
5646 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5647 (TARGET_VFP_DOUBLE || !is_double));
5648 }
5649
5650 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5651 suitable for passing or returning in VFP registers for the PCS
5652 variant selected. If it is, then *BASE_MODE is updated to contain
5653 a machine mode describing each element of the argument's type and
5654 *COUNT to hold the number of such elements. */
5655 static bool
5656 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5657 machine_mode mode, const_tree type,
5658 machine_mode *base_mode, int *count)
5659 {
5660 machine_mode new_mode = VOIDmode;
5661
5662 /* If we have the type information, prefer that to working things
5663 out from the mode. */
5664 if (type)
5665 {
5666 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5667
5668 if (ag_count > 0 && ag_count <= 4)
5669 *count = ag_count;
5670 else
5671 return false;
5672 }
5673 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5674 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5675 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5676 {
5677 *count = 1;
5678 new_mode = mode;
5679 }
5680 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5681 {
5682 *count = 2;
5683 new_mode = (mode == DCmode ? DFmode : SFmode);
5684 }
5685 else
5686 return false;
5687
5688
5689 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5690 return false;
5691
5692 *base_mode = new_mode;
5693 return true;
5694 }
5695
5696 static bool
5697 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5698 machine_mode mode, const_tree type)
5699 {
5700 int count ATTRIBUTE_UNUSED;
5701 machine_mode ag_mode ATTRIBUTE_UNUSED;
5702
5703 if (!use_vfp_abi (pcs_variant, false))
5704 return false;
5705 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5706 &ag_mode, &count);
5707 }
5708
5709 static bool
5710 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5711 const_tree type)
5712 {
5713 if (!use_vfp_abi (pcum->pcs_variant, false))
5714 return false;
5715
5716 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5717 &pcum->aapcs_vfp_rmode,
5718 &pcum->aapcs_vfp_rcount);
5719 }
5720
5721 static bool
5722 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5723 const_tree type ATTRIBUTE_UNUSED)
5724 {
5725 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5726 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5727 int regno;
5728
5729 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5730 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5731 {
5732 pcum->aapcs_vfp_reg_alloc = mask << regno;
5733 if (mode == BLKmode
5734 || (mode == TImode && ! TARGET_NEON)
5735 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5736 {
5737 int i;
5738 int rcount = pcum->aapcs_vfp_rcount;
5739 int rshift = shift;
5740 machine_mode rmode = pcum->aapcs_vfp_rmode;
5741 rtx par;
5742 if (!TARGET_NEON)
5743 {
5744 /* Avoid using unsupported vector modes. */
5745 if (rmode == V2SImode)
5746 rmode = DImode;
5747 else if (rmode == V4SImode)
5748 {
5749 rmode = DImode;
5750 rcount *= 2;
5751 rshift /= 2;
5752 }
5753 }
5754 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5755 for (i = 0; i < rcount; i++)
5756 {
5757 rtx tmp = gen_rtx_REG (rmode,
5758 FIRST_VFP_REGNUM + regno + i * rshift);
5759 tmp = gen_rtx_EXPR_LIST
5760 (VOIDmode, tmp,
5761 GEN_INT (i * GET_MODE_SIZE (rmode)));
5762 XVECEXP (par, 0, i) = tmp;
5763 }
5764
5765 pcum->aapcs_reg = par;
5766 }
5767 else
5768 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5769 return true;
5770 }
5771 return false;
5772 }
5773
5774 static rtx
5775 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5776 machine_mode mode,
5777 const_tree type ATTRIBUTE_UNUSED)
5778 {
5779 if (!use_vfp_abi (pcs_variant, false))
5780 return NULL;
5781
5782 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5783 {
5784 int count;
5785 machine_mode ag_mode;
5786 int i;
5787 rtx par;
5788 int shift;
5789
5790 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5791 &ag_mode, &count);
5792
5793 if (!TARGET_NEON)
5794 {
5795 if (ag_mode == V2SImode)
5796 ag_mode = DImode;
5797 else if (ag_mode == V4SImode)
5798 {
5799 ag_mode = DImode;
5800 count *= 2;
5801 }
5802 }
5803 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5804 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5805 for (i = 0; i < count; i++)
5806 {
5807 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5808 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5809 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5810 XVECEXP (par, 0, i) = tmp;
5811 }
5812
5813 return par;
5814 }
5815
5816 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5817 }
5818
5819 static void
5820 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5821 machine_mode mode ATTRIBUTE_UNUSED,
5822 const_tree type ATTRIBUTE_UNUSED)
5823 {
5824 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5825 pcum->aapcs_vfp_reg_alloc = 0;
5826 return;
5827 }
5828
5829 #define AAPCS_CP(X) \
5830 { \
5831 aapcs_ ## X ## _cum_init, \
5832 aapcs_ ## X ## _is_call_candidate, \
5833 aapcs_ ## X ## _allocate, \
5834 aapcs_ ## X ## _is_return_candidate, \
5835 aapcs_ ## X ## _allocate_return_reg, \
5836 aapcs_ ## X ## _advance \
5837 }
5838
5839 /* Table of co-processors that can be used to pass arguments in
5840 registers. Idealy no arugment should be a candidate for more than
5841 one co-processor table entry, but the table is processed in order
5842 and stops after the first match. If that entry then fails to put
5843 the argument into a co-processor register, the argument will go on
5844 the stack. */
5845 static struct
5846 {
5847 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5848 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5849
5850 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5851 BLKmode) is a candidate for this co-processor's registers; this
5852 function should ignore any position-dependent state in
5853 CUMULATIVE_ARGS and only use call-type dependent information. */
5854 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5855
5856 /* Return true if the argument does get a co-processor register; it
5857 should set aapcs_reg to an RTX of the register allocated as is
5858 required for a return from FUNCTION_ARG. */
5859 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5860
5861 /* Return true if a result of mode MODE (or type TYPE if MODE is
5862 BLKmode) is can be returned in this co-processor's registers. */
5863 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5864
5865 /* Allocate and return an RTX element to hold the return type of a
5866 call, this routine must not fail and will only be called if
5867 is_return_candidate returned true with the same parameters. */
5868 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5869
5870 /* Finish processing this argument and prepare to start processing
5871 the next one. */
5872 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5873 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5874 {
5875 AAPCS_CP(vfp)
5876 };
5877
5878 #undef AAPCS_CP
5879
5880 static int
5881 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5882 const_tree type)
5883 {
5884 int i;
5885
5886 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5887 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5888 return i;
5889
5890 return -1;
5891 }
5892
5893 static int
5894 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5895 {
5896 /* We aren't passed a decl, so we can't check that a call is local.
5897 However, it isn't clear that that would be a win anyway, since it
5898 might limit some tail-calling opportunities. */
5899 enum arm_pcs pcs_variant;
5900
5901 if (fntype)
5902 {
5903 const_tree fndecl = NULL_TREE;
5904
5905 if (TREE_CODE (fntype) == FUNCTION_DECL)
5906 {
5907 fndecl = fntype;
5908 fntype = TREE_TYPE (fntype);
5909 }
5910
5911 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5912 }
5913 else
5914 pcs_variant = arm_pcs_default;
5915
5916 if (pcs_variant != ARM_PCS_AAPCS)
5917 {
5918 int i;
5919
5920 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5921 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5922 TYPE_MODE (type),
5923 type))
5924 return i;
5925 }
5926 return -1;
5927 }
5928
5929 static rtx
5930 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5931 const_tree fntype)
5932 {
5933 /* We aren't passed a decl, so we can't check that a call is local.
5934 However, it isn't clear that that would be a win anyway, since it
5935 might limit some tail-calling opportunities. */
5936 enum arm_pcs pcs_variant;
5937 int unsignedp ATTRIBUTE_UNUSED;
5938
5939 if (fntype)
5940 {
5941 const_tree fndecl = NULL_TREE;
5942
5943 if (TREE_CODE (fntype) == FUNCTION_DECL)
5944 {
5945 fndecl = fntype;
5946 fntype = TREE_TYPE (fntype);
5947 }
5948
5949 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5950 }
5951 else
5952 pcs_variant = arm_pcs_default;
5953
5954 /* Promote integer types. */
5955 if (type && INTEGRAL_TYPE_P (type))
5956 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5957
5958 if (pcs_variant != ARM_PCS_AAPCS)
5959 {
5960 int i;
5961
5962 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5963 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5964 type))
5965 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5966 mode, type);
5967 }
5968
5969 /* Promotes small structs returned in a register to full-word size
5970 for big-endian AAPCS. */
5971 if (type && arm_return_in_msb (type))
5972 {
5973 HOST_WIDE_INT size = int_size_in_bytes (type);
5974 if (size % UNITS_PER_WORD != 0)
5975 {
5976 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5977 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5978 }
5979 }
5980
5981 return gen_rtx_REG (mode, R0_REGNUM);
5982 }
5983
5984 static rtx
5985 aapcs_libcall_value (machine_mode mode)
5986 {
5987 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5988 && GET_MODE_SIZE (mode) <= 4)
5989 mode = SImode;
5990
5991 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5992 }
5993
5994 /* Lay out a function argument using the AAPCS rules. The rule
5995 numbers referred to here are those in the AAPCS. */
5996 static void
5997 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5998 const_tree type, bool named)
5999 {
6000 int nregs, nregs2;
6001 int ncrn;
6002
6003 /* We only need to do this once per argument. */
6004 if (pcum->aapcs_arg_processed)
6005 return;
6006
6007 pcum->aapcs_arg_processed = true;
6008
6009 /* Special case: if named is false then we are handling an incoming
6010 anonymous argument which is on the stack. */
6011 if (!named)
6012 return;
6013
6014 /* Is this a potential co-processor register candidate? */
6015 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6016 {
6017 int slot = aapcs_select_call_coproc (pcum, mode, type);
6018 pcum->aapcs_cprc_slot = slot;
6019
6020 /* We don't have to apply any of the rules from part B of the
6021 preparation phase, these are handled elsewhere in the
6022 compiler. */
6023
6024 if (slot >= 0)
6025 {
6026 /* A Co-processor register candidate goes either in its own
6027 class of registers or on the stack. */
6028 if (!pcum->aapcs_cprc_failed[slot])
6029 {
6030 /* C1.cp - Try to allocate the argument to co-processor
6031 registers. */
6032 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6033 return;
6034
6035 /* C2.cp - Put the argument on the stack and note that we
6036 can't assign any more candidates in this slot. We also
6037 need to note that we have allocated stack space, so that
6038 we won't later try to split a non-cprc candidate between
6039 core registers and the stack. */
6040 pcum->aapcs_cprc_failed[slot] = true;
6041 pcum->can_split = false;
6042 }
6043
6044 /* We didn't get a register, so this argument goes on the
6045 stack. */
6046 gcc_assert (pcum->can_split == false);
6047 return;
6048 }
6049 }
6050
6051 /* C3 - For double-word aligned arguments, round the NCRN up to the
6052 next even number. */
6053 ncrn = pcum->aapcs_ncrn;
6054 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
6055 ncrn++;
6056
6057 nregs = ARM_NUM_REGS2(mode, type);
6058
6059 /* Sigh, this test should really assert that nregs > 0, but a GCC
6060 extension allows empty structs and then gives them empty size; it
6061 then allows such a structure to be passed by value. For some of
6062 the code below we have to pretend that such an argument has
6063 non-zero size so that we 'locate' it correctly either in
6064 registers or on the stack. */
6065 gcc_assert (nregs >= 0);
6066
6067 nregs2 = nregs ? nregs : 1;
6068
6069 /* C4 - Argument fits entirely in core registers. */
6070 if (ncrn + nregs2 <= NUM_ARG_REGS)
6071 {
6072 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6073 pcum->aapcs_next_ncrn = ncrn + nregs;
6074 return;
6075 }
6076
6077 /* C5 - Some core registers left and there are no arguments already
6078 on the stack: split this argument between the remaining core
6079 registers and the stack. */
6080 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6081 {
6082 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6083 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6084 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6085 return;
6086 }
6087
6088 /* C6 - NCRN is set to 4. */
6089 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6090
6091 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6092 return;
6093 }
6094
6095 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6096 for a call to a function whose data type is FNTYPE.
6097 For a library call, FNTYPE is NULL. */
6098 void
6099 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6100 rtx libname,
6101 tree fndecl ATTRIBUTE_UNUSED)
6102 {
6103 /* Long call handling. */
6104 if (fntype)
6105 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6106 else
6107 pcum->pcs_variant = arm_pcs_default;
6108
6109 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6110 {
6111 if (arm_libcall_uses_aapcs_base (libname))
6112 pcum->pcs_variant = ARM_PCS_AAPCS;
6113
6114 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6115 pcum->aapcs_reg = NULL_RTX;
6116 pcum->aapcs_partial = 0;
6117 pcum->aapcs_arg_processed = false;
6118 pcum->aapcs_cprc_slot = -1;
6119 pcum->can_split = true;
6120
6121 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6122 {
6123 int i;
6124
6125 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6126 {
6127 pcum->aapcs_cprc_failed[i] = false;
6128 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6129 }
6130 }
6131 return;
6132 }
6133
6134 /* Legacy ABIs */
6135
6136 /* On the ARM, the offset starts at 0. */
6137 pcum->nregs = 0;
6138 pcum->iwmmxt_nregs = 0;
6139 pcum->can_split = true;
6140
6141 /* Varargs vectors are treated the same as long long.
6142 named_count avoids having to change the way arm handles 'named' */
6143 pcum->named_count = 0;
6144 pcum->nargs = 0;
6145
6146 if (TARGET_REALLY_IWMMXT && fntype)
6147 {
6148 tree fn_arg;
6149
6150 for (fn_arg = TYPE_ARG_TYPES (fntype);
6151 fn_arg;
6152 fn_arg = TREE_CHAIN (fn_arg))
6153 pcum->named_count += 1;
6154
6155 if (! pcum->named_count)
6156 pcum->named_count = INT_MAX;
6157 }
6158 }
6159
6160 /* Return true if mode/type need doubleword alignment. */
6161 static bool
6162 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6163 {
6164 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
6165 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
6166 }
6167
6168
6169 /* Determine where to put an argument to a function.
6170 Value is zero to push the argument on the stack,
6171 or a hard register in which to store the argument.
6172
6173 MODE is the argument's machine mode.
6174 TYPE is the data type of the argument (as a tree).
6175 This is null for libcalls where that information may
6176 not be available.
6177 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6178 the preceding args and about the function being called.
6179 NAMED is nonzero if this argument is a named parameter
6180 (otherwise it is an extra parameter matching an ellipsis).
6181
6182 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6183 other arguments are passed on the stack. If (NAMED == 0) (which happens
6184 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6185 defined), say it is passed in the stack (function_prologue will
6186 indeed make it pass in the stack if necessary). */
6187
6188 static rtx
6189 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6190 const_tree type, bool named)
6191 {
6192 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6193 int nregs;
6194
6195 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6196 a call insn (op3 of a call_value insn). */
6197 if (mode == VOIDmode)
6198 return const0_rtx;
6199
6200 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6201 {
6202 aapcs_layout_arg (pcum, mode, type, named);
6203 return pcum->aapcs_reg;
6204 }
6205
6206 /* Varargs vectors are treated the same as long long.
6207 named_count avoids having to change the way arm handles 'named' */
6208 if (TARGET_IWMMXT_ABI
6209 && arm_vector_mode_supported_p (mode)
6210 && pcum->named_count > pcum->nargs + 1)
6211 {
6212 if (pcum->iwmmxt_nregs <= 9)
6213 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6214 else
6215 {
6216 pcum->can_split = false;
6217 return NULL_RTX;
6218 }
6219 }
6220
6221 /* Put doubleword aligned quantities in even register pairs. */
6222 if (pcum->nregs & 1
6223 && ARM_DOUBLEWORD_ALIGN
6224 && arm_needs_doubleword_align (mode, type))
6225 pcum->nregs++;
6226
6227 /* Only allow splitting an arg between regs and memory if all preceding
6228 args were allocated to regs. For args passed by reference we only count
6229 the reference pointer. */
6230 if (pcum->can_split)
6231 nregs = 1;
6232 else
6233 nregs = ARM_NUM_REGS2 (mode, type);
6234
6235 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6236 return NULL_RTX;
6237
6238 return gen_rtx_REG (mode, pcum->nregs);
6239 }
6240
6241 static unsigned int
6242 arm_function_arg_boundary (machine_mode mode, const_tree type)
6243 {
6244 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6245 ? DOUBLEWORD_ALIGNMENT
6246 : PARM_BOUNDARY);
6247 }
6248
6249 static int
6250 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6251 tree type, bool named)
6252 {
6253 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6254 int nregs = pcum->nregs;
6255
6256 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6257 {
6258 aapcs_layout_arg (pcum, mode, type, named);
6259 return pcum->aapcs_partial;
6260 }
6261
6262 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6263 return 0;
6264
6265 if (NUM_ARG_REGS > nregs
6266 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6267 && pcum->can_split)
6268 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6269
6270 return 0;
6271 }
6272
6273 /* Update the data in PCUM to advance over an argument
6274 of mode MODE and data type TYPE.
6275 (TYPE is null for libcalls where that information may not be available.) */
6276
6277 static void
6278 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6279 const_tree type, bool named)
6280 {
6281 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6282
6283 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6284 {
6285 aapcs_layout_arg (pcum, mode, type, named);
6286
6287 if (pcum->aapcs_cprc_slot >= 0)
6288 {
6289 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6290 type);
6291 pcum->aapcs_cprc_slot = -1;
6292 }
6293
6294 /* Generic stuff. */
6295 pcum->aapcs_arg_processed = false;
6296 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6297 pcum->aapcs_reg = NULL_RTX;
6298 pcum->aapcs_partial = 0;
6299 }
6300 else
6301 {
6302 pcum->nargs += 1;
6303 if (arm_vector_mode_supported_p (mode)
6304 && pcum->named_count > pcum->nargs
6305 && TARGET_IWMMXT_ABI)
6306 pcum->iwmmxt_nregs += 1;
6307 else
6308 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6309 }
6310 }
6311
6312 /* Variable sized types are passed by reference. This is a GCC
6313 extension to the ARM ABI. */
6314
6315 static bool
6316 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6317 machine_mode mode ATTRIBUTE_UNUSED,
6318 const_tree type, bool named ATTRIBUTE_UNUSED)
6319 {
6320 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6321 }
6322 \f
6323 /* Encode the current state of the #pragma [no_]long_calls. */
6324 typedef enum
6325 {
6326 OFF, /* No #pragma [no_]long_calls is in effect. */
6327 LONG, /* #pragma long_calls is in effect. */
6328 SHORT /* #pragma no_long_calls is in effect. */
6329 } arm_pragma_enum;
6330
6331 static arm_pragma_enum arm_pragma_long_calls = OFF;
6332
6333 void
6334 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6335 {
6336 arm_pragma_long_calls = LONG;
6337 }
6338
6339 void
6340 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6341 {
6342 arm_pragma_long_calls = SHORT;
6343 }
6344
6345 void
6346 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6347 {
6348 arm_pragma_long_calls = OFF;
6349 }
6350 \f
6351 /* Handle an attribute requiring a FUNCTION_DECL;
6352 arguments as in struct attribute_spec.handler. */
6353 static tree
6354 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6355 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6356 {
6357 if (TREE_CODE (*node) != FUNCTION_DECL)
6358 {
6359 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6360 name);
6361 *no_add_attrs = true;
6362 }
6363
6364 return NULL_TREE;
6365 }
6366
6367 /* Handle an "interrupt" or "isr" attribute;
6368 arguments as in struct attribute_spec.handler. */
6369 static tree
6370 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6371 bool *no_add_attrs)
6372 {
6373 if (DECL_P (*node))
6374 {
6375 if (TREE_CODE (*node) != FUNCTION_DECL)
6376 {
6377 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6378 name);
6379 *no_add_attrs = true;
6380 }
6381 /* FIXME: the argument if any is checked for type attributes;
6382 should it be checked for decl ones? */
6383 }
6384 else
6385 {
6386 if (TREE_CODE (*node) == FUNCTION_TYPE
6387 || TREE_CODE (*node) == METHOD_TYPE)
6388 {
6389 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6390 {
6391 warning (OPT_Wattributes, "%qE attribute ignored",
6392 name);
6393 *no_add_attrs = true;
6394 }
6395 }
6396 else if (TREE_CODE (*node) == POINTER_TYPE
6397 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6398 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6399 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6400 {
6401 *node = build_variant_type_copy (*node);
6402 TREE_TYPE (*node) = build_type_attribute_variant
6403 (TREE_TYPE (*node),
6404 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6405 *no_add_attrs = true;
6406 }
6407 else
6408 {
6409 /* Possibly pass this attribute on from the type to a decl. */
6410 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6411 | (int) ATTR_FLAG_FUNCTION_NEXT
6412 | (int) ATTR_FLAG_ARRAY_NEXT))
6413 {
6414 *no_add_attrs = true;
6415 return tree_cons (name, args, NULL_TREE);
6416 }
6417 else
6418 {
6419 warning (OPT_Wattributes, "%qE attribute ignored",
6420 name);
6421 }
6422 }
6423 }
6424
6425 return NULL_TREE;
6426 }
6427
6428 /* Handle a "pcs" attribute; arguments as in struct
6429 attribute_spec.handler. */
6430 static tree
6431 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6432 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6433 {
6434 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6435 {
6436 warning (OPT_Wattributes, "%qE attribute ignored", name);
6437 *no_add_attrs = true;
6438 }
6439 return NULL_TREE;
6440 }
6441
6442 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6443 /* Handle the "notshared" attribute. This attribute is another way of
6444 requesting hidden visibility. ARM's compiler supports
6445 "__declspec(notshared)"; we support the same thing via an
6446 attribute. */
6447
6448 static tree
6449 arm_handle_notshared_attribute (tree *node,
6450 tree name ATTRIBUTE_UNUSED,
6451 tree args ATTRIBUTE_UNUSED,
6452 int flags ATTRIBUTE_UNUSED,
6453 bool *no_add_attrs)
6454 {
6455 tree decl = TYPE_NAME (*node);
6456
6457 if (decl)
6458 {
6459 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6460 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6461 *no_add_attrs = false;
6462 }
6463 return NULL_TREE;
6464 }
6465 #endif
6466
6467 /* Return 0 if the attributes for two types are incompatible, 1 if they
6468 are compatible, and 2 if they are nearly compatible (which causes a
6469 warning to be generated). */
6470 static int
6471 arm_comp_type_attributes (const_tree type1, const_tree type2)
6472 {
6473 int l1, l2, s1, s2;
6474
6475 /* Check for mismatch of non-default calling convention. */
6476 if (TREE_CODE (type1) != FUNCTION_TYPE)
6477 return 1;
6478
6479 /* Check for mismatched call attributes. */
6480 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6481 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6482 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6483 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6484
6485 /* Only bother to check if an attribute is defined. */
6486 if (l1 | l2 | s1 | s2)
6487 {
6488 /* If one type has an attribute, the other must have the same attribute. */
6489 if ((l1 != l2) || (s1 != s2))
6490 return 0;
6491
6492 /* Disallow mixed attributes. */
6493 if ((l1 & s2) || (l2 & s1))
6494 return 0;
6495 }
6496
6497 /* Check for mismatched ISR attribute. */
6498 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6499 if (! l1)
6500 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6501 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6502 if (! l2)
6503 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6504 if (l1 != l2)
6505 return 0;
6506
6507 return 1;
6508 }
6509
6510 /* Assigns default attributes to newly defined type. This is used to
6511 set short_call/long_call attributes for function types of
6512 functions defined inside corresponding #pragma scopes. */
6513 static void
6514 arm_set_default_type_attributes (tree type)
6515 {
6516 /* Add __attribute__ ((long_call)) to all functions, when
6517 inside #pragma long_calls or __attribute__ ((short_call)),
6518 when inside #pragma no_long_calls. */
6519 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6520 {
6521 tree type_attr_list, attr_name;
6522 type_attr_list = TYPE_ATTRIBUTES (type);
6523
6524 if (arm_pragma_long_calls == LONG)
6525 attr_name = get_identifier ("long_call");
6526 else if (arm_pragma_long_calls == SHORT)
6527 attr_name = get_identifier ("short_call");
6528 else
6529 return;
6530
6531 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6532 TYPE_ATTRIBUTES (type) = type_attr_list;
6533 }
6534 }
6535 \f
6536 /* Return true if DECL is known to be linked into section SECTION. */
6537
6538 static bool
6539 arm_function_in_section_p (tree decl, section *section)
6540 {
6541 /* We can only be certain about the prevailing symbol definition. */
6542 if (!decl_binds_to_current_def_p (decl))
6543 return false;
6544
6545 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6546 if (!DECL_SECTION_NAME (decl))
6547 {
6548 /* Make sure that we will not create a unique section for DECL. */
6549 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6550 return false;
6551 }
6552
6553 return function_section (decl) == section;
6554 }
6555
6556 /* Return nonzero if a 32-bit "long_call" should be generated for
6557 a call from the current function to DECL. We generate a long_call
6558 if the function:
6559
6560 a. has an __attribute__((long call))
6561 or b. is within the scope of a #pragma long_calls
6562 or c. the -mlong-calls command line switch has been specified
6563
6564 However we do not generate a long call if the function:
6565
6566 d. has an __attribute__ ((short_call))
6567 or e. is inside the scope of a #pragma no_long_calls
6568 or f. is defined in the same section as the current function. */
6569
6570 bool
6571 arm_is_long_call_p (tree decl)
6572 {
6573 tree attrs;
6574
6575 if (!decl)
6576 return TARGET_LONG_CALLS;
6577
6578 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6579 if (lookup_attribute ("short_call", attrs))
6580 return false;
6581
6582 /* For "f", be conservative, and only cater for cases in which the
6583 whole of the current function is placed in the same section. */
6584 if (!flag_reorder_blocks_and_partition
6585 && TREE_CODE (decl) == FUNCTION_DECL
6586 && arm_function_in_section_p (decl, current_function_section ()))
6587 return false;
6588
6589 if (lookup_attribute ("long_call", attrs))
6590 return true;
6591
6592 return TARGET_LONG_CALLS;
6593 }
6594
6595 /* Return nonzero if it is ok to make a tail-call to DECL. */
6596 static bool
6597 arm_function_ok_for_sibcall (tree decl, tree exp)
6598 {
6599 unsigned long func_type;
6600
6601 if (cfun->machine->sibcall_blocked)
6602 return false;
6603
6604 /* Never tailcall something if we are generating code for Thumb-1. */
6605 if (TARGET_THUMB1)
6606 return false;
6607
6608 /* The PIC register is live on entry to VxWorks PLT entries, so we
6609 must make the call before restoring the PIC register. */
6610 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6611 return false;
6612
6613 /* If we are interworking and the function is not declared static
6614 then we can't tail-call it unless we know that it exists in this
6615 compilation unit (since it might be a Thumb routine). */
6616 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6617 && !TREE_ASM_WRITTEN (decl))
6618 return false;
6619
6620 func_type = arm_current_func_type ();
6621 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6622 if (IS_INTERRUPT (func_type))
6623 return false;
6624
6625 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6626 {
6627 /* Check that the return value locations are the same. For
6628 example that we aren't returning a value from the sibling in
6629 a VFP register but then need to transfer it to a core
6630 register. */
6631 rtx a, b;
6632
6633 a = arm_function_value (TREE_TYPE (exp), decl, false);
6634 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6635 cfun->decl, false);
6636 if (!rtx_equal_p (a, b))
6637 return false;
6638 }
6639
6640 /* Never tailcall if function may be called with a misaligned SP. */
6641 if (IS_STACKALIGN (func_type))
6642 return false;
6643
6644 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6645 references should become a NOP. Don't convert such calls into
6646 sibling calls. */
6647 if (TARGET_AAPCS_BASED
6648 && arm_abi == ARM_ABI_AAPCS
6649 && decl
6650 && DECL_WEAK (decl))
6651 return false;
6652
6653 /* Everything else is ok. */
6654 return true;
6655 }
6656
6657 \f
6658 /* Addressing mode support functions. */
6659
6660 /* Return nonzero if X is a legitimate immediate operand when compiling
6661 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6662 int
6663 legitimate_pic_operand_p (rtx x)
6664 {
6665 if (GET_CODE (x) == SYMBOL_REF
6666 || (GET_CODE (x) == CONST
6667 && GET_CODE (XEXP (x, 0)) == PLUS
6668 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6669 return 0;
6670
6671 return 1;
6672 }
6673
6674 /* Record that the current function needs a PIC register. Initialize
6675 cfun->machine->pic_reg if we have not already done so. */
6676
6677 static void
6678 require_pic_register (void)
6679 {
6680 /* A lot of the logic here is made obscure by the fact that this
6681 routine gets called as part of the rtx cost estimation process.
6682 We don't want those calls to affect any assumptions about the real
6683 function; and further, we can't call entry_of_function() until we
6684 start the real expansion process. */
6685 if (!crtl->uses_pic_offset_table)
6686 {
6687 gcc_assert (can_create_pseudo_p ());
6688 if (arm_pic_register != INVALID_REGNUM
6689 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6690 {
6691 if (!cfun->machine->pic_reg)
6692 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6693
6694 /* Play games to avoid marking the function as needing pic
6695 if we are being called as part of the cost-estimation
6696 process. */
6697 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6698 crtl->uses_pic_offset_table = 1;
6699 }
6700 else
6701 {
6702 rtx_insn *seq, *insn;
6703
6704 if (!cfun->machine->pic_reg)
6705 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6706
6707 /* Play games to avoid marking the function as needing pic
6708 if we are being called as part of the cost-estimation
6709 process. */
6710 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6711 {
6712 crtl->uses_pic_offset_table = 1;
6713 start_sequence ();
6714
6715 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6716 && arm_pic_register > LAST_LO_REGNUM)
6717 emit_move_insn (cfun->machine->pic_reg,
6718 gen_rtx_REG (Pmode, arm_pic_register));
6719 else
6720 arm_load_pic_register (0UL);
6721
6722 seq = get_insns ();
6723 end_sequence ();
6724
6725 for (insn = seq; insn; insn = NEXT_INSN (insn))
6726 if (INSN_P (insn))
6727 INSN_LOCATION (insn) = prologue_location;
6728
6729 /* We can be called during expansion of PHI nodes, where
6730 we can't yet emit instructions directly in the final
6731 insn stream. Queue the insns on the entry edge, they will
6732 be committed after everything else is expanded. */
6733 insert_insn_on_edge (seq,
6734 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6735 }
6736 }
6737 }
6738 }
6739
6740 rtx
6741 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6742 {
6743 if (GET_CODE (orig) == SYMBOL_REF
6744 || GET_CODE (orig) == LABEL_REF)
6745 {
6746 rtx insn;
6747
6748 if (reg == 0)
6749 {
6750 gcc_assert (can_create_pseudo_p ());
6751 reg = gen_reg_rtx (Pmode);
6752 }
6753
6754 /* VxWorks does not impose a fixed gap between segments; the run-time
6755 gap can be different from the object-file gap. We therefore can't
6756 use GOTOFF unless we are absolutely sure that the symbol is in the
6757 same segment as the GOT. Unfortunately, the flexibility of linker
6758 scripts means that we can't be sure of that in general, so assume
6759 that GOTOFF is never valid on VxWorks. */
6760 if ((GET_CODE (orig) == LABEL_REF
6761 || (GET_CODE (orig) == SYMBOL_REF &&
6762 SYMBOL_REF_LOCAL_P (orig)))
6763 && NEED_GOT_RELOC
6764 && arm_pic_data_is_text_relative)
6765 insn = arm_pic_static_addr (orig, reg);
6766 else
6767 {
6768 rtx pat;
6769 rtx mem;
6770
6771 /* If this function doesn't have a pic register, create one now. */
6772 require_pic_register ();
6773
6774 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6775
6776 /* Make the MEM as close to a constant as possible. */
6777 mem = SET_SRC (pat);
6778 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6779 MEM_READONLY_P (mem) = 1;
6780 MEM_NOTRAP_P (mem) = 1;
6781
6782 insn = emit_insn (pat);
6783 }
6784
6785 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6786 by loop. */
6787 set_unique_reg_note (insn, REG_EQUAL, orig);
6788
6789 return reg;
6790 }
6791 else if (GET_CODE (orig) == CONST)
6792 {
6793 rtx base, offset;
6794
6795 if (GET_CODE (XEXP (orig, 0)) == PLUS
6796 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6797 return orig;
6798
6799 /* Handle the case where we have: const (UNSPEC_TLS). */
6800 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6801 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6802 return orig;
6803
6804 /* Handle the case where we have:
6805 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6806 CONST_INT. */
6807 if (GET_CODE (XEXP (orig, 0)) == PLUS
6808 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6809 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6810 {
6811 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6812 return orig;
6813 }
6814
6815 if (reg == 0)
6816 {
6817 gcc_assert (can_create_pseudo_p ());
6818 reg = gen_reg_rtx (Pmode);
6819 }
6820
6821 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6822
6823 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6824 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6825 base == reg ? 0 : reg);
6826
6827 if (CONST_INT_P (offset))
6828 {
6829 /* The base register doesn't really matter, we only want to
6830 test the index for the appropriate mode. */
6831 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6832 {
6833 gcc_assert (can_create_pseudo_p ());
6834 offset = force_reg (Pmode, offset);
6835 }
6836
6837 if (CONST_INT_P (offset))
6838 return plus_constant (Pmode, base, INTVAL (offset));
6839 }
6840
6841 if (GET_MODE_SIZE (mode) > 4
6842 && (GET_MODE_CLASS (mode) == MODE_INT
6843 || TARGET_SOFT_FLOAT))
6844 {
6845 emit_insn (gen_addsi3 (reg, base, offset));
6846 return reg;
6847 }
6848
6849 return gen_rtx_PLUS (Pmode, base, offset);
6850 }
6851
6852 return orig;
6853 }
6854
6855
6856 /* Find a spare register to use during the prolog of a function. */
6857
6858 static int
6859 thumb_find_work_register (unsigned long pushed_regs_mask)
6860 {
6861 int reg;
6862
6863 /* Check the argument registers first as these are call-used. The
6864 register allocation order means that sometimes r3 might be used
6865 but earlier argument registers might not, so check them all. */
6866 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6867 if (!df_regs_ever_live_p (reg))
6868 return reg;
6869
6870 /* Before going on to check the call-saved registers we can try a couple
6871 more ways of deducing that r3 is available. The first is when we are
6872 pushing anonymous arguments onto the stack and we have less than 4
6873 registers worth of fixed arguments(*). In this case r3 will be part of
6874 the variable argument list and so we can be sure that it will be
6875 pushed right at the start of the function. Hence it will be available
6876 for the rest of the prologue.
6877 (*): ie crtl->args.pretend_args_size is greater than 0. */
6878 if (cfun->machine->uses_anonymous_args
6879 && crtl->args.pretend_args_size > 0)
6880 return LAST_ARG_REGNUM;
6881
6882 /* The other case is when we have fixed arguments but less than 4 registers
6883 worth. In this case r3 might be used in the body of the function, but
6884 it is not being used to convey an argument into the function. In theory
6885 we could just check crtl->args.size to see how many bytes are
6886 being passed in argument registers, but it seems that it is unreliable.
6887 Sometimes it will have the value 0 when in fact arguments are being
6888 passed. (See testcase execute/20021111-1.c for an example). So we also
6889 check the args_info.nregs field as well. The problem with this field is
6890 that it makes no allowances for arguments that are passed to the
6891 function but which are not used. Hence we could miss an opportunity
6892 when a function has an unused argument in r3. But it is better to be
6893 safe than to be sorry. */
6894 if (! cfun->machine->uses_anonymous_args
6895 && crtl->args.size >= 0
6896 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6897 && (TARGET_AAPCS_BASED
6898 ? crtl->args.info.aapcs_ncrn < 4
6899 : crtl->args.info.nregs < 4))
6900 return LAST_ARG_REGNUM;
6901
6902 /* Otherwise look for a call-saved register that is going to be pushed. */
6903 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6904 if (pushed_regs_mask & (1 << reg))
6905 return reg;
6906
6907 if (TARGET_THUMB2)
6908 {
6909 /* Thumb-2 can use high regs. */
6910 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6911 if (pushed_regs_mask & (1 << reg))
6912 return reg;
6913 }
6914 /* Something went wrong - thumb_compute_save_reg_mask()
6915 should have arranged for a suitable register to be pushed. */
6916 gcc_unreachable ();
6917 }
6918
6919 static GTY(()) int pic_labelno;
6920
6921 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6922 low register. */
6923
6924 void
6925 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6926 {
6927 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6928
6929 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6930 return;
6931
6932 gcc_assert (flag_pic);
6933
6934 pic_reg = cfun->machine->pic_reg;
6935 if (TARGET_VXWORKS_RTP)
6936 {
6937 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6938 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6939 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6940
6941 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6942
6943 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6944 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6945 }
6946 else
6947 {
6948 /* We use an UNSPEC rather than a LABEL_REF because this label
6949 never appears in the code stream. */
6950
6951 labelno = GEN_INT (pic_labelno++);
6952 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6953 l1 = gen_rtx_CONST (VOIDmode, l1);
6954
6955 /* On the ARM the PC register contains 'dot + 8' at the time of the
6956 addition, on the Thumb it is 'dot + 4'. */
6957 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6958 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6959 UNSPEC_GOTSYM_OFF);
6960 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6961
6962 if (TARGET_32BIT)
6963 {
6964 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6965 }
6966 else /* TARGET_THUMB1 */
6967 {
6968 if (arm_pic_register != INVALID_REGNUM
6969 && REGNO (pic_reg) > LAST_LO_REGNUM)
6970 {
6971 /* We will have pushed the pic register, so we should always be
6972 able to find a work register. */
6973 pic_tmp = gen_rtx_REG (SImode,
6974 thumb_find_work_register (saved_regs));
6975 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6976 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6977 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6978 }
6979 else if (arm_pic_register != INVALID_REGNUM
6980 && arm_pic_register > LAST_LO_REGNUM
6981 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6982 {
6983 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6984 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6985 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6986 }
6987 else
6988 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6989 }
6990 }
6991
6992 /* Need to emit this whether or not we obey regdecls,
6993 since setjmp/longjmp can cause life info to screw up. */
6994 emit_use (pic_reg);
6995 }
6996
6997 /* Generate code to load the address of a static var when flag_pic is set. */
6998 static rtx
6999 arm_pic_static_addr (rtx orig, rtx reg)
7000 {
7001 rtx l1, labelno, offset_rtx, insn;
7002
7003 gcc_assert (flag_pic);
7004
7005 /* We use an UNSPEC rather than a LABEL_REF because this label
7006 never appears in the code stream. */
7007 labelno = GEN_INT (pic_labelno++);
7008 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7009 l1 = gen_rtx_CONST (VOIDmode, l1);
7010
7011 /* On the ARM the PC register contains 'dot + 8' at the time of the
7012 addition, on the Thumb it is 'dot + 4'. */
7013 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7014 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7015 UNSPEC_SYMBOL_OFFSET);
7016 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7017
7018 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7019 return insn;
7020 }
7021
7022 /* Return nonzero if X is valid as an ARM state addressing register. */
7023 static int
7024 arm_address_register_rtx_p (rtx x, int strict_p)
7025 {
7026 int regno;
7027
7028 if (!REG_P (x))
7029 return 0;
7030
7031 regno = REGNO (x);
7032
7033 if (strict_p)
7034 return ARM_REGNO_OK_FOR_BASE_P (regno);
7035
7036 return (regno <= LAST_ARM_REGNUM
7037 || regno >= FIRST_PSEUDO_REGISTER
7038 || regno == FRAME_POINTER_REGNUM
7039 || regno == ARG_POINTER_REGNUM);
7040 }
7041
7042 /* Return TRUE if this rtx is the difference of a symbol and a label,
7043 and will reduce to a PC-relative relocation in the object file.
7044 Expressions like this can be left alone when generating PIC, rather
7045 than forced through the GOT. */
7046 static int
7047 pcrel_constant_p (rtx x)
7048 {
7049 if (GET_CODE (x) == MINUS)
7050 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7051
7052 return FALSE;
7053 }
7054
7055 /* Return true if X will surely end up in an index register after next
7056 splitting pass. */
7057 static bool
7058 will_be_in_index_register (const_rtx x)
7059 {
7060 /* arm.md: calculate_pic_address will split this into a register. */
7061 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7062 }
7063
7064 /* Return nonzero if X is a valid ARM state address operand. */
7065 int
7066 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7067 int strict_p)
7068 {
7069 bool use_ldrd;
7070 enum rtx_code code = GET_CODE (x);
7071
7072 if (arm_address_register_rtx_p (x, strict_p))
7073 return 1;
7074
7075 use_ldrd = (TARGET_LDRD
7076 && (mode == DImode
7077 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7078
7079 if (code == POST_INC || code == PRE_DEC
7080 || ((code == PRE_INC || code == POST_DEC)
7081 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7082 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7083
7084 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7085 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7086 && GET_CODE (XEXP (x, 1)) == PLUS
7087 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7088 {
7089 rtx addend = XEXP (XEXP (x, 1), 1);
7090
7091 /* Don't allow ldrd post increment by register because it's hard
7092 to fixup invalid register choices. */
7093 if (use_ldrd
7094 && GET_CODE (x) == POST_MODIFY
7095 && REG_P (addend))
7096 return 0;
7097
7098 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7099 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7100 }
7101
7102 /* After reload constants split into minipools will have addresses
7103 from a LABEL_REF. */
7104 else if (reload_completed
7105 && (code == LABEL_REF
7106 || (code == CONST
7107 && GET_CODE (XEXP (x, 0)) == PLUS
7108 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7109 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7110 return 1;
7111
7112 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7113 return 0;
7114
7115 else if (code == PLUS)
7116 {
7117 rtx xop0 = XEXP (x, 0);
7118 rtx xop1 = XEXP (x, 1);
7119
7120 return ((arm_address_register_rtx_p (xop0, strict_p)
7121 && ((CONST_INT_P (xop1)
7122 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7123 || (!strict_p && will_be_in_index_register (xop1))))
7124 || (arm_address_register_rtx_p (xop1, strict_p)
7125 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7126 }
7127
7128 #if 0
7129 /* Reload currently can't handle MINUS, so disable this for now */
7130 else if (GET_CODE (x) == MINUS)
7131 {
7132 rtx xop0 = XEXP (x, 0);
7133 rtx xop1 = XEXP (x, 1);
7134
7135 return (arm_address_register_rtx_p (xop0, strict_p)
7136 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7137 }
7138 #endif
7139
7140 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7141 && code == SYMBOL_REF
7142 && CONSTANT_POOL_ADDRESS_P (x)
7143 && ! (flag_pic
7144 && symbol_mentioned_p (get_pool_constant (x))
7145 && ! pcrel_constant_p (get_pool_constant (x))))
7146 return 1;
7147
7148 return 0;
7149 }
7150
7151 /* Return nonzero if X is a valid Thumb-2 address operand. */
7152 static int
7153 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7154 {
7155 bool use_ldrd;
7156 enum rtx_code code = GET_CODE (x);
7157
7158 if (arm_address_register_rtx_p (x, strict_p))
7159 return 1;
7160
7161 use_ldrd = (TARGET_LDRD
7162 && (mode == DImode
7163 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7164
7165 if (code == POST_INC || code == PRE_DEC
7166 || ((code == PRE_INC || code == POST_DEC)
7167 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7168 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7169
7170 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7171 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7172 && GET_CODE (XEXP (x, 1)) == PLUS
7173 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7174 {
7175 /* Thumb-2 only has autoincrement by constant. */
7176 rtx addend = XEXP (XEXP (x, 1), 1);
7177 HOST_WIDE_INT offset;
7178
7179 if (!CONST_INT_P (addend))
7180 return 0;
7181
7182 offset = INTVAL(addend);
7183 if (GET_MODE_SIZE (mode) <= 4)
7184 return (offset > -256 && offset < 256);
7185
7186 return (use_ldrd && offset > -1024 && offset < 1024
7187 && (offset & 3) == 0);
7188 }
7189
7190 /* After reload constants split into minipools will have addresses
7191 from a LABEL_REF. */
7192 else if (reload_completed
7193 && (code == LABEL_REF
7194 || (code == CONST
7195 && GET_CODE (XEXP (x, 0)) == PLUS
7196 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7197 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7198 return 1;
7199
7200 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7201 return 0;
7202
7203 else if (code == PLUS)
7204 {
7205 rtx xop0 = XEXP (x, 0);
7206 rtx xop1 = XEXP (x, 1);
7207
7208 return ((arm_address_register_rtx_p (xop0, strict_p)
7209 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7210 || (!strict_p && will_be_in_index_register (xop1))))
7211 || (arm_address_register_rtx_p (xop1, strict_p)
7212 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7213 }
7214
7215 /* Normally we can assign constant values to target registers without
7216 the help of constant pool. But there are cases we have to use constant
7217 pool like:
7218 1) assign a label to register.
7219 2) sign-extend a 8bit value to 32bit and then assign to register.
7220
7221 Constant pool access in format:
7222 (set (reg r0) (mem (symbol_ref (".LC0"))))
7223 will cause the use of literal pool (later in function arm_reorg).
7224 So here we mark such format as an invalid format, then the compiler
7225 will adjust it into:
7226 (set (reg r0) (symbol_ref (".LC0")))
7227 (set (reg r0) (mem (reg r0))).
7228 No extra register is required, and (mem (reg r0)) won't cause the use
7229 of literal pools. */
7230 else if (arm_disable_literal_pool && code == SYMBOL_REF
7231 && CONSTANT_POOL_ADDRESS_P (x))
7232 return 0;
7233
7234 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7235 && code == SYMBOL_REF
7236 && CONSTANT_POOL_ADDRESS_P (x)
7237 && ! (flag_pic
7238 && symbol_mentioned_p (get_pool_constant (x))
7239 && ! pcrel_constant_p (get_pool_constant (x))))
7240 return 1;
7241
7242 return 0;
7243 }
7244
7245 /* Return nonzero if INDEX is valid for an address index operand in
7246 ARM state. */
7247 static int
7248 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7249 int strict_p)
7250 {
7251 HOST_WIDE_INT range;
7252 enum rtx_code code = GET_CODE (index);
7253
7254 /* Standard coprocessor addressing modes. */
7255 if (TARGET_HARD_FLOAT
7256 && TARGET_VFP
7257 && (mode == SFmode || mode == DFmode))
7258 return (code == CONST_INT && INTVAL (index) < 1024
7259 && INTVAL (index) > -1024
7260 && (INTVAL (index) & 3) == 0);
7261
7262 /* For quad modes, we restrict the constant offset to be slightly less
7263 than what the instruction format permits. We do this because for
7264 quad mode moves, we will actually decompose them into two separate
7265 double-mode reads or writes. INDEX must therefore be a valid
7266 (double-mode) offset and so should INDEX+8. */
7267 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7268 return (code == CONST_INT
7269 && INTVAL (index) < 1016
7270 && INTVAL (index) > -1024
7271 && (INTVAL (index) & 3) == 0);
7272
7273 /* We have no such constraint on double mode offsets, so we permit the
7274 full range of the instruction format. */
7275 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7276 return (code == CONST_INT
7277 && INTVAL (index) < 1024
7278 && INTVAL (index) > -1024
7279 && (INTVAL (index) & 3) == 0);
7280
7281 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7282 return (code == CONST_INT
7283 && INTVAL (index) < 1024
7284 && INTVAL (index) > -1024
7285 && (INTVAL (index) & 3) == 0);
7286
7287 if (arm_address_register_rtx_p (index, strict_p)
7288 && (GET_MODE_SIZE (mode) <= 4))
7289 return 1;
7290
7291 if (mode == DImode || mode == DFmode)
7292 {
7293 if (code == CONST_INT)
7294 {
7295 HOST_WIDE_INT val = INTVAL (index);
7296
7297 if (TARGET_LDRD)
7298 return val > -256 && val < 256;
7299 else
7300 return val > -4096 && val < 4092;
7301 }
7302
7303 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7304 }
7305
7306 if (GET_MODE_SIZE (mode) <= 4
7307 && ! (arm_arch4
7308 && (mode == HImode
7309 || mode == HFmode
7310 || (mode == QImode && outer == SIGN_EXTEND))))
7311 {
7312 if (code == MULT)
7313 {
7314 rtx xiop0 = XEXP (index, 0);
7315 rtx xiop1 = XEXP (index, 1);
7316
7317 return ((arm_address_register_rtx_p (xiop0, strict_p)
7318 && power_of_two_operand (xiop1, SImode))
7319 || (arm_address_register_rtx_p (xiop1, strict_p)
7320 && power_of_two_operand (xiop0, SImode)));
7321 }
7322 else if (code == LSHIFTRT || code == ASHIFTRT
7323 || code == ASHIFT || code == ROTATERT)
7324 {
7325 rtx op = XEXP (index, 1);
7326
7327 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7328 && CONST_INT_P (op)
7329 && INTVAL (op) > 0
7330 && INTVAL (op) <= 31);
7331 }
7332 }
7333
7334 /* For ARM v4 we may be doing a sign-extend operation during the
7335 load. */
7336 if (arm_arch4)
7337 {
7338 if (mode == HImode
7339 || mode == HFmode
7340 || (outer == SIGN_EXTEND && mode == QImode))
7341 range = 256;
7342 else
7343 range = 4096;
7344 }
7345 else
7346 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7347
7348 return (code == CONST_INT
7349 && INTVAL (index) < range
7350 && INTVAL (index) > -range);
7351 }
7352
7353 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7354 index operand. i.e. 1, 2, 4 or 8. */
7355 static bool
7356 thumb2_index_mul_operand (rtx op)
7357 {
7358 HOST_WIDE_INT val;
7359
7360 if (!CONST_INT_P (op))
7361 return false;
7362
7363 val = INTVAL(op);
7364 return (val == 1 || val == 2 || val == 4 || val == 8);
7365 }
7366
7367 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7368 static int
7369 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7370 {
7371 enum rtx_code code = GET_CODE (index);
7372
7373 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7374 /* Standard coprocessor addressing modes. */
7375 if (TARGET_HARD_FLOAT
7376 && TARGET_VFP
7377 && (mode == SFmode || mode == DFmode))
7378 return (code == CONST_INT && INTVAL (index) < 1024
7379 /* Thumb-2 allows only > -256 index range for it's core register
7380 load/stores. Since we allow SF/DF in core registers, we have
7381 to use the intersection between -256~4096 (core) and -1024~1024
7382 (coprocessor). */
7383 && INTVAL (index) > -256
7384 && (INTVAL (index) & 3) == 0);
7385
7386 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7387 {
7388 /* For DImode assume values will usually live in core regs
7389 and only allow LDRD addressing modes. */
7390 if (!TARGET_LDRD || mode != DImode)
7391 return (code == CONST_INT
7392 && INTVAL (index) < 1024
7393 && INTVAL (index) > -1024
7394 && (INTVAL (index) & 3) == 0);
7395 }
7396
7397 /* For quad modes, we restrict the constant offset to be slightly less
7398 than what the instruction format permits. We do this because for
7399 quad mode moves, we will actually decompose them into two separate
7400 double-mode reads or writes. INDEX must therefore be a valid
7401 (double-mode) offset and so should INDEX+8. */
7402 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7403 return (code == CONST_INT
7404 && INTVAL (index) < 1016
7405 && INTVAL (index) > -1024
7406 && (INTVAL (index) & 3) == 0);
7407
7408 /* We have no such constraint on double mode offsets, so we permit the
7409 full range of the instruction format. */
7410 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7411 return (code == CONST_INT
7412 && INTVAL (index) < 1024
7413 && INTVAL (index) > -1024
7414 && (INTVAL (index) & 3) == 0);
7415
7416 if (arm_address_register_rtx_p (index, strict_p)
7417 && (GET_MODE_SIZE (mode) <= 4))
7418 return 1;
7419
7420 if (mode == DImode || mode == DFmode)
7421 {
7422 if (code == CONST_INT)
7423 {
7424 HOST_WIDE_INT val = INTVAL (index);
7425 /* ??? Can we assume ldrd for thumb2? */
7426 /* Thumb-2 ldrd only has reg+const addressing modes. */
7427 /* ldrd supports offsets of +-1020.
7428 However the ldr fallback does not. */
7429 return val > -256 && val < 256 && (val & 3) == 0;
7430 }
7431 else
7432 return 0;
7433 }
7434
7435 if (code == MULT)
7436 {
7437 rtx xiop0 = XEXP (index, 0);
7438 rtx xiop1 = XEXP (index, 1);
7439
7440 return ((arm_address_register_rtx_p (xiop0, strict_p)
7441 && thumb2_index_mul_operand (xiop1))
7442 || (arm_address_register_rtx_p (xiop1, strict_p)
7443 && thumb2_index_mul_operand (xiop0)));
7444 }
7445 else if (code == ASHIFT)
7446 {
7447 rtx op = XEXP (index, 1);
7448
7449 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7450 && CONST_INT_P (op)
7451 && INTVAL (op) > 0
7452 && INTVAL (op) <= 3);
7453 }
7454
7455 return (code == CONST_INT
7456 && INTVAL (index) < 4096
7457 && INTVAL (index) > -256);
7458 }
7459
7460 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7461 static int
7462 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7463 {
7464 int regno;
7465
7466 if (!REG_P (x))
7467 return 0;
7468
7469 regno = REGNO (x);
7470
7471 if (strict_p)
7472 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7473
7474 return (regno <= LAST_LO_REGNUM
7475 || regno > LAST_VIRTUAL_REGISTER
7476 || regno == FRAME_POINTER_REGNUM
7477 || (GET_MODE_SIZE (mode) >= 4
7478 && (regno == STACK_POINTER_REGNUM
7479 || regno >= FIRST_PSEUDO_REGISTER
7480 || x == hard_frame_pointer_rtx
7481 || x == arg_pointer_rtx)));
7482 }
7483
7484 /* Return nonzero if x is a legitimate index register. This is the case
7485 for any base register that can access a QImode object. */
7486 inline static int
7487 thumb1_index_register_rtx_p (rtx x, int strict_p)
7488 {
7489 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7490 }
7491
7492 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7493
7494 The AP may be eliminated to either the SP or the FP, so we use the
7495 least common denominator, e.g. SImode, and offsets from 0 to 64.
7496
7497 ??? Verify whether the above is the right approach.
7498
7499 ??? Also, the FP may be eliminated to the SP, so perhaps that
7500 needs special handling also.
7501
7502 ??? Look at how the mips16 port solves this problem. It probably uses
7503 better ways to solve some of these problems.
7504
7505 Although it is not incorrect, we don't accept QImode and HImode
7506 addresses based on the frame pointer or arg pointer until the
7507 reload pass starts. This is so that eliminating such addresses
7508 into stack based ones won't produce impossible code. */
7509 int
7510 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7511 {
7512 /* ??? Not clear if this is right. Experiment. */
7513 if (GET_MODE_SIZE (mode) < 4
7514 && !(reload_in_progress || reload_completed)
7515 && (reg_mentioned_p (frame_pointer_rtx, x)
7516 || reg_mentioned_p (arg_pointer_rtx, x)
7517 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7518 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7519 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7520 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7521 return 0;
7522
7523 /* Accept any base register. SP only in SImode or larger. */
7524 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7525 return 1;
7526
7527 /* This is PC relative data before arm_reorg runs. */
7528 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7529 && GET_CODE (x) == SYMBOL_REF
7530 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7531 return 1;
7532
7533 /* This is PC relative data after arm_reorg runs. */
7534 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7535 && reload_completed
7536 && (GET_CODE (x) == LABEL_REF
7537 || (GET_CODE (x) == CONST
7538 && GET_CODE (XEXP (x, 0)) == PLUS
7539 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7540 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7541 return 1;
7542
7543 /* Post-inc indexing only supported for SImode and larger. */
7544 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7545 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7546 return 1;
7547
7548 else if (GET_CODE (x) == PLUS)
7549 {
7550 /* REG+REG address can be any two index registers. */
7551 /* We disallow FRAME+REG addressing since we know that FRAME
7552 will be replaced with STACK, and SP relative addressing only
7553 permits SP+OFFSET. */
7554 if (GET_MODE_SIZE (mode) <= 4
7555 && XEXP (x, 0) != frame_pointer_rtx
7556 && XEXP (x, 1) != frame_pointer_rtx
7557 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7558 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7559 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7560 return 1;
7561
7562 /* REG+const has 5-7 bit offset for non-SP registers. */
7563 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7564 || XEXP (x, 0) == arg_pointer_rtx)
7565 && CONST_INT_P (XEXP (x, 1))
7566 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7567 return 1;
7568
7569 /* REG+const has 10-bit offset for SP, but only SImode and
7570 larger is supported. */
7571 /* ??? Should probably check for DI/DFmode overflow here
7572 just like GO_IF_LEGITIMATE_OFFSET does. */
7573 else if (REG_P (XEXP (x, 0))
7574 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7575 && GET_MODE_SIZE (mode) >= 4
7576 && CONST_INT_P (XEXP (x, 1))
7577 && INTVAL (XEXP (x, 1)) >= 0
7578 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7579 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7580 return 1;
7581
7582 else if (REG_P (XEXP (x, 0))
7583 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7584 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7585 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7586 && REGNO (XEXP (x, 0))
7587 <= LAST_VIRTUAL_POINTER_REGISTER))
7588 && GET_MODE_SIZE (mode) >= 4
7589 && CONST_INT_P (XEXP (x, 1))
7590 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7591 return 1;
7592 }
7593
7594 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7595 && GET_MODE_SIZE (mode) == 4
7596 && GET_CODE (x) == SYMBOL_REF
7597 && CONSTANT_POOL_ADDRESS_P (x)
7598 && ! (flag_pic
7599 && symbol_mentioned_p (get_pool_constant (x))
7600 && ! pcrel_constant_p (get_pool_constant (x))))
7601 return 1;
7602
7603 return 0;
7604 }
7605
7606 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7607 instruction of mode MODE. */
7608 int
7609 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7610 {
7611 switch (GET_MODE_SIZE (mode))
7612 {
7613 case 1:
7614 return val >= 0 && val < 32;
7615
7616 case 2:
7617 return val >= 0 && val < 64 && (val & 1) == 0;
7618
7619 default:
7620 return (val >= 0
7621 && (val + GET_MODE_SIZE (mode)) <= 128
7622 && (val & 3) == 0);
7623 }
7624 }
7625
7626 bool
7627 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7628 {
7629 if (TARGET_ARM)
7630 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7631 else if (TARGET_THUMB2)
7632 return thumb2_legitimate_address_p (mode, x, strict_p);
7633 else /* if (TARGET_THUMB1) */
7634 return thumb1_legitimate_address_p (mode, x, strict_p);
7635 }
7636
7637 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7638
7639 Given an rtx X being reloaded into a reg required to be
7640 in class CLASS, return the class of reg to actually use.
7641 In general this is just CLASS, but for the Thumb core registers and
7642 immediate constants we prefer a LO_REGS class or a subset. */
7643
7644 static reg_class_t
7645 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7646 {
7647 if (TARGET_32BIT)
7648 return rclass;
7649 else
7650 {
7651 if (rclass == GENERAL_REGS)
7652 return LO_REGS;
7653 else
7654 return rclass;
7655 }
7656 }
7657
7658 /* Build the SYMBOL_REF for __tls_get_addr. */
7659
7660 static GTY(()) rtx tls_get_addr_libfunc;
7661
7662 static rtx
7663 get_tls_get_addr (void)
7664 {
7665 if (!tls_get_addr_libfunc)
7666 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7667 return tls_get_addr_libfunc;
7668 }
7669
7670 rtx
7671 arm_load_tp (rtx target)
7672 {
7673 if (!target)
7674 target = gen_reg_rtx (SImode);
7675
7676 if (TARGET_HARD_TP)
7677 {
7678 /* Can return in any reg. */
7679 emit_insn (gen_load_tp_hard (target));
7680 }
7681 else
7682 {
7683 /* Always returned in r0. Immediately copy the result into a pseudo,
7684 otherwise other uses of r0 (e.g. setting up function arguments) may
7685 clobber the value. */
7686
7687 rtx tmp;
7688
7689 emit_insn (gen_load_tp_soft ());
7690
7691 tmp = gen_rtx_REG (SImode, R0_REGNUM);
7692 emit_move_insn (target, tmp);
7693 }
7694 return target;
7695 }
7696
7697 static rtx
7698 load_tls_operand (rtx x, rtx reg)
7699 {
7700 rtx tmp;
7701
7702 if (reg == NULL_RTX)
7703 reg = gen_reg_rtx (SImode);
7704
7705 tmp = gen_rtx_CONST (SImode, x);
7706
7707 emit_move_insn (reg, tmp);
7708
7709 return reg;
7710 }
7711
7712 static rtx
7713 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7714 {
7715 rtx insns, label, labelno, sum;
7716
7717 gcc_assert (reloc != TLS_DESCSEQ);
7718 start_sequence ();
7719
7720 labelno = GEN_INT (pic_labelno++);
7721 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7722 label = gen_rtx_CONST (VOIDmode, label);
7723
7724 sum = gen_rtx_UNSPEC (Pmode,
7725 gen_rtvec (4, x, GEN_INT (reloc), label,
7726 GEN_INT (TARGET_ARM ? 8 : 4)),
7727 UNSPEC_TLS);
7728 reg = load_tls_operand (sum, reg);
7729
7730 if (TARGET_ARM)
7731 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7732 else
7733 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7734
7735 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7736 LCT_PURE, /* LCT_CONST? */
7737 Pmode, 1, reg, Pmode);
7738
7739 insns = get_insns ();
7740 end_sequence ();
7741
7742 return insns;
7743 }
7744
7745 static rtx
7746 arm_tls_descseq_addr (rtx x, rtx reg)
7747 {
7748 rtx labelno = GEN_INT (pic_labelno++);
7749 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7750 rtx sum = gen_rtx_UNSPEC (Pmode,
7751 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7752 gen_rtx_CONST (VOIDmode, label),
7753 GEN_INT (!TARGET_ARM)),
7754 UNSPEC_TLS);
7755 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
7756
7757 emit_insn (gen_tlscall (x, labelno));
7758 if (!reg)
7759 reg = gen_reg_rtx (SImode);
7760 else
7761 gcc_assert (REGNO (reg) != R0_REGNUM);
7762
7763 emit_move_insn (reg, reg0);
7764
7765 return reg;
7766 }
7767
7768 rtx
7769 legitimize_tls_address (rtx x, rtx reg)
7770 {
7771 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7772 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7773
7774 switch (model)
7775 {
7776 case TLS_MODEL_GLOBAL_DYNAMIC:
7777 if (TARGET_GNU2_TLS)
7778 {
7779 reg = arm_tls_descseq_addr (x, reg);
7780
7781 tp = arm_load_tp (NULL_RTX);
7782
7783 dest = gen_rtx_PLUS (Pmode, tp, reg);
7784 }
7785 else
7786 {
7787 /* Original scheme */
7788 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7789 dest = gen_reg_rtx (Pmode);
7790 emit_libcall_block (insns, dest, ret, x);
7791 }
7792 return dest;
7793
7794 case TLS_MODEL_LOCAL_DYNAMIC:
7795 if (TARGET_GNU2_TLS)
7796 {
7797 reg = arm_tls_descseq_addr (x, reg);
7798
7799 tp = arm_load_tp (NULL_RTX);
7800
7801 dest = gen_rtx_PLUS (Pmode, tp, reg);
7802 }
7803 else
7804 {
7805 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7806
7807 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7808 share the LDM result with other LD model accesses. */
7809 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7810 UNSPEC_TLS);
7811 dest = gen_reg_rtx (Pmode);
7812 emit_libcall_block (insns, dest, ret, eqv);
7813
7814 /* Load the addend. */
7815 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7816 GEN_INT (TLS_LDO32)),
7817 UNSPEC_TLS);
7818 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7819 dest = gen_rtx_PLUS (Pmode, dest, addend);
7820 }
7821 return dest;
7822
7823 case TLS_MODEL_INITIAL_EXEC:
7824 labelno = GEN_INT (pic_labelno++);
7825 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7826 label = gen_rtx_CONST (VOIDmode, label);
7827 sum = gen_rtx_UNSPEC (Pmode,
7828 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7829 GEN_INT (TARGET_ARM ? 8 : 4)),
7830 UNSPEC_TLS);
7831 reg = load_tls_operand (sum, reg);
7832
7833 if (TARGET_ARM)
7834 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7835 else if (TARGET_THUMB2)
7836 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7837 else
7838 {
7839 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7840 emit_move_insn (reg, gen_const_mem (SImode, reg));
7841 }
7842
7843 tp = arm_load_tp (NULL_RTX);
7844
7845 return gen_rtx_PLUS (Pmode, tp, reg);
7846
7847 case TLS_MODEL_LOCAL_EXEC:
7848 tp = arm_load_tp (NULL_RTX);
7849
7850 reg = gen_rtx_UNSPEC (Pmode,
7851 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7852 UNSPEC_TLS);
7853 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7854
7855 return gen_rtx_PLUS (Pmode, tp, reg);
7856
7857 default:
7858 abort ();
7859 }
7860 }
7861
7862 /* Try machine-dependent ways of modifying an illegitimate address
7863 to be legitimate. If we find one, return the new, valid address. */
7864 rtx
7865 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7866 {
7867 if (arm_tls_referenced_p (x))
7868 {
7869 rtx addend = NULL;
7870
7871 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7872 {
7873 addend = XEXP (XEXP (x, 0), 1);
7874 x = XEXP (XEXP (x, 0), 0);
7875 }
7876
7877 if (GET_CODE (x) != SYMBOL_REF)
7878 return x;
7879
7880 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7881
7882 x = legitimize_tls_address (x, NULL_RTX);
7883
7884 if (addend)
7885 {
7886 x = gen_rtx_PLUS (SImode, x, addend);
7887 orig_x = x;
7888 }
7889 else
7890 return x;
7891 }
7892
7893 if (!TARGET_ARM)
7894 {
7895 /* TODO: legitimize_address for Thumb2. */
7896 if (TARGET_THUMB2)
7897 return x;
7898 return thumb_legitimize_address (x, orig_x, mode);
7899 }
7900
7901 if (GET_CODE (x) == PLUS)
7902 {
7903 rtx xop0 = XEXP (x, 0);
7904 rtx xop1 = XEXP (x, 1);
7905
7906 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7907 xop0 = force_reg (SImode, xop0);
7908
7909 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7910 && !symbol_mentioned_p (xop1))
7911 xop1 = force_reg (SImode, xop1);
7912
7913 if (ARM_BASE_REGISTER_RTX_P (xop0)
7914 && CONST_INT_P (xop1))
7915 {
7916 HOST_WIDE_INT n, low_n;
7917 rtx base_reg, val;
7918 n = INTVAL (xop1);
7919
7920 /* VFP addressing modes actually allow greater offsets, but for
7921 now we just stick with the lowest common denominator. */
7922 if (mode == DImode
7923 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7924 {
7925 low_n = n & 0x0f;
7926 n &= ~0x0f;
7927 if (low_n > 4)
7928 {
7929 n += 16;
7930 low_n -= 16;
7931 }
7932 }
7933 else
7934 {
7935 low_n = ((mode) == TImode ? 0
7936 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7937 n -= low_n;
7938 }
7939
7940 base_reg = gen_reg_rtx (SImode);
7941 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7942 emit_move_insn (base_reg, val);
7943 x = plus_constant (Pmode, base_reg, low_n);
7944 }
7945 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7946 x = gen_rtx_PLUS (SImode, xop0, xop1);
7947 }
7948
7949 /* XXX We don't allow MINUS any more -- see comment in
7950 arm_legitimate_address_outer_p (). */
7951 else if (GET_CODE (x) == MINUS)
7952 {
7953 rtx xop0 = XEXP (x, 0);
7954 rtx xop1 = XEXP (x, 1);
7955
7956 if (CONSTANT_P (xop0))
7957 xop0 = force_reg (SImode, xop0);
7958
7959 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7960 xop1 = force_reg (SImode, xop1);
7961
7962 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7963 x = gen_rtx_MINUS (SImode, xop0, xop1);
7964 }
7965
7966 /* Make sure to take full advantage of the pre-indexed addressing mode
7967 with absolute addresses which often allows for the base register to
7968 be factorized for multiple adjacent memory references, and it might
7969 even allows for the mini pool to be avoided entirely. */
7970 else if (CONST_INT_P (x) && optimize > 0)
7971 {
7972 unsigned int bits;
7973 HOST_WIDE_INT mask, base, index;
7974 rtx base_reg;
7975
7976 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7977 use a 8-bit index. So let's use a 12-bit index for SImode only and
7978 hope that arm_gen_constant will enable ldrb to use more bits. */
7979 bits = (mode == SImode) ? 12 : 8;
7980 mask = (1 << bits) - 1;
7981 base = INTVAL (x) & ~mask;
7982 index = INTVAL (x) & mask;
7983 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7984 {
7985 /* It'll most probably be more efficient to generate the base
7986 with more bits set and use a negative index instead. */
7987 base |= mask;
7988 index -= mask;
7989 }
7990 base_reg = force_reg (SImode, GEN_INT (base));
7991 x = plus_constant (Pmode, base_reg, index);
7992 }
7993
7994 if (flag_pic)
7995 {
7996 /* We need to find and carefully transform any SYMBOL and LABEL
7997 references; so go back to the original address expression. */
7998 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7999
8000 if (new_x != orig_x)
8001 x = new_x;
8002 }
8003
8004 return x;
8005 }
8006
8007
8008 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8009 to be legitimate. If we find one, return the new, valid address. */
8010 rtx
8011 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8012 {
8013 if (GET_CODE (x) == PLUS
8014 && CONST_INT_P (XEXP (x, 1))
8015 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8016 || INTVAL (XEXP (x, 1)) < 0))
8017 {
8018 rtx xop0 = XEXP (x, 0);
8019 rtx xop1 = XEXP (x, 1);
8020 HOST_WIDE_INT offset = INTVAL (xop1);
8021
8022 /* Try and fold the offset into a biasing of the base register and
8023 then offsetting that. Don't do this when optimizing for space
8024 since it can cause too many CSEs. */
8025 if (optimize_size && offset >= 0
8026 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8027 {
8028 HOST_WIDE_INT delta;
8029
8030 if (offset >= 256)
8031 delta = offset - (256 - GET_MODE_SIZE (mode));
8032 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8033 delta = 31 * GET_MODE_SIZE (mode);
8034 else
8035 delta = offset & (~31 * GET_MODE_SIZE (mode));
8036
8037 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8038 NULL_RTX);
8039 x = plus_constant (Pmode, xop0, delta);
8040 }
8041 else if (offset < 0 && offset > -256)
8042 /* Small negative offsets are best done with a subtract before the
8043 dereference, forcing these into a register normally takes two
8044 instructions. */
8045 x = force_operand (x, NULL_RTX);
8046 else
8047 {
8048 /* For the remaining cases, force the constant into a register. */
8049 xop1 = force_reg (SImode, xop1);
8050 x = gen_rtx_PLUS (SImode, xop0, xop1);
8051 }
8052 }
8053 else if (GET_CODE (x) == PLUS
8054 && s_register_operand (XEXP (x, 1), SImode)
8055 && !s_register_operand (XEXP (x, 0), SImode))
8056 {
8057 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8058
8059 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8060 }
8061
8062 if (flag_pic)
8063 {
8064 /* We need to find and carefully transform any SYMBOL and LABEL
8065 references; so go back to the original address expression. */
8066 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8067
8068 if (new_x != orig_x)
8069 x = new_x;
8070 }
8071
8072 return x;
8073 }
8074
8075 /* Return TRUE if X contains any TLS symbol references. */
8076
8077 bool
8078 arm_tls_referenced_p (rtx x)
8079 {
8080 if (! TARGET_HAVE_TLS)
8081 return false;
8082
8083 subrtx_iterator::array_type array;
8084 FOR_EACH_SUBRTX (iter, array, x, ALL)
8085 {
8086 const_rtx x = *iter;
8087 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8088 return true;
8089
8090 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8091 TLS offsets, not real symbol references. */
8092 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8093 iter.skip_subrtxes ();
8094 }
8095 return false;
8096 }
8097
8098 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8099
8100 On the ARM, allow any integer (invalid ones are removed later by insn
8101 patterns), nice doubles and symbol_refs which refer to the function's
8102 constant pool XXX.
8103
8104 When generating pic allow anything. */
8105
8106 static bool
8107 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8108 {
8109 return flag_pic || !label_mentioned_p (x);
8110 }
8111
8112 static bool
8113 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8114 {
8115 return (CONST_INT_P (x)
8116 || CONST_DOUBLE_P (x)
8117 || CONSTANT_ADDRESS_P (x)
8118 || flag_pic);
8119 }
8120
8121 static bool
8122 arm_legitimate_constant_p (machine_mode mode, rtx x)
8123 {
8124 return (!arm_cannot_force_const_mem (mode, x)
8125 && (TARGET_32BIT
8126 ? arm_legitimate_constant_p_1 (mode, x)
8127 : thumb_legitimate_constant_p (mode, x)));
8128 }
8129
8130 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8131
8132 static bool
8133 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8134 {
8135 rtx base, offset;
8136
8137 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8138 {
8139 split_const (x, &base, &offset);
8140 if (GET_CODE (base) == SYMBOL_REF
8141 && !offset_within_block_p (base, INTVAL (offset)))
8142 return true;
8143 }
8144 return arm_tls_referenced_p (x);
8145 }
8146 \f
8147 #define REG_OR_SUBREG_REG(X) \
8148 (REG_P (X) \
8149 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8150
8151 #define REG_OR_SUBREG_RTX(X) \
8152 (REG_P (X) ? (X) : SUBREG_REG (X))
8153
8154 static inline int
8155 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8156 {
8157 machine_mode mode = GET_MODE (x);
8158 int total, words;
8159
8160 switch (code)
8161 {
8162 case ASHIFT:
8163 case ASHIFTRT:
8164 case LSHIFTRT:
8165 case ROTATERT:
8166 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8167
8168 case PLUS:
8169 case MINUS:
8170 case COMPARE:
8171 case NEG:
8172 case NOT:
8173 return COSTS_N_INSNS (1);
8174
8175 case MULT:
8176 if (CONST_INT_P (XEXP (x, 1)))
8177 {
8178 int cycles = 0;
8179 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8180
8181 while (i)
8182 {
8183 i >>= 2;
8184 cycles++;
8185 }
8186 return COSTS_N_INSNS (2) + cycles;
8187 }
8188 return COSTS_N_INSNS (1) + 16;
8189
8190 case SET:
8191 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8192 the mode. */
8193 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8194 return (COSTS_N_INSNS (words)
8195 + 4 * ((MEM_P (SET_SRC (x)))
8196 + MEM_P (SET_DEST (x))));
8197
8198 case CONST_INT:
8199 if (outer == SET)
8200 {
8201 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8202 return 0;
8203 if (thumb_shiftable_const (INTVAL (x)))
8204 return COSTS_N_INSNS (2);
8205 return COSTS_N_INSNS (3);
8206 }
8207 else if ((outer == PLUS || outer == COMPARE)
8208 && INTVAL (x) < 256 && INTVAL (x) > -256)
8209 return 0;
8210 else if ((outer == IOR || outer == XOR || outer == AND)
8211 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8212 return COSTS_N_INSNS (1);
8213 else if (outer == AND)
8214 {
8215 int i;
8216 /* This duplicates the tests in the andsi3 expander. */
8217 for (i = 9; i <= 31; i++)
8218 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8219 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8220 return COSTS_N_INSNS (2);
8221 }
8222 else if (outer == ASHIFT || outer == ASHIFTRT
8223 || outer == LSHIFTRT)
8224 return 0;
8225 return COSTS_N_INSNS (2);
8226
8227 case CONST:
8228 case CONST_DOUBLE:
8229 case LABEL_REF:
8230 case SYMBOL_REF:
8231 return COSTS_N_INSNS (3);
8232
8233 case UDIV:
8234 case UMOD:
8235 case DIV:
8236 case MOD:
8237 return 100;
8238
8239 case TRUNCATE:
8240 return 99;
8241
8242 case AND:
8243 case XOR:
8244 case IOR:
8245 /* XXX guess. */
8246 return 8;
8247
8248 case MEM:
8249 /* XXX another guess. */
8250 /* Memory costs quite a lot for the first word, but subsequent words
8251 load at the equivalent of a single insn each. */
8252 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8253 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8254 ? 4 : 0));
8255
8256 case IF_THEN_ELSE:
8257 /* XXX a guess. */
8258 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8259 return 14;
8260 return 2;
8261
8262 case SIGN_EXTEND:
8263 case ZERO_EXTEND:
8264 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8265 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8266
8267 if (mode == SImode)
8268 return total;
8269
8270 if (arm_arch6)
8271 return total + COSTS_N_INSNS (1);
8272
8273 /* Assume a two-shift sequence. Increase the cost slightly so
8274 we prefer actual shifts over an extend operation. */
8275 return total + 1 + COSTS_N_INSNS (2);
8276
8277 default:
8278 return 99;
8279 }
8280 }
8281
8282 static inline bool
8283 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8284 {
8285 machine_mode mode = GET_MODE (x);
8286 enum rtx_code subcode;
8287 rtx operand;
8288 enum rtx_code code = GET_CODE (x);
8289 *total = 0;
8290
8291 switch (code)
8292 {
8293 case MEM:
8294 /* Memory costs quite a lot for the first word, but subsequent words
8295 load at the equivalent of a single insn each. */
8296 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8297 return true;
8298
8299 case DIV:
8300 case MOD:
8301 case UDIV:
8302 case UMOD:
8303 if (TARGET_HARD_FLOAT && mode == SFmode)
8304 *total = COSTS_N_INSNS (2);
8305 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8306 *total = COSTS_N_INSNS (4);
8307 else
8308 *total = COSTS_N_INSNS (20);
8309 return false;
8310
8311 case ROTATE:
8312 if (REG_P (XEXP (x, 1)))
8313 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8314 else if (!CONST_INT_P (XEXP (x, 1)))
8315 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8316
8317 /* Fall through */
8318 case ROTATERT:
8319 if (mode != SImode)
8320 {
8321 *total += COSTS_N_INSNS (4);
8322 return true;
8323 }
8324
8325 /* Fall through */
8326 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8327 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8328 if (mode == DImode)
8329 {
8330 *total += COSTS_N_INSNS (3);
8331 return true;
8332 }
8333
8334 *total += COSTS_N_INSNS (1);
8335 /* Increase the cost of complex shifts because they aren't any faster,
8336 and reduce dual issue opportunities. */
8337 if (arm_tune_cortex_a9
8338 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8339 ++*total;
8340
8341 return true;
8342
8343 case MINUS:
8344 if (mode == DImode)
8345 {
8346 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8347 if (CONST_INT_P (XEXP (x, 0))
8348 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8349 {
8350 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8351 return true;
8352 }
8353
8354 if (CONST_INT_P (XEXP (x, 1))
8355 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8356 {
8357 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8358 return true;
8359 }
8360
8361 return false;
8362 }
8363
8364 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8365 {
8366 if (TARGET_HARD_FLOAT
8367 && (mode == SFmode
8368 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8369 {
8370 *total = COSTS_N_INSNS (1);
8371 if (CONST_DOUBLE_P (XEXP (x, 0))
8372 && arm_const_double_rtx (XEXP (x, 0)))
8373 {
8374 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8375 return true;
8376 }
8377
8378 if (CONST_DOUBLE_P (XEXP (x, 1))
8379 && arm_const_double_rtx (XEXP (x, 1)))
8380 {
8381 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8382 return true;
8383 }
8384
8385 return false;
8386 }
8387 *total = COSTS_N_INSNS (20);
8388 return false;
8389 }
8390
8391 *total = COSTS_N_INSNS (1);
8392 if (CONST_INT_P (XEXP (x, 0))
8393 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8394 {
8395 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8396 return true;
8397 }
8398
8399 subcode = GET_CODE (XEXP (x, 1));
8400 if (subcode == ASHIFT || subcode == ASHIFTRT
8401 || subcode == LSHIFTRT
8402 || subcode == ROTATE || subcode == ROTATERT)
8403 {
8404 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8405 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8406 return true;
8407 }
8408
8409 /* A shift as a part of RSB costs no more than RSB itself. */
8410 if (GET_CODE (XEXP (x, 0)) == MULT
8411 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8412 {
8413 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8414 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8415 return true;
8416 }
8417
8418 if (subcode == MULT
8419 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8420 {
8421 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8422 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8423 return true;
8424 }
8425
8426 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8427 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8428 {
8429 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8430 if (REG_P (XEXP (XEXP (x, 1), 0))
8431 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8432 *total += COSTS_N_INSNS (1);
8433
8434 return true;
8435 }
8436
8437 /* Fall through */
8438
8439 case PLUS:
8440 if (code == PLUS && arm_arch6 && mode == SImode
8441 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8442 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8443 {
8444 *total = COSTS_N_INSNS (1);
8445 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8446 0, speed);
8447 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8448 return true;
8449 }
8450
8451 /* MLA: All arguments must be registers. We filter out
8452 multiplication by a power of two, so that we fall down into
8453 the code below. */
8454 if (GET_CODE (XEXP (x, 0)) == MULT
8455 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8456 {
8457 /* The cost comes from the cost of the multiply. */
8458 return false;
8459 }
8460
8461 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8462 {
8463 if (TARGET_HARD_FLOAT
8464 && (mode == SFmode
8465 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8466 {
8467 *total = COSTS_N_INSNS (1);
8468 if (CONST_DOUBLE_P (XEXP (x, 1))
8469 && arm_const_double_rtx (XEXP (x, 1)))
8470 {
8471 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8472 return true;
8473 }
8474
8475 return false;
8476 }
8477
8478 *total = COSTS_N_INSNS (20);
8479 return false;
8480 }
8481
8482 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8483 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8484 {
8485 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8486 if (REG_P (XEXP (XEXP (x, 0), 0))
8487 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8488 *total += COSTS_N_INSNS (1);
8489 return true;
8490 }
8491
8492 /* Fall through */
8493
8494 case AND: case XOR: case IOR:
8495
8496 /* Normally the frame registers will be spilt into reg+const during
8497 reload, so it is a bad idea to combine them with other instructions,
8498 since then they might not be moved outside of loops. As a compromise
8499 we allow integration with ops that have a constant as their second
8500 operand. */
8501 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8502 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8503 && !CONST_INT_P (XEXP (x, 1)))
8504 *total = COSTS_N_INSNS (1);
8505
8506 if (mode == DImode)
8507 {
8508 *total += COSTS_N_INSNS (2);
8509 if (CONST_INT_P (XEXP (x, 1))
8510 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8511 {
8512 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8513 return true;
8514 }
8515
8516 return false;
8517 }
8518
8519 *total += COSTS_N_INSNS (1);
8520 if (CONST_INT_P (XEXP (x, 1))
8521 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8522 {
8523 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8524 return true;
8525 }
8526 subcode = GET_CODE (XEXP (x, 0));
8527 if (subcode == ASHIFT || subcode == ASHIFTRT
8528 || subcode == LSHIFTRT
8529 || subcode == ROTATE || subcode == ROTATERT)
8530 {
8531 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8532 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8533 return true;
8534 }
8535
8536 if (subcode == MULT
8537 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8538 {
8539 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8540 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8541 return true;
8542 }
8543
8544 if (subcode == UMIN || subcode == UMAX
8545 || subcode == SMIN || subcode == SMAX)
8546 {
8547 *total = COSTS_N_INSNS (3);
8548 return true;
8549 }
8550
8551 return false;
8552
8553 case MULT:
8554 /* This should have been handled by the CPU specific routines. */
8555 gcc_unreachable ();
8556
8557 case TRUNCATE:
8558 if (arm_arch3m && mode == SImode
8559 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8560 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8561 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8562 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8563 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8564 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8565 {
8566 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8567 return true;
8568 }
8569 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8570 return false;
8571
8572 case NEG:
8573 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8574 {
8575 if (TARGET_HARD_FLOAT
8576 && (mode == SFmode
8577 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8578 {
8579 *total = COSTS_N_INSNS (1);
8580 return false;
8581 }
8582 *total = COSTS_N_INSNS (2);
8583 return false;
8584 }
8585
8586 /* Fall through */
8587 case NOT:
8588 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8589 if (mode == SImode && code == NOT)
8590 {
8591 subcode = GET_CODE (XEXP (x, 0));
8592 if (subcode == ASHIFT || subcode == ASHIFTRT
8593 || subcode == LSHIFTRT
8594 || subcode == ROTATE || subcode == ROTATERT
8595 || (subcode == MULT
8596 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8597 {
8598 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8599 /* Register shifts cost an extra cycle. */
8600 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8601 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8602 subcode, 1, speed);
8603 return true;
8604 }
8605 }
8606
8607 return false;
8608
8609 case IF_THEN_ELSE:
8610 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8611 {
8612 *total = COSTS_N_INSNS (4);
8613 return true;
8614 }
8615
8616 operand = XEXP (x, 0);
8617
8618 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8619 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8620 && REG_P (XEXP (operand, 0))
8621 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8622 *total += COSTS_N_INSNS (1);
8623 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8624 + rtx_cost (XEXP (x, 2), code, 2, speed));
8625 return true;
8626
8627 case NE:
8628 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8629 {
8630 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8631 return true;
8632 }
8633 goto scc_insn;
8634
8635 case GE:
8636 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8637 && mode == SImode && XEXP (x, 1) == const0_rtx)
8638 {
8639 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8640 return true;
8641 }
8642 goto scc_insn;
8643
8644 case LT:
8645 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8646 && mode == SImode && XEXP (x, 1) == const0_rtx)
8647 {
8648 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8649 return true;
8650 }
8651 goto scc_insn;
8652
8653 case EQ:
8654 case GT:
8655 case LE:
8656 case GEU:
8657 case LTU:
8658 case GTU:
8659 case LEU:
8660 case UNORDERED:
8661 case ORDERED:
8662 case UNEQ:
8663 case UNGE:
8664 case UNLT:
8665 case UNGT:
8666 case UNLE:
8667 scc_insn:
8668 /* SCC insns. In the case where the comparison has already been
8669 performed, then they cost 2 instructions. Otherwise they need
8670 an additional comparison before them. */
8671 *total = COSTS_N_INSNS (2);
8672 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8673 {
8674 return true;
8675 }
8676
8677 /* Fall through */
8678 case COMPARE:
8679 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8680 {
8681 *total = 0;
8682 return true;
8683 }
8684
8685 *total += COSTS_N_INSNS (1);
8686 if (CONST_INT_P (XEXP (x, 1))
8687 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8688 {
8689 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8690 return true;
8691 }
8692
8693 subcode = GET_CODE (XEXP (x, 0));
8694 if (subcode == ASHIFT || subcode == ASHIFTRT
8695 || subcode == LSHIFTRT
8696 || subcode == ROTATE || subcode == ROTATERT)
8697 {
8698 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8699 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8700 return true;
8701 }
8702
8703 if (subcode == MULT
8704 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8705 {
8706 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8707 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8708 return true;
8709 }
8710
8711 return false;
8712
8713 case UMIN:
8714 case UMAX:
8715 case SMIN:
8716 case SMAX:
8717 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8718 if (!CONST_INT_P (XEXP (x, 1))
8719 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8720 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8721 return true;
8722
8723 case ABS:
8724 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8725 {
8726 if (TARGET_HARD_FLOAT
8727 && (mode == SFmode
8728 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8729 {
8730 *total = COSTS_N_INSNS (1);
8731 return false;
8732 }
8733 *total = COSTS_N_INSNS (20);
8734 return false;
8735 }
8736 *total = COSTS_N_INSNS (1);
8737 if (mode == DImode)
8738 *total += COSTS_N_INSNS (3);
8739 return false;
8740
8741 case SIGN_EXTEND:
8742 case ZERO_EXTEND:
8743 *total = 0;
8744 if (GET_MODE_CLASS (mode) == MODE_INT)
8745 {
8746 rtx op = XEXP (x, 0);
8747 machine_mode opmode = GET_MODE (op);
8748
8749 if (mode == DImode)
8750 *total += COSTS_N_INSNS (1);
8751
8752 if (opmode != SImode)
8753 {
8754 if (MEM_P (op))
8755 {
8756 /* If !arm_arch4, we use one of the extendhisi2_mem
8757 or movhi_bytes patterns for HImode. For a QImode
8758 sign extension, we first zero-extend from memory
8759 and then perform a shift sequence. */
8760 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8761 *total += COSTS_N_INSNS (2);
8762 }
8763 else if (arm_arch6)
8764 *total += COSTS_N_INSNS (1);
8765
8766 /* We don't have the necessary insn, so we need to perform some
8767 other operation. */
8768 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8769 /* An and with constant 255. */
8770 *total += COSTS_N_INSNS (1);
8771 else
8772 /* A shift sequence. Increase costs slightly to avoid
8773 combining two shifts into an extend operation. */
8774 *total += COSTS_N_INSNS (2) + 1;
8775 }
8776
8777 return false;
8778 }
8779
8780 switch (GET_MODE (XEXP (x, 0)))
8781 {
8782 case V8QImode:
8783 case V4HImode:
8784 case V2SImode:
8785 case V4QImode:
8786 case V2HImode:
8787 *total = COSTS_N_INSNS (1);
8788 return false;
8789
8790 default:
8791 gcc_unreachable ();
8792 }
8793 gcc_unreachable ();
8794
8795 case ZERO_EXTRACT:
8796 case SIGN_EXTRACT:
8797 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8798 return true;
8799
8800 case CONST_INT:
8801 if (const_ok_for_arm (INTVAL (x))
8802 || const_ok_for_arm (~INTVAL (x)))
8803 *total = COSTS_N_INSNS (1);
8804 else
8805 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8806 INTVAL (x), NULL_RTX,
8807 NULL_RTX, 0, 0));
8808 return true;
8809
8810 case CONST:
8811 case LABEL_REF:
8812 case SYMBOL_REF:
8813 *total = COSTS_N_INSNS (3);
8814 return true;
8815
8816 case HIGH:
8817 *total = COSTS_N_INSNS (1);
8818 return true;
8819
8820 case LO_SUM:
8821 *total = COSTS_N_INSNS (1);
8822 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8823 return true;
8824
8825 case CONST_DOUBLE:
8826 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8827 && (mode == SFmode || !TARGET_VFP_SINGLE))
8828 *total = COSTS_N_INSNS (1);
8829 else
8830 *total = COSTS_N_INSNS (4);
8831 return true;
8832
8833 case SET:
8834 /* The vec_extract patterns accept memory operands that require an
8835 address reload. Account for the cost of that reload to give the
8836 auto-inc-dec pass an incentive to try to replace them. */
8837 if (TARGET_NEON && MEM_P (SET_DEST (x))
8838 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8839 {
8840 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8841 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8842 *total += COSTS_N_INSNS (1);
8843 return true;
8844 }
8845 /* Likewise for the vec_set patterns. */
8846 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8847 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8848 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8849 {
8850 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8851 *total = rtx_cost (mem, code, 0, speed);
8852 if (!neon_vector_mem_operand (mem, 2, true))
8853 *total += COSTS_N_INSNS (1);
8854 return true;
8855 }
8856 return false;
8857
8858 case UNSPEC:
8859 /* We cost this as high as our memory costs to allow this to
8860 be hoisted from loops. */
8861 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8862 {
8863 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8864 }
8865 return true;
8866
8867 case CONST_VECTOR:
8868 if (TARGET_NEON
8869 && TARGET_HARD_FLOAT
8870 && outer == SET
8871 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8872 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8873 *total = COSTS_N_INSNS (1);
8874 else
8875 *total = COSTS_N_INSNS (4);
8876 return true;
8877
8878 default:
8879 *total = COSTS_N_INSNS (4);
8880 return false;
8881 }
8882 }
8883
8884 /* Estimates the size cost of thumb1 instructions.
8885 For now most of the code is copied from thumb1_rtx_costs. We need more
8886 fine grain tuning when we have more related test cases. */
8887 static inline int
8888 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8889 {
8890 machine_mode mode = GET_MODE (x);
8891 int words;
8892
8893 switch (code)
8894 {
8895 case ASHIFT:
8896 case ASHIFTRT:
8897 case LSHIFTRT:
8898 case ROTATERT:
8899 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8900
8901 case PLUS:
8902 case MINUS:
8903 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8904 defined by RTL expansion, especially for the expansion of
8905 multiplication. */
8906 if ((GET_CODE (XEXP (x, 0)) == MULT
8907 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8908 || (GET_CODE (XEXP (x, 1)) == MULT
8909 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8910 return COSTS_N_INSNS (2);
8911 /* On purpose fall through for normal RTX. */
8912 case COMPARE:
8913 case NEG:
8914 case NOT:
8915 return COSTS_N_INSNS (1);
8916
8917 case MULT:
8918 if (CONST_INT_P (XEXP (x, 1)))
8919 {
8920 /* Thumb1 mul instruction can't operate on const. We must Load it
8921 into a register first. */
8922 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8923 /* For the targets which have a very small and high-latency multiply
8924 unit, we prefer to synthesize the mult with up to 5 instructions,
8925 giving a good balance between size and performance. */
8926 if (arm_arch6m && arm_m_profile_small_mul)
8927 return COSTS_N_INSNS (5);
8928 else
8929 return COSTS_N_INSNS (1) + const_size;
8930 }
8931 return COSTS_N_INSNS (1);
8932
8933 case SET:
8934 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8935 the mode. */
8936 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8937 return COSTS_N_INSNS (words)
8938 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8939 || satisfies_constraint_K (SET_SRC (x))
8940 /* thumb1_movdi_insn. */
8941 || ((words > 1) && MEM_P (SET_SRC (x))));
8942
8943 case CONST_INT:
8944 if (outer == SET)
8945 {
8946 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8947 return COSTS_N_INSNS (1);
8948 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8949 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8950 return COSTS_N_INSNS (2);
8951 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8952 if (thumb_shiftable_const (INTVAL (x)))
8953 return COSTS_N_INSNS (2);
8954 return COSTS_N_INSNS (3);
8955 }
8956 else if ((outer == PLUS || outer == COMPARE)
8957 && INTVAL (x) < 256 && INTVAL (x) > -256)
8958 return 0;
8959 else if ((outer == IOR || outer == XOR || outer == AND)
8960 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8961 return COSTS_N_INSNS (1);
8962 else if (outer == AND)
8963 {
8964 int i;
8965 /* This duplicates the tests in the andsi3 expander. */
8966 for (i = 9; i <= 31; i++)
8967 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8968 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8969 return COSTS_N_INSNS (2);
8970 }
8971 else if (outer == ASHIFT || outer == ASHIFTRT
8972 || outer == LSHIFTRT)
8973 return 0;
8974 return COSTS_N_INSNS (2);
8975
8976 case CONST:
8977 case CONST_DOUBLE:
8978 case LABEL_REF:
8979 case SYMBOL_REF:
8980 return COSTS_N_INSNS (3);
8981
8982 case UDIV:
8983 case UMOD:
8984 case DIV:
8985 case MOD:
8986 return 100;
8987
8988 case TRUNCATE:
8989 return 99;
8990
8991 case AND:
8992 case XOR:
8993 case IOR:
8994 return COSTS_N_INSNS (1);
8995
8996 case MEM:
8997 return (COSTS_N_INSNS (1)
8998 + COSTS_N_INSNS (1)
8999 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9000 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9001 ? COSTS_N_INSNS (1) : 0));
9002
9003 case IF_THEN_ELSE:
9004 /* XXX a guess. */
9005 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9006 return 14;
9007 return 2;
9008
9009 case ZERO_EXTEND:
9010 /* XXX still guessing. */
9011 switch (GET_MODE (XEXP (x, 0)))
9012 {
9013 case QImode:
9014 return (1 + (mode == DImode ? 4 : 0)
9015 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9016
9017 case HImode:
9018 return (4 + (mode == DImode ? 4 : 0)
9019 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9020
9021 case SImode:
9022 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9023
9024 default:
9025 return 99;
9026 }
9027
9028 default:
9029 return 99;
9030 }
9031 }
9032
9033 /* RTX costs when optimizing for size. */
9034 static bool
9035 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9036 int *total)
9037 {
9038 machine_mode mode = GET_MODE (x);
9039 if (TARGET_THUMB1)
9040 {
9041 *total = thumb1_size_rtx_costs (x, code, outer_code);
9042 return true;
9043 }
9044
9045 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9046 switch (code)
9047 {
9048 case MEM:
9049 /* A memory access costs 1 insn if the mode is small, or the address is
9050 a single register, otherwise it costs one insn per word. */
9051 if (REG_P (XEXP (x, 0)))
9052 *total = COSTS_N_INSNS (1);
9053 else if (flag_pic
9054 && GET_CODE (XEXP (x, 0)) == PLUS
9055 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9056 /* This will be split into two instructions.
9057 See arm.md:calculate_pic_address. */
9058 *total = COSTS_N_INSNS (2);
9059 else
9060 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9061 return true;
9062
9063 case DIV:
9064 case MOD:
9065 case UDIV:
9066 case UMOD:
9067 /* Needs a libcall, so it costs about this. */
9068 *total = COSTS_N_INSNS (2);
9069 return false;
9070
9071 case ROTATE:
9072 if (mode == SImode && REG_P (XEXP (x, 1)))
9073 {
9074 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9075 return true;
9076 }
9077 /* Fall through */
9078 case ROTATERT:
9079 case ASHIFT:
9080 case LSHIFTRT:
9081 case ASHIFTRT:
9082 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9083 {
9084 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9085 return true;
9086 }
9087 else if (mode == SImode)
9088 {
9089 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9090 /* Slightly disparage register shifts, but not by much. */
9091 if (!CONST_INT_P (XEXP (x, 1)))
9092 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9093 return true;
9094 }
9095
9096 /* Needs a libcall. */
9097 *total = COSTS_N_INSNS (2);
9098 return false;
9099
9100 case MINUS:
9101 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9102 && (mode == SFmode || !TARGET_VFP_SINGLE))
9103 {
9104 *total = COSTS_N_INSNS (1);
9105 return false;
9106 }
9107
9108 if (mode == SImode)
9109 {
9110 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9111 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9112
9113 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9114 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9115 || subcode1 == ROTATE || subcode1 == ROTATERT
9116 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9117 || subcode1 == ASHIFTRT)
9118 {
9119 /* It's just the cost of the two operands. */
9120 *total = 0;
9121 return false;
9122 }
9123
9124 *total = COSTS_N_INSNS (1);
9125 return false;
9126 }
9127
9128 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9129 return false;
9130
9131 case PLUS:
9132 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9133 && (mode == SFmode || !TARGET_VFP_SINGLE))
9134 {
9135 *total = COSTS_N_INSNS (1);
9136 return false;
9137 }
9138
9139 /* A shift as a part of ADD costs nothing. */
9140 if (GET_CODE (XEXP (x, 0)) == MULT
9141 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9142 {
9143 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9144 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9145 *total += rtx_cost (XEXP (x, 1), code, 1, false);
9146 return true;
9147 }
9148
9149 /* Fall through */
9150 case AND: case XOR: case IOR:
9151 if (mode == SImode)
9152 {
9153 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9154
9155 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9156 || subcode == LSHIFTRT || subcode == ASHIFTRT
9157 || (code == AND && subcode == NOT))
9158 {
9159 /* It's just the cost of the two operands. */
9160 *total = 0;
9161 return false;
9162 }
9163 }
9164
9165 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9166 return false;
9167
9168 case MULT:
9169 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9170 return false;
9171
9172 case NEG:
9173 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9174 && (mode == SFmode || !TARGET_VFP_SINGLE))
9175 {
9176 *total = COSTS_N_INSNS (1);
9177 return false;
9178 }
9179
9180 /* Fall through */
9181 case NOT:
9182 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9183
9184 return false;
9185
9186 case IF_THEN_ELSE:
9187 *total = 0;
9188 return false;
9189
9190 case COMPARE:
9191 if (cc_register (XEXP (x, 0), VOIDmode))
9192 * total = 0;
9193 else
9194 *total = COSTS_N_INSNS (1);
9195 return false;
9196
9197 case ABS:
9198 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9199 && (mode == SFmode || !TARGET_VFP_SINGLE))
9200 *total = COSTS_N_INSNS (1);
9201 else
9202 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9203 return false;
9204
9205 case SIGN_EXTEND:
9206 case ZERO_EXTEND:
9207 return arm_rtx_costs_1 (x, outer_code, total, 0);
9208
9209 case CONST_INT:
9210 if (const_ok_for_arm (INTVAL (x)))
9211 /* A multiplication by a constant requires another instruction
9212 to load the constant to a register. */
9213 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9214 ? 1 : 0);
9215 else if (const_ok_for_arm (~INTVAL (x)))
9216 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9217 else if (const_ok_for_arm (-INTVAL (x)))
9218 {
9219 if (outer_code == COMPARE || outer_code == PLUS
9220 || outer_code == MINUS)
9221 *total = 0;
9222 else
9223 *total = COSTS_N_INSNS (1);
9224 }
9225 else
9226 *total = COSTS_N_INSNS (2);
9227 return true;
9228
9229 case CONST:
9230 case LABEL_REF:
9231 case SYMBOL_REF:
9232 *total = COSTS_N_INSNS (2);
9233 return true;
9234
9235 case CONST_DOUBLE:
9236 *total = COSTS_N_INSNS (4);
9237 return true;
9238
9239 case CONST_VECTOR:
9240 if (TARGET_NEON
9241 && TARGET_HARD_FLOAT
9242 && outer_code == SET
9243 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9244 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9245 *total = COSTS_N_INSNS (1);
9246 else
9247 *total = COSTS_N_INSNS (4);
9248 return true;
9249
9250 case HIGH:
9251 case LO_SUM:
9252 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9253 cost of these slightly. */
9254 *total = COSTS_N_INSNS (1) + 1;
9255 return true;
9256
9257 case SET:
9258 return false;
9259
9260 default:
9261 if (mode != VOIDmode)
9262 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9263 else
9264 *total = COSTS_N_INSNS (4); /* How knows? */
9265 return false;
9266 }
9267 }
9268
9269 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9270 operand, then return the operand that is being shifted. If the shift
9271 is not by a constant, then set SHIFT_REG to point to the operand.
9272 Return NULL if OP is not a shifter operand. */
9273 static rtx
9274 shifter_op_p (rtx op, rtx *shift_reg)
9275 {
9276 enum rtx_code code = GET_CODE (op);
9277
9278 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9279 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9280 return XEXP (op, 0);
9281 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9282 return XEXP (op, 0);
9283 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9284 || code == ASHIFTRT)
9285 {
9286 if (!CONST_INT_P (XEXP (op, 1)))
9287 *shift_reg = XEXP (op, 1);
9288 return XEXP (op, 0);
9289 }
9290
9291 return NULL;
9292 }
9293
9294 static bool
9295 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9296 {
9297 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9298 rtx_code code = GET_CODE (x);
9299 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9300
9301 switch (XINT (x, 1))
9302 {
9303 case UNSPEC_UNALIGNED_LOAD:
9304 /* We can only do unaligned loads into the integer unit, and we can't
9305 use LDM or LDRD. */
9306 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9307 if (speed_p)
9308 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9309 + extra_cost->ldst.load_unaligned);
9310
9311 #ifdef NOT_YET
9312 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9313 ADDR_SPACE_GENERIC, speed_p);
9314 #endif
9315 return true;
9316
9317 case UNSPEC_UNALIGNED_STORE:
9318 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9319 if (speed_p)
9320 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9321 + extra_cost->ldst.store_unaligned);
9322
9323 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9324 #ifdef NOT_YET
9325 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9326 ADDR_SPACE_GENERIC, speed_p);
9327 #endif
9328 return true;
9329
9330 case UNSPEC_VRINTZ:
9331 case UNSPEC_VRINTP:
9332 case UNSPEC_VRINTM:
9333 case UNSPEC_VRINTR:
9334 case UNSPEC_VRINTX:
9335 case UNSPEC_VRINTA:
9336 *cost = COSTS_N_INSNS (1);
9337 if (speed_p)
9338 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9339
9340 return true;
9341 default:
9342 *cost = COSTS_N_INSNS (2);
9343 break;
9344 }
9345 return true;
9346 }
9347
9348 /* Cost of a libcall. We assume one insn per argument, an amount for the
9349 call (one insn for -Os) and then one for processing the result. */
9350 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9351
9352 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9353 do \
9354 { \
9355 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9356 if (shift_op != NULL \
9357 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9358 { \
9359 if (shift_reg) \
9360 { \
9361 if (speed_p) \
9362 *cost += extra_cost->alu.arith_shift_reg; \
9363 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9364 } \
9365 else if (speed_p) \
9366 *cost += extra_cost->alu.arith_shift; \
9367 \
9368 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9369 + rtx_cost (XEXP (x, 1 - IDX), \
9370 OP, 1, speed_p)); \
9371 return true; \
9372 } \
9373 } \
9374 while (0);
9375
9376 /* RTX costs. Make an estimate of the cost of executing the operation
9377 X, which is contained with an operation with code OUTER_CODE.
9378 SPEED_P indicates whether the cost desired is the performance cost,
9379 or the size cost. The estimate is stored in COST and the return
9380 value is TRUE if the cost calculation is final, or FALSE if the
9381 caller should recurse through the operands of X to add additional
9382 costs.
9383
9384 We currently make no attempt to model the size savings of Thumb-2
9385 16-bit instructions. At the normal points in compilation where
9386 this code is called we have no measure of whether the condition
9387 flags are live or not, and thus no realistic way to determine what
9388 the size will eventually be. */
9389 static bool
9390 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9391 const struct cpu_cost_table *extra_cost,
9392 int *cost, bool speed_p)
9393 {
9394 machine_mode mode = GET_MODE (x);
9395
9396 if (TARGET_THUMB1)
9397 {
9398 if (speed_p)
9399 *cost = thumb1_rtx_costs (x, code, outer_code);
9400 else
9401 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9402 return true;
9403 }
9404
9405 switch (code)
9406 {
9407 case SET:
9408 *cost = 0;
9409 /* SET RTXs don't have a mode so we get it from the destination. */
9410 mode = GET_MODE (SET_DEST (x));
9411
9412 if (REG_P (SET_SRC (x))
9413 && REG_P (SET_DEST (x)))
9414 {
9415 /* Assume that most copies can be done with a single insn,
9416 unless we don't have HW FP, in which case everything
9417 larger than word mode will require two insns. */
9418 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9419 && GET_MODE_SIZE (mode) > 4)
9420 || mode == DImode)
9421 ? 2 : 1);
9422 /* Conditional register moves can be encoded
9423 in 16 bits in Thumb mode. */
9424 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9425 *cost >>= 1;
9426
9427 return true;
9428 }
9429
9430 if (CONST_INT_P (SET_SRC (x)))
9431 {
9432 /* Handle CONST_INT here, since the value doesn't have a mode
9433 and we would otherwise be unable to work out the true cost. */
9434 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9435 outer_code = SET;
9436 /* Slightly lower the cost of setting a core reg to a constant.
9437 This helps break up chains and allows for better scheduling. */
9438 if (REG_P (SET_DEST (x))
9439 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9440 *cost -= 1;
9441 x = SET_SRC (x);
9442 /* Immediate moves with an immediate in the range [0, 255] can be
9443 encoded in 16 bits in Thumb mode. */
9444 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9445 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9446 *cost >>= 1;
9447 goto const_int_cost;
9448 }
9449
9450 return false;
9451
9452 case MEM:
9453 /* A memory access costs 1 insn if the mode is small, or the address is
9454 a single register, otherwise it costs one insn per word. */
9455 if (REG_P (XEXP (x, 0)))
9456 *cost = COSTS_N_INSNS (1);
9457 else if (flag_pic
9458 && GET_CODE (XEXP (x, 0)) == PLUS
9459 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9460 /* This will be split into two instructions.
9461 See arm.md:calculate_pic_address. */
9462 *cost = COSTS_N_INSNS (2);
9463 else
9464 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9465
9466 /* For speed optimizations, add the costs of the address and
9467 accessing memory. */
9468 if (speed_p)
9469 #ifdef NOT_YET
9470 *cost += (extra_cost->ldst.load
9471 + arm_address_cost (XEXP (x, 0), mode,
9472 ADDR_SPACE_GENERIC, speed_p));
9473 #else
9474 *cost += extra_cost->ldst.load;
9475 #endif
9476 return true;
9477
9478 case PARALLEL:
9479 {
9480 /* Calculations of LDM costs are complex. We assume an initial cost
9481 (ldm_1st) which will load the number of registers mentioned in
9482 ldm_regs_per_insn_1st registers; then each additional
9483 ldm_regs_per_insn_subsequent registers cost one more insn. The
9484 formula for N regs is thus:
9485
9486 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9487 + ldm_regs_per_insn_subsequent - 1)
9488 / ldm_regs_per_insn_subsequent).
9489
9490 Additional costs may also be added for addressing. A similar
9491 formula is used for STM. */
9492
9493 bool is_ldm = load_multiple_operation (x, SImode);
9494 bool is_stm = store_multiple_operation (x, SImode);
9495
9496 *cost = COSTS_N_INSNS (1);
9497
9498 if (is_ldm || is_stm)
9499 {
9500 if (speed_p)
9501 {
9502 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9503 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9504 ? extra_cost->ldst.ldm_regs_per_insn_1st
9505 : extra_cost->ldst.stm_regs_per_insn_1st;
9506 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9507 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9508 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9509
9510 *cost += regs_per_insn_1st
9511 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9512 + regs_per_insn_sub - 1)
9513 / regs_per_insn_sub);
9514 return true;
9515 }
9516
9517 }
9518 return false;
9519 }
9520 case DIV:
9521 case UDIV:
9522 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9523 && (mode == SFmode || !TARGET_VFP_SINGLE))
9524 *cost = COSTS_N_INSNS (speed_p
9525 ? extra_cost->fp[mode != SFmode].div : 1);
9526 else if (mode == SImode && TARGET_IDIV)
9527 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9528 else
9529 *cost = LIBCALL_COST (2);
9530 return false; /* All arguments must be in registers. */
9531
9532 case MOD:
9533 case UMOD:
9534 *cost = LIBCALL_COST (2);
9535 return false; /* All arguments must be in registers. */
9536
9537 case ROTATE:
9538 if (mode == SImode && REG_P (XEXP (x, 1)))
9539 {
9540 *cost = (COSTS_N_INSNS (2)
9541 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9542 if (speed_p)
9543 *cost += extra_cost->alu.shift_reg;
9544 return true;
9545 }
9546 /* Fall through */
9547 case ROTATERT:
9548 case ASHIFT:
9549 case LSHIFTRT:
9550 case ASHIFTRT:
9551 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9552 {
9553 *cost = (COSTS_N_INSNS (3)
9554 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9555 if (speed_p)
9556 *cost += 2 * extra_cost->alu.shift;
9557 return true;
9558 }
9559 else if (mode == SImode)
9560 {
9561 *cost = (COSTS_N_INSNS (1)
9562 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9563 /* Slightly disparage register shifts at -Os, but not by much. */
9564 if (!CONST_INT_P (XEXP (x, 1)))
9565 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9566 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9567 return true;
9568 }
9569 else if (GET_MODE_CLASS (mode) == MODE_INT
9570 && GET_MODE_SIZE (mode) < 4)
9571 {
9572 if (code == ASHIFT)
9573 {
9574 *cost = (COSTS_N_INSNS (1)
9575 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9576 /* Slightly disparage register shifts at -Os, but not by
9577 much. */
9578 if (!CONST_INT_P (XEXP (x, 1)))
9579 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9580 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9581 }
9582 else if (code == LSHIFTRT || code == ASHIFTRT)
9583 {
9584 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9585 {
9586 /* Can use SBFX/UBFX. */
9587 *cost = COSTS_N_INSNS (1);
9588 if (speed_p)
9589 *cost += extra_cost->alu.bfx;
9590 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9591 }
9592 else
9593 {
9594 *cost = COSTS_N_INSNS (2);
9595 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9596 if (speed_p)
9597 {
9598 if (CONST_INT_P (XEXP (x, 1)))
9599 *cost += 2 * extra_cost->alu.shift;
9600 else
9601 *cost += (extra_cost->alu.shift
9602 + extra_cost->alu.shift_reg);
9603 }
9604 else
9605 /* Slightly disparage register shifts. */
9606 *cost += !CONST_INT_P (XEXP (x, 1));
9607 }
9608 }
9609 else /* Rotates. */
9610 {
9611 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9612 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9613 if (speed_p)
9614 {
9615 if (CONST_INT_P (XEXP (x, 1)))
9616 *cost += (2 * extra_cost->alu.shift
9617 + extra_cost->alu.log_shift);
9618 else
9619 *cost += (extra_cost->alu.shift
9620 + extra_cost->alu.shift_reg
9621 + extra_cost->alu.log_shift_reg);
9622 }
9623 }
9624 return true;
9625 }
9626
9627 *cost = LIBCALL_COST (2);
9628 return false;
9629
9630 case BSWAP:
9631 if (arm_arch6)
9632 {
9633 if (mode == SImode)
9634 {
9635 *cost = COSTS_N_INSNS (1);
9636 if (speed_p)
9637 *cost += extra_cost->alu.rev;
9638
9639 return false;
9640 }
9641 }
9642 else
9643 {
9644 /* No rev instruction available. Look at arm_legacy_rev
9645 and thumb_legacy_rev for the form of RTL used then. */
9646 if (TARGET_THUMB)
9647 {
9648 *cost = COSTS_N_INSNS (10);
9649
9650 if (speed_p)
9651 {
9652 *cost += 6 * extra_cost->alu.shift;
9653 *cost += 3 * extra_cost->alu.logical;
9654 }
9655 }
9656 else
9657 {
9658 *cost = COSTS_N_INSNS (5);
9659
9660 if (speed_p)
9661 {
9662 *cost += 2 * extra_cost->alu.shift;
9663 *cost += extra_cost->alu.arith_shift;
9664 *cost += 2 * extra_cost->alu.logical;
9665 }
9666 }
9667 return true;
9668 }
9669 return false;
9670
9671 case MINUS:
9672 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9673 && (mode == SFmode || !TARGET_VFP_SINGLE))
9674 {
9675 *cost = COSTS_N_INSNS (1);
9676 if (GET_CODE (XEXP (x, 0)) == MULT
9677 || GET_CODE (XEXP (x, 1)) == MULT)
9678 {
9679 rtx mul_op0, mul_op1, sub_op;
9680
9681 if (speed_p)
9682 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9683
9684 if (GET_CODE (XEXP (x, 0)) == MULT)
9685 {
9686 mul_op0 = XEXP (XEXP (x, 0), 0);
9687 mul_op1 = XEXP (XEXP (x, 0), 1);
9688 sub_op = XEXP (x, 1);
9689 }
9690 else
9691 {
9692 mul_op0 = XEXP (XEXP (x, 1), 0);
9693 mul_op1 = XEXP (XEXP (x, 1), 1);
9694 sub_op = XEXP (x, 0);
9695 }
9696
9697 /* The first operand of the multiply may be optionally
9698 negated. */
9699 if (GET_CODE (mul_op0) == NEG)
9700 mul_op0 = XEXP (mul_op0, 0);
9701
9702 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9703 + rtx_cost (mul_op1, code, 0, speed_p)
9704 + rtx_cost (sub_op, code, 0, speed_p));
9705
9706 return true;
9707 }
9708
9709 if (speed_p)
9710 *cost += extra_cost->fp[mode != SFmode].addsub;
9711 return false;
9712 }
9713
9714 if (mode == SImode)
9715 {
9716 rtx shift_by_reg = NULL;
9717 rtx shift_op;
9718 rtx non_shift_op;
9719
9720 *cost = COSTS_N_INSNS (1);
9721
9722 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9723 if (shift_op == NULL)
9724 {
9725 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9726 non_shift_op = XEXP (x, 0);
9727 }
9728 else
9729 non_shift_op = XEXP (x, 1);
9730
9731 if (shift_op != NULL)
9732 {
9733 if (shift_by_reg != NULL)
9734 {
9735 if (speed_p)
9736 *cost += extra_cost->alu.arith_shift_reg;
9737 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9738 }
9739 else if (speed_p)
9740 *cost += extra_cost->alu.arith_shift;
9741
9742 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9743 + rtx_cost (non_shift_op, code, 0, speed_p));
9744 return true;
9745 }
9746
9747 if (arm_arch_thumb2
9748 && GET_CODE (XEXP (x, 1)) == MULT)
9749 {
9750 /* MLS. */
9751 if (speed_p)
9752 *cost += extra_cost->mult[0].add;
9753 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9754 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9755 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9756 return true;
9757 }
9758
9759 if (CONST_INT_P (XEXP (x, 0)))
9760 {
9761 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9762 INTVAL (XEXP (x, 0)), NULL_RTX,
9763 NULL_RTX, 1, 0);
9764 *cost = COSTS_N_INSNS (insns);
9765 if (speed_p)
9766 *cost += insns * extra_cost->alu.arith;
9767 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9768 return true;
9769 }
9770 else if (speed_p)
9771 *cost += extra_cost->alu.arith;
9772
9773 return false;
9774 }
9775
9776 if (GET_MODE_CLASS (mode) == MODE_INT
9777 && GET_MODE_SIZE (mode) < 4)
9778 {
9779 rtx shift_op, shift_reg;
9780 shift_reg = NULL;
9781
9782 /* We check both sides of the MINUS for shifter operands since,
9783 unlike PLUS, it's not commutative. */
9784
9785 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9786 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9787
9788 /* Slightly disparage, as we might need to widen the result. */
9789 *cost = 1 + COSTS_N_INSNS (1);
9790 if (speed_p)
9791 *cost += extra_cost->alu.arith;
9792
9793 if (CONST_INT_P (XEXP (x, 0)))
9794 {
9795 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9796 return true;
9797 }
9798
9799 return false;
9800 }
9801
9802 if (mode == DImode)
9803 {
9804 *cost = COSTS_N_INSNS (2);
9805
9806 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9807 {
9808 rtx op1 = XEXP (x, 1);
9809
9810 if (speed_p)
9811 *cost += 2 * extra_cost->alu.arith;
9812
9813 if (GET_CODE (op1) == ZERO_EXTEND)
9814 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9815 else
9816 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9817 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9818 0, speed_p);
9819 return true;
9820 }
9821 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9822 {
9823 if (speed_p)
9824 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9825 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9826 0, speed_p)
9827 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9828 return true;
9829 }
9830 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9831 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9832 {
9833 if (speed_p)
9834 *cost += (extra_cost->alu.arith
9835 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9836 ? extra_cost->alu.arith
9837 : extra_cost->alu.arith_shift));
9838 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9839 + rtx_cost (XEXP (XEXP (x, 1), 0),
9840 GET_CODE (XEXP (x, 1)), 0, speed_p));
9841 return true;
9842 }
9843
9844 if (speed_p)
9845 *cost += 2 * extra_cost->alu.arith;
9846 return false;
9847 }
9848
9849 /* Vector mode? */
9850
9851 *cost = LIBCALL_COST (2);
9852 return false;
9853
9854 case PLUS:
9855 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9856 && (mode == SFmode || !TARGET_VFP_SINGLE))
9857 {
9858 *cost = COSTS_N_INSNS (1);
9859 if (GET_CODE (XEXP (x, 0)) == MULT)
9860 {
9861 rtx mul_op0, mul_op1, add_op;
9862
9863 if (speed_p)
9864 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9865
9866 mul_op0 = XEXP (XEXP (x, 0), 0);
9867 mul_op1 = XEXP (XEXP (x, 0), 1);
9868 add_op = XEXP (x, 1);
9869
9870 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9871 + rtx_cost (mul_op1, code, 0, speed_p)
9872 + rtx_cost (add_op, code, 0, speed_p));
9873
9874 return true;
9875 }
9876
9877 if (speed_p)
9878 *cost += extra_cost->fp[mode != SFmode].addsub;
9879 return false;
9880 }
9881 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9882 {
9883 *cost = LIBCALL_COST (2);
9884 return false;
9885 }
9886
9887 /* Narrow modes can be synthesized in SImode, but the range
9888 of useful sub-operations is limited. Check for shift operations
9889 on one of the operands. Only left shifts can be used in the
9890 narrow modes. */
9891 if (GET_MODE_CLASS (mode) == MODE_INT
9892 && GET_MODE_SIZE (mode) < 4)
9893 {
9894 rtx shift_op, shift_reg;
9895 shift_reg = NULL;
9896
9897 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9898
9899 if (CONST_INT_P (XEXP (x, 1)))
9900 {
9901 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9902 INTVAL (XEXP (x, 1)), NULL_RTX,
9903 NULL_RTX, 1, 0);
9904 *cost = COSTS_N_INSNS (insns);
9905 if (speed_p)
9906 *cost += insns * extra_cost->alu.arith;
9907 /* Slightly penalize a narrow operation as the result may
9908 need widening. */
9909 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9910 return true;
9911 }
9912
9913 /* Slightly penalize a narrow operation as the result may
9914 need widening. */
9915 *cost = 1 + COSTS_N_INSNS (1);
9916 if (speed_p)
9917 *cost += extra_cost->alu.arith;
9918
9919 return false;
9920 }
9921
9922 if (mode == SImode)
9923 {
9924 rtx shift_op, shift_reg;
9925
9926 *cost = COSTS_N_INSNS (1);
9927 if (TARGET_INT_SIMD
9928 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9929 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9930 {
9931 /* UXTA[BH] or SXTA[BH]. */
9932 if (speed_p)
9933 *cost += extra_cost->alu.extend_arith;
9934 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9935 speed_p)
9936 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9937 return true;
9938 }
9939
9940 shift_reg = NULL;
9941 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9942 if (shift_op != NULL)
9943 {
9944 if (shift_reg)
9945 {
9946 if (speed_p)
9947 *cost += extra_cost->alu.arith_shift_reg;
9948 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9949 }
9950 else if (speed_p)
9951 *cost += extra_cost->alu.arith_shift;
9952
9953 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9954 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9955 return true;
9956 }
9957 if (GET_CODE (XEXP (x, 0)) == MULT)
9958 {
9959 rtx mul_op = XEXP (x, 0);
9960
9961 *cost = COSTS_N_INSNS (1);
9962
9963 if (TARGET_DSP_MULTIPLY
9964 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9965 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9966 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9967 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9968 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9969 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9970 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9971 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9972 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9973 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9974 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9975 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9976 == 16))))))
9977 {
9978 /* SMLA[BT][BT]. */
9979 if (speed_p)
9980 *cost += extra_cost->mult[0].extend_add;
9981 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9982 SIGN_EXTEND, 0, speed_p)
9983 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9984 SIGN_EXTEND, 0, speed_p)
9985 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9986 return true;
9987 }
9988
9989 if (speed_p)
9990 *cost += extra_cost->mult[0].add;
9991 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9992 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9993 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9994 return true;
9995 }
9996 if (CONST_INT_P (XEXP (x, 1)))
9997 {
9998 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9999 INTVAL (XEXP (x, 1)), NULL_RTX,
10000 NULL_RTX, 1, 0);
10001 *cost = COSTS_N_INSNS (insns);
10002 if (speed_p)
10003 *cost += insns * extra_cost->alu.arith;
10004 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
10005 return true;
10006 }
10007 else if (speed_p)
10008 *cost += extra_cost->alu.arith;
10009
10010 return false;
10011 }
10012
10013 if (mode == DImode)
10014 {
10015 if (arm_arch3m
10016 && GET_CODE (XEXP (x, 0)) == MULT
10017 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10018 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10019 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10020 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10021 {
10022 *cost = COSTS_N_INSNS (1);
10023 if (speed_p)
10024 *cost += extra_cost->mult[1].extend_add;
10025 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
10026 ZERO_EXTEND, 0, speed_p)
10027 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
10028 ZERO_EXTEND, 0, speed_p)
10029 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10030 return true;
10031 }
10032
10033 *cost = COSTS_N_INSNS (2);
10034
10035 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10036 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10037 {
10038 if (speed_p)
10039 *cost += (extra_cost->alu.arith
10040 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10041 ? extra_cost->alu.arith
10042 : extra_cost->alu.arith_shift));
10043
10044 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10045 speed_p)
10046 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10047 return true;
10048 }
10049
10050 if (speed_p)
10051 *cost += 2 * extra_cost->alu.arith;
10052 return false;
10053 }
10054
10055 /* Vector mode? */
10056 *cost = LIBCALL_COST (2);
10057 return false;
10058 case IOR:
10059 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10060 {
10061 *cost = COSTS_N_INSNS (1);
10062 if (speed_p)
10063 *cost += extra_cost->alu.rev;
10064
10065 return true;
10066 }
10067 /* Fall through. */
10068 case AND: case XOR:
10069 if (mode == SImode)
10070 {
10071 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10072 rtx op0 = XEXP (x, 0);
10073 rtx shift_op, shift_reg;
10074
10075 *cost = COSTS_N_INSNS (1);
10076
10077 if (subcode == NOT
10078 && (code == AND
10079 || (code == IOR && TARGET_THUMB2)))
10080 op0 = XEXP (op0, 0);
10081
10082 shift_reg = NULL;
10083 shift_op = shifter_op_p (op0, &shift_reg);
10084 if (shift_op != NULL)
10085 {
10086 if (shift_reg)
10087 {
10088 if (speed_p)
10089 *cost += extra_cost->alu.log_shift_reg;
10090 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10091 }
10092 else if (speed_p)
10093 *cost += extra_cost->alu.log_shift;
10094
10095 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10096 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10097 return true;
10098 }
10099
10100 if (CONST_INT_P (XEXP (x, 1)))
10101 {
10102 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10103 INTVAL (XEXP (x, 1)), NULL_RTX,
10104 NULL_RTX, 1, 0);
10105
10106 *cost = COSTS_N_INSNS (insns);
10107 if (speed_p)
10108 *cost += insns * extra_cost->alu.logical;
10109 *cost += rtx_cost (op0, code, 0, speed_p);
10110 return true;
10111 }
10112
10113 if (speed_p)
10114 *cost += extra_cost->alu.logical;
10115 *cost += (rtx_cost (op0, code, 0, speed_p)
10116 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10117 return true;
10118 }
10119
10120 if (mode == DImode)
10121 {
10122 rtx op0 = XEXP (x, 0);
10123 enum rtx_code subcode = GET_CODE (op0);
10124
10125 *cost = COSTS_N_INSNS (2);
10126
10127 if (subcode == NOT
10128 && (code == AND
10129 || (code == IOR && TARGET_THUMB2)))
10130 op0 = XEXP (op0, 0);
10131
10132 if (GET_CODE (op0) == ZERO_EXTEND)
10133 {
10134 if (speed_p)
10135 *cost += 2 * extra_cost->alu.logical;
10136
10137 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10138 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10139 return true;
10140 }
10141 else if (GET_CODE (op0) == SIGN_EXTEND)
10142 {
10143 if (speed_p)
10144 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10145
10146 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10147 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10148 return true;
10149 }
10150
10151 if (speed_p)
10152 *cost += 2 * extra_cost->alu.logical;
10153
10154 return true;
10155 }
10156 /* Vector mode? */
10157
10158 *cost = LIBCALL_COST (2);
10159 return false;
10160
10161 case MULT:
10162 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10163 && (mode == SFmode || !TARGET_VFP_SINGLE))
10164 {
10165 rtx op0 = XEXP (x, 0);
10166
10167 *cost = COSTS_N_INSNS (1);
10168
10169 if (GET_CODE (op0) == NEG)
10170 op0 = XEXP (op0, 0);
10171
10172 if (speed_p)
10173 *cost += extra_cost->fp[mode != SFmode].mult;
10174
10175 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10176 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10177 return true;
10178 }
10179 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10180 {
10181 *cost = LIBCALL_COST (2);
10182 return false;
10183 }
10184
10185 if (mode == SImode)
10186 {
10187 *cost = COSTS_N_INSNS (1);
10188 if (TARGET_DSP_MULTIPLY
10189 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10190 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10191 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10192 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10193 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10194 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10195 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10196 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10197 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10198 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10199 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10200 && (INTVAL (XEXP (XEXP (x, 1), 1))
10201 == 16))))))
10202 {
10203 /* SMUL[TB][TB]. */
10204 if (speed_p)
10205 *cost += extra_cost->mult[0].extend;
10206 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10207 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10208 return true;
10209 }
10210 if (speed_p)
10211 *cost += extra_cost->mult[0].simple;
10212 return false;
10213 }
10214
10215 if (mode == DImode)
10216 {
10217 if (arm_arch3m
10218 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10219 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10220 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10221 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10222 {
10223 *cost = COSTS_N_INSNS (1);
10224 if (speed_p)
10225 *cost += extra_cost->mult[1].extend;
10226 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10227 ZERO_EXTEND, 0, speed_p)
10228 + rtx_cost (XEXP (XEXP (x, 1), 0),
10229 ZERO_EXTEND, 0, speed_p));
10230 return true;
10231 }
10232
10233 *cost = LIBCALL_COST (2);
10234 return false;
10235 }
10236
10237 /* Vector mode? */
10238 *cost = LIBCALL_COST (2);
10239 return false;
10240
10241 case NEG:
10242 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10243 && (mode == SFmode || !TARGET_VFP_SINGLE))
10244 {
10245 *cost = COSTS_N_INSNS (1);
10246 if (speed_p)
10247 *cost += extra_cost->fp[mode != SFmode].neg;
10248
10249 return false;
10250 }
10251 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10252 {
10253 *cost = LIBCALL_COST (1);
10254 return false;
10255 }
10256
10257 if (mode == SImode)
10258 {
10259 if (GET_CODE (XEXP (x, 0)) == ABS)
10260 {
10261 *cost = COSTS_N_INSNS (2);
10262 /* Assume the non-flag-changing variant. */
10263 if (speed_p)
10264 *cost += (extra_cost->alu.log_shift
10265 + extra_cost->alu.arith_shift);
10266 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10267 return true;
10268 }
10269
10270 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10271 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10272 {
10273 *cost = COSTS_N_INSNS (2);
10274 /* No extra cost for MOV imm and MVN imm. */
10275 /* If the comparison op is using the flags, there's no further
10276 cost, otherwise we need to add the cost of the comparison. */
10277 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10278 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10279 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10280 {
10281 *cost += (COSTS_N_INSNS (1)
10282 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10283 speed_p)
10284 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10285 speed_p));
10286 if (speed_p)
10287 *cost += extra_cost->alu.arith;
10288 }
10289 return true;
10290 }
10291 *cost = COSTS_N_INSNS (1);
10292 if (speed_p)
10293 *cost += extra_cost->alu.arith;
10294 return false;
10295 }
10296
10297 if (GET_MODE_CLASS (mode) == MODE_INT
10298 && GET_MODE_SIZE (mode) < 4)
10299 {
10300 /* Slightly disparage, as we might need an extend operation. */
10301 *cost = 1 + COSTS_N_INSNS (1);
10302 if (speed_p)
10303 *cost += extra_cost->alu.arith;
10304 return false;
10305 }
10306
10307 if (mode == DImode)
10308 {
10309 *cost = COSTS_N_INSNS (2);
10310 if (speed_p)
10311 *cost += 2 * extra_cost->alu.arith;
10312 return false;
10313 }
10314
10315 /* Vector mode? */
10316 *cost = LIBCALL_COST (1);
10317 return false;
10318
10319 case NOT:
10320 if (mode == SImode)
10321 {
10322 rtx shift_op;
10323 rtx shift_reg = NULL;
10324
10325 *cost = COSTS_N_INSNS (1);
10326 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10327
10328 if (shift_op)
10329 {
10330 if (shift_reg != NULL)
10331 {
10332 if (speed_p)
10333 *cost += extra_cost->alu.log_shift_reg;
10334 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10335 }
10336 else if (speed_p)
10337 *cost += extra_cost->alu.log_shift;
10338 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10339 return true;
10340 }
10341
10342 if (speed_p)
10343 *cost += extra_cost->alu.logical;
10344 return false;
10345 }
10346 if (mode == DImode)
10347 {
10348 *cost = COSTS_N_INSNS (2);
10349 return false;
10350 }
10351
10352 /* Vector mode? */
10353
10354 *cost += LIBCALL_COST (1);
10355 return false;
10356
10357 case IF_THEN_ELSE:
10358 {
10359 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10360 {
10361 *cost = COSTS_N_INSNS (4);
10362 return true;
10363 }
10364 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10365 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10366
10367 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10368 /* Assume that if one arm of the if_then_else is a register,
10369 that it will be tied with the result and eliminate the
10370 conditional insn. */
10371 if (REG_P (XEXP (x, 1)))
10372 *cost += op2cost;
10373 else if (REG_P (XEXP (x, 2)))
10374 *cost += op1cost;
10375 else
10376 {
10377 if (speed_p)
10378 {
10379 if (extra_cost->alu.non_exec_costs_exec)
10380 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10381 else
10382 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10383 }
10384 else
10385 *cost += op1cost + op2cost;
10386 }
10387 }
10388 return true;
10389
10390 case COMPARE:
10391 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10392 *cost = 0;
10393 else
10394 {
10395 machine_mode op0mode;
10396 /* We'll mostly assume that the cost of a compare is the cost of the
10397 LHS. However, there are some notable exceptions. */
10398
10399 /* Floating point compares are never done as side-effects. */
10400 op0mode = GET_MODE (XEXP (x, 0));
10401 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10402 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10403 {
10404 *cost = COSTS_N_INSNS (1);
10405 if (speed_p)
10406 *cost += extra_cost->fp[op0mode != SFmode].compare;
10407
10408 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10409 {
10410 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10411 return true;
10412 }
10413
10414 return false;
10415 }
10416 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10417 {
10418 *cost = LIBCALL_COST (2);
10419 return false;
10420 }
10421
10422 /* DImode compares normally take two insns. */
10423 if (op0mode == DImode)
10424 {
10425 *cost = COSTS_N_INSNS (2);
10426 if (speed_p)
10427 *cost += 2 * extra_cost->alu.arith;
10428 return false;
10429 }
10430
10431 if (op0mode == SImode)
10432 {
10433 rtx shift_op;
10434 rtx shift_reg;
10435
10436 if (XEXP (x, 1) == const0_rtx
10437 && !(REG_P (XEXP (x, 0))
10438 || (GET_CODE (XEXP (x, 0)) == SUBREG
10439 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10440 {
10441 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10442
10443 /* Multiply operations that set the flags are often
10444 significantly more expensive. */
10445 if (speed_p
10446 && GET_CODE (XEXP (x, 0)) == MULT
10447 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10448 *cost += extra_cost->mult[0].flag_setting;
10449
10450 if (speed_p
10451 && GET_CODE (XEXP (x, 0)) == PLUS
10452 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10453 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10454 0), 1), mode))
10455 *cost += extra_cost->mult[0].flag_setting;
10456 return true;
10457 }
10458
10459 shift_reg = NULL;
10460 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10461 if (shift_op != NULL)
10462 {
10463 *cost = COSTS_N_INSNS (1);
10464 if (shift_reg != NULL)
10465 {
10466 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10467 if (speed_p)
10468 *cost += extra_cost->alu.arith_shift_reg;
10469 }
10470 else if (speed_p)
10471 *cost += extra_cost->alu.arith_shift;
10472 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10473 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10474 return true;
10475 }
10476
10477 *cost = COSTS_N_INSNS (1);
10478 if (speed_p)
10479 *cost += extra_cost->alu.arith;
10480 if (CONST_INT_P (XEXP (x, 1))
10481 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10482 {
10483 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10484 return true;
10485 }
10486 return false;
10487 }
10488
10489 /* Vector mode? */
10490
10491 *cost = LIBCALL_COST (2);
10492 return false;
10493 }
10494 return true;
10495
10496 case EQ:
10497 case NE:
10498 case LT:
10499 case LE:
10500 case GT:
10501 case GE:
10502 case LTU:
10503 case LEU:
10504 case GEU:
10505 case GTU:
10506 case ORDERED:
10507 case UNORDERED:
10508 case UNEQ:
10509 case UNLE:
10510 case UNLT:
10511 case UNGE:
10512 case UNGT:
10513 case LTGT:
10514 if (outer_code == SET)
10515 {
10516 /* Is it a store-flag operation? */
10517 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10518 && XEXP (x, 1) == const0_rtx)
10519 {
10520 /* Thumb also needs an IT insn. */
10521 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10522 return true;
10523 }
10524 if (XEXP (x, 1) == const0_rtx)
10525 {
10526 switch (code)
10527 {
10528 case LT:
10529 /* LSR Rd, Rn, #31. */
10530 *cost = COSTS_N_INSNS (1);
10531 if (speed_p)
10532 *cost += extra_cost->alu.shift;
10533 break;
10534
10535 case EQ:
10536 /* RSBS T1, Rn, #0
10537 ADC Rd, Rn, T1. */
10538
10539 case NE:
10540 /* SUBS T1, Rn, #1
10541 SBC Rd, Rn, T1. */
10542 *cost = COSTS_N_INSNS (2);
10543 break;
10544
10545 case LE:
10546 /* RSBS T1, Rn, Rn, LSR #31
10547 ADC Rd, Rn, T1. */
10548 *cost = COSTS_N_INSNS (2);
10549 if (speed_p)
10550 *cost += extra_cost->alu.arith_shift;
10551 break;
10552
10553 case GT:
10554 /* RSB Rd, Rn, Rn, ASR #1
10555 LSR Rd, Rd, #31. */
10556 *cost = COSTS_N_INSNS (2);
10557 if (speed_p)
10558 *cost += (extra_cost->alu.arith_shift
10559 + extra_cost->alu.shift);
10560 break;
10561
10562 case GE:
10563 /* ASR Rd, Rn, #31
10564 ADD Rd, Rn, #1. */
10565 *cost = COSTS_N_INSNS (2);
10566 if (speed_p)
10567 *cost += extra_cost->alu.shift;
10568 break;
10569
10570 default:
10571 /* Remaining cases are either meaningless or would take
10572 three insns anyway. */
10573 *cost = COSTS_N_INSNS (3);
10574 break;
10575 }
10576 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10577 return true;
10578 }
10579 else
10580 {
10581 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10582 if (CONST_INT_P (XEXP (x, 1))
10583 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10584 {
10585 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10586 return true;
10587 }
10588
10589 return false;
10590 }
10591 }
10592 /* Not directly inside a set. If it involves the condition code
10593 register it must be the condition for a branch, cond_exec or
10594 I_T_E operation. Since the comparison is performed elsewhere
10595 this is just the control part which has no additional
10596 cost. */
10597 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10598 && XEXP (x, 1) == const0_rtx)
10599 {
10600 *cost = 0;
10601 return true;
10602 }
10603 return false;
10604
10605 case ABS:
10606 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10607 && (mode == SFmode || !TARGET_VFP_SINGLE))
10608 {
10609 *cost = COSTS_N_INSNS (1);
10610 if (speed_p)
10611 *cost += extra_cost->fp[mode != SFmode].neg;
10612
10613 return false;
10614 }
10615 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10616 {
10617 *cost = LIBCALL_COST (1);
10618 return false;
10619 }
10620
10621 if (mode == SImode)
10622 {
10623 *cost = COSTS_N_INSNS (1);
10624 if (speed_p)
10625 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10626 return false;
10627 }
10628 /* Vector mode? */
10629 *cost = LIBCALL_COST (1);
10630 return false;
10631
10632 case SIGN_EXTEND:
10633 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10634 && MEM_P (XEXP (x, 0)))
10635 {
10636 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10637
10638 if (mode == DImode)
10639 *cost += COSTS_N_INSNS (1);
10640
10641 if (!speed_p)
10642 return true;
10643
10644 if (GET_MODE (XEXP (x, 0)) == SImode)
10645 *cost += extra_cost->ldst.load;
10646 else
10647 *cost += extra_cost->ldst.load_sign_extend;
10648
10649 if (mode == DImode)
10650 *cost += extra_cost->alu.shift;
10651
10652 return true;
10653 }
10654
10655 /* Widening from less than 32-bits requires an extend operation. */
10656 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10657 {
10658 /* We have SXTB/SXTH. */
10659 *cost = COSTS_N_INSNS (1);
10660 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10661 if (speed_p)
10662 *cost += extra_cost->alu.extend;
10663 }
10664 else if (GET_MODE (XEXP (x, 0)) != SImode)
10665 {
10666 /* Needs two shifts. */
10667 *cost = COSTS_N_INSNS (2);
10668 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10669 if (speed_p)
10670 *cost += 2 * extra_cost->alu.shift;
10671 }
10672
10673 /* Widening beyond 32-bits requires one more insn. */
10674 if (mode == DImode)
10675 {
10676 *cost += COSTS_N_INSNS (1);
10677 if (speed_p)
10678 *cost += extra_cost->alu.shift;
10679 }
10680
10681 return true;
10682
10683 case ZERO_EXTEND:
10684 if ((arm_arch4
10685 || GET_MODE (XEXP (x, 0)) == SImode
10686 || GET_MODE (XEXP (x, 0)) == QImode)
10687 && MEM_P (XEXP (x, 0)))
10688 {
10689 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10690
10691 if (mode == DImode)
10692 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10693
10694 return true;
10695 }
10696
10697 /* Widening from less than 32-bits requires an extend operation. */
10698 if (GET_MODE (XEXP (x, 0)) == QImode)
10699 {
10700 /* UXTB can be a shorter instruction in Thumb2, but it might
10701 be slower than the AND Rd, Rn, #255 alternative. When
10702 optimizing for speed it should never be slower to use
10703 AND, and we don't really model 16-bit vs 32-bit insns
10704 here. */
10705 *cost = COSTS_N_INSNS (1);
10706 if (speed_p)
10707 *cost += extra_cost->alu.logical;
10708 }
10709 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10710 {
10711 /* We have UXTB/UXTH. */
10712 *cost = COSTS_N_INSNS (1);
10713 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10714 if (speed_p)
10715 *cost += extra_cost->alu.extend;
10716 }
10717 else if (GET_MODE (XEXP (x, 0)) != SImode)
10718 {
10719 /* Needs two shifts. It's marginally preferable to use
10720 shifts rather than two BIC instructions as the second
10721 shift may merge with a subsequent insn as a shifter
10722 op. */
10723 *cost = COSTS_N_INSNS (2);
10724 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10725 if (speed_p)
10726 *cost += 2 * extra_cost->alu.shift;
10727 }
10728 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10729 *cost = COSTS_N_INSNS (1);
10730
10731 /* Widening beyond 32-bits requires one more insn. */
10732 if (mode == DImode)
10733 {
10734 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10735 }
10736
10737 return true;
10738
10739 case CONST_INT:
10740 *cost = 0;
10741 /* CONST_INT has no mode, so we cannot tell for sure how many
10742 insns are really going to be needed. The best we can do is
10743 look at the value passed. If it fits in SImode, then assume
10744 that's the mode it will be used for. Otherwise assume it
10745 will be used in DImode. */
10746 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10747 mode = SImode;
10748 else
10749 mode = DImode;
10750
10751 /* Avoid blowing up in arm_gen_constant (). */
10752 if (!(outer_code == PLUS
10753 || outer_code == AND
10754 || outer_code == IOR
10755 || outer_code == XOR
10756 || outer_code == MINUS))
10757 outer_code = SET;
10758
10759 const_int_cost:
10760 if (mode == SImode)
10761 {
10762 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10763 INTVAL (x), NULL, NULL,
10764 0, 0));
10765 /* Extra costs? */
10766 }
10767 else
10768 {
10769 *cost += COSTS_N_INSNS (arm_gen_constant
10770 (outer_code, SImode, NULL,
10771 trunc_int_for_mode (INTVAL (x), SImode),
10772 NULL, NULL, 0, 0)
10773 + arm_gen_constant (outer_code, SImode, NULL,
10774 INTVAL (x) >> 32, NULL,
10775 NULL, 0, 0));
10776 /* Extra costs? */
10777 }
10778
10779 return true;
10780
10781 case CONST:
10782 case LABEL_REF:
10783 case SYMBOL_REF:
10784 if (speed_p)
10785 {
10786 if (arm_arch_thumb2 && !flag_pic)
10787 *cost = COSTS_N_INSNS (2);
10788 else
10789 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10790 }
10791 else
10792 *cost = COSTS_N_INSNS (2);
10793
10794 if (flag_pic)
10795 {
10796 *cost += COSTS_N_INSNS (1);
10797 if (speed_p)
10798 *cost += extra_cost->alu.arith;
10799 }
10800
10801 return true;
10802
10803 case CONST_FIXED:
10804 *cost = COSTS_N_INSNS (4);
10805 /* Fixme. */
10806 return true;
10807
10808 case CONST_DOUBLE:
10809 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10810 && (mode == SFmode || !TARGET_VFP_SINGLE))
10811 {
10812 if (vfp3_const_double_rtx (x))
10813 {
10814 *cost = COSTS_N_INSNS (1);
10815 if (speed_p)
10816 *cost += extra_cost->fp[mode == DFmode].fpconst;
10817 return true;
10818 }
10819
10820 if (speed_p)
10821 {
10822 *cost = COSTS_N_INSNS (1);
10823 if (mode == DFmode)
10824 *cost += extra_cost->ldst.loadd;
10825 else
10826 *cost += extra_cost->ldst.loadf;
10827 }
10828 else
10829 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10830
10831 return true;
10832 }
10833 *cost = COSTS_N_INSNS (4);
10834 return true;
10835
10836 case CONST_VECTOR:
10837 /* Fixme. */
10838 if (TARGET_NEON
10839 && TARGET_HARD_FLOAT
10840 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10841 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10842 *cost = COSTS_N_INSNS (1);
10843 else
10844 *cost = COSTS_N_INSNS (4);
10845 return true;
10846
10847 case HIGH:
10848 case LO_SUM:
10849 *cost = COSTS_N_INSNS (1);
10850 /* When optimizing for size, we prefer constant pool entries to
10851 MOVW/MOVT pairs, so bump the cost of these slightly. */
10852 if (!speed_p)
10853 *cost += 1;
10854 return true;
10855
10856 case CLZ:
10857 *cost = COSTS_N_INSNS (1);
10858 if (speed_p)
10859 *cost += extra_cost->alu.clz;
10860 return false;
10861
10862 case SMIN:
10863 if (XEXP (x, 1) == const0_rtx)
10864 {
10865 *cost = COSTS_N_INSNS (1);
10866 if (speed_p)
10867 *cost += extra_cost->alu.log_shift;
10868 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10869 return true;
10870 }
10871 /* Fall through. */
10872 case SMAX:
10873 case UMIN:
10874 case UMAX:
10875 *cost = COSTS_N_INSNS (2);
10876 return false;
10877
10878 case TRUNCATE:
10879 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10880 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10881 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10882 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10883 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10884 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10885 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10886 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10887 == ZERO_EXTEND))))
10888 {
10889 *cost = COSTS_N_INSNS (1);
10890 if (speed_p)
10891 *cost += extra_cost->mult[1].extend;
10892 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10893 speed_p)
10894 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10895 0, speed_p));
10896 return true;
10897 }
10898 *cost = LIBCALL_COST (1);
10899 return false;
10900
10901 case UNSPEC_VOLATILE:
10902 case UNSPEC:
10903 return arm_unspec_cost (x, outer_code, speed_p, cost);
10904
10905 case PC:
10906 /* Reading the PC is like reading any other register. Writing it
10907 is more expensive, but we take that into account elsewhere. */
10908 *cost = 0;
10909 return true;
10910
10911 case ZERO_EXTRACT:
10912 /* TODO: Simple zero_extract of bottom bits using AND. */
10913 /* Fall through. */
10914 case SIGN_EXTRACT:
10915 if (arm_arch6
10916 && mode == SImode
10917 && CONST_INT_P (XEXP (x, 1))
10918 && CONST_INT_P (XEXP (x, 2)))
10919 {
10920 *cost = COSTS_N_INSNS (1);
10921 if (speed_p)
10922 *cost += extra_cost->alu.bfx;
10923 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10924 return true;
10925 }
10926 /* Without UBFX/SBFX, need to resort to shift operations. */
10927 *cost = COSTS_N_INSNS (2);
10928 if (speed_p)
10929 *cost += 2 * extra_cost->alu.shift;
10930 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10931 return true;
10932
10933 case FLOAT_EXTEND:
10934 if (TARGET_HARD_FLOAT)
10935 {
10936 *cost = COSTS_N_INSNS (1);
10937 if (speed_p)
10938 *cost += extra_cost->fp[mode == DFmode].widen;
10939 if (!TARGET_FPU_ARMV8
10940 && GET_MODE (XEXP (x, 0)) == HFmode)
10941 {
10942 /* Pre v8, widening HF->DF is a two-step process, first
10943 widening to SFmode. */
10944 *cost += COSTS_N_INSNS (1);
10945 if (speed_p)
10946 *cost += extra_cost->fp[0].widen;
10947 }
10948 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10949 return true;
10950 }
10951
10952 *cost = LIBCALL_COST (1);
10953 return false;
10954
10955 case FLOAT_TRUNCATE:
10956 if (TARGET_HARD_FLOAT)
10957 {
10958 *cost = COSTS_N_INSNS (1);
10959 if (speed_p)
10960 *cost += extra_cost->fp[mode == DFmode].narrow;
10961 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10962 return true;
10963 /* Vector modes? */
10964 }
10965 *cost = LIBCALL_COST (1);
10966 return false;
10967
10968 case FMA:
10969 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10970 {
10971 rtx op0 = XEXP (x, 0);
10972 rtx op1 = XEXP (x, 1);
10973 rtx op2 = XEXP (x, 2);
10974
10975 *cost = COSTS_N_INSNS (1);
10976
10977 /* vfms or vfnma. */
10978 if (GET_CODE (op0) == NEG)
10979 op0 = XEXP (op0, 0);
10980
10981 /* vfnms or vfnma. */
10982 if (GET_CODE (op2) == NEG)
10983 op2 = XEXP (op2, 0);
10984
10985 *cost += rtx_cost (op0, FMA, 0, speed_p);
10986 *cost += rtx_cost (op1, FMA, 1, speed_p);
10987 *cost += rtx_cost (op2, FMA, 2, speed_p);
10988
10989 if (speed_p)
10990 *cost += extra_cost->fp[mode ==DFmode].fma;
10991
10992 return true;
10993 }
10994
10995 *cost = LIBCALL_COST (3);
10996 return false;
10997
10998 case FIX:
10999 case UNSIGNED_FIX:
11000 if (TARGET_HARD_FLOAT)
11001 {
11002 if (GET_MODE_CLASS (mode) == MODE_INT)
11003 {
11004 *cost = COSTS_N_INSNS (1);
11005 if (speed_p)
11006 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
11007 /* Strip of the 'cost' of rounding towards zero. */
11008 if (GET_CODE (XEXP (x, 0)) == FIX)
11009 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
11010 else
11011 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
11012 /* ??? Increase the cost to deal with transferring from
11013 FP -> CORE registers? */
11014 return true;
11015 }
11016 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11017 && TARGET_FPU_ARMV8)
11018 {
11019 *cost = COSTS_N_INSNS (1);
11020 if (speed_p)
11021 *cost += extra_cost->fp[mode == DFmode].roundint;
11022 return false;
11023 }
11024 /* Vector costs? */
11025 }
11026 *cost = LIBCALL_COST (1);
11027 return false;
11028
11029 case FLOAT:
11030 case UNSIGNED_FLOAT:
11031 if (TARGET_HARD_FLOAT)
11032 {
11033 /* ??? Increase the cost to deal with transferring from CORE
11034 -> FP registers? */
11035 *cost = COSTS_N_INSNS (1);
11036 if (speed_p)
11037 *cost += extra_cost->fp[mode == DFmode].fromint;
11038 return false;
11039 }
11040 *cost = LIBCALL_COST (1);
11041 return false;
11042
11043 case CALL:
11044 *cost = COSTS_N_INSNS (1);
11045 return true;
11046
11047 case ASM_OPERANDS:
11048 {
11049 /* Just a guess. Guess number of instructions in the asm
11050 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11051 though (see PR60663). */
11052 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11053 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11054
11055 *cost = COSTS_N_INSNS (asm_length + num_operands);
11056 return true;
11057 }
11058 default:
11059 if (mode != VOIDmode)
11060 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11061 else
11062 *cost = COSTS_N_INSNS (4); /* Who knows? */
11063 return false;
11064 }
11065 }
11066
11067 #undef HANDLE_NARROW_SHIFT_ARITH
11068
11069 /* RTX costs when optimizing for size. */
11070 static bool
11071 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11072 int *total, bool speed)
11073 {
11074 bool result;
11075
11076 if (TARGET_OLD_RTX_COSTS
11077 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11078 {
11079 /* Old way. (Deprecated.) */
11080 if (!speed)
11081 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11082 (enum rtx_code) outer_code, total);
11083 else
11084 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11085 (enum rtx_code) outer_code, total,
11086 speed);
11087 }
11088 else
11089 {
11090 /* New way. */
11091 if (current_tune->insn_extra_cost)
11092 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11093 (enum rtx_code) outer_code,
11094 current_tune->insn_extra_cost,
11095 total, speed);
11096 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11097 && current_tune->insn_extra_cost != NULL */
11098 else
11099 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11100 (enum rtx_code) outer_code,
11101 &generic_extra_costs, total, speed);
11102 }
11103
11104 if (dump_file && (dump_flags & TDF_DETAILS))
11105 {
11106 print_rtl_single (dump_file, x);
11107 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11108 *total, result ? "final" : "partial");
11109 }
11110 return result;
11111 }
11112
11113 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11114 supported on any "slowmul" cores, so it can be ignored. */
11115
11116 static bool
11117 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11118 int *total, bool speed)
11119 {
11120 machine_mode mode = GET_MODE (x);
11121
11122 if (TARGET_THUMB)
11123 {
11124 *total = thumb1_rtx_costs (x, code, outer_code);
11125 return true;
11126 }
11127
11128 switch (code)
11129 {
11130 case MULT:
11131 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11132 || mode == DImode)
11133 {
11134 *total = COSTS_N_INSNS (20);
11135 return false;
11136 }
11137
11138 if (CONST_INT_P (XEXP (x, 1)))
11139 {
11140 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11141 & (unsigned HOST_WIDE_INT) 0xffffffff);
11142 int cost, const_ok = const_ok_for_arm (i);
11143 int j, booth_unit_size;
11144
11145 /* Tune as appropriate. */
11146 cost = const_ok ? 4 : 8;
11147 booth_unit_size = 2;
11148 for (j = 0; i && j < 32; j += booth_unit_size)
11149 {
11150 i >>= booth_unit_size;
11151 cost++;
11152 }
11153
11154 *total = COSTS_N_INSNS (cost);
11155 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11156 return true;
11157 }
11158
11159 *total = COSTS_N_INSNS (20);
11160 return false;
11161
11162 default:
11163 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11164 }
11165 }
11166
11167
11168 /* RTX cost for cores with a fast multiply unit (M variants). */
11169
11170 static bool
11171 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11172 int *total, bool speed)
11173 {
11174 machine_mode mode = GET_MODE (x);
11175
11176 if (TARGET_THUMB1)
11177 {
11178 *total = thumb1_rtx_costs (x, code, outer_code);
11179 return true;
11180 }
11181
11182 /* ??? should thumb2 use different costs? */
11183 switch (code)
11184 {
11185 case MULT:
11186 /* There is no point basing this on the tuning, since it is always the
11187 fast variant if it exists at all. */
11188 if (mode == DImode
11189 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11190 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11191 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11192 {
11193 *total = COSTS_N_INSNS(2);
11194 return false;
11195 }
11196
11197
11198 if (mode == DImode)
11199 {
11200 *total = COSTS_N_INSNS (5);
11201 return false;
11202 }
11203
11204 if (CONST_INT_P (XEXP (x, 1)))
11205 {
11206 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11207 & (unsigned HOST_WIDE_INT) 0xffffffff);
11208 int cost, const_ok = const_ok_for_arm (i);
11209 int j, booth_unit_size;
11210
11211 /* Tune as appropriate. */
11212 cost = const_ok ? 4 : 8;
11213 booth_unit_size = 8;
11214 for (j = 0; i && j < 32; j += booth_unit_size)
11215 {
11216 i >>= booth_unit_size;
11217 cost++;
11218 }
11219
11220 *total = COSTS_N_INSNS(cost);
11221 return false;
11222 }
11223
11224 if (mode == SImode)
11225 {
11226 *total = COSTS_N_INSNS (4);
11227 return false;
11228 }
11229
11230 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11231 {
11232 if (TARGET_HARD_FLOAT
11233 && (mode == SFmode
11234 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11235 {
11236 *total = COSTS_N_INSNS (1);
11237 return false;
11238 }
11239 }
11240
11241 /* Requires a lib call */
11242 *total = COSTS_N_INSNS (20);
11243 return false;
11244
11245 default:
11246 return arm_rtx_costs_1 (x, outer_code, total, speed);
11247 }
11248 }
11249
11250
11251 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11252 so it can be ignored. */
11253
11254 static bool
11255 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11256 int *total, bool speed)
11257 {
11258 machine_mode mode = GET_MODE (x);
11259
11260 if (TARGET_THUMB)
11261 {
11262 *total = thumb1_rtx_costs (x, code, outer_code);
11263 return true;
11264 }
11265
11266 switch (code)
11267 {
11268 case COMPARE:
11269 if (GET_CODE (XEXP (x, 0)) != MULT)
11270 return arm_rtx_costs_1 (x, outer_code, total, speed);
11271
11272 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11273 will stall until the multiplication is complete. */
11274 *total = COSTS_N_INSNS (3);
11275 return false;
11276
11277 case MULT:
11278 /* There is no point basing this on the tuning, since it is always the
11279 fast variant if it exists at all. */
11280 if (mode == DImode
11281 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11282 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11283 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11284 {
11285 *total = COSTS_N_INSNS (2);
11286 return false;
11287 }
11288
11289
11290 if (mode == DImode)
11291 {
11292 *total = COSTS_N_INSNS (5);
11293 return false;
11294 }
11295
11296 if (CONST_INT_P (XEXP (x, 1)))
11297 {
11298 /* If operand 1 is a constant we can more accurately
11299 calculate the cost of the multiply. The multiplier can
11300 retire 15 bits on the first cycle and a further 12 on the
11301 second. We do, of course, have to load the constant into
11302 a register first. */
11303 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11304 /* There's a general overhead of one cycle. */
11305 int cost = 1;
11306 unsigned HOST_WIDE_INT masked_const;
11307
11308 if (i & 0x80000000)
11309 i = ~i;
11310
11311 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11312
11313 masked_const = i & 0xffff8000;
11314 if (masked_const != 0)
11315 {
11316 cost++;
11317 masked_const = i & 0xf8000000;
11318 if (masked_const != 0)
11319 cost++;
11320 }
11321 *total = COSTS_N_INSNS (cost);
11322 return false;
11323 }
11324
11325 if (mode == SImode)
11326 {
11327 *total = COSTS_N_INSNS (3);
11328 return false;
11329 }
11330
11331 /* Requires a lib call */
11332 *total = COSTS_N_INSNS (20);
11333 return false;
11334
11335 default:
11336 return arm_rtx_costs_1 (x, outer_code, total, speed);
11337 }
11338 }
11339
11340
11341 /* RTX costs for 9e (and later) cores. */
11342
11343 static bool
11344 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11345 int *total, bool speed)
11346 {
11347 machine_mode mode = GET_MODE (x);
11348
11349 if (TARGET_THUMB1)
11350 {
11351 switch (code)
11352 {
11353 case MULT:
11354 /* Small multiply: 32 cycles for an integer multiply inst. */
11355 if (arm_arch6m && arm_m_profile_small_mul)
11356 *total = COSTS_N_INSNS (32);
11357 else
11358 *total = COSTS_N_INSNS (3);
11359 return true;
11360
11361 default:
11362 *total = thumb1_rtx_costs (x, code, outer_code);
11363 return true;
11364 }
11365 }
11366
11367 switch (code)
11368 {
11369 case MULT:
11370 /* There is no point basing this on the tuning, since it is always the
11371 fast variant if it exists at all. */
11372 if (mode == DImode
11373 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11374 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11375 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11376 {
11377 *total = COSTS_N_INSNS (2);
11378 return false;
11379 }
11380
11381
11382 if (mode == DImode)
11383 {
11384 *total = COSTS_N_INSNS (5);
11385 return false;
11386 }
11387
11388 if (mode == SImode)
11389 {
11390 *total = COSTS_N_INSNS (2);
11391 return false;
11392 }
11393
11394 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11395 {
11396 if (TARGET_HARD_FLOAT
11397 && (mode == SFmode
11398 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11399 {
11400 *total = COSTS_N_INSNS (1);
11401 return false;
11402 }
11403 }
11404
11405 *total = COSTS_N_INSNS (20);
11406 return false;
11407
11408 default:
11409 return arm_rtx_costs_1 (x, outer_code, total, speed);
11410 }
11411 }
11412 /* All address computations that can be done are free, but rtx cost returns
11413 the same for practically all of them. So we weight the different types
11414 of address here in the order (most pref first):
11415 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11416 static inline int
11417 arm_arm_address_cost (rtx x)
11418 {
11419 enum rtx_code c = GET_CODE (x);
11420
11421 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11422 return 0;
11423 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11424 return 10;
11425
11426 if (c == PLUS)
11427 {
11428 if (CONST_INT_P (XEXP (x, 1)))
11429 return 2;
11430
11431 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11432 return 3;
11433
11434 return 4;
11435 }
11436
11437 return 6;
11438 }
11439
11440 static inline int
11441 arm_thumb_address_cost (rtx x)
11442 {
11443 enum rtx_code c = GET_CODE (x);
11444
11445 if (c == REG)
11446 return 1;
11447 if (c == PLUS
11448 && REG_P (XEXP (x, 0))
11449 && CONST_INT_P (XEXP (x, 1)))
11450 return 1;
11451
11452 return 2;
11453 }
11454
11455 static int
11456 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11457 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11458 {
11459 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11460 }
11461
11462 /* Adjust cost hook for XScale. */
11463 static bool
11464 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11465 {
11466 /* Some true dependencies can have a higher cost depending
11467 on precisely how certain input operands are used. */
11468 if (REG_NOTE_KIND(link) == 0
11469 && recog_memoized (insn) >= 0
11470 && recog_memoized (dep) >= 0)
11471 {
11472 int shift_opnum = get_attr_shift (insn);
11473 enum attr_type attr_type = get_attr_type (dep);
11474
11475 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11476 operand for INSN. If we have a shifted input operand and the
11477 instruction we depend on is another ALU instruction, then we may
11478 have to account for an additional stall. */
11479 if (shift_opnum != 0
11480 && (attr_type == TYPE_ALU_SHIFT_IMM
11481 || attr_type == TYPE_ALUS_SHIFT_IMM
11482 || attr_type == TYPE_LOGIC_SHIFT_IMM
11483 || attr_type == TYPE_LOGICS_SHIFT_IMM
11484 || attr_type == TYPE_ALU_SHIFT_REG
11485 || attr_type == TYPE_ALUS_SHIFT_REG
11486 || attr_type == TYPE_LOGIC_SHIFT_REG
11487 || attr_type == TYPE_LOGICS_SHIFT_REG
11488 || attr_type == TYPE_MOV_SHIFT
11489 || attr_type == TYPE_MVN_SHIFT
11490 || attr_type == TYPE_MOV_SHIFT_REG
11491 || attr_type == TYPE_MVN_SHIFT_REG))
11492 {
11493 rtx shifted_operand;
11494 int opno;
11495
11496 /* Get the shifted operand. */
11497 extract_insn (insn);
11498 shifted_operand = recog_data.operand[shift_opnum];
11499
11500 /* Iterate over all the operands in DEP. If we write an operand
11501 that overlaps with SHIFTED_OPERAND, then we have increase the
11502 cost of this dependency. */
11503 extract_insn (dep);
11504 preprocess_constraints (dep);
11505 for (opno = 0; opno < recog_data.n_operands; opno++)
11506 {
11507 /* We can ignore strict inputs. */
11508 if (recog_data.operand_type[opno] == OP_IN)
11509 continue;
11510
11511 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11512 shifted_operand))
11513 {
11514 *cost = 2;
11515 return false;
11516 }
11517 }
11518 }
11519 }
11520 return true;
11521 }
11522
11523 /* Adjust cost hook for Cortex A9. */
11524 static bool
11525 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11526 {
11527 switch (REG_NOTE_KIND (link))
11528 {
11529 case REG_DEP_ANTI:
11530 *cost = 0;
11531 return false;
11532
11533 case REG_DEP_TRUE:
11534 case REG_DEP_OUTPUT:
11535 if (recog_memoized (insn) >= 0
11536 && recog_memoized (dep) >= 0)
11537 {
11538 if (GET_CODE (PATTERN (insn)) == SET)
11539 {
11540 if (GET_MODE_CLASS
11541 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11542 || GET_MODE_CLASS
11543 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11544 {
11545 enum attr_type attr_type_insn = get_attr_type (insn);
11546 enum attr_type attr_type_dep = get_attr_type (dep);
11547
11548 /* By default all dependencies of the form
11549 s0 = s0 <op> s1
11550 s0 = s0 <op> s2
11551 have an extra latency of 1 cycle because
11552 of the input and output dependency in this
11553 case. However this gets modeled as an true
11554 dependency and hence all these checks. */
11555 if (REG_P (SET_DEST (PATTERN (insn)))
11556 && REG_P (SET_DEST (PATTERN (dep)))
11557 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11558 SET_DEST (PATTERN (dep))))
11559 {
11560 /* FMACS is a special case where the dependent
11561 instruction can be issued 3 cycles before
11562 the normal latency in case of an output
11563 dependency. */
11564 if ((attr_type_insn == TYPE_FMACS
11565 || attr_type_insn == TYPE_FMACD)
11566 && (attr_type_dep == TYPE_FMACS
11567 || attr_type_dep == TYPE_FMACD))
11568 {
11569 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11570 *cost = insn_default_latency (dep) - 3;
11571 else
11572 *cost = insn_default_latency (dep);
11573 return false;
11574 }
11575 else
11576 {
11577 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11578 *cost = insn_default_latency (dep) + 1;
11579 else
11580 *cost = insn_default_latency (dep);
11581 }
11582 return false;
11583 }
11584 }
11585 }
11586 }
11587 break;
11588
11589 default:
11590 gcc_unreachable ();
11591 }
11592
11593 return true;
11594 }
11595
11596 /* Adjust cost hook for FA726TE. */
11597 static bool
11598 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11599 {
11600 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11601 have penalty of 3. */
11602 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11603 && recog_memoized (insn) >= 0
11604 && recog_memoized (dep) >= 0
11605 && get_attr_conds (dep) == CONDS_SET)
11606 {
11607 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11608 if (get_attr_conds (insn) == CONDS_USE
11609 && get_attr_type (insn) != TYPE_BRANCH)
11610 {
11611 *cost = 3;
11612 return false;
11613 }
11614
11615 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11616 || get_attr_conds (insn) == CONDS_USE)
11617 {
11618 *cost = 0;
11619 return false;
11620 }
11621 }
11622
11623 return true;
11624 }
11625
11626 /* Implement TARGET_REGISTER_MOVE_COST.
11627
11628 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11629 it is typically more expensive than a single memory access. We set
11630 the cost to less than two memory accesses so that floating
11631 point to integer conversion does not go through memory. */
11632
11633 int
11634 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11635 reg_class_t from, reg_class_t to)
11636 {
11637 if (TARGET_32BIT)
11638 {
11639 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11640 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11641 return 15;
11642 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11643 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11644 return 4;
11645 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11646 return 20;
11647 else
11648 return 2;
11649 }
11650 else
11651 {
11652 if (from == HI_REGS || to == HI_REGS)
11653 return 4;
11654 else
11655 return 2;
11656 }
11657 }
11658
11659 /* Implement TARGET_MEMORY_MOVE_COST. */
11660
11661 int
11662 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11663 bool in ATTRIBUTE_UNUSED)
11664 {
11665 if (TARGET_32BIT)
11666 return 10;
11667 else
11668 {
11669 if (GET_MODE_SIZE (mode) < 4)
11670 return 8;
11671 else
11672 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11673 }
11674 }
11675
11676 /* Vectorizer cost model implementation. */
11677
11678 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11679 static int
11680 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11681 tree vectype,
11682 int misalign ATTRIBUTE_UNUSED)
11683 {
11684 unsigned elements;
11685
11686 switch (type_of_cost)
11687 {
11688 case scalar_stmt:
11689 return current_tune->vec_costs->scalar_stmt_cost;
11690
11691 case scalar_load:
11692 return current_tune->vec_costs->scalar_load_cost;
11693
11694 case scalar_store:
11695 return current_tune->vec_costs->scalar_store_cost;
11696
11697 case vector_stmt:
11698 return current_tune->vec_costs->vec_stmt_cost;
11699
11700 case vector_load:
11701 return current_tune->vec_costs->vec_align_load_cost;
11702
11703 case vector_store:
11704 return current_tune->vec_costs->vec_store_cost;
11705
11706 case vec_to_scalar:
11707 return current_tune->vec_costs->vec_to_scalar_cost;
11708
11709 case scalar_to_vec:
11710 return current_tune->vec_costs->scalar_to_vec_cost;
11711
11712 case unaligned_load:
11713 return current_tune->vec_costs->vec_unalign_load_cost;
11714
11715 case unaligned_store:
11716 return current_tune->vec_costs->vec_unalign_store_cost;
11717
11718 case cond_branch_taken:
11719 return current_tune->vec_costs->cond_taken_branch_cost;
11720
11721 case cond_branch_not_taken:
11722 return current_tune->vec_costs->cond_not_taken_branch_cost;
11723
11724 case vec_perm:
11725 case vec_promote_demote:
11726 return current_tune->vec_costs->vec_stmt_cost;
11727
11728 case vec_construct:
11729 elements = TYPE_VECTOR_SUBPARTS (vectype);
11730 return elements / 2 + 1;
11731
11732 default:
11733 gcc_unreachable ();
11734 }
11735 }
11736
11737 /* Implement targetm.vectorize.add_stmt_cost. */
11738
11739 static unsigned
11740 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11741 struct _stmt_vec_info *stmt_info, int misalign,
11742 enum vect_cost_model_location where)
11743 {
11744 unsigned *cost = (unsigned *) data;
11745 unsigned retval = 0;
11746
11747 if (flag_vect_cost_model)
11748 {
11749 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11750 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11751
11752 /* Statements in an inner loop relative to the loop being
11753 vectorized are weighted more heavily. The value here is
11754 arbitrary and could potentially be improved with analysis. */
11755 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11756 count *= 50; /* FIXME. */
11757
11758 retval = (unsigned) (count * stmt_cost);
11759 cost[where] += retval;
11760 }
11761
11762 return retval;
11763 }
11764
11765 /* Return true if and only if this insn can dual-issue only as older. */
11766 static bool
11767 cortexa7_older_only (rtx_insn *insn)
11768 {
11769 if (recog_memoized (insn) < 0)
11770 return false;
11771
11772 switch (get_attr_type (insn))
11773 {
11774 case TYPE_ALU_DSP_REG:
11775 case TYPE_ALU_SREG:
11776 case TYPE_ALUS_SREG:
11777 case TYPE_LOGIC_REG:
11778 case TYPE_LOGICS_REG:
11779 case TYPE_ADC_REG:
11780 case TYPE_ADCS_REG:
11781 case TYPE_ADR:
11782 case TYPE_BFM:
11783 case TYPE_REV:
11784 case TYPE_MVN_REG:
11785 case TYPE_SHIFT_IMM:
11786 case TYPE_SHIFT_REG:
11787 case TYPE_LOAD_BYTE:
11788 case TYPE_LOAD1:
11789 case TYPE_STORE1:
11790 case TYPE_FFARITHS:
11791 case TYPE_FADDS:
11792 case TYPE_FFARITHD:
11793 case TYPE_FADDD:
11794 case TYPE_FMOV:
11795 case TYPE_F_CVT:
11796 case TYPE_FCMPS:
11797 case TYPE_FCMPD:
11798 case TYPE_FCONSTS:
11799 case TYPE_FCONSTD:
11800 case TYPE_FMULS:
11801 case TYPE_FMACS:
11802 case TYPE_FMULD:
11803 case TYPE_FMACD:
11804 case TYPE_FDIVS:
11805 case TYPE_FDIVD:
11806 case TYPE_F_MRC:
11807 case TYPE_F_MRRC:
11808 case TYPE_F_FLAG:
11809 case TYPE_F_LOADS:
11810 case TYPE_F_STORES:
11811 return true;
11812 default:
11813 return false;
11814 }
11815 }
11816
11817 /* Return true if and only if this insn can dual-issue as younger. */
11818 static bool
11819 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11820 {
11821 if (recog_memoized (insn) < 0)
11822 {
11823 if (verbose > 5)
11824 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11825 return false;
11826 }
11827
11828 switch (get_attr_type (insn))
11829 {
11830 case TYPE_ALU_IMM:
11831 case TYPE_ALUS_IMM:
11832 case TYPE_LOGIC_IMM:
11833 case TYPE_LOGICS_IMM:
11834 case TYPE_EXTEND:
11835 case TYPE_MVN_IMM:
11836 case TYPE_MOV_IMM:
11837 case TYPE_MOV_REG:
11838 case TYPE_MOV_SHIFT:
11839 case TYPE_MOV_SHIFT_REG:
11840 case TYPE_BRANCH:
11841 case TYPE_CALL:
11842 return true;
11843 default:
11844 return false;
11845 }
11846 }
11847
11848
11849 /* Look for an instruction that can dual issue only as an older
11850 instruction, and move it in front of any instructions that can
11851 dual-issue as younger, while preserving the relative order of all
11852 other instructions in the ready list. This is a hueuristic to help
11853 dual-issue in later cycles, by postponing issue of more flexible
11854 instructions. This heuristic may affect dual issue opportunities
11855 in the current cycle. */
11856 static void
11857 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11858 int *n_readyp, int clock)
11859 {
11860 int i;
11861 int first_older_only = -1, first_younger = -1;
11862
11863 if (verbose > 5)
11864 fprintf (file,
11865 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11866 clock,
11867 *n_readyp);
11868
11869 /* Traverse the ready list from the head (the instruction to issue
11870 first), and looking for the first instruction that can issue as
11871 younger and the first instruction that can dual-issue only as
11872 older. */
11873 for (i = *n_readyp - 1; i >= 0; i--)
11874 {
11875 rtx_insn *insn = ready[i];
11876 if (cortexa7_older_only (insn))
11877 {
11878 first_older_only = i;
11879 if (verbose > 5)
11880 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11881 break;
11882 }
11883 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11884 first_younger = i;
11885 }
11886
11887 /* Nothing to reorder because either no younger insn found or insn
11888 that can dual-issue only as older appears before any insn that
11889 can dual-issue as younger. */
11890 if (first_younger == -1)
11891 {
11892 if (verbose > 5)
11893 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11894 return;
11895 }
11896
11897 /* Nothing to reorder because no older-only insn in the ready list. */
11898 if (first_older_only == -1)
11899 {
11900 if (verbose > 5)
11901 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11902 return;
11903 }
11904
11905 /* Move first_older_only insn before first_younger. */
11906 if (verbose > 5)
11907 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11908 INSN_UID(ready [first_older_only]),
11909 INSN_UID(ready [first_younger]));
11910 rtx_insn *first_older_only_insn = ready [first_older_only];
11911 for (i = first_older_only; i < first_younger; i++)
11912 {
11913 ready[i] = ready[i+1];
11914 }
11915
11916 ready[i] = first_older_only_insn;
11917 return;
11918 }
11919
11920 /* Implement TARGET_SCHED_REORDER. */
11921 static int
11922 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11923 int clock)
11924 {
11925 switch (arm_tune)
11926 {
11927 case cortexa7:
11928 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11929 break;
11930 default:
11931 /* Do nothing for other cores. */
11932 break;
11933 }
11934
11935 return arm_issue_rate ();
11936 }
11937
11938 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11939 It corrects the value of COST based on the relationship between
11940 INSN and DEP through the dependence LINK. It returns the new
11941 value. There is a per-core adjust_cost hook to adjust scheduler costs
11942 and the per-core hook can choose to completely override the generic
11943 adjust_cost function. Only put bits of code into arm_adjust_cost that
11944 are common across all cores. */
11945 static int
11946 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
11947 {
11948 rtx i_pat, d_pat;
11949
11950 /* When generating Thumb-1 code, we want to place flag-setting operations
11951 close to a conditional branch which depends on them, so that we can
11952 omit the comparison. */
11953 if (TARGET_THUMB1
11954 && REG_NOTE_KIND (link) == 0
11955 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11956 && recog_memoized (dep) >= 0
11957 && get_attr_conds (dep) == CONDS_SET)
11958 return 0;
11959
11960 if (current_tune->sched_adjust_cost != NULL)
11961 {
11962 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11963 return cost;
11964 }
11965
11966 /* XXX Is this strictly true? */
11967 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11968 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11969 return 0;
11970
11971 /* Call insns don't incur a stall, even if they follow a load. */
11972 if (REG_NOTE_KIND (link) == 0
11973 && CALL_P (insn))
11974 return 1;
11975
11976 if ((i_pat = single_set (insn)) != NULL
11977 && MEM_P (SET_SRC (i_pat))
11978 && (d_pat = single_set (dep)) != NULL
11979 && MEM_P (SET_DEST (d_pat)))
11980 {
11981 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11982 /* This is a load after a store, there is no conflict if the load reads
11983 from a cached area. Assume that loads from the stack, and from the
11984 constant pool are cached, and that others will miss. This is a
11985 hack. */
11986
11987 if ((GET_CODE (src_mem) == SYMBOL_REF
11988 && CONSTANT_POOL_ADDRESS_P (src_mem))
11989 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11990 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11991 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11992 return 1;
11993 }
11994
11995 return cost;
11996 }
11997
11998 int
11999 arm_max_conditional_execute (void)
12000 {
12001 return max_insns_skipped;
12002 }
12003
12004 static int
12005 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
12006 {
12007 if (TARGET_32BIT)
12008 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
12009 else
12010 return (optimize > 0) ? 2 : 0;
12011 }
12012
12013 static int
12014 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12015 {
12016 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12017 }
12018
12019 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12020 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12021 sequences of non-executed instructions in IT blocks probably take the same
12022 amount of time as executed instructions (and the IT instruction itself takes
12023 space in icache). This function was experimentally determined to give good
12024 results on a popular embedded benchmark. */
12025
12026 static int
12027 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12028 {
12029 return (TARGET_32BIT && speed_p) ? 1
12030 : arm_default_branch_cost (speed_p, predictable_p);
12031 }
12032
12033 static int
12034 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
12035 {
12036 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12037 }
12038
12039 static bool fp_consts_inited = false;
12040
12041 static REAL_VALUE_TYPE value_fp0;
12042
12043 static void
12044 init_fp_table (void)
12045 {
12046 REAL_VALUE_TYPE r;
12047
12048 r = REAL_VALUE_ATOF ("0", DFmode);
12049 value_fp0 = r;
12050 fp_consts_inited = true;
12051 }
12052
12053 /* Return TRUE if rtx X is a valid immediate FP constant. */
12054 int
12055 arm_const_double_rtx (rtx x)
12056 {
12057 REAL_VALUE_TYPE r;
12058
12059 if (!fp_consts_inited)
12060 init_fp_table ();
12061
12062 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12063 if (REAL_VALUE_MINUS_ZERO (r))
12064 return 0;
12065
12066 if (REAL_VALUES_EQUAL (r, value_fp0))
12067 return 1;
12068
12069 return 0;
12070 }
12071
12072 /* VFPv3 has a fairly wide range of representable immediates, formed from
12073 "quarter-precision" floating-point values. These can be evaluated using this
12074 formula (with ^ for exponentiation):
12075
12076 -1^s * n * 2^-r
12077
12078 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12079 16 <= n <= 31 and 0 <= r <= 7.
12080
12081 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12082
12083 - A (most-significant) is the sign bit.
12084 - BCD are the exponent (encoded as r XOR 3).
12085 - EFGH are the mantissa (encoded as n - 16).
12086 */
12087
12088 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12089 fconst[sd] instruction, or -1 if X isn't suitable. */
12090 static int
12091 vfp3_const_double_index (rtx x)
12092 {
12093 REAL_VALUE_TYPE r, m;
12094 int sign, exponent;
12095 unsigned HOST_WIDE_INT mantissa, mant_hi;
12096 unsigned HOST_WIDE_INT mask;
12097 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12098 bool fail;
12099
12100 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12101 return -1;
12102
12103 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12104
12105 /* We can't represent these things, so detect them first. */
12106 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12107 return -1;
12108
12109 /* Extract sign, exponent and mantissa. */
12110 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12111 r = real_value_abs (&r);
12112 exponent = REAL_EXP (&r);
12113 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12114 highest (sign) bit, with a fixed binary point at bit point_pos.
12115 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12116 bits for the mantissa, this may fail (low bits would be lost). */
12117 real_ldexp (&m, &r, point_pos - exponent);
12118 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12119 mantissa = w.elt (0);
12120 mant_hi = w.elt (1);
12121
12122 /* If there are bits set in the low part of the mantissa, we can't
12123 represent this value. */
12124 if (mantissa != 0)
12125 return -1;
12126
12127 /* Now make it so that mantissa contains the most-significant bits, and move
12128 the point_pos to indicate that the least-significant bits have been
12129 discarded. */
12130 point_pos -= HOST_BITS_PER_WIDE_INT;
12131 mantissa = mant_hi;
12132
12133 /* We can permit four significant bits of mantissa only, plus a high bit
12134 which is always 1. */
12135 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12136 if ((mantissa & mask) != 0)
12137 return -1;
12138
12139 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12140 mantissa >>= point_pos - 5;
12141
12142 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12143 floating-point immediate zero with Neon using an integer-zero load, but
12144 that case is handled elsewhere.) */
12145 if (mantissa == 0)
12146 return -1;
12147
12148 gcc_assert (mantissa >= 16 && mantissa <= 31);
12149
12150 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12151 normalized significands are in the range [1, 2). (Our mantissa is shifted
12152 left 4 places at this point relative to normalized IEEE754 values). GCC
12153 internally uses [0.5, 1) (see real.c), so the exponent returned from
12154 REAL_EXP must be altered. */
12155 exponent = 5 - exponent;
12156
12157 if (exponent < 0 || exponent > 7)
12158 return -1;
12159
12160 /* Sign, mantissa and exponent are now in the correct form to plug into the
12161 formula described in the comment above. */
12162 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12163 }
12164
12165 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12166 int
12167 vfp3_const_double_rtx (rtx x)
12168 {
12169 if (!TARGET_VFP3)
12170 return 0;
12171
12172 return vfp3_const_double_index (x) != -1;
12173 }
12174
12175 /* Recognize immediates which can be used in various Neon instructions. Legal
12176 immediates are described by the following table (for VMVN variants, the
12177 bitwise inverse of the constant shown is recognized. In either case, VMOV
12178 is output and the correct instruction to use for a given constant is chosen
12179 by the assembler). The constant shown is replicated across all elements of
12180 the destination vector.
12181
12182 insn elems variant constant (binary)
12183 ---- ----- ------- -----------------
12184 vmov i32 0 00000000 00000000 00000000 abcdefgh
12185 vmov i32 1 00000000 00000000 abcdefgh 00000000
12186 vmov i32 2 00000000 abcdefgh 00000000 00000000
12187 vmov i32 3 abcdefgh 00000000 00000000 00000000
12188 vmov i16 4 00000000 abcdefgh
12189 vmov i16 5 abcdefgh 00000000
12190 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12191 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12192 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12193 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12194 vmvn i16 10 00000000 abcdefgh
12195 vmvn i16 11 abcdefgh 00000000
12196 vmov i32 12 00000000 00000000 abcdefgh 11111111
12197 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12198 vmov i32 14 00000000 abcdefgh 11111111 11111111
12199 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12200 vmov i8 16 abcdefgh
12201 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12202 eeeeeeee ffffffff gggggggg hhhhhhhh
12203 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12204 vmov f32 19 00000000 00000000 00000000 00000000
12205
12206 For case 18, B = !b. Representable values are exactly those accepted by
12207 vfp3_const_double_index, but are output as floating-point numbers rather
12208 than indices.
12209
12210 For case 19, we will change it to vmov.i32 when assembling.
12211
12212 Variants 0-5 (inclusive) may also be used as immediates for the second
12213 operand of VORR/VBIC instructions.
12214
12215 The INVERSE argument causes the bitwise inverse of the given operand to be
12216 recognized instead (used for recognizing legal immediates for the VAND/VORN
12217 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12218 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12219 output, rather than the real insns vbic/vorr).
12220
12221 INVERSE makes no difference to the recognition of float vectors.
12222
12223 The return value is the variant of immediate as shown in the above table, or
12224 -1 if the given value doesn't match any of the listed patterns.
12225 */
12226 static int
12227 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12228 rtx *modconst, int *elementwidth)
12229 {
12230 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12231 matches = 1; \
12232 for (i = 0; i < idx; i += (STRIDE)) \
12233 if (!(TEST)) \
12234 matches = 0; \
12235 if (matches) \
12236 { \
12237 immtype = (CLASS); \
12238 elsize = (ELSIZE); \
12239 break; \
12240 }
12241
12242 unsigned int i, elsize = 0, idx = 0, n_elts;
12243 unsigned int innersize;
12244 unsigned char bytes[16];
12245 int immtype = -1, matches;
12246 unsigned int invmask = inverse ? 0xff : 0;
12247 bool vector = GET_CODE (op) == CONST_VECTOR;
12248
12249 if (vector)
12250 {
12251 n_elts = CONST_VECTOR_NUNITS (op);
12252 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12253 }
12254 else
12255 {
12256 n_elts = 1;
12257 if (mode == VOIDmode)
12258 mode = DImode;
12259 innersize = GET_MODE_SIZE (mode);
12260 }
12261
12262 /* Vectors of float constants. */
12263 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12264 {
12265 rtx el0 = CONST_VECTOR_ELT (op, 0);
12266 REAL_VALUE_TYPE r0;
12267
12268 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12269 return -1;
12270
12271 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12272
12273 for (i = 1; i < n_elts; i++)
12274 {
12275 rtx elt = CONST_VECTOR_ELT (op, i);
12276 REAL_VALUE_TYPE re;
12277
12278 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12279
12280 if (!REAL_VALUES_EQUAL (r0, re))
12281 return -1;
12282 }
12283
12284 if (modconst)
12285 *modconst = CONST_VECTOR_ELT (op, 0);
12286
12287 if (elementwidth)
12288 *elementwidth = 0;
12289
12290 if (el0 == CONST0_RTX (GET_MODE (el0)))
12291 return 19;
12292 else
12293 return 18;
12294 }
12295
12296 /* Splat vector constant out into a byte vector. */
12297 for (i = 0; i < n_elts; i++)
12298 {
12299 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12300 unsigned HOST_WIDE_INT elpart;
12301 unsigned int part, parts;
12302
12303 if (CONST_INT_P (el))
12304 {
12305 elpart = INTVAL (el);
12306 parts = 1;
12307 }
12308 else if (CONST_DOUBLE_P (el))
12309 {
12310 elpart = CONST_DOUBLE_LOW (el);
12311 parts = 2;
12312 }
12313 else
12314 gcc_unreachable ();
12315
12316 for (part = 0; part < parts; part++)
12317 {
12318 unsigned int byte;
12319 for (byte = 0; byte < innersize; byte++)
12320 {
12321 bytes[idx++] = (elpart & 0xff) ^ invmask;
12322 elpart >>= BITS_PER_UNIT;
12323 }
12324 if (CONST_DOUBLE_P (el))
12325 elpart = CONST_DOUBLE_HIGH (el);
12326 }
12327 }
12328
12329 /* Sanity check. */
12330 gcc_assert (idx == GET_MODE_SIZE (mode));
12331
12332 do
12333 {
12334 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12335 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12336
12337 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12338 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12339
12340 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12341 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12342
12343 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12344 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12345
12346 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12347
12348 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12349
12350 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12351 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12352
12353 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12354 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12355
12356 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12357 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12358
12359 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12360 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12361
12362 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12363
12364 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12365
12366 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12367 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12368
12369 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12370 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12371
12372 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12373 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12374
12375 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12376 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12377
12378 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12379
12380 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12381 && bytes[i] == bytes[(i + 8) % idx]);
12382 }
12383 while (0);
12384
12385 if (immtype == -1)
12386 return -1;
12387
12388 if (elementwidth)
12389 *elementwidth = elsize;
12390
12391 if (modconst)
12392 {
12393 unsigned HOST_WIDE_INT imm = 0;
12394
12395 /* Un-invert bytes of recognized vector, if necessary. */
12396 if (invmask != 0)
12397 for (i = 0; i < idx; i++)
12398 bytes[i] ^= invmask;
12399
12400 if (immtype == 17)
12401 {
12402 /* FIXME: Broken on 32-bit H_W_I hosts. */
12403 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12404
12405 for (i = 0; i < 8; i++)
12406 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12407 << (i * BITS_PER_UNIT);
12408
12409 *modconst = GEN_INT (imm);
12410 }
12411 else
12412 {
12413 unsigned HOST_WIDE_INT imm = 0;
12414
12415 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12416 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12417
12418 *modconst = GEN_INT (imm);
12419 }
12420 }
12421
12422 return immtype;
12423 #undef CHECK
12424 }
12425
12426 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12427 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12428 float elements), and a modified constant (whatever should be output for a
12429 VMOV) in *MODCONST. */
12430
12431 int
12432 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12433 rtx *modconst, int *elementwidth)
12434 {
12435 rtx tmpconst;
12436 int tmpwidth;
12437 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12438
12439 if (retval == -1)
12440 return 0;
12441
12442 if (modconst)
12443 *modconst = tmpconst;
12444
12445 if (elementwidth)
12446 *elementwidth = tmpwidth;
12447
12448 return 1;
12449 }
12450
12451 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12452 the immediate is valid, write a constant suitable for using as an operand
12453 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12454 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12455
12456 int
12457 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12458 rtx *modconst, int *elementwidth)
12459 {
12460 rtx tmpconst;
12461 int tmpwidth;
12462 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12463
12464 if (retval < 0 || retval > 5)
12465 return 0;
12466
12467 if (modconst)
12468 *modconst = tmpconst;
12469
12470 if (elementwidth)
12471 *elementwidth = tmpwidth;
12472
12473 return 1;
12474 }
12475
12476 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12477 the immediate is valid, write a constant suitable for using as an operand
12478 to VSHR/VSHL to *MODCONST and the corresponding element width to
12479 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12480 because they have different limitations. */
12481
12482 int
12483 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12484 rtx *modconst, int *elementwidth,
12485 bool isleftshift)
12486 {
12487 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12488 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12489 unsigned HOST_WIDE_INT last_elt = 0;
12490 unsigned HOST_WIDE_INT maxshift;
12491
12492 /* Split vector constant out into a byte vector. */
12493 for (i = 0; i < n_elts; i++)
12494 {
12495 rtx el = CONST_VECTOR_ELT (op, i);
12496 unsigned HOST_WIDE_INT elpart;
12497
12498 if (CONST_INT_P (el))
12499 elpart = INTVAL (el);
12500 else if (CONST_DOUBLE_P (el))
12501 return 0;
12502 else
12503 gcc_unreachable ();
12504
12505 if (i != 0 && elpart != last_elt)
12506 return 0;
12507
12508 last_elt = elpart;
12509 }
12510
12511 /* Shift less than element size. */
12512 maxshift = innersize * 8;
12513
12514 if (isleftshift)
12515 {
12516 /* Left shift immediate value can be from 0 to <size>-1. */
12517 if (last_elt >= maxshift)
12518 return 0;
12519 }
12520 else
12521 {
12522 /* Right shift immediate value can be from 1 to <size>. */
12523 if (last_elt == 0 || last_elt > maxshift)
12524 return 0;
12525 }
12526
12527 if (elementwidth)
12528 *elementwidth = innersize * 8;
12529
12530 if (modconst)
12531 *modconst = CONST_VECTOR_ELT (op, 0);
12532
12533 return 1;
12534 }
12535
12536 /* Return a string suitable for output of Neon immediate logic operation
12537 MNEM. */
12538
12539 char *
12540 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12541 int inverse, int quad)
12542 {
12543 int width, is_valid;
12544 static char templ[40];
12545
12546 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12547
12548 gcc_assert (is_valid != 0);
12549
12550 if (quad)
12551 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12552 else
12553 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12554
12555 return templ;
12556 }
12557
12558 /* Return a string suitable for output of Neon immediate shift operation
12559 (VSHR or VSHL) MNEM. */
12560
12561 char *
12562 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12563 machine_mode mode, int quad,
12564 bool isleftshift)
12565 {
12566 int width, is_valid;
12567 static char templ[40];
12568
12569 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12570 gcc_assert (is_valid != 0);
12571
12572 if (quad)
12573 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12574 else
12575 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12576
12577 return templ;
12578 }
12579
12580 /* Output a sequence of pairwise operations to implement a reduction.
12581 NOTE: We do "too much work" here, because pairwise operations work on two
12582 registers-worth of operands in one go. Unfortunately we can't exploit those
12583 extra calculations to do the full operation in fewer steps, I don't think.
12584 Although all vector elements of the result but the first are ignored, we
12585 actually calculate the same result in each of the elements. An alternative
12586 such as initially loading a vector with zero to use as each of the second
12587 operands would use up an additional register and take an extra instruction,
12588 for no particular gain. */
12589
12590 void
12591 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12592 rtx (*reduc) (rtx, rtx, rtx))
12593 {
12594 machine_mode inner = GET_MODE_INNER (mode);
12595 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12596 rtx tmpsum = op1;
12597
12598 for (i = parts / 2; i >= 1; i /= 2)
12599 {
12600 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12601 emit_insn (reduc (dest, tmpsum, tmpsum));
12602 tmpsum = dest;
12603 }
12604 }
12605
12606 /* If VALS is a vector constant that can be loaded into a register
12607 using VDUP, generate instructions to do so and return an RTX to
12608 assign to the register. Otherwise return NULL_RTX. */
12609
12610 static rtx
12611 neon_vdup_constant (rtx vals)
12612 {
12613 machine_mode mode = GET_MODE (vals);
12614 machine_mode inner_mode = GET_MODE_INNER (mode);
12615 int n_elts = GET_MODE_NUNITS (mode);
12616 bool all_same = true;
12617 rtx x;
12618 int i;
12619
12620 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12621 return NULL_RTX;
12622
12623 for (i = 0; i < n_elts; ++i)
12624 {
12625 x = XVECEXP (vals, 0, i);
12626 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12627 all_same = false;
12628 }
12629
12630 if (!all_same)
12631 /* The elements are not all the same. We could handle repeating
12632 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12633 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12634 vdup.i16). */
12635 return NULL_RTX;
12636
12637 /* We can load this constant by using VDUP and a constant in a
12638 single ARM register. This will be cheaper than a vector
12639 load. */
12640
12641 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12642 return gen_rtx_VEC_DUPLICATE (mode, x);
12643 }
12644
12645 /* Generate code to load VALS, which is a PARALLEL containing only
12646 constants (for vec_init) or CONST_VECTOR, efficiently into a
12647 register. Returns an RTX to copy into the register, or NULL_RTX
12648 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12649
12650 rtx
12651 neon_make_constant (rtx vals)
12652 {
12653 machine_mode mode = GET_MODE (vals);
12654 rtx target;
12655 rtx const_vec = NULL_RTX;
12656 int n_elts = GET_MODE_NUNITS (mode);
12657 int n_const = 0;
12658 int i;
12659
12660 if (GET_CODE (vals) == CONST_VECTOR)
12661 const_vec = vals;
12662 else if (GET_CODE (vals) == PARALLEL)
12663 {
12664 /* A CONST_VECTOR must contain only CONST_INTs and
12665 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12666 Only store valid constants in a CONST_VECTOR. */
12667 for (i = 0; i < n_elts; ++i)
12668 {
12669 rtx x = XVECEXP (vals, 0, i);
12670 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12671 n_const++;
12672 }
12673 if (n_const == n_elts)
12674 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12675 }
12676 else
12677 gcc_unreachable ();
12678
12679 if (const_vec != NULL
12680 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12681 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12682 return const_vec;
12683 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12684 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12685 pipeline cycle; creating the constant takes one or two ARM
12686 pipeline cycles. */
12687 return target;
12688 else if (const_vec != NULL_RTX)
12689 /* Load from constant pool. On Cortex-A8 this takes two cycles
12690 (for either double or quad vectors). We can not take advantage
12691 of single-cycle VLD1 because we need a PC-relative addressing
12692 mode. */
12693 return const_vec;
12694 else
12695 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12696 We can not construct an initializer. */
12697 return NULL_RTX;
12698 }
12699
12700 /* Initialize vector TARGET to VALS. */
12701
12702 void
12703 neon_expand_vector_init (rtx target, rtx vals)
12704 {
12705 machine_mode mode = GET_MODE (target);
12706 machine_mode inner_mode = GET_MODE_INNER (mode);
12707 int n_elts = GET_MODE_NUNITS (mode);
12708 int n_var = 0, one_var = -1;
12709 bool all_same = true;
12710 rtx x, mem;
12711 int i;
12712
12713 for (i = 0; i < n_elts; ++i)
12714 {
12715 x = XVECEXP (vals, 0, i);
12716 if (!CONSTANT_P (x))
12717 ++n_var, one_var = i;
12718
12719 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12720 all_same = false;
12721 }
12722
12723 if (n_var == 0)
12724 {
12725 rtx constant = neon_make_constant (vals);
12726 if (constant != NULL_RTX)
12727 {
12728 emit_move_insn (target, constant);
12729 return;
12730 }
12731 }
12732
12733 /* Splat a single non-constant element if we can. */
12734 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12735 {
12736 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12737 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12738 return;
12739 }
12740
12741 /* One field is non-constant. Load constant then overwrite varying
12742 field. This is more efficient than using the stack. */
12743 if (n_var == 1)
12744 {
12745 rtx copy = copy_rtx (vals);
12746 rtx index = GEN_INT (one_var);
12747
12748 /* Load constant part of vector, substitute neighboring value for
12749 varying element. */
12750 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12751 neon_expand_vector_init (target, copy);
12752
12753 /* Insert variable. */
12754 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12755 switch (mode)
12756 {
12757 case V8QImode:
12758 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12759 break;
12760 case V16QImode:
12761 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12762 break;
12763 case V4HImode:
12764 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12765 break;
12766 case V8HImode:
12767 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12768 break;
12769 case V2SImode:
12770 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12771 break;
12772 case V4SImode:
12773 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12774 break;
12775 case V2SFmode:
12776 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12777 break;
12778 case V4SFmode:
12779 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12780 break;
12781 case V2DImode:
12782 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12783 break;
12784 default:
12785 gcc_unreachable ();
12786 }
12787 return;
12788 }
12789
12790 /* Construct the vector in memory one field at a time
12791 and load the whole vector. */
12792 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12793 for (i = 0; i < n_elts; i++)
12794 emit_move_insn (adjust_address_nv (mem, inner_mode,
12795 i * GET_MODE_SIZE (inner_mode)),
12796 XVECEXP (vals, 0, i));
12797 emit_move_insn (target, mem);
12798 }
12799
12800 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12801 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12802 reported source locations are bogus. */
12803
12804 static void
12805 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12806 const char *err)
12807 {
12808 HOST_WIDE_INT lane;
12809
12810 gcc_assert (CONST_INT_P (operand));
12811
12812 lane = INTVAL (operand);
12813
12814 if (lane < low || lane >= high)
12815 error (err);
12816 }
12817
12818 /* Bounds-check lanes. */
12819
12820 void
12821 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12822 {
12823 bounds_check (operand, low, high, "lane out of range");
12824 }
12825
12826 /* Bounds-check constants. */
12827
12828 void
12829 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12830 {
12831 bounds_check (operand, low, high, "constant out of range");
12832 }
12833
12834 HOST_WIDE_INT
12835 neon_element_bits (machine_mode mode)
12836 {
12837 if (mode == DImode)
12838 return GET_MODE_BITSIZE (mode);
12839 else
12840 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12841 }
12842
12843 \f
12844 /* Predicates for `match_operand' and `match_operator'. */
12845
12846 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12847 WB is true if full writeback address modes are allowed and is false
12848 if limited writeback address modes (POST_INC and PRE_DEC) are
12849 allowed. */
12850
12851 int
12852 arm_coproc_mem_operand (rtx op, bool wb)
12853 {
12854 rtx ind;
12855
12856 /* Reject eliminable registers. */
12857 if (! (reload_in_progress || reload_completed || lra_in_progress)
12858 && ( reg_mentioned_p (frame_pointer_rtx, op)
12859 || reg_mentioned_p (arg_pointer_rtx, op)
12860 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12861 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12862 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12863 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12864 return FALSE;
12865
12866 /* Constants are converted into offsets from labels. */
12867 if (!MEM_P (op))
12868 return FALSE;
12869
12870 ind = XEXP (op, 0);
12871
12872 if (reload_completed
12873 && (GET_CODE (ind) == LABEL_REF
12874 || (GET_CODE (ind) == CONST
12875 && GET_CODE (XEXP (ind, 0)) == PLUS
12876 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12877 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12878 return TRUE;
12879
12880 /* Match: (mem (reg)). */
12881 if (REG_P (ind))
12882 return arm_address_register_rtx_p (ind, 0);
12883
12884 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12885 acceptable in any case (subject to verification by
12886 arm_address_register_rtx_p). We need WB to be true to accept
12887 PRE_INC and POST_DEC. */
12888 if (GET_CODE (ind) == POST_INC
12889 || GET_CODE (ind) == PRE_DEC
12890 || (wb
12891 && (GET_CODE (ind) == PRE_INC
12892 || GET_CODE (ind) == POST_DEC)))
12893 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12894
12895 if (wb
12896 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12897 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12898 && GET_CODE (XEXP (ind, 1)) == PLUS
12899 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12900 ind = XEXP (ind, 1);
12901
12902 /* Match:
12903 (plus (reg)
12904 (const)). */
12905 if (GET_CODE (ind) == PLUS
12906 && REG_P (XEXP (ind, 0))
12907 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12908 && CONST_INT_P (XEXP (ind, 1))
12909 && INTVAL (XEXP (ind, 1)) > -1024
12910 && INTVAL (XEXP (ind, 1)) < 1024
12911 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12912 return TRUE;
12913
12914 return FALSE;
12915 }
12916
12917 /* Return TRUE if OP is a memory operand which we can load or store a vector
12918 to/from. TYPE is one of the following values:
12919 0 - Vector load/stor (vldr)
12920 1 - Core registers (ldm)
12921 2 - Element/structure loads (vld1)
12922 */
12923 int
12924 neon_vector_mem_operand (rtx op, int type, bool strict)
12925 {
12926 rtx ind;
12927
12928 /* Reject eliminable registers. */
12929 if (! (reload_in_progress || reload_completed)
12930 && ( reg_mentioned_p (frame_pointer_rtx, op)
12931 || reg_mentioned_p (arg_pointer_rtx, op)
12932 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12933 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12934 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12935 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12936 return !strict;
12937
12938 /* Constants are converted into offsets from labels. */
12939 if (!MEM_P (op))
12940 return FALSE;
12941
12942 ind = XEXP (op, 0);
12943
12944 if (reload_completed
12945 && (GET_CODE (ind) == LABEL_REF
12946 || (GET_CODE (ind) == CONST
12947 && GET_CODE (XEXP (ind, 0)) == PLUS
12948 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12949 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12950 return TRUE;
12951
12952 /* Match: (mem (reg)). */
12953 if (REG_P (ind))
12954 return arm_address_register_rtx_p (ind, 0);
12955
12956 /* Allow post-increment with Neon registers. */
12957 if ((type != 1 && GET_CODE (ind) == POST_INC)
12958 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12959 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12960
12961 /* Allow post-increment by register for VLDn */
12962 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12963 && GET_CODE (XEXP (ind, 1)) == PLUS
12964 && REG_P (XEXP (XEXP (ind, 1), 1)))
12965 return true;
12966
12967 /* Match:
12968 (plus (reg)
12969 (const)). */
12970 if (type == 0
12971 && GET_CODE (ind) == PLUS
12972 && REG_P (XEXP (ind, 0))
12973 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12974 && CONST_INT_P (XEXP (ind, 1))
12975 && INTVAL (XEXP (ind, 1)) > -1024
12976 /* For quad modes, we restrict the constant offset to be slightly less
12977 than what the instruction format permits. We have no such constraint
12978 on double mode offsets. (This must match arm_legitimate_index_p.) */
12979 && (INTVAL (XEXP (ind, 1))
12980 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12981 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12982 return TRUE;
12983
12984 return FALSE;
12985 }
12986
12987 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12988 type. */
12989 int
12990 neon_struct_mem_operand (rtx op)
12991 {
12992 rtx ind;
12993
12994 /* Reject eliminable registers. */
12995 if (! (reload_in_progress || reload_completed)
12996 && ( reg_mentioned_p (frame_pointer_rtx, op)
12997 || reg_mentioned_p (arg_pointer_rtx, op)
12998 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12999 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
13000 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
13001 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
13002 return FALSE;
13003
13004 /* Constants are converted into offsets from labels. */
13005 if (!MEM_P (op))
13006 return FALSE;
13007
13008 ind = XEXP (op, 0);
13009
13010 if (reload_completed
13011 && (GET_CODE (ind) == LABEL_REF
13012 || (GET_CODE (ind) == CONST
13013 && GET_CODE (XEXP (ind, 0)) == PLUS
13014 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
13015 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
13016 return TRUE;
13017
13018 /* Match: (mem (reg)). */
13019 if (REG_P (ind))
13020 return arm_address_register_rtx_p (ind, 0);
13021
13022 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
13023 if (GET_CODE (ind) == POST_INC
13024 || GET_CODE (ind) == PRE_DEC)
13025 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13026
13027 return FALSE;
13028 }
13029
13030 /* Return true if X is a register that will be eliminated later on. */
13031 int
13032 arm_eliminable_register (rtx x)
13033 {
13034 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13035 || REGNO (x) == ARG_POINTER_REGNUM
13036 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13037 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13038 }
13039
13040 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13041 coprocessor registers. Otherwise return NO_REGS. */
13042
13043 enum reg_class
13044 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13045 {
13046 if (mode == HFmode)
13047 {
13048 if (!TARGET_NEON_FP16)
13049 return GENERAL_REGS;
13050 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13051 return NO_REGS;
13052 return GENERAL_REGS;
13053 }
13054
13055 /* The neon move patterns handle all legitimate vector and struct
13056 addresses. */
13057 if (TARGET_NEON
13058 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13059 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13060 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13061 || VALID_NEON_STRUCT_MODE (mode)))
13062 return NO_REGS;
13063
13064 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13065 return NO_REGS;
13066
13067 return GENERAL_REGS;
13068 }
13069
13070 /* Values which must be returned in the most-significant end of the return
13071 register. */
13072
13073 static bool
13074 arm_return_in_msb (const_tree valtype)
13075 {
13076 return (TARGET_AAPCS_BASED
13077 && BYTES_BIG_ENDIAN
13078 && (AGGREGATE_TYPE_P (valtype)
13079 || TREE_CODE (valtype) == COMPLEX_TYPE
13080 || FIXED_POINT_TYPE_P (valtype)));
13081 }
13082
13083 /* Return TRUE if X references a SYMBOL_REF. */
13084 int
13085 symbol_mentioned_p (rtx x)
13086 {
13087 const char * fmt;
13088 int i;
13089
13090 if (GET_CODE (x) == SYMBOL_REF)
13091 return 1;
13092
13093 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13094 are constant offsets, not symbols. */
13095 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13096 return 0;
13097
13098 fmt = GET_RTX_FORMAT (GET_CODE (x));
13099
13100 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13101 {
13102 if (fmt[i] == 'E')
13103 {
13104 int j;
13105
13106 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13107 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13108 return 1;
13109 }
13110 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13111 return 1;
13112 }
13113
13114 return 0;
13115 }
13116
13117 /* Return TRUE if X references a LABEL_REF. */
13118 int
13119 label_mentioned_p (rtx x)
13120 {
13121 const char * fmt;
13122 int i;
13123
13124 if (GET_CODE (x) == LABEL_REF)
13125 return 1;
13126
13127 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13128 instruction, but they are constant offsets, not symbols. */
13129 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13130 return 0;
13131
13132 fmt = GET_RTX_FORMAT (GET_CODE (x));
13133 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13134 {
13135 if (fmt[i] == 'E')
13136 {
13137 int j;
13138
13139 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13140 if (label_mentioned_p (XVECEXP (x, i, j)))
13141 return 1;
13142 }
13143 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13144 return 1;
13145 }
13146
13147 return 0;
13148 }
13149
13150 int
13151 tls_mentioned_p (rtx x)
13152 {
13153 switch (GET_CODE (x))
13154 {
13155 case CONST:
13156 return tls_mentioned_p (XEXP (x, 0));
13157
13158 case UNSPEC:
13159 if (XINT (x, 1) == UNSPEC_TLS)
13160 return 1;
13161
13162 default:
13163 return 0;
13164 }
13165 }
13166
13167 /* Must not copy any rtx that uses a pc-relative address. */
13168
13169 static bool
13170 arm_cannot_copy_insn_p (rtx_insn *insn)
13171 {
13172 /* The tls call insn cannot be copied, as it is paired with a data
13173 word. */
13174 if (recog_memoized (insn) == CODE_FOR_tlscall)
13175 return true;
13176
13177 subrtx_iterator::array_type array;
13178 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13179 {
13180 const_rtx x = *iter;
13181 if (GET_CODE (x) == UNSPEC
13182 && (XINT (x, 1) == UNSPEC_PIC_BASE
13183 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13184 return true;
13185 }
13186 return false;
13187 }
13188
13189 enum rtx_code
13190 minmax_code (rtx x)
13191 {
13192 enum rtx_code code = GET_CODE (x);
13193
13194 switch (code)
13195 {
13196 case SMAX:
13197 return GE;
13198 case SMIN:
13199 return LE;
13200 case UMIN:
13201 return LEU;
13202 case UMAX:
13203 return GEU;
13204 default:
13205 gcc_unreachable ();
13206 }
13207 }
13208
13209 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13210
13211 bool
13212 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13213 int *mask, bool *signed_sat)
13214 {
13215 /* The high bound must be a power of two minus one. */
13216 int log = exact_log2 (INTVAL (hi_bound) + 1);
13217 if (log == -1)
13218 return false;
13219
13220 /* The low bound is either zero (for usat) or one less than the
13221 negation of the high bound (for ssat). */
13222 if (INTVAL (lo_bound) == 0)
13223 {
13224 if (mask)
13225 *mask = log;
13226 if (signed_sat)
13227 *signed_sat = false;
13228
13229 return true;
13230 }
13231
13232 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13233 {
13234 if (mask)
13235 *mask = log + 1;
13236 if (signed_sat)
13237 *signed_sat = true;
13238
13239 return true;
13240 }
13241
13242 return false;
13243 }
13244
13245 /* Return 1 if memory locations are adjacent. */
13246 int
13247 adjacent_mem_locations (rtx a, rtx b)
13248 {
13249 /* We don't guarantee to preserve the order of these memory refs. */
13250 if (volatile_refs_p (a) || volatile_refs_p (b))
13251 return 0;
13252
13253 if ((REG_P (XEXP (a, 0))
13254 || (GET_CODE (XEXP (a, 0)) == PLUS
13255 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13256 && (REG_P (XEXP (b, 0))
13257 || (GET_CODE (XEXP (b, 0)) == PLUS
13258 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13259 {
13260 HOST_WIDE_INT val0 = 0, val1 = 0;
13261 rtx reg0, reg1;
13262 int val_diff;
13263
13264 if (GET_CODE (XEXP (a, 0)) == PLUS)
13265 {
13266 reg0 = XEXP (XEXP (a, 0), 0);
13267 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13268 }
13269 else
13270 reg0 = XEXP (a, 0);
13271
13272 if (GET_CODE (XEXP (b, 0)) == PLUS)
13273 {
13274 reg1 = XEXP (XEXP (b, 0), 0);
13275 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13276 }
13277 else
13278 reg1 = XEXP (b, 0);
13279
13280 /* Don't accept any offset that will require multiple
13281 instructions to handle, since this would cause the
13282 arith_adjacentmem pattern to output an overlong sequence. */
13283 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13284 return 0;
13285
13286 /* Don't allow an eliminable register: register elimination can make
13287 the offset too large. */
13288 if (arm_eliminable_register (reg0))
13289 return 0;
13290
13291 val_diff = val1 - val0;
13292
13293 if (arm_ld_sched)
13294 {
13295 /* If the target has load delay slots, then there's no benefit
13296 to using an ldm instruction unless the offset is zero and
13297 we are optimizing for size. */
13298 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13299 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13300 && (val_diff == 4 || val_diff == -4));
13301 }
13302
13303 return ((REGNO (reg0) == REGNO (reg1))
13304 && (val_diff == 4 || val_diff == -4));
13305 }
13306
13307 return 0;
13308 }
13309
13310 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13311 for load operations, false for store operations. CONSECUTIVE is true
13312 if the register numbers in the operation must be consecutive in the register
13313 bank. RETURN_PC is true if value is to be loaded in PC.
13314 The pattern we are trying to match for load is:
13315 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13316 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13317 :
13318 :
13319 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13320 ]
13321 where
13322 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13323 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13324 3. If consecutive is TRUE, then for kth register being loaded,
13325 REGNO (R_dk) = REGNO (R_d0) + k.
13326 The pattern for store is similar. */
13327 bool
13328 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13329 bool consecutive, bool return_pc)
13330 {
13331 HOST_WIDE_INT count = XVECLEN (op, 0);
13332 rtx reg, mem, addr;
13333 unsigned regno;
13334 unsigned first_regno;
13335 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13336 rtx elt;
13337 bool addr_reg_in_reglist = false;
13338 bool update = false;
13339 int reg_increment;
13340 int offset_adj;
13341 int regs_per_val;
13342
13343 /* If not in SImode, then registers must be consecutive
13344 (e.g., VLDM instructions for DFmode). */
13345 gcc_assert ((mode == SImode) || consecutive);
13346 /* Setting return_pc for stores is illegal. */
13347 gcc_assert (!return_pc || load);
13348
13349 /* Set up the increments and the regs per val based on the mode. */
13350 reg_increment = GET_MODE_SIZE (mode);
13351 regs_per_val = reg_increment / 4;
13352 offset_adj = return_pc ? 1 : 0;
13353
13354 if (count <= 1
13355 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13356 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13357 return false;
13358
13359 /* Check if this is a write-back. */
13360 elt = XVECEXP (op, 0, offset_adj);
13361 if (GET_CODE (SET_SRC (elt)) == PLUS)
13362 {
13363 i++;
13364 base = 1;
13365 update = true;
13366
13367 /* The offset adjustment must be the number of registers being
13368 popped times the size of a single register. */
13369 if (!REG_P (SET_DEST (elt))
13370 || !REG_P (XEXP (SET_SRC (elt), 0))
13371 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13372 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13373 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13374 ((count - 1 - offset_adj) * reg_increment))
13375 return false;
13376 }
13377
13378 i = i + offset_adj;
13379 base = base + offset_adj;
13380 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13381 success depends on the type: VLDM can do just one reg,
13382 LDM must do at least two. */
13383 if ((count <= i) && (mode == SImode))
13384 return false;
13385
13386 elt = XVECEXP (op, 0, i - 1);
13387 if (GET_CODE (elt) != SET)
13388 return false;
13389
13390 if (load)
13391 {
13392 reg = SET_DEST (elt);
13393 mem = SET_SRC (elt);
13394 }
13395 else
13396 {
13397 reg = SET_SRC (elt);
13398 mem = SET_DEST (elt);
13399 }
13400
13401 if (!REG_P (reg) || !MEM_P (mem))
13402 return false;
13403
13404 regno = REGNO (reg);
13405 first_regno = regno;
13406 addr = XEXP (mem, 0);
13407 if (GET_CODE (addr) == PLUS)
13408 {
13409 if (!CONST_INT_P (XEXP (addr, 1)))
13410 return false;
13411
13412 offset = INTVAL (XEXP (addr, 1));
13413 addr = XEXP (addr, 0);
13414 }
13415
13416 if (!REG_P (addr))
13417 return false;
13418
13419 /* Don't allow SP to be loaded unless it is also the base register. It
13420 guarantees that SP is reset correctly when an LDM instruction
13421 is interrupted. Otherwise, we might end up with a corrupt stack. */
13422 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13423 return false;
13424
13425 for (; i < count; i++)
13426 {
13427 elt = XVECEXP (op, 0, i);
13428 if (GET_CODE (elt) != SET)
13429 return false;
13430
13431 if (load)
13432 {
13433 reg = SET_DEST (elt);
13434 mem = SET_SRC (elt);
13435 }
13436 else
13437 {
13438 reg = SET_SRC (elt);
13439 mem = SET_DEST (elt);
13440 }
13441
13442 if (!REG_P (reg)
13443 || GET_MODE (reg) != mode
13444 || REGNO (reg) <= regno
13445 || (consecutive
13446 && (REGNO (reg) !=
13447 (unsigned int) (first_regno + regs_per_val * (i - base))))
13448 /* Don't allow SP to be loaded unless it is also the base register. It
13449 guarantees that SP is reset correctly when an LDM instruction
13450 is interrupted. Otherwise, we might end up with a corrupt stack. */
13451 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13452 || !MEM_P (mem)
13453 || GET_MODE (mem) != mode
13454 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13455 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13456 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13457 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13458 offset + (i - base) * reg_increment))
13459 && (!REG_P (XEXP (mem, 0))
13460 || offset + (i - base) * reg_increment != 0)))
13461 return false;
13462
13463 regno = REGNO (reg);
13464 if (regno == REGNO (addr))
13465 addr_reg_in_reglist = true;
13466 }
13467
13468 if (load)
13469 {
13470 if (update && addr_reg_in_reglist)
13471 return false;
13472
13473 /* For Thumb-1, address register is always modified - either by write-back
13474 or by explicit load. If the pattern does not describe an update,
13475 then the address register must be in the list of loaded registers. */
13476 if (TARGET_THUMB1)
13477 return update || addr_reg_in_reglist;
13478 }
13479
13480 return true;
13481 }
13482
13483 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13484 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13485 instruction. ADD_OFFSET is nonzero if the base address register needs
13486 to be modified with an add instruction before we can use it. */
13487
13488 static bool
13489 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13490 int nops, HOST_WIDE_INT add_offset)
13491 {
13492 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13493 if the offset isn't small enough. The reason 2 ldrs are faster
13494 is because these ARMs are able to do more than one cache access
13495 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13496 whilst the ARM8 has a double bandwidth cache. This means that
13497 these cores can do both an instruction fetch and a data fetch in
13498 a single cycle, so the trick of calculating the address into a
13499 scratch register (one of the result regs) and then doing a load
13500 multiple actually becomes slower (and no smaller in code size).
13501 That is the transformation
13502
13503 ldr rd1, [rbase + offset]
13504 ldr rd2, [rbase + offset + 4]
13505
13506 to
13507
13508 add rd1, rbase, offset
13509 ldmia rd1, {rd1, rd2}
13510
13511 produces worse code -- '3 cycles + any stalls on rd2' instead of
13512 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13513 access per cycle, the first sequence could never complete in less
13514 than 6 cycles, whereas the ldm sequence would only take 5 and
13515 would make better use of sequential accesses if not hitting the
13516 cache.
13517
13518 We cheat here and test 'arm_ld_sched' which we currently know to
13519 only be true for the ARM8, ARM9 and StrongARM. If this ever
13520 changes, then the test below needs to be reworked. */
13521 if (nops == 2 && arm_ld_sched && add_offset != 0)
13522 return false;
13523
13524 /* XScale has load-store double instructions, but they have stricter
13525 alignment requirements than load-store multiple, so we cannot
13526 use them.
13527
13528 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13529 the pipeline until completion.
13530
13531 NREGS CYCLES
13532 1 3
13533 2 4
13534 3 5
13535 4 6
13536
13537 An ldr instruction takes 1-3 cycles, but does not block the
13538 pipeline.
13539
13540 NREGS CYCLES
13541 1 1-3
13542 2 2-6
13543 3 3-9
13544 4 4-12
13545
13546 Best case ldr will always win. However, the more ldr instructions
13547 we issue, the less likely we are to be able to schedule them well.
13548 Using ldr instructions also increases code size.
13549
13550 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13551 for counts of 3 or 4 regs. */
13552 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13553 return false;
13554 return true;
13555 }
13556
13557 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13558 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13559 an array ORDER which describes the sequence to use when accessing the
13560 offsets that produces an ascending order. In this sequence, each
13561 offset must be larger by exactly 4 than the previous one. ORDER[0]
13562 must have been filled in with the lowest offset by the caller.
13563 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13564 we use to verify that ORDER produces an ascending order of registers.
13565 Return true if it was possible to construct such an order, false if
13566 not. */
13567
13568 static bool
13569 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13570 int *unsorted_regs)
13571 {
13572 int i;
13573 for (i = 1; i < nops; i++)
13574 {
13575 int j;
13576
13577 order[i] = order[i - 1];
13578 for (j = 0; j < nops; j++)
13579 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13580 {
13581 /* We must find exactly one offset that is higher than the
13582 previous one by 4. */
13583 if (order[i] != order[i - 1])
13584 return false;
13585 order[i] = j;
13586 }
13587 if (order[i] == order[i - 1])
13588 return false;
13589 /* The register numbers must be ascending. */
13590 if (unsorted_regs != NULL
13591 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13592 return false;
13593 }
13594 return true;
13595 }
13596
13597 /* Used to determine in a peephole whether a sequence of load
13598 instructions can be changed into a load-multiple instruction.
13599 NOPS is the number of separate load instructions we are examining. The
13600 first NOPS entries in OPERANDS are the destination registers, the
13601 next NOPS entries are memory operands. If this function is
13602 successful, *BASE is set to the common base register of the memory
13603 accesses; *LOAD_OFFSET is set to the first memory location's offset
13604 from that base register.
13605 REGS is an array filled in with the destination register numbers.
13606 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13607 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13608 the sequence of registers in REGS matches the loads from ascending memory
13609 locations, and the function verifies that the register numbers are
13610 themselves ascending. If CHECK_REGS is false, the register numbers
13611 are stored in the order they are found in the operands. */
13612 static int
13613 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13614 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13615 {
13616 int unsorted_regs[MAX_LDM_STM_OPS];
13617 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13618 int order[MAX_LDM_STM_OPS];
13619 rtx base_reg_rtx = NULL;
13620 int base_reg = -1;
13621 int i, ldm_case;
13622
13623 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13624 easily extended if required. */
13625 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13626
13627 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13628
13629 /* Loop over the operands and check that the memory references are
13630 suitable (i.e. immediate offsets from the same base register). At
13631 the same time, extract the target register, and the memory
13632 offsets. */
13633 for (i = 0; i < nops; i++)
13634 {
13635 rtx reg;
13636 rtx offset;
13637
13638 /* Convert a subreg of a mem into the mem itself. */
13639 if (GET_CODE (operands[nops + i]) == SUBREG)
13640 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13641
13642 gcc_assert (MEM_P (operands[nops + i]));
13643
13644 /* Don't reorder volatile memory references; it doesn't seem worth
13645 looking for the case where the order is ok anyway. */
13646 if (MEM_VOLATILE_P (operands[nops + i]))
13647 return 0;
13648
13649 offset = const0_rtx;
13650
13651 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13652 || (GET_CODE (reg) == SUBREG
13653 && REG_P (reg = SUBREG_REG (reg))))
13654 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13655 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13656 || (GET_CODE (reg) == SUBREG
13657 && REG_P (reg = SUBREG_REG (reg))))
13658 && (CONST_INT_P (offset
13659 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13660 {
13661 if (i == 0)
13662 {
13663 base_reg = REGNO (reg);
13664 base_reg_rtx = reg;
13665 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13666 return 0;
13667 }
13668 else if (base_reg != (int) REGNO (reg))
13669 /* Not addressed from the same base register. */
13670 return 0;
13671
13672 unsorted_regs[i] = (REG_P (operands[i])
13673 ? REGNO (operands[i])
13674 : REGNO (SUBREG_REG (operands[i])));
13675
13676 /* If it isn't an integer register, or if it overwrites the
13677 base register but isn't the last insn in the list, then
13678 we can't do this. */
13679 if (unsorted_regs[i] < 0
13680 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13681 || unsorted_regs[i] > 14
13682 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13683 return 0;
13684
13685 /* Don't allow SP to be loaded unless it is also the base
13686 register. It guarantees that SP is reset correctly when
13687 an LDM instruction is interrupted. Otherwise, we might
13688 end up with a corrupt stack. */
13689 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13690 return 0;
13691
13692 unsorted_offsets[i] = INTVAL (offset);
13693 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13694 order[0] = i;
13695 }
13696 else
13697 /* Not a suitable memory address. */
13698 return 0;
13699 }
13700
13701 /* All the useful information has now been extracted from the
13702 operands into unsorted_regs and unsorted_offsets; additionally,
13703 order[0] has been set to the lowest offset in the list. Sort
13704 the offsets into order, verifying that they are adjacent, and
13705 check that the register numbers are ascending. */
13706 if (!compute_offset_order (nops, unsorted_offsets, order,
13707 check_regs ? unsorted_regs : NULL))
13708 return 0;
13709
13710 if (saved_order)
13711 memcpy (saved_order, order, sizeof order);
13712
13713 if (base)
13714 {
13715 *base = base_reg;
13716
13717 for (i = 0; i < nops; i++)
13718 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13719
13720 *load_offset = unsorted_offsets[order[0]];
13721 }
13722
13723 if (TARGET_THUMB1
13724 && !peep2_reg_dead_p (nops, base_reg_rtx))
13725 return 0;
13726
13727 if (unsorted_offsets[order[0]] == 0)
13728 ldm_case = 1; /* ldmia */
13729 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13730 ldm_case = 2; /* ldmib */
13731 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13732 ldm_case = 3; /* ldmda */
13733 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13734 ldm_case = 4; /* ldmdb */
13735 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13736 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13737 ldm_case = 5;
13738 else
13739 return 0;
13740
13741 if (!multiple_operation_profitable_p (false, nops,
13742 ldm_case == 5
13743 ? unsorted_offsets[order[0]] : 0))
13744 return 0;
13745
13746 return ldm_case;
13747 }
13748
13749 /* Used to determine in a peephole whether a sequence of store instructions can
13750 be changed into a store-multiple instruction.
13751 NOPS is the number of separate store instructions we are examining.
13752 NOPS_TOTAL is the total number of instructions recognized by the peephole
13753 pattern.
13754 The first NOPS entries in OPERANDS are the source registers, the next
13755 NOPS entries are memory operands. If this function is successful, *BASE is
13756 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13757 to the first memory location's offset from that base register. REGS is an
13758 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13759 likewise filled with the corresponding rtx's.
13760 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13761 numbers to an ascending order of stores.
13762 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13763 from ascending memory locations, and the function verifies that the register
13764 numbers are themselves ascending. If CHECK_REGS is false, the register
13765 numbers are stored in the order they are found in the operands. */
13766 static int
13767 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13768 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13769 HOST_WIDE_INT *load_offset, bool check_regs)
13770 {
13771 int unsorted_regs[MAX_LDM_STM_OPS];
13772 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13773 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13774 int order[MAX_LDM_STM_OPS];
13775 int base_reg = -1;
13776 rtx base_reg_rtx = NULL;
13777 int i, stm_case;
13778
13779 /* Write back of base register is currently only supported for Thumb 1. */
13780 int base_writeback = TARGET_THUMB1;
13781
13782 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13783 easily extended if required. */
13784 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13785
13786 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13787
13788 /* Loop over the operands and check that the memory references are
13789 suitable (i.e. immediate offsets from the same base register). At
13790 the same time, extract the target register, and the memory
13791 offsets. */
13792 for (i = 0; i < nops; i++)
13793 {
13794 rtx reg;
13795 rtx offset;
13796
13797 /* Convert a subreg of a mem into the mem itself. */
13798 if (GET_CODE (operands[nops + i]) == SUBREG)
13799 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13800
13801 gcc_assert (MEM_P (operands[nops + i]));
13802
13803 /* Don't reorder volatile memory references; it doesn't seem worth
13804 looking for the case where the order is ok anyway. */
13805 if (MEM_VOLATILE_P (operands[nops + i]))
13806 return 0;
13807
13808 offset = const0_rtx;
13809
13810 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13811 || (GET_CODE (reg) == SUBREG
13812 && REG_P (reg = SUBREG_REG (reg))))
13813 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13814 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13815 || (GET_CODE (reg) == SUBREG
13816 && REG_P (reg = SUBREG_REG (reg))))
13817 && (CONST_INT_P (offset
13818 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13819 {
13820 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13821 ? operands[i] : SUBREG_REG (operands[i]));
13822 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13823
13824 if (i == 0)
13825 {
13826 base_reg = REGNO (reg);
13827 base_reg_rtx = reg;
13828 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13829 return 0;
13830 }
13831 else if (base_reg != (int) REGNO (reg))
13832 /* Not addressed from the same base register. */
13833 return 0;
13834
13835 /* If it isn't an integer register, then we can't do this. */
13836 if (unsorted_regs[i] < 0
13837 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13838 /* The effects are unpredictable if the base register is
13839 both updated and stored. */
13840 || (base_writeback && unsorted_regs[i] == base_reg)
13841 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13842 || unsorted_regs[i] > 14)
13843 return 0;
13844
13845 unsorted_offsets[i] = INTVAL (offset);
13846 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13847 order[0] = i;
13848 }
13849 else
13850 /* Not a suitable memory address. */
13851 return 0;
13852 }
13853
13854 /* All the useful information has now been extracted from the
13855 operands into unsorted_regs and unsorted_offsets; additionally,
13856 order[0] has been set to the lowest offset in the list. Sort
13857 the offsets into order, verifying that they are adjacent, and
13858 check that the register numbers are ascending. */
13859 if (!compute_offset_order (nops, unsorted_offsets, order,
13860 check_regs ? unsorted_regs : NULL))
13861 return 0;
13862
13863 if (saved_order)
13864 memcpy (saved_order, order, sizeof order);
13865
13866 if (base)
13867 {
13868 *base = base_reg;
13869
13870 for (i = 0; i < nops; i++)
13871 {
13872 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13873 if (reg_rtxs)
13874 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13875 }
13876
13877 *load_offset = unsorted_offsets[order[0]];
13878 }
13879
13880 if (TARGET_THUMB1
13881 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13882 return 0;
13883
13884 if (unsorted_offsets[order[0]] == 0)
13885 stm_case = 1; /* stmia */
13886 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13887 stm_case = 2; /* stmib */
13888 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13889 stm_case = 3; /* stmda */
13890 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13891 stm_case = 4; /* stmdb */
13892 else
13893 return 0;
13894
13895 if (!multiple_operation_profitable_p (false, nops, 0))
13896 return 0;
13897
13898 return stm_case;
13899 }
13900 \f
13901 /* Routines for use in generating RTL. */
13902
13903 /* Generate a load-multiple instruction. COUNT is the number of loads in
13904 the instruction; REGS and MEMS are arrays containing the operands.
13905 BASEREG is the base register to be used in addressing the memory operands.
13906 WBACK_OFFSET is nonzero if the instruction should update the base
13907 register. */
13908
13909 static rtx
13910 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13911 HOST_WIDE_INT wback_offset)
13912 {
13913 int i = 0, j;
13914 rtx result;
13915
13916 if (!multiple_operation_profitable_p (false, count, 0))
13917 {
13918 rtx seq;
13919
13920 start_sequence ();
13921
13922 for (i = 0; i < count; i++)
13923 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13924
13925 if (wback_offset != 0)
13926 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13927
13928 seq = get_insns ();
13929 end_sequence ();
13930
13931 return seq;
13932 }
13933
13934 result = gen_rtx_PARALLEL (VOIDmode,
13935 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13936 if (wback_offset != 0)
13937 {
13938 XVECEXP (result, 0, 0)
13939 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13940 i = 1;
13941 count++;
13942 }
13943
13944 for (j = 0; i < count; i++, j++)
13945 XVECEXP (result, 0, i)
13946 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13947
13948 return result;
13949 }
13950
13951 /* Generate a store-multiple instruction. COUNT is the number of stores in
13952 the instruction; REGS and MEMS are arrays containing the operands.
13953 BASEREG is the base register to be used in addressing the memory operands.
13954 WBACK_OFFSET is nonzero if the instruction should update the base
13955 register. */
13956
13957 static rtx
13958 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13959 HOST_WIDE_INT wback_offset)
13960 {
13961 int i = 0, j;
13962 rtx result;
13963
13964 if (GET_CODE (basereg) == PLUS)
13965 basereg = XEXP (basereg, 0);
13966
13967 if (!multiple_operation_profitable_p (false, count, 0))
13968 {
13969 rtx seq;
13970
13971 start_sequence ();
13972
13973 for (i = 0; i < count; i++)
13974 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13975
13976 if (wback_offset != 0)
13977 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13978
13979 seq = get_insns ();
13980 end_sequence ();
13981
13982 return seq;
13983 }
13984
13985 result = gen_rtx_PARALLEL (VOIDmode,
13986 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13987 if (wback_offset != 0)
13988 {
13989 XVECEXP (result, 0, 0)
13990 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13991 i = 1;
13992 count++;
13993 }
13994
13995 for (j = 0; i < count; i++, j++)
13996 XVECEXP (result, 0, i)
13997 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13998
13999 return result;
14000 }
14001
14002 /* Generate either a load-multiple or a store-multiple instruction. This
14003 function can be used in situations where we can start with a single MEM
14004 rtx and adjust its address upwards.
14005 COUNT is the number of operations in the instruction, not counting a
14006 possible update of the base register. REGS is an array containing the
14007 register operands.
14008 BASEREG is the base register to be used in addressing the memory operands,
14009 which are constructed from BASEMEM.
14010 WRITE_BACK specifies whether the generated instruction should include an
14011 update of the base register.
14012 OFFSETP is used to pass an offset to and from this function; this offset
14013 is not used when constructing the address (instead BASEMEM should have an
14014 appropriate offset in its address), it is used only for setting
14015 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
14016
14017 static rtx
14018 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14019 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14020 {
14021 rtx mems[MAX_LDM_STM_OPS];
14022 HOST_WIDE_INT offset = *offsetp;
14023 int i;
14024
14025 gcc_assert (count <= MAX_LDM_STM_OPS);
14026
14027 if (GET_CODE (basereg) == PLUS)
14028 basereg = XEXP (basereg, 0);
14029
14030 for (i = 0; i < count; i++)
14031 {
14032 rtx addr = plus_constant (Pmode, basereg, i * 4);
14033 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14034 offset += 4;
14035 }
14036
14037 if (write_back)
14038 *offsetp = offset;
14039
14040 if (is_load)
14041 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14042 write_back ? 4 * count : 0);
14043 else
14044 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14045 write_back ? 4 * count : 0);
14046 }
14047
14048 rtx
14049 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14050 rtx basemem, HOST_WIDE_INT *offsetp)
14051 {
14052 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14053 offsetp);
14054 }
14055
14056 rtx
14057 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14058 rtx basemem, HOST_WIDE_INT *offsetp)
14059 {
14060 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14061 offsetp);
14062 }
14063
14064 /* Called from a peephole2 expander to turn a sequence of loads into an
14065 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14066 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14067 is true if we can reorder the registers because they are used commutatively
14068 subsequently.
14069 Returns true iff we could generate a new instruction. */
14070
14071 bool
14072 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14073 {
14074 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14075 rtx mems[MAX_LDM_STM_OPS];
14076 int i, j, base_reg;
14077 rtx base_reg_rtx;
14078 HOST_WIDE_INT offset;
14079 int write_back = FALSE;
14080 int ldm_case;
14081 rtx addr;
14082
14083 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14084 &base_reg, &offset, !sort_regs);
14085
14086 if (ldm_case == 0)
14087 return false;
14088
14089 if (sort_regs)
14090 for (i = 0; i < nops - 1; i++)
14091 for (j = i + 1; j < nops; j++)
14092 if (regs[i] > regs[j])
14093 {
14094 int t = regs[i];
14095 regs[i] = regs[j];
14096 regs[j] = t;
14097 }
14098 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14099
14100 if (TARGET_THUMB1)
14101 {
14102 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14103 gcc_assert (ldm_case == 1 || ldm_case == 5);
14104 write_back = TRUE;
14105 }
14106
14107 if (ldm_case == 5)
14108 {
14109 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14110 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14111 offset = 0;
14112 if (!TARGET_THUMB1)
14113 {
14114 base_reg = regs[0];
14115 base_reg_rtx = newbase;
14116 }
14117 }
14118
14119 for (i = 0; i < nops; i++)
14120 {
14121 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14122 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14123 SImode, addr, 0);
14124 }
14125 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14126 write_back ? offset + i * 4 : 0));
14127 return true;
14128 }
14129
14130 /* Called from a peephole2 expander to turn a sequence of stores into an
14131 STM instruction. OPERANDS are the operands found by the peephole matcher;
14132 NOPS indicates how many separate stores we are trying to combine.
14133 Returns true iff we could generate a new instruction. */
14134
14135 bool
14136 gen_stm_seq (rtx *operands, int nops)
14137 {
14138 int i;
14139 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14140 rtx mems[MAX_LDM_STM_OPS];
14141 int base_reg;
14142 rtx base_reg_rtx;
14143 HOST_WIDE_INT offset;
14144 int write_back = FALSE;
14145 int stm_case;
14146 rtx addr;
14147 bool base_reg_dies;
14148
14149 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14150 mem_order, &base_reg, &offset, true);
14151
14152 if (stm_case == 0)
14153 return false;
14154
14155 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14156
14157 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14158 if (TARGET_THUMB1)
14159 {
14160 gcc_assert (base_reg_dies);
14161 write_back = TRUE;
14162 }
14163
14164 if (stm_case == 5)
14165 {
14166 gcc_assert (base_reg_dies);
14167 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14168 offset = 0;
14169 }
14170
14171 addr = plus_constant (Pmode, base_reg_rtx, offset);
14172
14173 for (i = 0; i < nops; i++)
14174 {
14175 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14176 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14177 SImode, addr, 0);
14178 }
14179 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14180 write_back ? offset + i * 4 : 0));
14181 return true;
14182 }
14183
14184 /* Called from a peephole2 expander to turn a sequence of stores that are
14185 preceded by constant loads into an STM instruction. OPERANDS are the
14186 operands found by the peephole matcher; NOPS indicates how many
14187 separate stores we are trying to combine; there are 2 * NOPS
14188 instructions in the peephole.
14189 Returns true iff we could generate a new instruction. */
14190
14191 bool
14192 gen_const_stm_seq (rtx *operands, int nops)
14193 {
14194 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14195 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14196 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14197 rtx mems[MAX_LDM_STM_OPS];
14198 int base_reg;
14199 rtx base_reg_rtx;
14200 HOST_WIDE_INT offset;
14201 int write_back = FALSE;
14202 int stm_case;
14203 rtx addr;
14204 bool base_reg_dies;
14205 int i, j;
14206 HARD_REG_SET allocated;
14207
14208 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14209 mem_order, &base_reg, &offset, false);
14210
14211 if (stm_case == 0)
14212 return false;
14213
14214 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14215
14216 /* If the same register is used more than once, try to find a free
14217 register. */
14218 CLEAR_HARD_REG_SET (allocated);
14219 for (i = 0; i < nops; i++)
14220 {
14221 for (j = i + 1; j < nops; j++)
14222 if (regs[i] == regs[j])
14223 {
14224 rtx t = peep2_find_free_register (0, nops * 2,
14225 TARGET_THUMB1 ? "l" : "r",
14226 SImode, &allocated);
14227 if (t == NULL_RTX)
14228 return false;
14229 reg_rtxs[i] = t;
14230 regs[i] = REGNO (t);
14231 }
14232 }
14233
14234 /* Compute an ordering that maps the register numbers to an ascending
14235 sequence. */
14236 reg_order[0] = 0;
14237 for (i = 0; i < nops; i++)
14238 if (regs[i] < regs[reg_order[0]])
14239 reg_order[0] = i;
14240
14241 for (i = 1; i < nops; i++)
14242 {
14243 int this_order = reg_order[i - 1];
14244 for (j = 0; j < nops; j++)
14245 if (regs[j] > regs[reg_order[i - 1]]
14246 && (this_order == reg_order[i - 1]
14247 || regs[j] < regs[this_order]))
14248 this_order = j;
14249 reg_order[i] = this_order;
14250 }
14251
14252 /* Ensure that registers that must be live after the instruction end
14253 up with the correct value. */
14254 for (i = 0; i < nops; i++)
14255 {
14256 int this_order = reg_order[i];
14257 if ((this_order != mem_order[i]
14258 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14259 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14260 return false;
14261 }
14262
14263 /* Load the constants. */
14264 for (i = 0; i < nops; i++)
14265 {
14266 rtx op = operands[2 * nops + mem_order[i]];
14267 sorted_regs[i] = regs[reg_order[i]];
14268 emit_move_insn (reg_rtxs[reg_order[i]], op);
14269 }
14270
14271 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14272
14273 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14274 if (TARGET_THUMB1)
14275 {
14276 gcc_assert (base_reg_dies);
14277 write_back = TRUE;
14278 }
14279
14280 if (stm_case == 5)
14281 {
14282 gcc_assert (base_reg_dies);
14283 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14284 offset = 0;
14285 }
14286
14287 addr = plus_constant (Pmode, base_reg_rtx, offset);
14288
14289 for (i = 0; i < nops; i++)
14290 {
14291 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14292 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14293 SImode, addr, 0);
14294 }
14295 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14296 write_back ? offset + i * 4 : 0));
14297 return true;
14298 }
14299
14300 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14301 unaligned copies on processors which support unaligned semantics for those
14302 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14303 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14304 An interleave factor of 1 (the minimum) will perform no interleaving.
14305 Load/store multiple are used for aligned addresses where possible. */
14306
14307 static void
14308 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14309 HOST_WIDE_INT length,
14310 unsigned int interleave_factor)
14311 {
14312 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14313 int *regnos = XALLOCAVEC (int, interleave_factor);
14314 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14315 HOST_WIDE_INT i, j;
14316 HOST_WIDE_INT remaining = length, words;
14317 rtx halfword_tmp = NULL, byte_tmp = NULL;
14318 rtx dst, src;
14319 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14320 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14321 HOST_WIDE_INT srcoffset, dstoffset;
14322 HOST_WIDE_INT src_autoinc, dst_autoinc;
14323 rtx mem, addr;
14324
14325 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14326
14327 /* Use hard registers if we have aligned source or destination so we can use
14328 load/store multiple with contiguous registers. */
14329 if (dst_aligned || src_aligned)
14330 for (i = 0; i < interleave_factor; i++)
14331 regs[i] = gen_rtx_REG (SImode, i);
14332 else
14333 for (i = 0; i < interleave_factor; i++)
14334 regs[i] = gen_reg_rtx (SImode);
14335
14336 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14337 src = copy_addr_to_reg (XEXP (srcbase, 0));
14338
14339 srcoffset = dstoffset = 0;
14340
14341 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14342 For copying the last bytes we want to subtract this offset again. */
14343 src_autoinc = dst_autoinc = 0;
14344
14345 for (i = 0; i < interleave_factor; i++)
14346 regnos[i] = i;
14347
14348 /* Copy BLOCK_SIZE_BYTES chunks. */
14349
14350 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14351 {
14352 /* Load words. */
14353 if (src_aligned && interleave_factor > 1)
14354 {
14355 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14356 TRUE, srcbase, &srcoffset));
14357 src_autoinc += UNITS_PER_WORD * interleave_factor;
14358 }
14359 else
14360 {
14361 for (j = 0; j < interleave_factor; j++)
14362 {
14363 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14364 - src_autoinc));
14365 mem = adjust_automodify_address (srcbase, SImode, addr,
14366 srcoffset + j * UNITS_PER_WORD);
14367 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14368 }
14369 srcoffset += block_size_bytes;
14370 }
14371
14372 /* Store words. */
14373 if (dst_aligned && interleave_factor > 1)
14374 {
14375 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14376 TRUE, dstbase, &dstoffset));
14377 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14378 }
14379 else
14380 {
14381 for (j = 0; j < interleave_factor; j++)
14382 {
14383 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14384 - dst_autoinc));
14385 mem = adjust_automodify_address (dstbase, SImode, addr,
14386 dstoffset + j * UNITS_PER_WORD);
14387 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14388 }
14389 dstoffset += block_size_bytes;
14390 }
14391
14392 remaining -= block_size_bytes;
14393 }
14394
14395 /* Copy any whole words left (note these aren't interleaved with any
14396 subsequent halfword/byte load/stores in the interests of simplicity). */
14397
14398 words = remaining / UNITS_PER_WORD;
14399
14400 gcc_assert (words < interleave_factor);
14401
14402 if (src_aligned && words > 1)
14403 {
14404 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14405 &srcoffset));
14406 src_autoinc += UNITS_PER_WORD * words;
14407 }
14408 else
14409 {
14410 for (j = 0; j < words; j++)
14411 {
14412 addr = plus_constant (Pmode, src,
14413 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14414 mem = adjust_automodify_address (srcbase, SImode, addr,
14415 srcoffset + j * UNITS_PER_WORD);
14416 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14417 }
14418 srcoffset += words * UNITS_PER_WORD;
14419 }
14420
14421 if (dst_aligned && words > 1)
14422 {
14423 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14424 &dstoffset));
14425 dst_autoinc += words * UNITS_PER_WORD;
14426 }
14427 else
14428 {
14429 for (j = 0; j < words; j++)
14430 {
14431 addr = plus_constant (Pmode, dst,
14432 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14433 mem = adjust_automodify_address (dstbase, SImode, addr,
14434 dstoffset + j * UNITS_PER_WORD);
14435 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14436 }
14437 dstoffset += words * UNITS_PER_WORD;
14438 }
14439
14440 remaining -= words * UNITS_PER_WORD;
14441
14442 gcc_assert (remaining < 4);
14443
14444 /* Copy a halfword if necessary. */
14445
14446 if (remaining >= 2)
14447 {
14448 halfword_tmp = gen_reg_rtx (SImode);
14449
14450 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14451 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14452 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14453
14454 /* Either write out immediately, or delay until we've loaded the last
14455 byte, depending on interleave factor. */
14456 if (interleave_factor == 1)
14457 {
14458 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14459 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14460 emit_insn (gen_unaligned_storehi (mem,
14461 gen_lowpart (HImode, halfword_tmp)));
14462 halfword_tmp = NULL;
14463 dstoffset += 2;
14464 }
14465
14466 remaining -= 2;
14467 srcoffset += 2;
14468 }
14469
14470 gcc_assert (remaining < 2);
14471
14472 /* Copy last byte. */
14473
14474 if ((remaining & 1) != 0)
14475 {
14476 byte_tmp = gen_reg_rtx (SImode);
14477
14478 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14479 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14480 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14481
14482 if (interleave_factor == 1)
14483 {
14484 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14485 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14486 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14487 byte_tmp = NULL;
14488 dstoffset++;
14489 }
14490
14491 remaining--;
14492 srcoffset++;
14493 }
14494
14495 /* Store last halfword if we haven't done so already. */
14496
14497 if (halfword_tmp)
14498 {
14499 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14500 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14501 emit_insn (gen_unaligned_storehi (mem,
14502 gen_lowpart (HImode, halfword_tmp)));
14503 dstoffset += 2;
14504 }
14505
14506 /* Likewise for last byte. */
14507
14508 if (byte_tmp)
14509 {
14510 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14511 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14512 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14513 dstoffset++;
14514 }
14515
14516 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14517 }
14518
14519 /* From mips_adjust_block_mem:
14520
14521 Helper function for doing a loop-based block operation on memory
14522 reference MEM. Each iteration of the loop will operate on LENGTH
14523 bytes of MEM.
14524
14525 Create a new base register for use within the loop and point it to
14526 the start of MEM. Create a new memory reference that uses this
14527 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14528
14529 static void
14530 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14531 rtx *loop_mem)
14532 {
14533 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14534
14535 /* Although the new mem does not refer to a known location,
14536 it does keep up to LENGTH bytes of alignment. */
14537 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14538 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14539 }
14540
14541 /* From mips_block_move_loop:
14542
14543 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14544 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14545 the memory regions do not overlap. */
14546
14547 static void
14548 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14549 unsigned int interleave_factor,
14550 HOST_WIDE_INT bytes_per_iter)
14551 {
14552 rtx src_reg, dest_reg, final_src, test;
14553 HOST_WIDE_INT leftover;
14554
14555 leftover = length % bytes_per_iter;
14556 length -= leftover;
14557
14558 /* Create registers and memory references for use within the loop. */
14559 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14560 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14561
14562 /* Calculate the value that SRC_REG should have after the last iteration of
14563 the loop. */
14564 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14565 0, 0, OPTAB_WIDEN);
14566
14567 /* Emit the start of the loop. */
14568 rtx_code_label *label = gen_label_rtx ();
14569 emit_label (label);
14570
14571 /* Emit the loop body. */
14572 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14573 interleave_factor);
14574
14575 /* Move on to the next block. */
14576 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14577 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14578
14579 /* Emit the loop condition. */
14580 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14581 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14582
14583 /* Mop up any left-over bytes. */
14584 if (leftover)
14585 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14586 }
14587
14588 /* Emit a block move when either the source or destination is unaligned (not
14589 aligned to a four-byte boundary). This may need further tuning depending on
14590 core type, optimize_size setting, etc. */
14591
14592 static int
14593 arm_movmemqi_unaligned (rtx *operands)
14594 {
14595 HOST_WIDE_INT length = INTVAL (operands[2]);
14596
14597 if (optimize_size)
14598 {
14599 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14600 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14601 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14602 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14603 or dst_aligned though: allow more interleaving in those cases since the
14604 resulting code can be smaller. */
14605 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14606 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14607
14608 if (length > 12)
14609 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14610 interleave_factor, bytes_per_iter);
14611 else
14612 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14613 interleave_factor);
14614 }
14615 else
14616 {
14617 /* Note that the loop created by arm_block_move_unaligned_loop may be
14618 subject to loop unrolling, which makes tuning this condition a little
14619 redundant. */
14620 if (length > 32)
14621 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14622 else
14623 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14624 }
14625
14626 return 1;
14627 }
14628
14629 int
14630 arm_gen_movmemqi (rtx *operands)
14631 {
14632 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14633 HOST_WIDE_INT srcoffset, dstoffset;
14634 int i;
14635 rtx src, dst, srcbase, dstbase;
14636 rtx part_bytes_reg = NULL;
14637 rtx mem;
14638
14639 if (!CONST_INT_P (operands[2])
14640 || !CONST_INT_P (operands[3])
14641 || INTVAL (operands[2]) > 64)
14642 return 0;
14643
14644 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14645 return arm_movmemqi_unaligned (operands);
14646
14647 if (INTVAL (operands[3]) & 3)
14648 return 0;
14649
14650 dstbase = operands[0];
14651 srcbase = operands[1];
14652
14653 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14654 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14655
14656 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14657 out_words_to_go = INTVAL (operands[2]) / 4;
14658 last_bytes = INTVAL (operands[2]) & 3;
14659 dstoffset = srcoffset = 0;
14660
14661 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14662 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14663
14664 for (i = 0; in_words_to_go >= 2; i+=4)
14665 {
14666 if (in_words_to_go > 4)
14667 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14668 TRUE, srcbase, &srcoffset));
14669 else
14670 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14671 src, FALSE, srcbase,
14672 &srcoffset));
14673
14674 if (out_words_to_go)
14675 {
14676 if (out_words_to_go > 4)
14677 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14678 TRUE, dstbase, &dstoffset));
14679 else if (out_words_to_go != 1)
14680 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14681 out_words_to_go, dst,
14682 (last_bytes == 0
14683 ? FALSE : TRUE),
14684 dstbase, &dstoffset));
14685 else
14686 {
14687 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14688 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14689 if (last_bytes != 0)
14690 {
14691 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14692 dstoffset += 4;
14693 }
14694 }
14695 }
14696
14697 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14698 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14699 }
14700
14701 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14702 if (out_words_to_go)
14703 {
14704 rtx sreg;
14705
14706 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14707 sreg = copy_to_reg (mem);
14708
14709 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14710 emit_move_insn (mem, sreg);
14711 in_words_to_go--;
14712
14713 gcc_assert (!in_words_to_go); /* Sanity check */
14714 }
14715
14716 if (in_words_to_go)
14717 {
14718 gcc_assert (in_words_to_go > 0);
14719
14720 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14721 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14722 }
14723
14724 gcc_assert (!last_bytes || part_bytes_reg);
14725
14726 if (BYTES_BIG_ENDIAN && last_bytes)
14727 {
14728 rtx tmp = gen_reg_rtx (SImode);
14729
14730 /* The bytes we want are in the top end of the word. */
14731 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14732 GEN_INT (8 * (4 - last_bytes))));
14733 part_bytes_reg = tmp;
14734
14735 while (last_bytes)
14736 {
14737 mem = adjust_automodify_address (dstbase, QImode,
14738 plus_constant (Pmode, dst,
14739 last_bytes - 1),
14740 dstoffset + last_bytes - 1);
14741 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14742
14743 if (--last_bytes)
14744 {
14745 tmp = gen_reg_rtx (SImode);
14746 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14747 part_bytes_reg = tmp;
14748 }
14749 }
14750
14751 }
14752 else
14753 {
14754 if (last_bytes > 1)
14755 {
14756 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14757 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14758 last_bytes -= 2;
14759 if (last_bytes)
14760 {
14761 rtx tmp = gen_reg_rtx (SImode);
14762 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14763 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14764 part_bytes_reg = tmp;
14765 dstoffset += 2;
14766 }
14767 }
14768
14769 if (last_bytes)
14770 {
14771 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14772 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14773 }
14774 }
14775
14776 return 1;
14777 }
14778
14779 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14780 by mode size. */
14781 inline static rtx
14782 next_consecutive_mem (rtx mem)
14783 {
14784 machine_mode mode = GET_MODE (mem);
14785 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14786 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14787
14788 return adjust_automodify_address (mem, mode, addr, offset);
14789 }
14790
14791 /* Copy using LDRD/STRD instructions whenever possible.
14792 Returns true upon success. */
14793 bool
14794 gen_movmem_ldrd_strd (rtx *operands)
14795 {
14796 unsigned HOST_WIDE_INT len;
14797 HOST_WIDE_INT align;
14798 rtx src, dst, base;
14799 rtx reg0;
14800 bool src_aligned, dst_aligned;
14801 bool src_volatile, dst_volatile;
14802
14803 gcc_assert (CONST_INT_P (operands[2]));
14804 gcc_assert (CONST_INT_P (operands[3]));
14805
14806 len = UINTVAL (operands[2]);
14807 if (len > 64)
14808 return false;
14809
14810 /* Maximum alignment we can assume for both src and dst buffers. */
14811 align = INTVAL (operands[3]);
14812
14813 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14814 return false;
14815
14816 /* Place src and dst addresses in registers
14817 and update the corresponding mem rtx. */
14818 dst = operands[0];
14819 dst_volatile = MEM_VOLATILE_P (dst);
14820 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14821 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14822 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14823
14824 src = operands[1];
14825 src_volatile = MEM_VOLATILE_P (src);
14826 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14827 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14828 src = adjust_automodify_address (src, VOIDmode, base, 0);
14829
14830 if (!unaligned_access && !(src_aligned && dst_aligned))
14831 return false;
14832
14833 if (src_volatile || dst_volatile)
14834 return false;
14835
14836 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14837 if (!(dst_aligned || src_aligned))
14838 return arm_gen_movmemqi (operands);
14839
14840 src = adjust_address (src, DImode, 0);
14841 dst = adjust_address (dst, DImode, 0);
14842 while (len >= 8)
14843 {
14844 len -= 8;
14845 reg0 = gen_reg_rtx (DImode);
14846 if (src_aligned)
14847 emit_move_insn (reg0, src);
14848 else
14849 emit_insn (gen_unaligned_loaddi (reg0, src));
14850
14851 if (dst_aligned)
14852 emit_move_insn (dst, reg0);
14853 else
14854 emit_insn (gen_unaligned_storedi (dst, reg0));
14855
14856 src = next_consecutive_mem (src);
14857 dst = next_consecutive_mem (dst);
14858 }
14859
14860 gcc_assert (len < 8);
14861 if (len >= 4)
14862 {
14863 /* More than a word but less than a double-word to copy. Copy a word. */
14864 reg0 = gen_reg_rtx (SImode);
14865 src = adjust_address (src, SImode, 0);
14866 dst = adjust_address (dst, SImode, 0);
14867 if (src_aligned)
14868 emit_move_insn (reg0, src);
14869 else
14870 emit_insn (gen_unaligned_loadsi (reg0, src));
14871
14872 if (dst_aligned)
14873 emit_move_insn (dst, reg0);
14874 else
14875 emit_insn (gen_unaligned_storesi (dst, reg0));
14876
14877 src = next_consecutive_mem (src);
14878 dst = next_consecutive_mem (dst);
14879 len -= 4;
14880 }
14881
14882 if (len == 0)
14883 return true;
14884
14885 /* Copy the remaining bytes. */
14886 if (len >= 2)
14887 {
14888 dst = adjust_address (dst, HImode, 0);
14889 src = adjust_address (src, HImode, 0);
14890 reg0 = gen_reg_rtx (SImode);
14891 if (src_aligned)
14892 emit_insn (gen_zero_extendhisi2 (reg0, src));
14893 else
14894 emit_insn (gen_unaligned_loadhiu (reg0, src));
14895
14896 if (dst_aligned)
14897 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14898 else
14899 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14900
14901 src = next_consecutive_mem (src);
14902 dst = next_consecutive_mem (dst);
14903 if (len == 2)
14904 return true;
14905 }
14906
14907 dst = adjust_address (dst, QImode, 0);
14908 src = adjust_address (src, QImode, 0);
14909 reg0 = gen_reg_rtx (QImode);
14910 emit_move_insn (reg0, src);
14911 emit_move_insn (dst, reg0);
14912 return true;
14913 }
14914
14915 /* Select a dominance comparison mode if possible for a test of the general
14916 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14917 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14918 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14919 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14920 In all cases OP will be either EQ or NE, but we don't need to know which
14921 here. If we are unable to support a dominance comparison we return
14922 CC mode. This will then fail to match for the RTL expressions that
14923 generate this call. */
14924 machine_mode
14925 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14926 {
14927 enum rtx_code cond1, cond2;
14928 int swapped = 0;
14929
14930 /* Currently we will probably get the wrong result if the individual
14931 comparisons are not simple. This also ensures that it is safe to
14932 reverse a comparison if necessary. */
14933 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14934 != CCmode)
14935 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14936 != CCmode))
14937 return CCmode;
14938
14939 /* The if_then_else variant of this tests the second condition if the
14940 first passes, but is true if the first fails. Reverse the first
14941 condition to get a true "inclusive-or" expression. */
14942 if (cond_or == DOM_CC_NX_OR_Y)
14943 cond1 = reverse_condition (cond1);
14944
14945 /* If the comparisons are not equal, and one doesn't dominate the other,
14946 then we can't do this. */
14947 if (cond1 != cond2
14948 && !comparison_dominates_p (cond1, cond2)
14949 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14950 return CCmode;
14951
14952 if (swapped)
14953 std::swap (cond1, cond2);
14954
14955 switch (cond1)
14956 {
14957 case EQ:
14958 if (cond_or == DOM_CC_X_AND_Y)
14959 return CC_DEQmode;
14960
14961 switch (cond2)
14962 {
14963 case EQ: return CC_DEQmode;
14964 case LE: return CC_DLEmode;
14965 case LEU: return CC_DLEUmode;
14966 case GE: return CC_DGEmode;
14967 case GEU: return CC_DGEUmode;
14968 default: gcc_unreachable ();
14969 }
14970
14971 case LT:
14972 if (cond_or == DOM_CC_X_AND_Y)
14973 return CC_DLTmode;
14974
14975 switch (cond2)
14976 {
14977 case LT:
14978 return CC_DLTmode;
14979 case LE:
14980 return CC_DLEmode;
14981 case NE:
14982 return CC_DNEmode;
14983 default:
14984 gcc_unreachable ();
14985 }
14986
14987 case GT:
14988 if (cond_or == DOM_CC_X_AND_Y)
14989 return CC_DGTmode;
14990
14991 switch (cond2)
14992 {
14993 case GT:
14994 return CC_DGTmode;
14995 case GE:
14996 return CC_DGEmode;
14997 case NE:
14998 return CC_DNEmode;
14999 default:
15000 gcc_unreachable ();
15001 }
15002
15003 case LTU:
15004 if (cond_or == DOM_CC_X_AND_Y)
15005 return CC_DLTUmode;
15006
15007 switch (cond2)
15008 {
15009 case LTU:
15010 return CC_DLTUmode;
15011 case LEU:
15012 return CC_DLEUmode;
15013 case NE:
15014 return CC_DNEmode;
15015 default:
15016 gcc_unreachable ();
15017 }
15018
15019 case GTU:
15020 if (cond_or == DOM_CC_X_AND_Y)
15021 return CC_DGTUmode;
15022
15023 switch (cond2)
15024 {
15025 case GTU:
15026 return CC_DGTUmode;
15027 case GEU:
15028 return CC_DGEUmode;
15029 case NE:
15030 return CC_DNEmode;
15031 default:
15032 gcc_unreachable ();
15033 }
15034
15035 /* The remaining cases only occur when both comparisons are the
15036 same. */
15037 case NE:
15038 gcc_assert (cond1 == cond2);
15039 return CC_DNEmode;
15040
15041 case LE:
15042 gcc_assert (cond1 == cond2);
15043 return CC_DLEmode;
15044
15045 case GE:
15046 gcc_assert (cond1 == cond2);
15047 return CC_DGEmode;
15048
15049 case LEU:
15050 gcc_assert (cond1 == cond2);
15051 return CC_DLEUmode;
15052
15053 case GEU:
15054 gcc_assert (cond1 == cond2);
15055 return CC_DGEUmode;
15056
15057 default:
15058 gcc_unreachable ();
15059 }
15060 }
15061
15062 machine_mode
15063 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15064 {
15065 /* All floating point compares return CCFP if it is an equality
15066 comparison, and CCFPE otherwise. */
15067 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15068 {
15069 switch (op)
15070 {
15071 case EQ:
15072 case NE:
15073 case UNORDERED:
15074 case ORDERED:
15075 case UNLT:
15076 case UNLE:
15077 case UNGT:
15078 case UNGE:
15079 case UNEQ:
15080 case LTGT:
15081 return CCFPmode;
15082
15083 case LT:
15084 case LE:
15085 case GT:
15086 case GE:
15087 return CCFPEmode;
15088
15089 default:
15090 gcc_unreachable ();
15091 }
15092 }
15093
15094 /* A compare with a shifted operand. Because of canonicalization, the
15095 comparison will have to be swapped when we emit the assembler. */
15096 if (GET_MODE (y) == SImode
15097 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15098 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15099 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15100 || GET_CODE (x) == ROTATERT))
15101 return CC_SWPmode;
15102
15103 /* This operation is performed swapped, but since we only rely on the Z
15104 flag we don't need an additional mode. */
15105 if (GET_MODE (y) == SImode
15106 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15107 && GET_CODE (x) == NEG
15108 && (op == EQ || op == NE))
15109 return CC_Zmode;
15110
15111 /* This is a special case that is used by combine to allow a
15112 comparison of a shifted byte load to be split into a zero-extend
15113 followed by a comparison of the shifted integer (only valid for
15114 equalities and unsigned inequalities). */
15115 if (GET_MODE (x) == SImode
15116 && GET_CODE (x) == ASHIFT
15117 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15118 && GET_CODE (XEXP (x, 0)) == SUBREG
15119 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15120 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15121 && (op == EQ || op == NE
15122 || op == GEU || op == GTU || op == LTU || op == LEU)
15123 && CONST_INT_P (y))
15124 return CC_Zmode;
15125
15126 /* A construct for a conditional compare, if the false arm contains
15127 0, then both conditions must be true, otherwise either condition
15128 must be true. Not all conditions are possible, so CCmode is
15129 returned if it can't be done. */
15130 if (GET_CODE (x) == IF_THEN_ELSE
15131 && (XEXP (x, 2) == const0_rtx
15132 || XEXP (x, 2) == const1_rtx)
15133 && COMPARISON_P (XEXP (x, 0))
15134 && COMPARISON_P (XEXP (x, 1)))
15135 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15136 INTVAL (XEXP (x, 2)));
15137
15138 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15139 if (GET_CODE (x) == AND
15140 && (op == EQ || op == NE)
15141 && COMPARISON_P (XEXP (x, 0))
15142 && COMPARISON_P (XEXP (x, 1)))
15143 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15144 DOM_CC_X_AND_Y);
15145
15146 if (GET_CODE (x) == IOR
15147 && (op == EQ || op == NE)
15148 && COMPARISON_P (XEXP (x, 0))
15149 && COMPARISON_P (XEXP (x, 1)))
15150 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15151 DOM_CC_X_OR_Y);
15152
15153 /* An operation (on Thumb) where we want to test for a single bit.
15154 This is done by shifting that bit up into the top bit of a
15155 scratch register; we can then branch on the sign bit. */
15156 if (TARGET_THUMB1
15157 && GET_MODE (x) == SImode
15158 && (op == EQ || op == NE)
15159 && GET_CODE (x) == ZERO_EXTRACT
15160 && XEXP (x, 1) == const1_rtx)
15161 return CC_Nmode;
15162
15163 /* An operation that sets the condition codes as a side-effect, the
15164 V flag is not set correctly, so we can only use comparisons where
15165 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15166 instead.) */
15167 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15168 if (GET_MODE (x) == SImode
15169 && y == const0_rtx
15170 && (op == EQ || op == NE || op == LT || op == GE)
15171 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15172 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15173 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15174 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15175 || GET_CODE (x) == LSHIFTRT
15176 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15177 || GET_CODE (x) == ROTATERT
15178 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15179 return CC_NOOVmode;
15180
15181 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15182 return CC_Zmode;
15183
15184 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15185 && GET_CODE (x) == PLUS
15186 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15187 return CC_Cmode;
15188
15189 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15190 {
15191 switch (op)
15192 {
15193 case EQ:
15194 case NE:
15195 /* A DImode comparison against zero can be implemented by
15196 or'ing the two halves together. */
15197 if (y == const0_rtx)
15198 return CC_Zmode;
15199
15200 /* We can do an equality test in three Thumb instructions. */
15201 if (!TARGET_32BIT)
15202 return CC_Zmode;
15203
15204 /* FALLTHROUGH */
15205
15206 case LTU:
15207 case LEU:
15208 case GTU:
15209 case GEU:
15210 /* DImode unsigned comparisons can be implemented by cmp +
15211 cmpeq without a scratch register. Not worth doing in
15212 Thumb-2. */
15213 if (TARGET_32BIT)
15214 return CC_CZmode;
15215
15216 /* FALLTHROUGH */
15217
15218 case LT:
15219 case LE:
15220 case GT:
15221 case GE:
15222 /* DImode signed and unsigned comparisons can be implemented
15223 by cmp + sbcs with a scratch register, but that does not
15224 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15225 gcc_assert (op != EQ && op != NE);
15226 return CC_NCVmode;
15227
15228 default:
15229 gcc_unreachable ();
15230 }
15231 }
15232
15233 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15234 return GET_MODE (x);
15235
15236 return CCmode;
15237 }
15238
15239 /* X and Y are two things to compare using CODE. Emit the compare insn and
15240 return the rtx for register 0 in the proper mode. FP means this is a
15241 floating point compare: I don't think that it is needed on the arm. */
15242 rtx
15243 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15244 {
15245 machine_mode mode;
15246 rtx cc_reg;
15247 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15248
15249 /* We might have X as a constant, Y as a register because of the predicates
15250 used for cmpdi. If so, force X to a register here. */
15251 if (dimode_comparison && !REG_P (x))
15252 x = force_reg (DImode, x);
15253
15254 mode = SELECT_CC_MODE (code, x, y);
15255 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15256
15257 if (dimode_comparison
15258 && mode != CC_CZmode)
15259 {
15260 rtx clobber, set;
15261
15262 /* To compare two non-zero values for equality, XOR them and
15263 then compare against zero. Not used for ARM mode; there
15264 CC_CZmode is cheaper. */
15265 if (mode == CC_Zmode && y != const0_rtx)
15266 {
15267 gcc_assert (!reload_completed);
15268 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15269 y = const0_rtx;
15270 }
15271
15272 /* A scratch register is required. */
15273 if (reload_completed)
15274 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15275 else
15276 scratch = gen_rtx_SCRATCH (SImode);
15277
15278 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15279 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
15280 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15281 }
15282 else
15283 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15284
15285 return cc_reg;
15286 }
15287
15288 /* Generate a sequence of insns that will generate the correct return
15289 address mask depending on the physical architecture that the program
15290 is running on. */
15291 rtx
15292 arm_gen_return_addr_mask (void)
15293 {
15294 rtx reg = gen_reg_rtx (Pmode);
15295
15296 emit_insn (gen_return_addr_mask (reg));
15297 return reg;
15298 }
15299
15300 void
15301 arm_reload_in_hi (rtx *operands)
15302 {
15303 rtx ref = operands[1];
15304 rtx base, scratch;
15305 HOST_WIDE_INT offset = 0;
15306
15307 if (GET_CODE (ref) == SUBREG)
15308 {
15309 offset = SUBREG_BYTE (ref);
15310 ref = SUBREG_REG (ref);
15311 }
15312
15313 if (REG_P (ref))
15314 {
15315 /* We have a pseudo which has been spilt onto the stack; there
15316 are two cases here: the first where there is a simple
15317 stack-slot replacement and a second where the stack-slot is
15318 out of range, or is used as a subreg. */
15319 if (reg_equiv_mem (REGNO (ref)))
15320 {
15321 ref = reg_equiv_mem (REGNO (ref));
15322 base = find_replacement (&XEXP (ref, 0));
15323 }
15324 else
15325 /* The slot is out of range, or was dressed up in a SUBREG. */
15326 base = reg_equiv_address (REGNO (ref));
15327 }
15328 else
15329 base = find_replacement (&XEXP (ref, 0));
15330
15331 /* Handle the case where the address is too complex to be offset by 1. */
15332 if (GET_CODE (base) == MINUS
15333 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15334 {
15335 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15336
15337 emit_set_insn (base_plus, base);
15338 base = base_plus;
15339 }
15340 else if (GET_CODE (base) == PLUS)
15341 {
15342 /* The addend must be CONST_INT, or we would have dealt with it above. */
15343 HOST_WIDE_INT hi, lo;
15344
15345 offset += INTVAL (XEXP (base, 1));
15346 base = XEXP (base, 0);
15347
15348 /* Rework the address into a legal sequence of insns. */
15349 /* Valid range for lo is -4095 -> 4095 */
15350 lo = (offset >= 0
15351 ? (offset & 0xfff)
15352 : -((-offset) & 0xfff));
15353
15354 /* Corner case, if lo is the max offset then we would be out of range
15355 once we have added the additional 1 below, so bump the msb into the
15356 pre-loading insn(s). */
15357 if (lo == 4095)
15358 lo &= 0x7ff;
15359
15360 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15361 ^ (HOST_WIDE_INT) 0x80000000)
15362 - (HOST_WIDE_INT) 0x80000000);
15363
15364 gcc_assert (hi + lo == offset);
15365
15366 if (hi != 0)
15367 {
15368 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15369
15370 /* Get the base address; addsi3 knows how to handle constants
15371 that require more than one insn. */
15372 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15373 base = base_plus;
15374 offset = lo;
15375 }
15376 }
15377
15378 /* Operands[2] may overlap operands[0] (though it won't overlap
15379 operands[1]), that's why we asked for a DImode reg -- so we can
15380 use the bit that does not overlap. */
15381 if (REGNO (operands[2]) == REGNO (operands[0]))
15382 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15383 else
15384 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15385
15386 emit_insn (gen_zero_extendqisi2 (scratch,
15387 gen_rtx_MEM (QImode,
15388 plus_constant (Pmode, base,
15389 offset))));
15390 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15391 gen_rtx_MEM (QImode,
15392 plus_constant (Pmode, base,
15393 offset + 1))));
15394 if (!BYTES_BIG_ENDIAN)
15395 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15396 gen_rtx_IOR (SImode,
15397 gen_rtx_ASHIFT
15398 (SImode,
15399 gen_rtx_SUBREG (SImode, operands[0], 0),
15400 GEN_INT (8)),
15401 scratch));
15402 else
15403 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15404 gen_rtx_IOR (SImode,
15405 gen_rtx_ASHIFT (SImode, scratch,
15406 GEN_INT (8)),
15407 gen_rtx_SUBREG (SImode, operands[0], 0)));
15408 }
15409
15410 /* Handle storing a half-word to memory during reload by synthesizing as two
15411 byte stores. Take care not to clobber the input values until after we
15412 have moved them somewhere safe. This code assumes that if the DImode
15413 scratch in operands[2] overlaps either the input value or output address
15414 in some way, then that value must die in this insn (we absolutely need
15415 two scratch registers for some corner cases). */
15416 void
15417 arm_reload_out_hi (rtx *operands)
15418 {
15419 rtx ref = operands[0];
15420 rtx outval = operands[1];
15421 rtx base, scratch;
15422 HOST_WIDE_INT offset = 0;
15423
15424 if (GET_CODE (ref) == SUBREG)
15425 {
15426 offset = SUBREG_BYTE (ref);
15427 ref = SUBREG_REG (ref);
15428 }
15429
15430 if (REG_P (ref))
15431 {
15432 /* We have a pseudo which has been spilt onto the stack; there
15433 are two cases here: the first where there is a simple
15434 stack-slot replacement and a second where the stack-slot is
15435 out of range, or is used as a subreg. */
15436 if (reg_equiv_mem (REGNO (ref)))
15437 {
15438 ref = reg_equiv_mem (REGNO (ref));
15439 base = find_replacement (&XEXP (ref, 0));
15440 }
15441 else
15442 /* The slot is out of range, or was dressed up in a SUBREG. */
15443 base = reg_equiv_address (REGNO (ref));
15444 }
15445 else
15446 base = find_replacement (&XEXP (ref, 0));
15447
15448 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15449
15450 /* Handle the case where the address is too complex to be offset by 1. */
15451 if (GET_CODE (base) == MINUS
15452 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15453 {
15454 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15455
15456 /* Be careful not to destroy OUTVAL. */
15457 if (reg_overlap_mentioned_p (base_plus, outval))
15458 {
15459 /* Updating base_plus might destroy outval, see if we can
15460 swap the scratch and base_plus. */
15461 if (!reg_overlap_mentioned_p (scratch, outval))
15462 std::swap (scratch, base_plus);
15463 else
15464 {
15465 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15466
15467 /* Be conservative and copy OUTVAL into the scratch now,
15468 this should only be necessary if outval is a subreg
15469 of something larger than a word. */
15470 /* XXX Might this clobber base? I can't see how it can,
15471 since scratch is known to overlap with OUTVAL, and
15472 must be wider than a word. */
15473 emit_insn (gen_movhi (scratch_hi, outval));
15474 outval = scratch_hi;
15475 }
15476 }
15477
15478 emit_set_insn (base_plus, base);
15479 base = base_plus;
15480 }
15481 else if (GET_CODE (base) == PLUS)
15482 {
15483 /* The addend must be CONST_INT, or we would have dealt with it above. */
15484 HOST_WIDE_INT hi, lo;
15485
15486 offset += INTVAL (XEXP (base, 1));
15487 base = XEXP (base, 0);
15488
15489 /* Rework the address into a legal sequence of insns. */
15490 /* Valid range for lo is -4095 -> 4095 */
15491 lo = (offset >= 0
15492 ? (offset & 0xfff)
15493 : -((-offset) & 0xfff));
15494
15495 /* Corner case, if lo is the max offset then we would be out of range
15496 once we have added the additional 1 below, so bump the msb into the
15497 pre-loading insn(s). */
15498 if (lo == 4095)
15499 lo &= 0x7ff;
15500
15501 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15502 ^ (HOST_WIDE_INT) 0x80000000)
15503 - (HOST_WIDE_INT) 0x80000000);
15504
15505 gcc_assert (hi + lo == offset);
15506
15507 if (hi != 0)
15508 {
15509 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15510
15511 /* Be careful not to destroy OUTVAL. */
15512 if (reg_overlap_mentioned_p (base_plus, outval))
15513 {
15514 /* Updating base_plus might destroy outval, see if we
15515 can swap the scratch and base_plus. */
15516 if (!reg_overlap_mentioned_p (scratch, outval))
15517 std::swap (scratch, base_plus);
15518 else
15519 {
15520 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15521
15522 /* Be conservative and copy outval into scratch now,
15523 this should only be necessary if outval is a
15524 subreg of something larger than a word. */
15525 /* XXX Might this clobber base? I can't see how it
15526 can, since scratch is known to overlap with
15527 outval. */
15528 emit_insn (gen_movhi (scratch_hi, outval));
15529 outval = scratch_hi;
15530 }
15531 }
15532
15533 /* Get the base address; addsi3 knows how to handle constants
15534 that require more than one insn. */
15535 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15536 base = base_plus;
15537 offset = lo;
15538 }
15539 }
15540
15541 if (BYTES_BIG_ENDIAN)
15542 {
15543 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15544 plus_constant (Pmode, base,
15545 offset + 1)),
15546 gen_lowpart (QImode, outval)));
15547 emit_insn (gen_lshrsi3 (scratch,
15548 gen_rtx_SUBREG (SImode, outval, 0),
15549 GEN_INT (8)));
15550 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15551 offset)),
15552 gen_lowpart (QImode, scratch)));
15553 }
15554 else
15555 {
15556 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15557 offset)),
15558 gen_lowpart (QImode, outval)));
15559 emit_insn (gen_lshrsi3 (scratch,
15560 gen_rtx_SUBREG (SImode, outval, 0),
15561 GEN_INT (8)));
15562 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15563 plus_constant (Pmode, base,
15564 offset + 1)),
15565 gen_lowpart (QImode, scratch)));
15566 }
15567 }
15568
15569 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15570 (padded to the size of a word) should be passed in a register. */
15571
15572 static bool
15573 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15574 {
15575 if (TARGET_AAPCS_BASED)
15576 return must_pass_in_stack_var_size (mode, type);
15577 else
15578 return must_pass_in_stack_var_size_or_pad (mode, type);
15579 }
15580
15581
15582 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15583 Return true if an argument passed on the stack should be padded upwards,
15584 i.e. if the least-significant byte has useful data.
15585 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15586 aggregate types are placed in the lowest memory address. */
15587
15588 bool
15589 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15590 {
15591 if (!TARGET_AAPCS_BASED)
15592 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15593
15594 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15595 return false;
15596
15597 return true;
15598 }
15599
15600
15601 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15602 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15603 register has useful data, and return the opposite if the most
15604 significant byte does. */
15605
15606 bool
15607 arm_pad_reg_upward (machine_mode mode,
15608 tree type, int first ATTRIBUTE_UNUSED)
15609 {
15610 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15611 {
15612 /* For AAPCS, small aggregates, small fixed-point types,
15613 and small complex types are always padded upwards. */
15614 if (type)
15615 {
15616 if ((AGGREGATE_TYPE_P (type)
15617 || TREE_CODE (type) == COMPLEX_TYPE
15618 || FIXED_POINT_TYPE_P (type))
15619 && int_size_in_bytes (type) <= 4)
15620 return true;
15621 }
15622 else
15623 {
15624 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15625 && GET_MODE_SIZE (mode) <= 4)
15626 return true;
15627 }
15628 }
15629
15630 /* Otherwise, use default padding. */
15631 return !BYTES_BIG_ENDIAN;
15632 }
15633
15634 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15635 assuming that the address in the base register is word aligned. */
15636 bool
15637 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15638 {
15639 HOST_WIDE_INT max_offset;
15640
15641 /* Offset must be a multiple of 4 in Thumb mode. */
15642 if (TARGET_THUMB2 && ((offset & 3) != 0))
15643 return false;
15644
15645 if (TARGET_THUMB2)
15646 max_offset = 1020;
15647 else if (TARGET_ARM)
15648 max_offset = 255;
15649 else
15650 return false;
15651
15652 return ((offset <= max_offset) && (offset >= -max_offset));
15653 }
15654
15655 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15656 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15657 Assumes that the address in the base register RN is word aligned. Pattern
15658 guarantees that both memory accesses use the same base register,
15659 the offsets are constants within the range, and the gap between the offsets is 4.
15660 If preload complete then check that registers are legal. WBACK indicates whether
15661 address is updated. LOAD indicates whether memory access is load or store. */
15662 bool
15663 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15664 bool wback, bool load)
15665 {
15666 unsigned int t, t2, n;
15667
15668 if (!reload_completed)
15669 return true;
15670
15671 if (!offset_ok_for_ldrd_strd (offset))
15672 return false;
15673
15674 t = REGNO (rt);
15675 t2 = REGNO (rt2);
15676 n = REGNO (rn);
15677
15678 if ((TARGET_THUMB2)
15679 && ((wback && (n == t || n == t2))
15680 || (t == SP_REGNUM)
15681 || (t == PC_REGNUM)
15682 || (t2 == SP_REGNUM)
15683 || (t2 == PC_REGNUM)
15684 || (!load && (n == PC_REGNUM))
15685 || (load && (t == t2))
15686 /* Triggers Cortex-M3 LDRD errata. */
15687 || (!wback && load && fix_cm3_ldrd && (n == t))))
15688 return false;
15689
15690 if ((TARGET_ARM)
15691 && ((wback && (n == t || n == t2))
15692 || (t2 == PC_REGNUM)
15693 || (t % 2 != 0) /* First destination register is not even. */
15694 || (t2 != t + 1)
15695 /* PC can be used as base register (for offset addressing only),
15696 but it is depricated. */
15697 || (n == PC_REGNUM)))
15698 return false;
15699
15700 return true;
15701 }
15702
15703 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15704 operand MEM's address contains an immediate offset from the base
15705 register and has no side effects, in which case it sets BASE and
15706 OFFSET accordingly. */
15707 static bool
15708 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15709 {
15710 rtx addr;
15711
15712 gcc_assert (base != NULL && offset != NULL);
15713
15714 /* TODO: Handle more general memory operand patterns, such as
15715 PRE_DEC and PRE_INC. */
15716
15717 if (side_effects_p (mem))
15718 return false;
15719
15720 /* Can't deal with subregs. */
15721 if (GET_CODE (mem) == SUBREG)
15722 return false;
15723
15724 gcc_assert (MEM_P (mem));
15725
15726 *offset = const0_rtx;
15727
15728 addr = XEXP (mem, 0);
15729
15730 /* If addr isn't valid for DImode, then we can't handle it. */
15731 if (!arm_legitimate_address_p (DImode, addr,
15732 reload_in_progress || reload_completed))
15733 return false;
15734
15735 if (REG_P (addr))
15736 {
15737 *base = addr;
15738 return true;
15739 }
15740 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15741 {
15742 *base = XEXP (addr, 0);
15743 *offset = XEXP (addr, 1);
15744 return (REG_P (*base) && CONST_INT_P (*offset));
15745 }
15746
15747 return false;
15748 }
15749
15750 /* Called from a peephole2 to replace two word-size accesses with a
15751 single LDRD/STRD instruction. Returns true iff we can generate a
15752 new instruction sequence. That is, both accesses use the same base
15753 register and the gap between constant offsets is 4. This function
15754 may reorder its operands to match ldrd/strd RTL templates.
15755 OPERANDS are the operands found by the peephole matcher;
15756 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15757 corresponding memory operands. LOAD indicaates whether the access
15758 is load or store. CONST_STORE indicates a store of constant
15759 integer values held in OPERANDS[4,5] and assumes that the pattern
15760 is of length 4 insn, for the purpose of checking dead registers.
15761 COMMUTE indicates that register operands may be reordered. */
15762 bool
15763 gen_operands_ldrd_strd (rtx *operands, bool load,
15764 bool const_store, bool commute)
15765 {
15766 int nops = 2;
15767 HOST_WIDE_INT offsets[2], offset;
15768 rtx base = NULL_RTX;
15769 rtx cur_base, cur_offset, tmp;
15770 int i, gap;
15771 HARD_REG_SET regset;
15772
15773 gcc_assert (!const_store || !load);
15774 /* Check that the memory references are immediate offsets from the
15775 same base register. Extract the base register, the destination
15776 registers, and the corresponding memory offsets. */
15777 for (i = 0; i < nops; i++)
15778 {
15779 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15780 return false;
15781
15782 if (i == 0)
15783 base = cur_base;
15784 else if (REGNO (base) != REGNO (cur_base))
15785 return false;
15786
15787 offsets[i] = INTVAL (cur_offset);
15788 if (GET_CODE (operands[i]) == SUBREG)
15789 {
15790 tmp = SUBREG_REG (operands[i]);
15791 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15792 operands[i] = tmp;
15793 }
15794 }
15795
15796 /* Make sure there is no dependency between the individual loads. */
15797 if (load && REGNO (operands[0]) == REGNO (base))
15798 return false; /* RAW */
15799
15800 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15801 return false; /* WAW */
15802
15803 /* If the same input register is used in both stores
15804 when storing different constants, try to find a free register.
15805 For example, the code
15806 mov r0, 0
15807 str r0, [r2]
15808 mov r0, 1
15809 str r0, [r2, #4]
15810 can be transformed into
15811 mov r1, 0
15812 strd r1, r0, [r2]
15813 in Thumb mode assuming that r1 is free. */
15814 if (const_store
15815 && REGNO (operands[0]) == REGNO (operands[1])
15816 && INTVAL (operands[4]) != INTVAL (operands[5]))
15817 {
15818 if (TARGET_THUMB2)
15819 {
15820 CLEAR_HARD_REG_SET (regset);
15821 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15822 if (tmp == NULL_RTX)
15823 return false;
15824
15825 /* Use the new register in the first load to ensure that
15826 if the original input register is not dead after peephole,
15827 then it will have the correct constant value. */
15828 operands[0] = tmp;
15829 }
15830 else if (TARGET_ARM)
15831 {
15832 return false;
15833 int regno = REGNO (operands[0]);
15834 if (!peep2_reg_dead_p (4, operands[0]))
15835 {
15836 /* When the input register is even and is not dead after the
15837 pattern, it has to hold the second constant but we cannot
15838 form a legal STRD in ARM mode with this register as the second
15839 register. */
15840 if (regno % 2 == 0)
15841 return false;
15842
15843 /* Is regno-1 free? */
15844 SET_HARD_REG_SET (regset);
15845 CLEAR_HARD_REG_BIT(regset, regno - 1);
15846 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15847 if (tmp == NULL_RTX)
15848 return false;
15849
15850 operands[0] = tmp;
15851 }
15852 else
15853 {
15854 /* Find a DImode register. */
15855 CLEAR_HARD_REG_SET (regset);
15856 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15857 if (tmp != NULL_RTX)
15858 {
15859 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15860 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15861 }
15862 else
15863 {
15864 /* Can we use the input register to form a DI register? */
15865 SET_HARD_REG_SET (regset);
15866 CLEAR_HARD_REG_BIT(regset,
15867 regno % 2 == 0 ? regno + 1 : regno - 1);
15868 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15869 if (tmp == NULL_RTX)
15870 return false;
15871 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15872 }
15873 }
15874
15875 gcc_assert (operands[0] != NULL_RTX);
15876 gcc_assert (operands[1] != NULL_RTX);
15877 gcc_assert (REGNO (operands[0]) % 2 == 0);
15878 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15879 }
15880 }
15881
15882 /* Make sure the instructions are ordered with lower memory access first. */
15883 if (offsets[0] > offsets[1])
15884 {
15885 gap = offsets[0] - offsets[1];
15886 offset = offsets[1];
15887
15888 /* Swap the instructions such that lower memory is accessed first. */
15889 std::swap (operands[0], operands[1]);
15890 std::swap (operands[2], operands[3]);
15891 if (const_store)
15892 std::swap (operands[4], operands[5]);
15893 }
15894 else
15895 {
15896 gap = offsets[1] - offsets[0];
15897 offset = offsets[0];
15898 }
15899
15900 /* Make sure accesses are to consecutive memory locations. */
15901 if (gap != 4)
15902 return false;
15903
15904 /* Make sure we generate legal instructions. */
15905 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15906 false, load))
15907 return true;
15908
15909 /* In Thumb state, where registers are almost unconstrained, there
15910 is little hope to fix it. */
15911 if (TARGET_THUMB2)
15912 return false;
15913
15914 if (load && commute)
15915 {
15916 /* Try reordering registers. */
15917 std::swap (operands[0], operands[1]);
15918 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15919 false, load))
15920 return true;
15921 }
15922
15923 if (const_store)
15924 {
15925 /* If input registers are dead after this pattern, they can be
15926 reordered or replaced by other registers that are free in the
15927 current pattern. */
15928 if (!peep2_reg_dead_p (4, operands[0])
15929 || !peep2_reg_dead_p (4, operands[1]))
15930 return false;
15931
15932 /* Try to reorder the input registers. */
15933 /* For example, the code
15934 mov r0, 0
15935 mov r1, 1
15936 str r1, [r2]
15937 str r0, [r2, #4]
15938 can be transformed into
15939 mov r1, 0
15940 mov r0, 1
15941 strd r0, [r2]
15942 */
15943 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15944 false, false))
15945 {
15946 std::swap (operands[0], operands[1]);
15947 return true;
15948 }
15949
15950 /* Try to find a free DI register. */
15951 CLEAR_HARD_REG_SET (regset);
15952 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15953 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15954 while (true)
15955 {
15956 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15957 if (tmp == NULL_RTX)
15958 return false;
15959
15960 /* DREG must be an even-numbered register in DImode.
15961 Split it into SI registers. */
15962 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15963 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15964 gcc_assert (operands[0] != NULL_RTX);
15965 gcc_assert (operands[1] != NULL_RTX);
15966 gcc_assert (REGNO (operands[0]) % 2 == 0);
15967 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15968
15969 return (operands_ok_ldrd_strd (operands[0], operands[1],
15970 base, offset,
15971 false, load));
15972 }
15973 }
15974
15975 return false;
15976 }
15977
15978
15979
15980 \f
15981 /* Print a symbolic form of X to the debug file, F. */
15982 static void
15983 arm_print_value (FILE *f, rtx x)
15984 {
15985 switch (GET_CODE (x))
15986 {
15987 case CONST_INT:
15988 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15989 return;
15990
15991 case CONST_DOUBLE:
15992 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15993 return;
15994
15995 case CONST_VECTOR:
15996 {
15997 int i;
15998
15999 fprintf (f, "<");
16000 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
16001 {
16002 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
16003 if (i < (CONST_VECTOR_NUNITS (x) - 1))
16004 fputc (',', f);
16005 }
16006 fprintf (f, ">");
16007 }
16008 return;
16009
16010 case CONST_STRING:
16011 fprintf (f, "\"%s\"", XSTR (x, 0));
16012 return;
16013
16014 case SYMBOL_REF:
16015 fprintf (f, "`%s'", XSTR (x, 0));
16016 return;
16017
16018 case LABEL_REF:
16019 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16020 return;
16021
16022 case CONST:
16023 arm_print_value (f, XEXP (x, 0));
16024 return;
16025
16026 case PLUS:
16027 arm_print_value (f, XEXP (x, 0));
16028 fprintf (f, "+");
16029 arm_print_value (f, XEXP (x, 1));
16030 return;
16031
16032 case PC:
16033 fprintf (f, "pc");
16034 return;
16035
16036 default:
16037 fprintf (f, "????");
16038 return;
16039 }
16040 }
16041 \f
16042 /* Routines for manipulation of the constant pool. */
16043
16044 /* Arm instructions cannot load a large constant directly into a
16045 register; they have to come from a pc relative load. The constant
16046 must therefore be placed in the addressable range of the pc
16047 relative load. Depending on the precise pc relative load
16048 instruction the range is somewhere between 256 bytes and 4k. This
16049 means that we often have to dump a constant inside a function, and
16050 generate code to branch around it.
16051
16052 It is important to minimize this, since the branches will slow
16053 things down and make the code larger.
16054
16055 Normally we can hide the table after an existing unconditional
16056 branch so that there is no interruption of the flow, but in the
16057 worst case the code looks like this:
16058
16059 ldr rn, L1
16060 ...
16061 b L2
16062 align
16063 L1: .long value
16064 L2:
16065 ...
16066
16067 ldr rn, L3
16068 ...
16069 b L4
16070 align
16071 L3: .long value
16072 L4:
16073 ...
16074
16075 We fix this by performing a scan after scheduling, which notices
16076 which instructions need to have their operands fetched from the
16077 constant table and builds the table.
16078
16079 The algorithm starts by building a table of all the constants that
16080 need fixing up and all the natural barriers in the function (places
16081 where a constant table can be dropped without breaking the flow).
16082 For each fixup we note how far the pc-relative replacement will be
16083 able to reach and the offset of the instruction into the function.
16084
16085 Having built the table we then group the fixes together to form
16086 tables that are as large as possible (subject to addressing
16087 constraints) and emit each table of constants after the last
16088 barrier that is within range of all the instructions in the group.
16089 If a group does not contain a barrier, then we forcibly create one
16090 by inserting a jump instruction into the flow. Once the table has
16091 been inserted, the insns are then modified to reference the
16092 relevant entry in the pool.
16093
16094 Possible enhancements to the algorithm (not implemented) are:
16095
16096 1) For some processors and object formats, there may be benefit in
16097 aligning the pools to the start of cache lines; this alignment
16098 would need to be taken into account when calculating addressability
16099 of a pool. */
16100
16101 /* These typedefs are located at the start of this file, so that
16102 they can be used in the prototypes there. This comment is to
16103 remind readers of that fact so that the following structures
16104 can be understood more easily.
16105
16106 typedef struct minipool_node Mnode;
16107 typedef struct minipool_fixup Mfix; */
16108
16109 struct minipool_node
16110 {
16111 /* Doubly linked chain of entries. */
16112 Mnode * next;
16113 Mnode * prev;
16114 /* The maximum offset into the code that this entry can be placed. While
16115 pushing fixes for forward references, all entries are sorted in order
16116 of increasing max_address. */
16117 HOST_WIDE_INT max_address;
16118 /* Similarly for an entry inserted for a backwards ref. */
16119 HOST_WIDE_INT min_address;
16120 /* The number of fixes referencing this entry. This can become zero
16121 if we "unpush" an entry. In this case we ignore the entry when we
16122 come to emit the code. */
16123 int refcount;
16124 /* The offset from the start of the minipool. */
16125 HOST_WIDE_INT offset;
16126 /* The value in table. */
16127 rtx value;
16128 /* The mode of value. */
16129 machine_mode mode;
16130 /* The size of the value. With iWMMXt enabled
16131 sizes > 4 also imply an alignment of 8-bytes. */
16132 int fix_size;
16133 };
16134
16135 struct minipool_fixup
16136 {
16137 Mfix * next;
16138 rtx_insn * insn;
16139 HOST_WIDE_INT address;
16140 rtx * loc;
16141 machine_mode mode;
16142 int fix_size;
16143 rtx value;
16144 Mnode * minipool;
16145 HOST_WIDE_INT forwards;
16146 HOST_WIDE_INT backwards;
16147 };
16148
16149 /* Fixes less than a word need padding out to a word boundary. */
16150 #define MINIPOOL_FIX_SIZE(mode) \
16151 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16152
16153 static Mnode * minipool_vector_head;
16154 static Mnode * minipool_vector_tail;
16155 static rtx_code_label *minipool_vector_label;
16156 static int minipool_pad;
16157
16158 /* The linked list of all minipool fixes required for this function. */
16159 Mfix * minipool_fix_head;
16160 Mfix * minipool_fix_tail;
16161 /* The fix entry for the current minipool, once it has been placed. */
16162 Mfix * minipool_barrier;
16163
16164 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16165 #define JUMP_TABLES_IN_TEXT_SECTION 0
16166 #endif
16167
16168 static HOST_WIDE_INT
16169 get_jump_table_size (rtx_jump_table_data *insn)
16170 {
16171 /* ADDR_VECs only take room if read-only data does into the text
16172 section. */
16173 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16174 {
16175 rtx body = PATTERN (insn);
16176 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16177 HOST_WIDE_INT size;
16178 HOST_WIDE_INT modesize;
16179
16180 modesize = GET_MODE_SIZE (GET_MODE (body));
16181 size = modesize * XVECLEN (body, elt);
16182 switch (modesize)
16183 {
16184 case 1:
16185 /* Round up size of TBB table to a halfword boundary. */
16186 size = (size + 1) & ~(HOST_WIDE_INT)1;
16187 break;
16188 case 2:
16189 /* No padding necessary for TBH. */
16190 break;
16191 case 4:
16192 /* Add two bytes for alignment on Thumb. */
16193 if (TARGET_THUMB)
16194 size += 2;
16195 break;
16196 default:
16197 gcc_unreachable ();
16198 }
16199 return size;
16200 }
16201
16202 return 0;
16203 }
16204
16205 /* Return the maximum amount of padding that will be inserted before
16206 label LABEL. */
16207
16208 static HOST_WIDE_INT
16209 get_label_padding (rtx label)
16210 {
16211 HOST_WIDE_INT align, min_insn_size;
16212
16213 align = 1 << label_to_alignment (label);
16214 min_insn_size = TARGET_THUMB ? 2 : 4;
16215 return align > min_insn_size ? align - min_insn_size : 0;
16216 }
16217
16218 /* Move a minipool fix MP from its current location to before MAX_MP.
16219 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16220 constraints may need updating. */
16221 static Mnode *
16222 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16223 HOST_WIDE_INT max_address)
16224 {
16225 /* The code below assumes these are different. */
16226 gcc_assert (mp != max_mp);
16227
16228 if (max_mp == NULL)
16229 {
16230 if (max_address < mp->max_address)
16231 mp->max_address = max_address;
16232 }
16233 else
16234 {
16235 if (max_address > max_mp->max_address - mp->fix_size)
16236 mp->max_address = max_mp->max_address - mp->fix_size;
16237 else
16238 mp->max_address = max_address;
16239
16240 /* Unlink MP from its current position. Since max_mp is non-null,
16241 mp->prev must be non-null. */
16242 mp->prev->next = mp->next;
16243 if (mp->next != NULL)
16244 mp->next->prev = mp->prev;
16245 else
16246 minipool_vector_tail = mp->prev;
16247
16248 /* Re-insert it before MAX_MP. */
16249 mp->next = max_mp;
16250 mp->prev = max_mp->prev;
16251 max_mp->prev = mp;
16252
16253 if (mp->prev != NULL)
16254 mp->prev->next = mp;
16255 else
16256 minipool_vector_head = mp;
16257 }
16258
16259 /* Save the new entry. */
16260 max_mp = mp;
16261
16262 /* Scan over the preceding entries and adjust their addresses as
16263 required. */
16264 while (mp->prev != NULL
16265 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16266 {
16267 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16268 mp = mp->prev;
16269 }
16270
16271 return max_mp;
16272 }
16273
16274 /* Add a constant to the minipool for a forward reference. Returns the
16275 node added or NULL if the constant will not fit in this pool. */
16276 static Mnode *
16277 add_minipool_forward_ref (Mfix *fix)
16278 {
16279 /* If set, max_mp is the first pool_entry that has a lower
16280 constraint than the one we are trying to add. */
16281 Mnode * max_mp = NULL;
16282 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16283 Mnode * mp;
16284
16285 /* If the minipool starts before the end of FIX->INSN then this FIX
16286 can not be placed into the current pool. Furthermore, adding the
16287 new constant pool entry may cause the pool to start FIX_SIZE bytes
16288 earlier. */
16289 if (minipool_vector_head &&
16290 (fix->address + get_attr_length (fix->insn)
16291 >= minipool_vector_head->max_address - fix->fix_size))
16292 return NULL;
16293
16294 /* Scan the pool to see if a constant with the same value has
16295 already been added. While we are doing this, also note the
16296 location where we must insert the constant if it doesn't already
16297 exist. */
16298 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16299 {
16300 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16301 && fix->mode == mp->mode
16302 && (!LABEL_P (fix->value)
16303 || (CODE_LABEL_NUMBER (fix->value)
16304 == CODE_LABEL_NUMBER (mp->value)))
16305 && rtx_equal_p (fix->value, mp->value))
16306 {
16307 /* More than one fix references this entry. */
16308 mp->refcount++;
16309 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16310 }
16311
16312 /* Note the insertion point if necessary. */
16313 if (max_mp == NULL
16314 && mp->max_address > max_address)
16315 max_mp = mp;
16316
16317 /* If we are inserting an 8-bytes aligned quantity and
16318 we have not already found an insertion point, then
16319 make sure that all such 8-byte aligned quantities are
16320 placed at the start of the pool. */
16321 if (ARM_DOUBLEWORD_ALIGN
16322 && max_mp == NULL
16323 && fix->fix_size >= 8
16324 && mp->fix_size < 8)
16325 {
16326 max_mp = mp;
16327 max_address = mp->max_address;
16328 }
16329 }
16330
16331 /* The value is not currently in the minipool, so we need to create
16332 a new entry for it. If MAX_MP is NULL, the entry will be put on
16333 the end of the list since the placement is less constrained than
16334 any existing entry. Otherwise, we insert the new fix before
16335 MAX_MP and, if necessary, adjust the constraints on the other
16336 entries. */
16337 mp = XNEW (Mnode);
16338 mp->fix_size = fix->fix_size;
16339 mp->mode = fix->mode;
16340 mp->value = fix->value;
16341 mp->refcount = 1;
16342 /* Not yet required for a backwards ref. */
16343 mp->min_address = -65536;
16344
16345 if (max_mp == NULL)
16346 {
16347 mp->max_address = max_address;
16348 mp->next = NULL;
16349 mp->prev = minipool_vector_tail;
16350
16351 if (mp->prev == NULL)
16352 {
16353 minipool_vector_head = mp;
16354 minipool_vector_label = gen_label_rtx ();
16355 }
16356 else
16357 mp->prev->next = mp;
16358
16359 minipool_vector_tail = mp;
16360 }
16361 else
16362 {
16363 if (max_address > max_mp->max_address - mp->fix_size)
16364 mp->max_address = max_mp->max_address - mp->fix_size;
16365 else
16366 mp->max_address = max_address;
16367
16368 mp->next = max_mp;
16369 mp->prev = max_mp->prev;
16370 max_mp->prev = mp;
16371 if (mp->prev != NULL)
16372 mp->prev->next = mp;
16373 else
16374 minipool_vector_head = mp;
16375 }
16376
16377 /* Save the new entry. */
16378 max_mp = mp;
16379
16380 /* Scan over the preceding entries and adjust their addresses as
16381 required. */
16382 while (mp->prev != NULL
16383 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16384 {
16385 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16386 mp = mp->prev;
16387 }
16388
16389 return max_mp;
16390 }
16391
16392 static Mnode *
16393 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16394 HOST_WIDE_INT min_address)
16395 {
16396 HOST_WIDE_INT offset;
16397
16398 /* The code below assumes these are different. */
16399 gcc_assert (mp != min_mp);
16400
16401 if (min_mp == NULL)
16402 {
16403 if (min_address > mp->min_address)
16404 mp->min_address = min_address;
16405 }
16406 else
16407 {
16408 /* We will adjust this below if it is too loose. */
16409 mp->min_address = min_address;
16410
16411 /* Unlink MP from its current position. Since min_mp is non-null,
16412 mp->next must be non-null. */
16413 mp->next->prev = mp->prev;
16414 if (mp->prev != NULL)
16415 mp->prev->next = mp->next;
16416 else
16417 minipool_vector_head = mp->next;
16418
16419 /* Reinsert it after MIN_MP. */
16420 mp->prev = min_mp;
16421 mp->next = min_mp->next;
16422 min_mp->next = mp;
16423 if (mp->next != NULL)
16424 mp->next->prev = mp;
16425 else
16426 minipool_vector_tail = mp;
16427 }
16428
16429 min_mp = mp;
16430
16431 offset = 0;
16432 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16433 {
16434 mp->offset = offset;
16435 if (mp->refcount > 0)
16436 offset += mp->fix_size;
16437
16438 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16439 mp->next->min_address = mp->min_address + mp->fix_size;
16440 }
16441
16442 return min_mp;
16443 }
16444
16445 /* Add a constant to the minipool for a backward reference. Returns the
16446 node added or NULL if the constant will not fit in this pool.
16447
16448 Note that the code for insertion for a backwards reference can be
16449 somewhat confusing because the calculated offsets for each fix do
16450 not take into account the size of the pool (which is still under
16451 construction. */
16452 static Mnode *
16453 add_minipool_backward_ref (Mfix *fix)
16454 {
16455 /* If set, min_mp is the last pool_entry that has a lower constraint
16456 than the one we are trying to add. */
16457 Mnode *min_mp = NULL;
16458 /* This can be negative, since it is only a constraint. */
16459 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16460 Mnode *mp;
16461
16462 /* If we can't reach the current pool from this insn, or if we can't
16463 insert this entry at the end of the pool without pushing other
16464 fixes out of range, then we don't try. This ensures that we
16465 can't fail later on. */
16466 if (min_address >= minipool_barrier->address
16467 || (minipool_vector_tail->min_address + fix->fix_size
16468 >= minipool_barrier->address))
16469 return NULL;
16470
16471 /* Scan the pool to see if a constant with the same value has
16472 already been added. While we are doing this, also note the
16473 location where we must insert the constant if it doesn't already
16474 exist. */
16475 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16476 {
16477 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16478 && fix->mode == mp->mode
16479 && (!LABEL_P (fix->value)
16480 || (CODE_LABEL_NUMBER (fix->value)
16481 == CODE_LABEL_NUMBER (mp->value)))
16482 && rtx_equal_p (fix->value, mp->value)
16483 /* Check that there is enough slack to move this entry to the
16484 end of the table (this is conservative). */
16485 && (mp->max_address
16486 > (minipool_barrier->address
16487 + minipool_vector_tail->offset
16488 + minipool_vector_tail->fix_size)))
16489 {
16490 mp->refcount++;
16491 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16492 }
16493
16494 if (min_mp != NULL)
16495 mp->min_address += fix->fix_size;
16496 else
16497 {
16498 /* Note the insertion point if necessary. */
16499 if (mp->min_address < min_address)
16500 {
16501 /* For now, we do not allow the insertion of 8-byte alignment
16502 requiring nodes anywhere but at the start of the pool. */
16503 if (ARM_DOUBLEWORD_ALIGN
16504 && fix->fix_size >= 8 && mp->fix_size < 8)
16505 return NULL;
16506 else
16507 min_mp = mp;
16508 }
16509 else if (mp->max_address
16510 < minipool_barrier->address + mp->offset + fix->fix_size)
16511 {
16512 /* Inserting before this entry would push the fix beyond
16513 its maximum address (which can happen if we have
16514 re-located a forwards fix); force the new fix to come
16515 after it. */
16516 if (ARM_DOUBLEWORD_ALIGN
16517 && fix->fix_size >= 8 && mp->fix_size < 8)
16518 return NULL;
16519 else
16520 {
16521 min_mp = mp;
16522 min_address = mp->min_address + fix->fix_size;
16523 }
16524 }
16525 /* Do not insert a non-8-byte aligned quantity before 8-byte
16526 aligned quantities. */
16527 else if (ARM_DOUBLEWORD_ALIGN
16528 && fix->fix_size < 8
16529 && mp->fix_size >= 8)
16530 {
16531 min_mp = mp;
16532 min_address = mp->min_address + fix->fix_size;
16533 }
16534 }
16535 }
16536
16537 /* We need to create a new entry. */
16538 mp = XNEW (Mnode);
16539 mp->fix_size = fix->fix_size;
16540 mp->mode = fix->mode;
16541 mp->value = fix->value;
16542 mp->refcount = 1;
16543 mp->max_address = minipool_barrier->address + 65536;
16544
16545 mp->min_address = min_address;
16546
16547 if (min_mp == NULL)
16548 {
16549 mp->prev = NULL;
16550 mp->next = minipool_vector_head;
16551
16552 if (mp->next == NULL)
16553 {
16554 minipool_vector_tail = mp;
16555 minipool_vector_label = gen_label_rtx ();
16556 }
16557 else
16558 mp->next->prev = mp;
16559
16560 minipool_vector_head = mp;
16561 }
16562 else
16563 {
16564 mp->next = min_mp->next;
16565 mp->prev = min_mp;
16566 min_mp->next = mp;
16567
16568 if (mp->next != NULL)
16569 mp->next->prev = mp;
16570 else
16571 minipool_vector_tail = mp;
16572 }
16573
16574 /* Save the new entry. */
16575 min_mp = mp;
16576
16577 if (mp->prev)
16578 mp = mp->prev;
16579 else
16580 mp->offset = 0;
16581
16582 /* Scan over the following entries and adjust their offsets. */
16583 while (mp->next != NULL)
16584 {
16585 if (mp->next->min_address < mp->min_address + mp->fix_size)
16586 mp->next->min_address = mp->min_address + mp->fix_size;
16587
16588 if (mp->refcount)
16589 mp->next->offset = mp->offset + mp->fix_size;
16590 else
16591 mp->next->offset = mp->offset;
16592
16593 mp = mp->next;
16594 }
16595
16596 return min_mp;
16597 }
16598
16599 static void
16600 assign_minipool_offsets (Mfix *barrier)
16601 {
16602 HOST_WIDE_INT offset = 0;
16603 Mnode *mp;
16604
16605 minipool_barrier = barrier;
16606
16607 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16608 {
16609 mp->offset = offset;
16610
16611 if (mp->refcount > 0)
16612 offset += mp->fix_size;
16613 }
16614 }
16615
16616 /* Output the literal table */
16617 static void
16618 dump_minipool (rtx_insn *scan)
16619 {
16620 Mnode * mp;
16621 Mnode * nmp;
16622 int align64 = 0;
16623
16624 if (ARM_DOUBLEWORD_ALIGN)
16625 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16626 if (mp->refcount > 0 && mp->fix_size >= 8)
16627 {
16628 align64 = 1;
16629 break;
16630 }
16631
16632 if (dump_file)
16633 fprintf (dump_file,
16634 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16635 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16636
16637 scan = emit_label_after (gen_label_rtx (), scan);
16638 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16639 scan = emit_label_after (minipool_vector_label, scan);
16640
16641 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16642 {
16643 if (mp->refcount > 0)
16644 {
16645 if (dump_file)
16646 {
16647 fprintf (dump_file,
16648 ";; Offset %u, min %ld, max %ld ",
16649 (unsigned) mp->offset, (unsigned long) mp->min_address,
16650 (unsigned long) mp->max_address);
16651 arm_print_value (dump_file, mp->value);
16652 fputc ('\n', dump_file);
16653 }
16654
16655 switch (GET_MODE_SIZE (mp->mode))
16656 {
16657 #ifdef HAVE_consttable_1
16658 case 1:
16659 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16660 break;
16661
16662 #endif
16663 #ifdef HAVE_consttable_2
16664 case 2:
16665 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16666 break;
16667
16668 #endif
16669 #ifdef HAVE_consttable_4
16670 case 4:
16671 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16672 break;
16673
16674 #endif
16675 #ifdef HAVE_consttable_8
16676 case 8:
16677 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16678 break;
16679
16680 #endif
16681 #ifdef HAVE_consttable_16
16682 case 16:
16683 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16684 break;
16685
16686 #endif
16687 default:
16688 gcc_unreachable ();
16689 }
16690 }
16691
16692 nmp = mp->next;
16693 free (mp);
16694 }
16695
16696 minipool_vector_head = minipool_vector_tail = NULL;
16697 scan = emit_insn_after (gen_consttable_end (), scan);
16698 scan = emit_barrier_after (scan);
16699 }
16700
16701 /* Return the cost of forcibly inserting a barrier after INSN. */
16702 static int
16703 arm_barrier_cost (rtx_insn *insn)
16704 {
16705 /* Basing the location of the pool on the loop depth is preferable,
16706 but at the moment, the basic block information seems to be
16707 corrupt by this stage of the compilation. */
16708 int base_cost = 50;
16709 rtx_insn *next = next_nonnote_insn (insn);
16710
16711 if (next != NULL && LABEL_P (next))
16712 base_cost -= 20;
16713
16714 switch (GET_CODE (insn))
16715 {
16716 case CODE_LABEL:
16717 /* It will always be better to place the table before the label, rather
16718 than after it. */
16719 return 50;
16720
16721 case INSN:
16722 case CALL_INSN:
16723 return base_cost;
16724
16725 case JUMP_INSN:
16726 return base_cost - 10;
16727
16728 default:
16729 return base_cost + 10;
16730 }
16731 }
16732
16733 /* Find the best place in the insn stream in the range
16734 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16735 Create the barrier by inserting a jump and add a new fix entry for
16736 it. */
16737 static Mfix *
16738 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16739 {
16740 HOST_WIDE_INT count = 0;
16741 rtx_barrier *barrier;
16742 rtx_insn *from = fix->insn;
16743 /* The instruction after which we will insert the jump. */
16744 rtx_insn *selected = NULL;
16745 int selected_cost;
16746 /* The address at which the jump instruction will be placed. */
16747 HOST_WIDE_INT selected_address;
16748 Mfix * new_fix;
16749 HOST_WIDE_INT max_count = max_address - fix->address;
16750 rtx_code_label *label = gen_label_rtx ();
16751
16752 selected_cost = arm_barrier_cost (from);
16753 selected_address = fix->address;
16754
16755 while (from && count < max_count)
16756 {
16757 rtx_jump_table_data *tmp;
16758 int new_cost;
16759
16760 /* This code shouldn't have been called if there was a natural barrier
16761 within range. */
16762 gcc_assert (!BARRIER_P (from));
16763
16764 /* Count the length of this insn. This must stay in sync with the
16765 code that pushes minipool fixes. */
16766 if (LABEL_P (from))
16767 count += get_label_padding (from);
16768 else
16769 count += get_attr_length (from);
16770
16771 /* If there is a jump table, add its length. */
16772 if (tablejump_p (from, NULL, &tmp))
16773 {
16774 count += get_jump_table_size (tmp);
16775
16776 /* Jump tables aren't in a basic block, so base the cost on
16777 the dispatch insn. If we select this location, we will
16778 still put the pool after the table. */
16779 new_cost = arm_barrier_cost (from);
16780
16781 if (count < max_count
16782 && (!selected || new_cost <= selected_cost))
16783 {
16784 selected = tmp;
16785 selected_cost = new_cost;
16786 selected_address = fix->address + count;
16787 }
16788
16789 /* Continue after the dispatch table. */
16790 from = NEXT_INSN (tmp);
16791 continue;
16792 }
16793
16794 new_cost = arm_barrier_cost (from);
16795
16796 if (count < max_count
16797 && (!selected || new_cost <= selected_cost))
16798 {
16799 selected = from;
16800 selected_cost = new_cost;
16801 selected_address = fix->address + count;
16802 }
16803
16804 from = NEXT_INSN (from);
16805 }
16806
16807 /* Make sure that we found a place to insert the jump. */
16808 gcc_assert (selected);
16809
16810 /* Make sure we do not split a call and its corresponding
16811 CALL_ARG_LOCATION note. */
16812 if (CALL_P (selected))
16813 {
16814 rtx_insn *next = NEXT_INSN (selected);
16815 if (next && NOTE_P (next)
16816 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16817 selected = next;
16818 }
16819
16820 /* Create a new JUMP_INSN that branches around a barrier. */
16821 from = emit_jump_insn_after (gen_jump (label), selected);
16822 JUMP_LABEL (from) = label;
16823 barrier = emit_barrier_after (from);
16824 emit_label_after (label, barrier);
16825
16826 /* Create a minipool barrier entry for the new barrier. */
16827 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16828 new_fix->insn = barrier;
16829 new_fix->address = selected_address;
16830 new_fix->next = fix->next;
16831 fix->next = new_fix;
16832
16833 return new_fix;
16834 }
16835
16836 /* Record that there is a natural barrier in the insn stream at
16837 ADDRESS. */
16838 static void
16839 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16840 {
16841 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16842
16843 fix->insn = insn;
16844 fix->address = address;
16845
16846 fix->next = NULL;
16847 if (minipool_fix_head != NULL)
16848 minipool_fix_tail->next = fix;
16849 else
16850 minipool_fix_head = fix;
16851
16852 minipool_fix_tail = fix;
16853 }
16854
16855 /* Record INSN, which will need fixing up to load a value from the
16856 minipool. ADDRESS is the offset of the insn since the start of the
16857 function; LOC is a pointer to the part of the insn which requires
16858 fixing; VALUE is the constant that must be loaded, which is of type
16859 MODE. */
16860 static void
16861 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16862 machine_mode mode, rtx value)
16863 {
16864 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16865
16866 fix->insn = insn;
16867 fix->address = address;
16868 fix->loc = loc;
16869 fix->mode = mode;
16870 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16871 fix->value = value;
16872 fix->forwards = get_attr_pool_range (insn);
16873 fix->backwards = get_attr_neg_pool_range (insn);
16874 fix->minipool = NULL;
16875
16876 /* If an insn doesn't have a range defined for it, then it isn't
16877 expecting to be reworked by this code. Better to stop now than
16878 to generate duff assembly code. */
16879 gcc_assert (fix->forwards || fix->backwards);
16880
16881 /* If an entry requires 8-byte alignment then assume all constant pools
16882 require 4 bytes of padding. Trying to do this later on a per-pool
16883 basis is awkward because existing pool entries have to be modified. */
16884 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16885 minipool_pad = 4;
16886
16887 if (dump_file)
16888 {
16889 fprintf (dump_file,
16890 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16891 GET_MODE_NAME (mode),
16892 INSN_UID (insn), (unsigned long) address,
16893 -1 * (long)fix->backwards, (long)fix->forwards);
16894 arm_print_value (dump_file, fix->value);
16895 fprintf (dump_file, "\n");
16896 }
16897
16898 /* Add it to the chain of fixes. */
16899 fix->next = NULL;
16900
16901 if (minipool_fix_head != NULL)
16902 minipool_fix_tail->next = fix;
16903 else
16904 minipool_fix_head = fix;
16905
16906 minipool_fix_tail = fix;
16907 }
16908
16909 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16910 Returns the number of insns needed, or 99 if we always want to synthesize
16911 the value. */
16912 int
16913 arm_max_const_double_inline_cost ()
16914 {
16915 /* Let the value get synthesized to avoid the use of literal pools. */
16916 if (arm_disable_literal_pool)
16917 return 99;
16918
16919 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16920 }
16921
16922 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16923 Returns the number of insns needed, or 99 if we don't know how to
16924 do it. */
16925 int
16926 arm_const_double_inline_cost (rtx val)
16927 {
16928 rtx lowpart, highpart;
16929 machine_mode mode;
16930
16931 mode = GET_MODE (val);
16932
16933 if (mode == VOIDmode)
16934 mode = DImode;
16935
16936 gcc_assert (GET_MODE_SIZE (mode) == 8);
16937
16938 lowpart = gen_lowpart (SImode, val);
16939 highpart = gen_highpart_mode (SImode, mode, val);
16940
16941 gcc_assert (CONST_INT_P (lowpart));
16942 gcc_assert (CONST_INT_P (highpart));
16943
16944 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16945 NULL_RTX, NULL_RTX, 0, 0)
16946 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16947 NULL_RTX, NULL_RTX, 0, 0));
16948 }
16949
16950 /* Cost of loading a SImode constant. */
16951 static inline int
16952 arm_const_inline_cost (enum rtx_code code, rtx val)
16953 {
16954 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16955 NULL_RTX, NULL_RTX, 1, 0);
16956 }
16957
16958 /* Return true if it is worthwhile to split a 64-bit constant into two
16959 32-bit operations. This is the case if optimizing for size, or
16960 if we have load delay slots, or if one 32-bit part can be done with
16961 a single data operation. */
16962 bool
16963 arm_const_double_by_parts (rtx val)
16964 {
16965 machine_mode mode = GET_MODE (val);
16966 rtx part;
16967
16968 if (optimize_size || arm_ld_sched)
16969 return true;
16970
16971 if (mode == VOIDmode)
16972 mode = DImode;
16973
16974 part = gen_highpart_mode (SImode, mode, val);
16975
16976 gcc_assert (CONST_INT_P (part));
16977
16978 if (const_ok_for_arm (INTVAL (part))
16979 || const_ok_for_arm (~INTVAL (part)))
16980 return true;
16981
16982 part = gen_lowpart (SImode, val);
16983
16984 gcc_assert (CONST_INT_P (part));
16985
16986 if (const_ok_for_arm (INTVAL (part))
16987 || const_ok_for_arm (~INTVAL (part)))
16988 return true;
16989
16990 return false;
16991 }
16992
16993 /* Return true if it is possible to inline both the high and low parts
16994 of a 64-bit constant into 32-bit data processing instructions. */
16995 bool
16996 arm_const_double_by_immediates (rtx val)
16997 {
16998 machine_mode mode = GET_MODE (val);
16999 rtx part;
17000
17001 if (mode == VOIDmode)
17002 mode = DImode;
17003
17004 part = gen_highpart_mode (SImode, mode, val);
17005
17006 gcc_assert (CONST_INT_P (part));
17007
17008 if (!const_ok_for_arm (INTVAL (part)))
17009 return false;
17010
17011 part = gen_lowpart (SImode, val);
17012
17013 gcc_assert (CONST_INT_P (part));
17014
17015 if (!const_ok_for_arm (INTVAL (part)))
17016 return false;
17017
17018 return true;
17019 }
17020
17021 /* Scan INSN and note any of its operands that need fixing.
17022 If DO_PUSHES is false we do not actually push any of the fixups
17023 needed. */
17024 static void
17025 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17026 {
17027 int opno;
17028
17029 extract_constrain_insn (insn);
17030
17031 if (recog_data.n_alternatives == 0)
17032 return;
17033
17034 /* Fill in recog_op_alt with information about the constraints of
17035 this insn. */
17036 preprocess_constraints (insn);
17037
17038 const operand_alternative *op_alt = which_op_alt ();
17039 for (opno = 0; opno < recog_data.n_operands; opno++)
17040 {
17041 /* Things we need to fix can only occur in inputs. */
17042 if (recog_data.operand_type[opno] != OP_IN)
17043 continue;
17044
17045 /* If this alternative is a memory reference, then any mention
17046 of constants in this alternative is really to fool reload
17047 into allowing us to accept one there. We need to fix them up
17048 now so that we output the right code. */
17049 if (op_alt[opno].memory_ok)
17050 {
17051 rtx op = recog_data.operand[opno];
17052
17053 if (CONSTANT_P (op))
17054 {
17055 if (do_pushes)
17056 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17057 recog_data.operand_mode[opno], op);
17058 }
17059 else if (MEM_P (op)
17060 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17061 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17062 {
17063 if (do_pushes)
17064 {
17065 rtx cop = avoid_constant_pool_reference (op);
17066
17067 /* Casting the address of something to a mode narrower
17068 than a word can cause avoid_constant_pool_reference()
17069 to return the pool reference itself. That's no good to
17070 us here. Lets just hope that we can use the
17071 constant pool value directly. */
17072 if (op == cop)
17073 cop = get_pool_constant (XEXP (op, 0));
17074
17075 push_minipool_fix (insn, address,
17076 recog_data.operand_loc[opno],
17077 recog_data.operand_mode[opno], cop);
17078 }
17079
17080 }
17081 }
17082 }
17083
17084 return;
17085 }
17086
17087 /* Rewrite move insn into subtract of 0 if the condition codes will
17088 be useful in next conditional jump insn. */
17089
17090 static void
17091 thumb1_reorg (void)
17092 {
17093 basic_block bb;
17094
17095 FOR_EACH_BB_FN (bb, cfun)
17096 {
17097 rtx dest, src;
17098 rtx pat, op0, set = NULL;
17099 rtx_insn *prev, *insn = BB_END (bb);
17100 bool insn_clobbered = false;
17101
17102 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17103 insn = PREV_INSN (insn);
17104
17105 /* Find the last cbranchsi4_insn in basic block BB. */
17106 if (insn == BB_HEAD (bb)
17107 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17108 continue;
17109
17110 /* Get the register with which we are comparing. */
17111 pat = PATTERN (insn);
17112 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17113
17114 /* Find the first flag setting insn before INSN in basic block BB. */
17115 gcc_assert (insn != BB_HEAD (bb));
17116 for (prev = PREV_INSN (insn);
17117 (!insn_clobbered
17118 && prev != BB_HEAD (bb)
17119 && (NOTE_P (prev)
17120 || DEBUG_INSN_P (prev)
17121 || ((set = single_set (prev)) != NULL
17122 && get_attr_conds (prev) == CONDS_NOCOND)));
17123 prev = PREV_INSN (prev))
17124 {
17125 if (reg_set_p (op0, prev))
17126 insn_clobbered = true;
17127 }
17128
17129 /* Skip if op0 is clobbered by insn other than prev. */
17130 if (insn_clobbered)
17131 continue;
17132
17133 if (!set)
17134 continue;
17135
17136 dest = SET_DEST (set);
17137 src = SET_SRC (set);
17138 if (!low_register_operand (dest, SImode)
17139 || !low_register_operand (src, SImode))
17140 continue;
17141
17142 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17143 in INSN. Both src and dest of the move insn are checked. */
17144 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17145 {
17146 dest = copy_rtx (dest);
17147 src = copy_rtx (src);
17148 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17149 PATTERN (prev) = gen_rtx_SET (dest, src);
17150 INSN_CODE (prev) = -1;
17151 /* Set test register in INSN to dest. */
17152 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17153 INSN_CODE (insn) = -1;
17154 }
17155 }
17156 }
17157
17158 /* Convert instructions to their cc-clobbering variant if possible, since
17159 that allows us to use smaller encodings. */
17160
17161 static void
17162 thumb2_reorg (void)
17163 {
17164 basic_block bb;
17165 regset_head live;
17166
17167 INIT_REG_SET (&live);
17168
17169 /* We are freeing block_for_insn in the toplev to keep compatibility
17170 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17171 compute_bb_for_insn ();
17172 df_analyze ();
17173
17174 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17175
17176 FOR_EACH_BB_FN (bb, cfun)
17177 {
17178 if ((current_tune->disparage_flag_setting_t16_encodings
17179 == tune_params::DISPARAGE_FLAGS_ALL)
17180 && optimize_bb_for_speed_p (bb))
17181 continue;
17182
17183 rtx_insn *insn;
17184 Convert_Action action = SKIP;
17185 Convert_Action action_for_partial_flag_setting
17186 = ((current_tune->disparage_flag_setting_t16_encodings
17187 != tune_params::DISPARAGE_FLAGS_NEITHER)
17188 && optimize_bb_for_speed_p (bb))
17189 ? SKIP : CONV;
17190
17191 COPY_REG_SET (&live, DF_LR_OUT (bb));
17192 df_simulate_initialize_backwards (bb, &live);
17193 FOR_BB_INSNS_REVERSE (bb, insn)
17194 {
17195 if (NONJUMP_INSN_P (insn)
17196 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17197 && GET_CODE (PATTERN (insn)) == SET)
17198 {
17199 action = SKIP;
17200 rtx pat = PATTERN (insn);
17201 rtx dst = XEXP (pat, 0);
17202 rtx src = XEXP (pat, 1);
17203 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17204
17205 if (UNARY_P (src) || BINARY_P (src))
17206 op0 = XEXP (src, 0);
17207
17208 if (BINARY_P (src))
17209 op1 = XEXP (src, 1);
17210
17211 if (low_register_operand (dst, SImode))
17212 {
17213 switch (GET_CODE (src))
17214 {
17215 case PLUS:
17216 /* Adding two registers and storing the result
17217 in the first source is already a 16-bit
17218 operation. */
17219 if (rtx_equal_p (dst, op0)
17220 && register_operand (op1, SImode))
17221 break;
17222
17223 if (low_register_operand (op0, SImode))
17224 {
17225 /* ADDS <Rd>,<Rn>,<Rm> */
17226 if (low_register_operand (op1, SImode))
17227 action = CONV;
17228 /* ADDS <Rdn>,#<imm8> */
17229 /* SUBS <Rdn>,#<imm8> */
17230 else if (rtx_equal_p (dst, op0)
17231 && CONST_INT_P (op1)
17232 && IN_RANGE (INTVAL (op1), -255, 255))
17233 action = CONV;
17234 /* ADDS <Rd>,<Rn>,#<imm3> */
17235 /* SUBS <Rd>,<Rn>,#<imm3> */
17236 else if (CONST_INT_P (op1)
17237 && IN_RANGE (INTVAL (op1), -7, 7))
17238 action = CONV;
17239 }
17240 /* ADCS <Rd>, <Rn> */
17241 else if (GET_CODE (XEXP (src, 0)) == PLUS
17242 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17243 && low_register_operand (XEXP (XEXP (src, 0), 1),
17244 SImode)
17245 && COMPARISON_P (op1)
17246 && cc_register (XEXP (op1, 0), VOIDmode)
17247 && maybe_get_arm_condition_code (op1) == ARM_CS
17248 && XEXP (op1, 1) == const0_rtx)
17249 action = CONV;
17250 break;
17251
17252 case MINUS:
17253 /* RSBS <Rd>,<Rn>,#0
17254 Not handled here: see NEG below. */
17255 /* SUBS <Rd>,<Rn>,#<imm3>
17256 SUBS <Rdn>,#<imm8>
17257 Not handled here: see PLUS above. */
17258 /* SUBS <Rd>,<Rn>,<Rm> */
17259 if (low_register_operand (op0, SImode)
17260 && low_register_operand (op1, SImode))
17261 action = CONV;
17262 break;
17263
17264 case MULT:
17265 /* MULS <Rdm>,<Rn>,<Rdm>
17266 As an exception to the rule, this is only used
17267 when optimizing for size since MULS is slow on all
17268 known implementations. We do not even want to use
17269 MULS in cold code, if optimizing for speed, so we
17270 test the global flag here. */
17271 if (!optimize_size)
17272 break;
17273 /* else fall through. */
17274 case AND:
17275 case IOR:
17276 case XOR:
17277 /* ANDS <Rdn>,<Rm> */
17278 if (rtx_equal_p (dst, op0)
17279 && low_register_operand (op1, SImode))
17280 action = action_for_partial_flag_setting;
17281 else if (rtx_equal_p (dst, op1)
17282 && low_register_operand (op0, SImode))
17283 action = action_for_partial_flag_setting == SKIP
17284 ? SKIP : SWAP_CONV;
17285 break;
17286
17287 case ASHIFTRT:
17288 case ASHIFT:
17289 case LSHIFTRT:
17290 /* ASRS <Rdn>,<Rm> */
17291 /* LSRS <Rdn>,<Rm> */
17292 /* LSLS <Rdn>,<Rm> */
17293 if (rtx_equal_p (dst, op0)
17294 && low_register_operand (op1, SImode))
17295 action = action_for_partial_flag_setting;
17296 /* ASRS <Rd>,<Rm>,#<imm5> */
17297 /* LSRS <Rd>,<Rm>,#<imm5> */
17298 /* LSLS <Rd>,<Rm>,#<imm5> */
17299 else if (low_register_operand (op0, SImode)
17300 && CONST_INT_P (op1)
17301 && IN_RANGE (INTVAL (op1), 0, 31))
17302 action = action_for_partial_flag_setting;
17303 break;
17304
17305 case ROTATERT:
17306 /* RORS <Rdn>,<Rm> */
17307 if (rtx_equal_p (dst, op0)
17308 && low_register_operand (op1, SImode))
17309 action = action_for_partial_flag_setting;
17310 break;
17311
17312 case NOT:
17313 /* MVNS <Rd>,<Rm> */
17314 if (low_register_operand (op0, SImode))
17315 action = action_for_partial_flag_setting;
17316 break;
17317
17318 case NEG:
17319 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17320 if (low_register_operand (op0, SImode))
17321 action = CONV;
17322 break;
17323
17324 case CONST_INT:
17325 /* MOVS <Rd>,#<imm8> */
17326 if (CONST_INT_P (src)
17327 && IN_RANGE (INTVAL (src), 0, 255))
17328 action = action_for_partial_flag_setting;
17329 break;
17330
17331 case REG:
17332 /* MOVS and MOV<c> with registers have different
17333 encodings, so are not relevant here. */
17334 break;
17335
17336 default:
17337 break;
17338 }
17339 }
17340
17341 if (action != SKIP)
17342 {
17343 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17344 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17345 rtvec vec;
17346
17347 if (action == SWAP_CONV)
17348 {
17349 src = copy_rtx (src);
17350 XEXP (src, 0) = op1;
17351 XEXP (src, 1) = op0;
17352 pat = gen_rtx_SET (dst, src);
17353 vec = gen_rtvec (2, pat, clobber);
17354 }
17355 else /* action == CONV */
17356 vec = gen_rtvec (2, pat, clobber);
17357
17358 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17359 INSN_CODE (insn) = -1;
17360 }
17361 }
17362
17363 if (NONDEBUG_INSN_P (insn))
17364 df_simulate_one_insn_backwards (bb, insn, &live);
17365 }
17366 }
17367
17368 CLEAR_REG_SET (&live);
17369 }
17370
17371 /* Gcc puts the pool in the wrong place for ARM, since we can only
17372 load addresses a limited distance around the pc. We do some
17373 special munging to move the constant pool values to the correct
17374 point in the code. */
17375 static void
17376 arm_reorg (void)
17377 {
17378 rtx_insn *insn;
17379 HOST_WIDE_INT address = 0;
17380 Mfix * fix;
17381
17382 if (TARGET_THUMB1)
17383 thumb1_reorg ();
17384 else if (TARGET_THUMB2)
17385 thumb2_reorg ();
17386
17387 /* Ensure all insns that must be split have been split at this point.
17388 Otherwise, the pool placement code below may compute incorrect
17389 insn lengths. Note that when optimizing, all insns have already
17390 been split at this point. */
17391 if (!optimize)
17392 split_all_insns_noflow ();
17393
17394 minipool_fix_head = minipool_fix_tail = NULL;
17395
17396 /* The first insn must always be a note, or the code below won't
17397 scan it properly. */
17398 insn = get_insns ();
17399 gcc_assert (NOTE_P (insn));
17400 minipool_pad = 0;
17401
17402 /* Scan all the insns and record the operands that will need fixing. */
17403 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17404 {
17405 if (BARRIER_P (insn))
17406 push_minipool_barrier (insn, address);
17407 else if (INSN_P (insn))
17408 {
17409 rtx_jump_table_data *table;
17410
17411 note_invalid_constants (insn, address, true);
17412 address += get_attr_length (insn);
17413
17414 /* If the insn is a vector jump, add the size of the table
17415 and skip the table. */
17416 if (tablejump_p (insn, NULL, &table))
17417 {
17418 address += get_jump_table_size (table);
17419 insn = table;
17420 }
17421 }
17422 else if (LABEL_P (insn))
17423 /* Add the worst-case padding due to alignment. We don't add
17424 the _current_ padding because the minipool insertions
17425 themselves might change it. */
17426 address += get_label_padding (insn);
17427 }
17428
17429 fix = minipool_fix_head;
17430
17431 /* Now scan the fixups and perform the required changes. */
17432 while (fix)
17433 {
17434 Mfix * ftmp;
17435 Mfix * fdel;
17436 Mfix * last_added_fix;
17437 Mfix * last_barrier = NULL;
17438 Mfix * this_fix;
17439
17440 /* Skip any further barriers before the next fix. */
17441 while (fix && BARRIER_P (fix->insn))
17442 fix = fix->next;
17443
17444 /* No more fixes. */
17445 if (fix == NULL)
17446 break;
17447
17448 last_added_fix = NULL;
17449
17450 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17451 {
17452 if (BARRIER_P (ftmp->insn))
17453 {
17454 if (ftmp->address >= minipool_vector_head->max_address)
17455 break;
17456
17457 last_barrier = ftmp;
17458 }
17459 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17460 break;
17461
17462 last_added_fix = ftmp; /* Keep track of the last fix added. */
17463 }
17464
17465 /* If we found a barrier, drop back to that; any fixes that we
17466 could have reached but come after the barrier will now go in
17467 the next mini-pool. */
17468 if (last_barrier != NULL)
17469 {
17470 /* Reduce the refcount for those fixes that won't go into this
17471 pool after all. */
17472 for (fdel = last_barrier->next;
17473 fdel && fdel != ftmp;
17474 fdel = fdel->next)
17475 {
17476 fdel->minipool->refcount--;
17477 fdel->minipool = NULL;
17478 }
17479
17480 ftmp = last_barrier;
17481 }
17482 else
17483 {
17484 /* ftmp is first fix that we can't fit into this pool and
17485 there no natural barriers that we could use. Insert a
17486 new barrier in the code somewhere between the previous
17487 fix and this one, and arrange to jump around it. */
17488 HOST_WIDE_INT max_address;
17489
17490 /* The last item on the list of fixes must be a barrier, so
17491 we can never run off the end of the list of fixes without
17492 last_barrier being set. */
17493 gcc_assert (ftmp);
17494
17495 max_address = minipool_vector_head->max_address;
17496 /* Check that there isn't another fix that is in range that
17497 we couldn't fit into this pool because the pool was
17498 already too large: we need to put the pool before such an
17499 instruction. The pool itself may come just after the
17500 fix because create_fix_barrier also allows space for a
17501 jump instruction. */
17502 if (ftmp->address < max_address)
17503 max_address = ftmp->address + 1;
17504
17505 last_barrier = create_fix_barrier (last_added_fix, max_address);
17506 }
17507
17508 assign_minipool_offsets (last_barrier);
17509
17510 while (ftmp)
17511 {
17512 if (!BARRIER_P (ftmp->insn)
17513 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17514 == NULL))
17515 break;
17516
17517 ftmp = ftmp->next;
17518 }
17519
17520 /* Scan over the fixes we have identified for this pool, fixing them
17521 up and adding the constants to the pool itself. */
17522 for (this_fix = fix; this_fix && ftmp != this_fix;
17523 this_fix = this_fix->next)
17524 if (!BARRIER_P (this_fix->insn))
17525 {
17526 rtx addr
17527 = plus_constant (Pmode,
17528 gen_rtx_LABEL_REF (VOIDmode,
17529 minipool_vector_label),
17530 this_fix->minipool->offset);
17531 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17532 }
17533
17534 dump_minipool (last_barrier->insn);
17535 fix = ftmp;
17536 }
17537
17538 /* From now on we must synthesize any constants that we can't handle
17539 directly. This can happen if the RTL gets split during final
17540 instruction generation. */
17541 cfun->machine->after_arm_reorg = 1;
17542
17543 /* Free the minipool memory. */
17544 obstack_free (&minipool_obstack, minipool_startobj);
17545 }
17546 \f
17547 /* Routines to output assembly language. */
17548
17549 /* Return string representation of passed in real value. */
17550 static const char *
17551 fp_const_from_val (REAL_VALUE_TYPE *r)
17552 {
17553 if (!fp_consts_inited)
17554 init_fp_table ();
17555
17556 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17557 return "0";
17558 }
17559
17560 /* OPERANDS[0] is the entire list of insns that constitute pop,
17561 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17562 is in the list, UPDATE is true iff the list contains explicit
17563 update of base register. */
17564 void
17565 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17566 bool update)
17567 {
17568 int i;
17569 char pattern[100];
17570 int offset;
17571 const char *conditional;
17572 int num_saves = XVECLEN (operands[0], 0);
17573 unsigned int regno;
17574 unsigned int regno_base = REGNO (operands[1]);
17575
17576 offset = 0;
17577 offset += update ? 1 : 0;
17578 offset += return_pc ? 1 : 0;
17579
17580 /* Is the base register in the list? */
17581 for (i = offset; i < num_saves; i++)
17582 {
17583 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17584 /* If SP is in the list, then the base register must be SP. */
17585 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17586 /* If base register is in the list, there must be no explicit update. */
17587 if (regno == regno_base)
17588 gcc_assert (!update);
17589 }
17590
17591 conditional = reverse ? "%?%D0" : "%?%d0";
17592 if ((regno_base == SP_REGNUM) && TARGET_THUMB)
17593 {
17594 /* Output pop (not stmfd) because it has a shorter encoding. */
17595 gcc_assert (update);
17596 sprintf (pattern, "pop%s\t{", conditional);
17597 }
17598 else
17599 {
17600 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17601 It's just a convention, their semantics are identical. */
17602 if (regno_base == SP_REGNUM)
17603 sprintf (pattern, "ldm%sfd\t", conditional);
17604 else if (TARGET_UNIFIED_ASM)
17605 sprintf (pattern, "ldmia%s\t", conditional);
17606 else
17607 sprintf (pattern, "ldm%sia\t", conditional);
17608
17609 strcat (pattern, reg_names[regno_base]);
17610 if (update)
17611 strcat (pattern, "!, {");
17612 else
17613 strcat (pattern, ", {");
17614 }
17615
17616 /* Output the first destination register. */
17617 strcat (pattern,
17618 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17619
17620 /* Output the rest of the destination registers. */
17621 for (i = offset + 1; i < num_saves; i++)
17622 {
17623 strcat (pattern, ", ");
17624 strcat (pattern,
17625 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17626 }
17627
17628 strcat (pattern, "}");
17629
17630 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17631 strcat (pattern, "^");
17632
17633 output_asm_insn (pattern, &cond);
17634 }
17635
17636
17637 /* Output the assembly for a store multiple. */
17638
17639 const char *
17640 vfp_output_vstmd (rtx * operands)
17641 {
17642 char pattern[100];
17643 int p;
17644 int base;
17645 int i;
17646 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17647 ? XEXP (operands[0], 0)
17648 : XEXP (XEXP (operands[0], 0), 0);
17649 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17650
17651 if (push_p)
17652 strcpy (pattern, "vpush%?.64\t{%P1");
17653 else
17654 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17655
17656 p = strlen (pattern);
17657
17658 gcc_assert (REG_P (operands[1]));
17659
17660 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17661 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17662 {
17663 p += sprintf (&pattern[p], ", d%d", base + i);
17664 }
17665 strcpy (&pattern[p], "}");
17666
17667 output_asm_insn (pattern, operands);
17668 return "";
17669 }
17670
17671
17672 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17673 number of bytes pushed. */
17674
17675 static int
17676 vfp_emit_fstmd (int base_reg, int count)
17677 {
17678 rtx par;
17679 rtx dwarf;
17680 rtx tmp, reg;
17681 int i;
17682
17683 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17684 register pairs are stored by a store multiple insn. We avoid this
17685 by pushing an extra pair. */
17686 if (count == 2 && !arm_arch6)
17687 {
17688 if (base_reg == LAST_VFP_REGNUM - 3)
17689 base_reg -= 2;
17690 count++;
17691 }
17692
17693 /* FSTMD may not store more than 16 doubleword registers at once. Split
17694 larger stores into multiple parts (up to a maximum of two, in
17695 practice). */
17696 if (count > 16)
17697 {
17698 int saved;
17699 /* NOTE: base_reg is an internal register number, so each D register
17700 counts as 2. */
17701 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17702 saved += vfp_emit_fstmd (base_reg, 16);
17703 return saved;
17704 }
17705
17706 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17707 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17708
17709 reg = gen_rtx_REG (DFmode, base_reg);
17710 base_reg += 2;
17711
17712 XVECEXP (par, 0, 0)
17713 = gen_rtx_SET (gen_frame_mem
17714 (BLKmode,
17715 gen_rtx_PRE_MODIFY (Pmode,
17716 stack_pointer_rtx,
17717 plus_constant
17718 (Pmode, stack_pointer_rtx,
17719 - (count * 8)))
17720 ),
17721 gen_rtx_UNSPEC (BLKmode,
17722 gen_rtvec (1, reg),
17723 UNSPEC_PUSH_MULT));
17724
17725 tmp = gen_rtx_SET (stack_pointer_rtx,
17726 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17727 RTX_FRAME_RELATED_P (tmp) = 1;
17728 XVECEXP (dwarf, 0, 0) = tmp;
17729
17730 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17731 RTX_FRAME_RELATED_P (tmp) = 1;
17732 XVECEXP (dwarf, 0, 1) = tmp;
17733
17734 for (i = 1; i < count; i++)
17735 {
17736 reg = gen_rtx_REG (DFmode, base_reg);
17737 base_reg += 2;
17738 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17739
17740 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17741 plus_constant (Pmode,
17742 stack_pointer_rtx,
17743 i * 8)),
17744 reg);
17745 RTX_FRAME_RELATED_P (tmp) = 1;
17746 XVECEXP (dwarf, 0, i + 1) = tmp;
17747 }
17748
17749 par = emit_insn (par);
17750 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17751 RTX_FRAME_RELATED_P (par) = 1;
17752
17753 return count * 8;
17754 }
17755
17756 /* Emit a call instruction with pattern PAT. ADDR is the address of
17757 the call target. */
17758
17759 void
17760 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17761 {
17762 rtx insn;
17763
17764 insn = emit_call_insn (pat);
17765
17766 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17767 If the call might use such an entry, add a use of the PIC register
17768 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17769 if (TARGET_VXWORKS_RTP
17770 && flag_pic
17771 && !sibcall
17772 && GET_CODE (addr) == SYMBOL_REF
17773 && (SYMBOL_REF_DECL (addr)
17774 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17775 : !SYMBOL_REF_LOCAL_P (addr)))
17776 {
17777 require_pic_register ();
17778 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17779 }
17780
17781 if (TARGET_AAPCS_BASED)
17782 {
17783 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17784 linker. We need to add an IP clobber to allow setting
17785 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17786 is not needed since it's a fixed register. */
17787 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17788 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17789 }
17790 }
17791
17792 /* Output a 'call' insn. */
17793 const char *
17794 output_call (rtx *operands)
17795 {
17796 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17797
17798 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17799 if (REGNO (operands[0]) == LR_REGNUM)
17800 {
17801 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17802 output_asm_insn ("mov%?\t%0, %|lr", operands);
17803 }
17804
17805 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17806
17807 if (TARGET_INTERWORK || arm_arch4t)
17808 output_asm_insn ("bx%?\t%0", operands);
17809 else
17810 output_asm_insn ("mov%?\t%|pc, %0", operands);
17811
17812 return "";
17813 }
17814
17815 /* Output a 'call' insn that is a reference in memory. This is
17816 disabled for ARMv5 and we prefer a blx instead because otherwise
17817 there's a significant performance overhead. */
17818 const char *
17819 output_call_mem (rtx *operands)
17820 {
17821 gcc_assert (!arm_arch5);
17822 if (TARGET_INTERWORK)
17823 {
17824 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17825 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17826 output_asm_insn ("bx%?\t%|ip", operands);
17827 }
17828 else if (regno_use_in (LR_REGNUM, operands[0]))
17829 {
17830 /* LR is used in the memory address. We load the address in the
17831 first instruction. It's safe to use IP as the target of the
17832 load since the call will kill it anyway. */
17833 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17834 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17835 if (arm_arch4t)
17836 output_asm_insn ("bx%?\t%|ip", operands);
17837 else
17838 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17839 }
17840 else
17841 {
17842 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17843 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17844 }
17845
17846 return "";
17847 }
17848
17849
17850 /* Output a move from arm registers to arm registers of a long double
17851 OPERANDS[0] is the destination.
17852 OPERANDS[1] is the source. */
17853 const char *
17854 output_mov_long_double_arm_from_arm (rtx *operands)
17855 {
17856 /* We have to be careful here because the two might overlap. */
17857 int dest_start = REGNO (operands[0]);
17858 int src_start = REGNO (operands[1]);
17859 rtx ops[2];
17860 int i;
17861
17862 if (dest_start < src_start)
17863 {
17864 for (i = 0; i < 3; i++)
17865 {
17866 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17867 ops[1] = gen_rtx_REG (SImode, src_start + i);
17868 output_asm_insn ("mov%?\t%0, %1", ops);
17869 }
17870 }
17871 else
17872 {
17873 for (i = 2; i >= 0; i--)
17874 {
17875 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17876 ops[1] = gen_rtx_REG (SImode, src_start + i);
17877 output_asm_insn ("mov%?\t%0, %1", ops);
17878 }
17879 }
17880
17881 return "";
17882 }
17883
17884 void
17885 arm_emit_movpair (rtx dest, rtx src)
17886 {
17887 /* If the src is an immediate, simplify it. */
17888 if (CONST_INT_P (src))
17889 {
17890 HOST_WIDE_INT val = INTVAL (src);
17891 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17892 if ((val >> 16) & 0x0000ffff)
17893 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17894 GEN_INT (16)),
17895 GEN_INT ((val >> 16) & 0x0000ffff));
17896 return;
17897 }
17898 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17899 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17900 }
17901
17902 /* Output a move between double words. It must be REG<-MEM
17903 or MEM<-REG. */
17904 const char *
17905 output_move_double (rtx *operands, bool emit, int *count)
17906 {
17907 enum rtx_code code0 = GET_CODE (operands[0]);
17908 enum rtx_code code1 = GET_CODE (operands[1]);
17909 rtx otherops[3];
17910 if (count)
17911 *count = 1;
17912
17913 /* The only case when this might happen is when
17914 you are looking at the length of a DImode instruction
17915 that has an invalid constant in it. */
17916 if (code0 == REG && code1 != MEM)
17917 {
17918 gcc_assert (!emit);
17919 *count = 2;
17920 return "";
17921 }
17922
17923 if (code0 == REG)
17924 {
17925 unsigned int reg0 = REGNO (operands[0]);
17926
17927 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17928
17929 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17930
17931 switch (GET_CODE (XEXP (operands[1], 0)))
17932 {
17933 case REG:
17934
17935 if (emit)
17936 {
17937 if (TARGET_LDRD
17938 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17939 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17940 else
17941 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17942 }
17943 break;
17944
17945 case PRE_INC:
17946 gcc_assert (TARGET_LDRD);
17947 if (emit)
17948 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17949 break;
17950
17951 case PRE_DEC:
17952 if (emit)
17953 {
17954 if (TARGET_LDRD)
17955 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17956 else
17957 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17958 }
17959 break;
17960
17961 case POST_INC:
17962 if (emit)
17963 {
17964 if (TARGET_LDRD)
17965 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17966 else
17967 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17968 }
17969 break;
17970
17971 case POST_DEC:
17972 gcc_assert (TARGET_LDRD);
17973 if (emit)
17974 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17975 break;
17976
17977 case PRE_MODIFY:
17978 case POST_MODIFY:
17979 /* Autoicrement addressing modes should never have overlapping
17980 base and destination registers, and overlapping index registers
17981 are already prohibited, so this doesn't need to worry about
17982 fix_cm3_ldrd. */
17983 otherops[0] = operands[0];
17984 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17985 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17986
17987 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17988 {
17989 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17990 {
17991 /* Registers overlap so split out the increment. */
17992 if (emit)
17993 {
17994 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17995 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17996 }
17997 if (count)
17998 *count = 2;
17999 }
18000 else
18001 {
18002 /* Use a single insn if we can.
18003 FIXME: IWMMXT allows offsets larger than ldrd can
18004 handle, fix these up with a pair of ldr. */
18005 if (TARGET_THUMB2
18006 || !CONST_INT_P (otherops[2])
18007 || (INTVAL (otherops[2]) > -256
18008 && INTVAL (otherops[2]) < 256))
18009 {
18010 if (emit)
18011 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
18012 }
18013 else
18014 {
18015 if (emit)
18016 {
18017 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18018 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18019 }
18020 if (count)
18021 *count = 2;
18022
18023 }
18024 }
18025 }
18026 else
18027 {
18028 /* Use a single insn if we can.
18029 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18030 fix these up with a pair of ldr. */
18031 if (TARGET_THUMB2
18032 || !CONST_INT_P (otherops[2])
18033 || (INTVAL (otherops[2]) > -256
18034 && INTVAL (otherops[2]) < 256))
18035 {
18036 if (emit)
18037 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18038 }
18039 else
18040 {
18041 if (emit)
18042 {
18043 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18044 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18045 }
18046 if (count)
18047 *count = 2;
18048 }
18049 }
18050 break;
18051
18052 case LABEL_REF:
18053 case CONST:
18054 /* We might be able to use ldrd %0, %1 here. However the range is
18055 different to ldr/adr, and it is broken on some ARMv7-M
18056 implementations. */
18057 /* Use the second register of the pair to avoid problematic
18058 overlap. */
18059 otherops[1] = operands[1];
18060 if (emit)
18061 output_asm_insn ("adr%?\t%0, %1", otherops);
18062 operands[1] = otherops[0];
18063 if (emit)
18064 {
18065 if (TARGET_LDRD)
18066 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18067 else
18068 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18069 }
18070
18071 if (count)
18072 *count = 2;
18073 break;
18074
18075 /* ??? This needs checking for thumb2. */
18076 default:
18077 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18078 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18079 {
18080 otherops[0] = operands[0];
18081 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18082 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18083
18084 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18085 {
18086 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18087 {
18088 switch ((int) INTVAL (otherops[2]))
18089 {
18090 case -8:
18091 if (emit)
18092 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18093 return "";
18094 case -4:
18095 if (TARGET_THUMB2)
18096 break;
18097 if (emit)
18098 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18099 return "";
18100 case 4:
18101 if (TARGET_THUMB2)
18102 break;
18103 if (emit)
18104 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18105 return "";
18106 }
18107 }
18108 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18109 operands[1] = otherops[0];
18110 if (TARGET_LDRD
18111 && (REG_P (otherops[2])
18112 || TARGET_THUMB2
18113 || (CONST_INT_P (otherops[2])
18114 && INTVAL (otherops[2]) > -256
18115 && INTVAL (otherops[2]) < 256)))
18116 {
18117 if (reg_overlap_mentioned_p (operands[0],
18118 otherops[2]))
18119 {
18120 /* Swap base and index registers over to
18121 avoid a conflict. */
18122 std::swap (otherops[1], otherops[2]);
18123 }
18124 /* If both registers conflict, it will usually
18125 have been fixed by a splitter. */
18126 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18127 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18128 {
18129 if (emit)
18130 {
18131 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18132 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18133 }
18134 if (count)
18135 *count = 2;
18136 }
18137 else
18138 {
18139 otherops[0] = operands[0];
18140 if (emit)
18141 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18142 }
18143 return "";
18144 }
18145
18146 if (CONST_INT_P (otherops[2]))
18147 {
18148 if (emit)
18149 {
18150 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18151 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18152 else
18153 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18154 }
18155 }
18156 else
18157 {
18158 if (emit)
18159 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18160 }
18161 }
18162 else
18163 {
18164 if (emit)
18165 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18166 }
18167
18168 if (count)
18169 *count = 2;
18170
18171 if (TARGET_LDRD)
18172 return "ldr%(d%)\t%0, [%1]";
18173
18174 return "ldm%(ia%)\t%1, %M0";
18175 }
18176 else
18177 {
18178 otherops[1] = adjust_address (operands[1], SImode, 4);
18179 /* Take care of overlapping base/data reg. */
18180 if (reg_mentioned_p (operands[0], operands[1]))
18181 {
18182 if (emit)
18183 {
18184 output_asm_insn ("ldr%?\t%0, %1", otherops);
18185 output_asm_insn ("ldr%?\t%0, %1", operands);
18186 }
18187 if (count)
18188 *count = 2;
18189
18190 }
18191 else
18192 {
18193 if (emit)
18194 {
18195 output_asm_insn ("ldr%?\t%0, %1", operands);
18196 output_asm_insn ("ldr%?\t%0, %1", otherops);
18197 }
18198 if (count)
18199 *count = 2;
18200 }
18201 }
18202 }
18203 }
18204 else
18205 {
18206 /* Constraints should ensure this. */
18207 gcc_assert (code0 == MEM && code1 == REG);
18208 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18209 || (TARGET_ARM && TARGET_LDRD));
18210
18211 switch (GET_CODE (XEXP (operands[0], 0)))
18212 {
18213 case REG:
18214 if (emit)
18215 {
18216 if (TARGET_LDRD)
18217 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18218 else
18219 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18220 }
18221 break;
18222
18223 case PRE_INC:
18224 gcc_assert (TARGET_LDRD);
18225 if (emit)
18226 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18227 break;
18228
18229 case PRE_DEC:
18230 if (emit)
18231 {
18232 if (TARGET_LDRD)
18233 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18234 else
18235 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18236 }
18237 break;
18238
18239 case POST_INC:
18240 if (emit)
18241 {
18242 if (TARGET_LDRD)
18243 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18244 else
18245 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18246 }
18247 break;
18248
18249 case POST_DEC:
18250 gcc_assert (TARGET_LDRD);
18251 if (emit)
18252 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18253 break;
18254
18255 case PRE_MODIFY:
18256 case POST_MODIFY:
18257 otherops[0] = operands[1];
18258 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18259 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18260
18261 /* IWMMXT allows offsets larger than ldrd can handle,
18262 fix these up with a pair of ldr. */
18263 if (!TARGET_THUMB2
18264 && CONST_INT_P (otherops[2])
18265 && (INTVAL(otherops[2]) <= -256
18266 || INTVAL(otherops[2]) >= 256))
18267 {
18268 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18269 {
18270 if (emit)
18271 {
18272 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18273 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18274 }
18275 if (count)
18276 *count = 2;
18277 }
18278 else
18279 {
18280 if (emit)
18281 {
18282 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18283 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18284 }
18285 if (count)
18286 *count = 2;
18287 }
18288 }
18289 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18290 {
18291 if (emit)
18292 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18293 }
18294 else
18295 {
18296 if (emit)
18297 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18298 }
18299 break;
18300
18301 case PLUS:
18302 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18303 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18304 {
18305 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18306 {
18307 case -8:
18308 if (emit)
18309 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18310 return "";
18311
18312 case -4:
18313 if (TARGET_THUMB2)
18314 break;
18315 if (emit)
18316 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18317 return "";
18318
18319 case 4:
18320 if (TARGET_THUMB2)
18321 break;
18322 if (emit)
18323 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18324 return "";
18325 }
18326 }
18327 if (TARGET_LDRD
18328 && (REG_P (otherops[2])
18329 || TARGET_THUMB2
18330 || (CONST_INT_P (otherops[2])
18331 && INTVAL (otherops[2]) > -256
18332 && INTVAL (otherops[2]) < 256)))
18333 {
18334 otherops[0] = operands[1];
18335 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18336 if (emit)
18337 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18338 return "";
18339 }
18340 /* Fall through */
18341
18342 default:
18343 otherops[0] = adjust_address (operands[0], SImode, 4);
18344 otherops[1] = operands[1];
18345 if (emit)
18346 {
18347 output_asm_insn ("str%?\t%1, %0", operands);
18348 output_asm_insn ("str%?\t%H1, %0", otherops);
18349 }
18350 if (count)
18351 *count = 2;
18352 }
18353 }
18354
18355 return "";
18356 }
18357
18358 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18359 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18360
18361 const char *
18362 output_move_quad (rtx *operands)
18363 {
18364 if (REG_P (operands[0]))
18365 {
18366 /* Load, or reg->reg move. */
18367
18368 if (MEM_P (operands[1]))
18369 {
18370 switch (GET_CODE (XEXP (operands[1], 0)))
18371 {
18372 case REG:
18373 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18374 break;
18375
18376 case LABEL_REF:
18377 case CONST:
18378 output_asm_insn ("adr%?\t%0, %1", operands);
18379 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18380 break;
18381
18382 default:
18383 gcc_unreachable ();
18384 }
18385 }
18386 else
18387 {
18388 rtx ops[2];
18389 int dest, src, i;
18390
18391 gcc_assert (REG_P (operands[1]));
18392
18393 dest = REGNO (operands[0]);
18394 src = REGNO (operands[1]);
18395
18396 /* This seems pretty dumb, but hopefully GCC won't try to do it
18397 very often. */
18398 if (dest < src)
18399 for (i = 0; i < 4; i++)
18400 {
18401 ops[0] = gen_rtx_REG (SImode, dest + i);
18402 ops[1] = gen_rtx_REG (SImode, src + i);
18403 output_asm_insn ("mov%?\t%0, %1", ops);
18404 }
18405 else
18406 for (i = 3; i >= 0; i--)
18407 {
18408 ops[0] = gen_rtx_REG (SImode, dest + i);
18409 ops[1] = gen_rtx_REG (SImode, src + i);
18410 output_asm_insn ("mov%?\t%0, %1", ops);
18411 }
18412 }
18413 }
18414 else
18415 {
18416 gcc_assert (MEM_P (operands[0]));
18417 gcc_assert (REG_P (operands[1]));
18418 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18419
18420 switch (GET_CODE (XEXP (operands[0], 0)))
18421 {
18422 case REG:
18423 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18424 break;
18425
18426 default:
18427 gcc_unreachable ();
18428 }
18429 }
18430
18431 return "";
18432 }
18433
18434 /* Output a VFP load or store instruction. */
18435
18436 const char *
18437 output_move_vfp (rtx *operands)
18438 {
18439 rtx reg, mem, addr, ops[2];
18440 int load = REG_P (operands[0]);
18441 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18442 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18443 const char *templ;
18444 char buff[50];
18445 machine_mode mode;
18446
18447 reg = operands[!load];
18448 mem = operands[load];
18449
18450 mode = GET_MODE (reg);
18451
18452 gcc_assert (REG_P (reg));
18453 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18454 gcc_assert (mode == SFmode
18455 || mode == DFmode
18456 || mode == SImode
18457 || mode == DImode
18458 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18459 gcc_assert (MEM_P (mem));
18460
18461 addr = XEXP (mem, 0);
18462
18463 switch (GET_CODE (addr))
18464 {
18465 case PRE_DEC:
18466 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18467 ops[0] = XEXP (addr, 0);
18468 ops[1] = reg;
18469 break;
18470
18471 case POST_INC:
18472 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18473 ops[0] = XEXP (addr, 0);
18474 ops[1] = reg;
18475 break;
18476
18477 default:
18478 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18479 ops[0] = reg;
18480 ops[1] = mem;
18481 break;
18482 }
18483
18484 sprintf (buff, templ,
18485 load ? "ld" : "st",
18486 dp ? "64" : "32",
18487 dp ? "P" : "",
18488 integer_p ? "\t%@ int" : "");
18489 output_asm_insn (buff, ops);
18490
18491 return "";
18492 }
18493
18494 /* Output a Neon double-word or quad-word load or store, or a load
18495 or store for larger structure modes.
18496
18497 WARNING: The ordering of elements is weird in big-endian mode,
18498 because the EABI requires that vectors stored in memory appear
18499 as though they were stored by a VSTM, as required by the EABI.
18500 GCC RTL defines element ordering based on in-memory order.
18501 This can be different from the architectural ordering of elements
18502 within a NEON register. The intrinsics defined in arm_neon.h use the
18503 NEON register element ordering, not the GCC RTL element ordering.
18504
18505 For example, the in-memory ordering of a big-endian a quadword
18506 vector with 16-bit elements when stored from register pair {d0,d1}
18507 will be (lowest address first, d0[N] is NEON register element N):
18508
18509 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18510
18511 When necessary, quadword registers (dN, dN+1) are moved to ARM
18512 registers from rN in the order:
18513
18514 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18515
18516 So that STM/LDM can be used on vectors in ARM registers, and the
18517 same memory layout will result as if VSTM/VLDM were used.
18518
18519 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18520 possible, which allows use of appropriate alignment tags.
18521 Note that the choice of "64" is independent of the actual vector
18522 element size; this size simply ensures that the behavior is
18523 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18524
18525 Due to limitations of those instructions, use of VST1.64/VLD1.64
18526 is not possible if:
18527 - the address contains PRE_DEC, or
18528 - the mode refers to more than 4 double-word registers
18529
18530 In those cases, it would be possible to replace VSTM/VLDM by a
18531 sequence of instructions; this is not currently implemented since
18532 this is not certain to actually improve performance. */
18533
18534 const char *
18535 output_move_neon (rtx *operands)
18536 {
18537 rtx reg, mem, addr, ops[2];
18538 int regno, nregs, load = REG_P (operands[0]);
18539 const char *templ;
18540 char buff[50];
18541 machine_mode mode;
18542
18543 reg = operands[!load];
18544 mem = operands[load];
18545
18546 mode = GET_MODE (reg);
18547
18548 gcc_assert (REG_P (reg));
18549 regno = REGNO (reg);
18550 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18551 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18552 || NEON_REGNO_OK_FOR_QUAD (regno));
18553 gcc_assert (VALID_NEON_DREG_MODE (mode)
18554 || VALID_NEON_QREG_MODE (mode)
18555 || VALID_NEON_STRUCT_MODE (mode));
18556 gcc_assert (MEM_P (mem));
18557
18558 addr = XEXP (mem, 0);
18559
18560 /* Strip off const from addresses like (const (plus (...))). */
18561 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18562 addr = XEXP (addr, 0);
18563
18564 switch (GET_CODE (addr))
18565 {
18566 case POST_INC:
18567 /* We have to use vldm / vstm for too-large modes. */
18568 if (nregs > 4)
18569 {
18570 templ = "v%smia%%?\t%%0!, %%h1";
18571 ops[0] = XEXP (addr, 0);
18572 }
18573 else
18574 {
18575 templ = "v%s1.64\t%%h1, %%A0";
18576 ops[0] = mem;
18577 }
18578 ops[1] = reg;
18579 break;
18580
18581 case PRE_DEC:
18582 /* We have to use vldm / vstm in this case, since there is no
18583 pre-decrement form of the vld1 / vst1 instructions. */
18584 templ = "v%smdb%%?\t%%0!, %%h1";
18585 ops[0] = XEXP (addr, 0);
18586 ops[1] = reg;
18587 break;
18588
18589 case POST_MODIFY:
18590 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18591 gcc_unreachable ();
18592
18593 case REG:
18594 /* We have to use vldm / vstm for too-large modes. */
18595 if (nregs > 1)
18596 {
18597 if (nregs > 4)
18598 templ = "v%smia%%?\t%%m0, %%h1";
18599 else
18600 templ = "v%s1.64\t%%h1, %%A0";
18601
18602 ops[0] = mem;
18603 ops[1] = reg;
18604 break;
18605 }
18606 /* Fall through. */
18607 case LABEL_REF:
18608 case PLUS:
18609 {
18610 int i;
18611 int overlap = -1;
18612 for (i = 0; i < nregs; i++)
18613 {
18614 /* We're only using DImode here because it's a convenient size. */
18615 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18616 ops[1] = adjust_address (mem, DImode, 8 * i);
18617 if (reg_overlap_mentioned_p (ops[0], mem))
18618 {
18619 gcc_assert (overlap == -1);
18620 overlap = i;
18621 }
18622 else
18623 {
18624 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18625 output_asm_insn (buff, ops);
18626 }
18627 }
18628 if (overlap != -1)
18629 {
18630 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18631 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18632 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18633 output_asm_insn (buff, ops);
18634 }
18635
18636 return "";
18637 }
18638
18639 default:
18640 gcc_unreachable ();
18641 }
18642
18643 sprintf (buff, templ, load ? "ld" : "st");
18644 output_asm_insn (buff, ops);
18645
18646 return "";
18647 }
18648
18649 /* Compute and return the length of neon_mov<mode>, where <mode> is
18650 one of VSTRUCT modes: EI, OI, CI or XI. */
18651 int
18652 arm_attr_length_move_neon (rtx_insn *insn)
18653 {
18654 rtx reg, mem, addr;
18655 int load;
18656 machine_mode mode;
18657
18658 extract_insn_cached (insn);
18659
18660 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18661 {
18662 mode = GET_MODE (recog_data.operand[0]);
18663 switch (mode)
18664 {
18665 case EImode:
18666 case OImode:
18667 return 8;
18668 case CImode:
18669 return 12;
18670 case XImode:
18671 return 16;
18672 default:
18673 gcc_unreachable ();
18674 }
18675 }
18676
18677 load = REG_P (recog_data.operand[0]);
18678 reg = recog_data.operand[!load];
18679 mem = recog_data.operand[load];
18680
18681 gcc_assert (MEM_P (mem));
18682
18683 mode = GET_MODE (reg);
18684 addr = XEXP (mem, 0);
18685
18686 /* Strip off const from addresses like (const (plus (...))). */
18687 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18688 addr = XEXP (addr, 0);
18689
18690 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18691 {
18692 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18693 return insns * 4;
18694 }
18695 else
18696 return 4;
18697 }
18698
18699 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18700 return zero. */
18701
18702 int
18703 arm_address_offset_is_imm (rtx_insn *insn)
18704 {
18705 rtx mem, addr;
18706
18707 extract_insn_cached (insn);
18708
18709 if (REG_P (recog_data.operand[0]))
18710 return 0;
18711
18712 mem = recog_data.operand[0];
18713
18714 gcc_assert (MEM_P (mem));
18715
18716 addr = XEXP (mem, 0);
18717
18718 if (REG_P (addr)
18719 || (GET_CODE (addr) == PLUS
18720 && REG_P (XEXP (addr, 0))
18721 && CONST_INT_P (XEXP (addr, 1))))
18722 return 1;
18723 else
18724 return 0;
18725 }
18726
18727 /* Output an ADD r, s, #n where n may be too big for one instruction.
18728 If adding zero to one register, output nothing. */
18729 const char *
18730 output_add_immediate (rtx *operands)
18731 {
18732 HOST_WIDE_INT n = INTVAL (operands[2]);
18733
18734 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18735 {
18736 if (n < 0)
18737 output_multi_immediate (operands,
18738 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18739 -n);
18740 else
18741 output_multi_immediate (operands,
18742 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18743 n);
18744 }
18745
18746 return "";
18747 }
18748
18749 /* Output a multiple immediate operation.
18750 OPERANDS is the vector of operands referred to in the output patterns.
18751 INSTR1 is the output pattern to use for the first constant.
18752 INSTR2 is the output pattern to use for subsequent constants.
18753 IMMED_OP is the index of the constant slot in OPERANDS.
18754 N is the constant value. */
18755 static const char *
18756 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18757 int immed_op, HOST_WIDE_INT n)
18758 {
18759 #if HOST_BITS_PER_WIDE_INT > 32
18760 n &= 0xffffffff;
18761 #endif
18762
18763 if (n == 0)
18764 {
18765 /* Quick and easy output. */
18766 operands[immed_op] = const0_rtx;
18767 output_asm_insn (instr1, operands);
18768 }
18769 else
18770 {
18771 int i;
18772 const char * instr = instr1;
18773
18774 /* Note that n is never zero here (which would give no output). */
18775 for (i = 0; i < 32; i += 2)
18776 {
18777 if (n & (3 << i))
18778 {
18779 operands[immed_op] = GEN_INT (n & (255 << i));
18780 output_asm_insn (instr, operands);
18781 instr = instr2;
18782 i += 6;
18783 }
18784 }
18785 }
18786
18787 return "";
18788 }
18789
18790 /* Return the name of a shifter operation. */
18791 static const char *
18792 arm_shift_nmem(enum rtx_code code)
18793 {
18794 switch (code)
18795 {
18796 case ASHIFT:
18797 return ARM_LSL_NAME;
18798
18799 case ASHIFTRT:
18800 return "asr";
18801
18802 case LSHIFTRT:
18803 return "lsr";
18804
18805 case ROTATERT:
18806 return "ror";
18807
18808 default:
18809 abort();
18810 }
18811 }
18812
18813 /* Return the appropriate ARM instruction for the operation code.
18814 The returned result should not be overwritten. OP is the rtx of the
18815 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18816 was shifted. */
18817 const char *
18818 arithmetic_instr (rtx op, int shift_first_arg)
18819 {
18820 switch (GET_CODE (op))
18821 {
18822 case PLUS:
18823 return "add";
18824
18825 case MINUS:
18826 return shift_first_arg ? "rsb" : "sub";
18827
18828 case IOR:
18829 return "orr";
18830
18831 case XOR:
18832 return "eor";
18833
18834 case AND:
18835 return "and";
18836
18837 case ASHIFT:
18838 case ASHIFTRT:
18839 case LSHIFTRT:
18840 case ROTATERT:
18841 return arm_shift_nmem(GET_CODE(op));
18842
18843 default:
18844 gcc_unreachable ();
18845 }
18846 }
18847
18848 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18849 for the operation code. The returned result should not be overwritten.
18850 OP is the rtx code of the shift.
18851 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18852 shift. */
18853 static const char *
18854 shift_op (rtx op, HOST_WIDE_INT *amountp)
18855 {
18856 const char * mnem;
18857 enum rtx_code code = GET_CODE (op);
18858
18859 switch (code)
18860 {
18861 case ROTATE:
18862 if (!CONST_INT_P (XEXP (op, 1)))
18863 {
18864 output_operand_lossage ("invalid shift operand");
18865 return NULL;
18866 }
18867
18868 code = ROTATERT;
18869 *amountp = 32 - INTVAL (XEXP (op, 1));
18870 mnem = "ror";
18871 break;
18872
18873 case ASHIFT:
18874 case ASHIFTRT:
18875 case LSHIFTRT:
18876 case ROTATERT:
18877 mnem = arm_shift_nmem(code);
18878 if (CONST_INT_P (XEXP (op, 1)))
18879 {
18880 *amountp = INTVAL (XEXP (op, 1));
18881 }
18882 else if (REG_P (XEXP (op, 1)))
18883 {
18884 *amountp = -1;
18885 return mnem;
18886 }
18887 else
18888 {
18889 output_operand_lossage ("invalid shift operand");
18890 return NULL;
18891 }
18892 break;
18893
18894 case MULT:
18895 /* We never have to worry about the amount being other than a
18896 power of 2, since this case can never be reloaded from a reg. */
18897 if (!CONST_INT_P (XEXP (op, 1)))
18898 {
18899 output_operand_lossage ("invalid shift operand");
18900 return NULL;
18901 }
18902
18903 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18904
18905 /* Amount must be a power of two. */
18906 if (*amountp & (*amountp - 1))
18907 {
18908 output_operand_lossage ("invalid shift operand");
18909 return NULL;
18910 }
18911
18912 *amountp = int_log2 (*amountp);
18913 return ARM_LSL_NAME;
18914
18915 default:
18916 output_operand_lossage ("invalid shift operand");
18917 return NULL;
18918 }
18919
18920 /* This is not 100% correct, but follows from the desire to merge
18921 multiplication by a power of 2 with the recognizer for a
18922 shift. >=32 is not a valid shift for "lsl", so we must try and
18923 output a shift that produces the correct arithmetical result.
18924 Using lsr #32 is identical except for the fact that the carry bit
18925 is not set correctly if we set the flags; but we never use the
18926 carry bit from such an operation, so we can ignore that. */
18927 if (code == ROTATERT)
18928 /* Rotate is just modulo 32. */
18929 *amountp &= 31;
18930 else if (*amountp != (*amountp & 31))
18931 {
18932 if (code == ASHIFT)
18933 mnem = "lsr";
18934 *amountp = 32;
18935 }
18936
18937 /* Shifts of 0 are no-ops. */
18938 if (*amountp == 0)
18939 return NULL;
18940
18941 return mnem;
18942 }
18943
18944 /* Obtain the shift from the POWER of two. */
18945
18946 static HOST_WIDE_INT
18947 int_log2 (HOST_WIDE_INT power)
18948 {
18949 HOST_WIDE_INT shift = 0;
18950
18951 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18952 {
18953 gcc_assert (shift <= 31);
18954 shift++;
18955 }
18956
18957 return shift;
18958 }
18959
18960 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18961 because /bin/as is horribly restrictive. The judgement about
18962 whether or not each character is 'printable' (and can be output as
18963 is) or not (and must be printed with an octal escape) must be made
18964 with reference to the *host* character set -- the situation is
18965 similar to that discussed in the comments above pp_c_char in
18966 c-pretty-print.c. */
18967
18968 #define MAX_ASCII_LEN 51
18969
18970 void
18971 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18972 {
18973 int i;
18974 int len_so_far = 0;
18975
18976 fputs ("\t.ascii\t\"", stream);
18977
18978 for (i = 0; i < len; i++)
18979 {
18980 int c = p[i];
18981
18982 if (len_so_far >= MAX_ASCII_LEN)
18983 {
18984 fputs ("\"\n\t.ascii\t\"", stream);
18985 len_so_far = 0;
18986 }
18987
18988 if (ISPRINT (c))
18989 {
18990 if (c == '\\' || c == '\"')
18991 {
18992 putc ('\\', stream);
18993 len_so_far++;
18994 }
18995 putc (c, stream);
18996 len_so_far++;
18997 }
18998 else
18999 {
19000 fprintf (stream, "\\%03o", c);
19001 len_so_far += 4;
19002 }
19003 }
19004
19005 fputs ("\"\n", stream);
19006 }
19007 \f
19008 /* Whether a register is callee saved or not. This is necessary because high
19009 registers are marked as caller saved when optimizing for size on Thumb-1
19010 targets despite being callee saved in order to avoid using them. */
19011 #define callee_saved_reg_p(reg) \
19012 (!call_used_regs[reg] \
19013 || (TARGET_THUMB1 && optimize_size \
19014 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19015
19016 /* Compute the register save mask for registers 0 through 12
19017 inclusive. This code is used by arm_compute_save_reg_mask. */
19018
19019 static unsigned long
19020 arm_compute_save_reg0_reg12_mask (void)
19021 {
19022 unsigned long func_type = arm_current_func_type ();
19023 unsigned long save_reg_mask = 0;
19024 unsigned int reg;
19025
19026 if (IS_INTERRUPT (func_type))
19027 {
19028 unsigned int max_reg;
19029 /* Interrupt functions must not corrupt any registers,
19030 even call clobbered ones. If this is a leaf function
19031 we can just examine the registers used by the RTL, but
19032 otherwise we have to assume that whatever function is
19033 called might clobber anything, and so we have to save
19034 all the call-clobbered registers as well. */
19035 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19036 /* FIQ handlers have registers r8 - r12 banked, so
19037 we only need to check r0 - r7, Normal ISRs only
19038 bank r14 and r15, so we must check up to r12.
19039 r13 is the stack pointer which is always preserved,
19040 so we do not need to consider it here. */
19041 max_reg = 7;
19042 else
19043 max_reg = 12;
19044
19045 for (reg = 0; reg <= max_reg; reg++)
19046 if (df_regs_ever_live_p (reg)
19047 || (! crtl->is_leaf && call_used_regs[reg]))
19048 save_reg_mask |= (1 << reg);
19049
19050 /* Also save the pic base register if necessary. */
19051 if (flag_pic
19052 && !TARGET_SINGLE_PIC_BASE
19053 && arm_pic_register != INVALID_REGNUM
19054 && crtl->uses_pic_offset_table)
19055 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19056 }
19057 else if (IS_VOLATILE(func_type))
19058 {
19059 /* For noreturn functions we historically omitted register saves
19060 altogether. However this really messes up debugging. As a
19061 compromise save just the frame pointers. Combined with the link
19062 register saved elsewhere this should be sufficient to get
19063 a backtrace. */
19064 if (frame_pointer_needed)
19065 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19066 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19067 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19068 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19069 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19070 }
19071 else
19072 {
19073 /* In the normal case we only need to save those registers
19074 which are call saved and which are used by this function. */
19075 for (reg = 0; reg <= 11; reg++)
19076 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19077 save_reg_mask |= (1 << reg);
19078
19079 /* Handle the frame pointer as a special case. */
19080 if (frame_pointer_needed)
19081 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19082
19083 /* If we aren't loading the PIC register,
19084 don't stack it even though it may be live. */
19085 if (flag_pic
19086 && !TARGET_SINGLE_PIC_BASE
19087 && arm_pic_register != INVALID_REGNUM
19088 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19089 || crtl->uses_pic_offset_table))
19090 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19091
19092 /* The prologue will copy SP into R0, so save it. */
19093 if (IS_STACKALIGN (func_type))
19094 save_reg_mask |= 1;
19095 }
19096
19097 /* Save registers so the exception handler can modify them. */
19098 if (crtl->calls_eh_return)
19099 {
19100 unsigned int i;
19101
19102 for (i = 0; ; i++)
19103 {
19104 reg = EH_RETURN_DATA_REGNO (i);
19105 if (reg == INVALID_REGNUM)
19106 break;
19107 save_reg_mask |= 1 << reg;
19108 }
19109 }
19110
19111 return save_reg_mask;
19112 }
19113
19114 /* Return true if r3 is live at the start of the function. */
19115
19116 static bool
19117 arm_r3_live_at_start_p (void)
19118 {
19119 /* Just look at cfg info, which is still close enough to correct at this
19120 point. This gives false positives for broken functions that might use
19121 uninitialized data that happens to be allocated in r3, but who cares? */
19122 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19123 }
19124
19125 /* Compute the number of bytes used to store the static chain register on the
19126 stack, above the stack frame. We need to know this accurately to get the
19127 alignment of the rest of the stack frame correct. */
19128
19129 static int
19130 arm_compute_static_chain_stack_bytes (void)
19131 {
19132 /* See the defining assertion in arm_expand_prologue. */
19133 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19134 && IS_NESTED (arm_current_func_type ())
19135 && arm_r3_live_at_start_p ()
19136 && crtl->args.pretend_args_size == 0)
19137 return 4;
19138
19139 return 0;
19140 }
19141
19142 /* Compute a bit mask of which registers need to be
19143 saved on the stack for the current function.
19144 This is used by arm_get_frame_offsets, which may add extra registers. */
19145
19146 static unsigned long
19147 arm_compute_save_reg_mask (void)
19148 {
19149 unsigned int save_reg_mask = 0;
19150 unsigned long func_type = arm_current_func_type ();
19151 unsigned int reg;
19152
19153 if (IS_NAKED (func_type))
19154 /* This should never really happen. */
19155 return 0;
19156
19157 /* If we are creating a stack frame, then we must save the frame pointer,
19158 IP (which will hold the old stack pointer), LR and the PC. */
19159 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19160 save_reg_mask |=
19161 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19162 | (1 << IP_REGNUM)
19163 | (1 << LR_REGNUM)
19164 | (1 << PC_REGNUM);
19165
19166 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19167
19168 /* Decide if we need to save the link register.
19169 Interrupt routines have their own banked link register,
19170 so they never need to save it.
19171 Otherwise if we do not use the link register we do not need to save
19172 it. If we are pushing other registers onto the stack however, we
19173 can save an instruction in the epilogue by pushing the link register
19174 now and then popping it back into the PC. This incurs extra memory
19175 accesses though, so we only do it when optimizing for size, and only
19176 if we know that we will not need a fancy return sequence. */
19177 if (df_regs_ever_live_p (LR_REGNUM)
19178 || (save_reg_mask
19179 && optimize_size
19180 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19181 && !crtl->tail_call_emit
19182 && !crtl->calls_eh_return))
19183 save_reg_mask |= 1 << LR_REGNUM;
19184
19185 if (cfun->machine->lr_save_eliminated)
19186 save_reg_mask &= ~ (1 << LR_REGNUM);
19187
19188 if (TARGET_REALLY_IWMMXT
19189 && ((bit_count (save_reg_mask)
19190 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19191 arm_compute_static_chain_stack_bytes())
19192 ) % 2) != 0)
19193 {
19194 /* The total number of registers that are going to be pushed
19195 onto the stack is odd. We need to ensure that the stack
19196 is 64-bit aligned before we start to save iWMMXt registers,
19197 and also before we start to create locals. (A local variable
19198 might be a double or long long which we will load/store using
19199 an iWMMXt instruction). Therefore we need to push another
19200 ARM register, so that the stack will be 64-bit aligned. We
19201 try to avoid using the arg registers (r0 -r3) as they might be
19202 used to pass values in a tail call. */
19203 for (reg = 4; reg <= 12; reg++)
19204 if ((save_reg_mask & (1 << reg)) == 0)
19205 break;
19206
19207 if (reg <= 12)
19208 save_reg_mask |= (1 << reg);
19209 else
19210 {
19211 cfun->machine->sibcall_blocked = 1;
19212 save_reg_mask |= (1 << 3);
19213 }
19214 }
19215
19216 /* We may need to push an additional register for use initializing the
19217 PIC base register. */
19218 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19219 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19220 {
19221 reg = thumb_find_work_register (1 << 4);
19222 if (!call_used_regs[reg])
19223 save_reg_mask |= (1 << reg);
19224 }
19225
19226 return save_reg_mask;
19227 }
19228
19229
19230 /* Compute a bit mask of which registers need to be
19231 saved on the stack for the current function. */
19232 static unsigned long
19233 thumb1_compute_save_reg_mask (void)
19234 {
19235 unsigned long mask;
19236 unsigned reg;
19237
19238 mask = 0;
19239 for (reg = 0; reg < 12; reg ++)
19240 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19241 mask |= 1 << reg;
19242
19243 if (flag_pic
19244 && !TARGET_SINGLE_PIC_BASE
19245 && arm_pic_register != INVALID_REGNUM
19246 && crtl->uses_pic_offset_table)
19247 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19248
19249 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19250 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19251 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19252
19253 /* LR will also be pushed if any lo regs are pushed. */
19254 if (mask & 0xff || thumb_force_lr_save ())
19255 mask |= (1 << LR_REGNUM);
19256
19257 /* Make sure we have a low work register if we need one.
19258 We will need one if we are going to push a high register,
19259 but we are not currently intending to push a low register. */
19260 if ((mask & 0xff) == 0
19261 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19262 {
19263 /* Use thumb_find_work_register to choose which register
19264 we will use. If the register is live then we will
19265 have to push it. Use LAST_LO_REGNUM as our fallback
19266 choice for the register to select. */
19267 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19268 /* Make sure the register returned by thumb_find_work_register is
19269 not part of the return value. */
19270 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19271 reg = LAST_LO_REGNUM;
19272
19273 if (callee_saved_reg_p (reg))
19274 mask |= 1 << reg;
19275 }
19276
19277 /* The 504 below is 8 bytes less than 512 because there are two possible
19278 alignment words. We can't tell here if they will be present or not so we
19279 have to play it safe and assume that they are. */
19280 if ((CALLER_INTERWORKING_SLOT_SIZE +
19281 ROUND_UP_WORD (get_frame_size ()) +
19282 crtl->outgoing_args_size) >= 504)
19283 {
19284 /* This is the same as the code in thumb1_expand_prologue() which
19285 determines which register to use for stack decrement. */
19286 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19287 if (mask & (1 << reg))
19288 break;
19289
19290 if (reg > LAST_LO_REGNUM)
19291 {
19292 /* Make sure we have a register available for stack decrement. */
19293 mask |= 1 << LAST_LO_REGNUM;
19294 }
19295 }
19296
19297 return mask;
19298 }
19299
19300
19301 /* Return the number of bytes required to save VFP registers. */
19302 static int
19303 arm_get_vfp_saved_size (void)
19304 {
19305 unsigned int regno;
19306 int count;
19307 int saved;
19308
19309 saved = 0;
19310 /* Space for saved VFP registers. */
19311 if (TARGET_HARD_FLOAT && TARGET_VFP)
19312 {
19313 count = 0;
19314 for (regno = FIRST_VFP_REGNUM;
19315 regno < LAST_VFP_REGNUM;
19316 regno += 2)
19317 {
19318 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19319 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19320 {
19321 if (count > 0)
19322 {
19323 /* Workaround ARM10 VFPr1 bug. */
19324 if (count == 2 && !arm_arch6)
19325 count++;
19326 saved += count * 8;
19327 }
19328 count = 0;
19329 }
19330 else
19331 count++;
19332 }
19333 if (count > 0)
19334 {
19335 if (count == 2 && !arm_arch6)
19336 count++;
19337 saved += count * 8;
19338 }
19339 }
19340 return saved;
19341 }
19342
19343
19344 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19345 everything bar the final return instruction. If simple_return is true,
19346 then do not output epilogue, because it has already been emitted in RTL. */
19347 const char *
19348 output_return_instruction (rtx operand, bool really_return, bool reverse,
19349 bool simple_return)
19350 {
19351 char conditional[10];
19352 char instr[100];
19353 unsigned reg;
19354 unsigned long live_regs_mask;
19355 unsigned long func_type;
19356 arm_stack_offsets *offsets;
19357
19358 func_type = arm_current_func_type ();
19359
19360 if (IS_NAKED (func_type))
19361 return "";
19362
19363 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19364 {
19365 /* If this function was declared non-returning, and we have
19366 found a tail call, then we have to trust that the called
19367 function won't return. */
19368 if (really_return)
19369 {
19370 rtx ops[2];
19371
19372 /* Otherwise, trap an attempted return by aborting. */
19373 ops[0] = operand;
19374 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19375 : "abort");
19376 assemble_external_libcall (ops[1]);
19377 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19378 }
19379
19380 return "";
19381 }
19382
19383 gcc_assert (!cfun->calls_alloca || really_return);
19384
19385 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19386
19387 cfun->machine->return_used_this_function = 1;
19388
19389 offsets = arm_get_frame_offsets ();
19390 live_regs_mask = offsets->saved_regs_mask;
19391
19392 if (!simple_return && live_regs_mask)
19393 {
19394 const char * return_reg;
19395
19396 /* If we do not have any special requirements for function exit
19397 (e.g. interworking) then we can load the return address
19398 directly into the PC. Otherwise we must load it into LR. */
19399 if (really_return
19400 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19401 return_reg = reg_names[PC_REGNUM];
19402 else
19403 return_reg = reg_names[LR_REGNUM];
19404
19405 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19406 {
19407 /* There are three possible reasons for the IP register
19408 being saved. 1) a stack frame was created, in which case
19409 IP contains the old stack pointer, or 2) an ISR routine
19410 corrupted it, or 3) it was saved to align the stack on
19411 iWMMXt. In case 1, restore IP into SP, otherwise just
19412 restore IP. */
19413 if (frame_pointer_needed)
19414 {
19415 live_regs_mask &= ~ (1 << IP_REGNUM);
19416 live_regs_mask |= (1 << SP_REGNUM);
19417 }
19418 else
19419 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19420 }
19421
19422 /* On some ARM architectures it is faster to use LDR rather than
19423 LDM to load a single register. On other architectures, the
19424 cost is the same. In 26 bit mode, or for exception handlers,
19425 we have to use LDM to load the PC so that the CPSR is also
19426 restored. */
19427 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19428 if (live_regs_mask == (1U << reg))
19429 break;
19430
19431 if (reg <= LAST_ARM_REGNUM
19432 && (reg != LR_REGNUM
19433 || ! really_return
19434 || ! IS_INTERRUPT (func_type)))
19435 {
19436 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19437 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19438 }
19439 else
19440 {
19441 char *p;
19442 int first = 1;
19443
19444 /* Generate the load multiple instruction to restore the
19445 registers. Note we can get here, even if
19446 frame_pointer_needed is true, but only if sp already
19447 points to the base of the saved core registers. */
19448 if (live_regs_mask & (1 << SP_REGNUM))
19449 {
19450 unsigned HOST_WIDE_INT stack_adjust;
19451
19452 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19453 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19454
19455 if (stack_adjust && arm_arch5 && TARGET_ARM)
19456 if (TARGET_UNIFIED_ASM)
19457 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19458 else
19459 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19460 else
19461 {
19462 /* If we can't use ldmib (SA110 bug),
19463 then try to pop r3 instead. */
19464 if (stack_adjust)
19465 live_regs_mask |= 1 << 3;
19466
19467 if (TARGET_UNIFIED_ASM)
19468 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19469 else
19470 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19471 }
19472 }
19473 else
19474 if (TARGET_UNIFIED_ASM)
19475 sprintf (instr, "pop%s\t{", conditional);
19476 else
19477 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19478
19479 p = instr + strlen (instr);
19480
19481 for (reg = 0; reg <= SP_REGNUM; reg++)
19482 if (live_regs_mask & (1 << reg))
19483 {
19484 int l = strlen (reg_names[reg]);
19485
19486 if (first)
19487 first = 0;
19488 else
19489 {
19490 memcpy (p, ", ", 2);
19491 p += 2;
19492 }
19493
19494 memcpy (p, "%|", 2);
19495 memcpy (p + 2, reg_names[reg], l);
19496 p += l + 2;
19497 }
19498
19499 if (live_regs_mask & (1 << LR_REGNUM))
19500 {
19501 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19502 /* If returning from an interrupt, restore the CPSR. */
19503 if (IS_INTERRUPT (func_type))
19504 strcat (p, "^");
19505 }
19506 else
19507 strcpy (p, "}");
19508 }
19509
19510 output_asm_insn (instr, & operand);
19511
19512 /* See if we need to generate an extra instruction to
19513 perform the actual function return. */
19514 if (really_return
19515 && func_type != ARM_FT_INTERWORKED
19516 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19517 {
19518 /* The return has already been handled
19519 by loading the LR into the PC. */
19520 return "";
19521 }
19522 }
19523
19524 if (really_return)
19525 {
19526 switch ((int) ARM_FUNC_TYPE (func_type))
19527 {
19528 case ARM_FT_ISR:
19529 case ARM_FT_FIQ:
19530 /* ??? This is wrong for unified assembly syntax. */
19531 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19532 break;
19533
19534 case ARM_FT_INTERWORKED:
19535 sprintf (instr, "bx%s\t%%|lr", conditional);
19536 break;
19537
19538 case ARM_FT_EXCEPTION:
19539 /* ??? This is wrong for unified assembly syntax. */
19540 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19541 break;
19542
19543 default:
19544 /* Use bx if it's available. */
19545 if (arm_arch5 || arm_arch4t)
19546 sprintf (instr, "bx%s\t%%|lr", conditional);
19547 else
19548 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19549 break;
19550 }
19551
19552 output_asm_insn (instr, & operand);
19553 }
19554
19555 return "";
19556 }
19557
19558 /* Write the function name into the code section, directly preceding
19559 the function prologue.
19560
19561 Code will be output similar to this:
19562 t0
19563 .ascii "arm_poke_function_name", 0
19564 .align
19565 t1
19566 .word 0xff000000 + (t1 - t0)
19567 arm_poke_function_name
19568 mov ip, sp
19569 stmfd sp!, {fp, ip, lr, pc}
19570 sub fp, ip, #4
19571
19572 When performing a stack backtrace, code can inspect the value
19573 of 'pc' stored at 'fp' + 0. If the trace function then looks
19574 at location pc - 12 and the top 8 bits are set, then we know
19575 that there is a function name embedded immediately preceding this
19576 location and has length ((pc[-3]) & 0xff000000).
19577
19578 We assume that pc is declared as a pointer to an unsigned long.
19579
19580 It is of no benefit to output the function name if we are assembling
19581 a leaf function. These function types will not contain a stack
19582 backtrace structure, therefore it is not possible to determine the
19583 function name. */
19584 void
19585 arm_poke_function_name (FILE *stream, const char *name)
19586 {
19587 unsigned long alignlength;
19588 unsigned long length;
19589 rtx x;
19590
19591 length = strlen (name) + 1;
19592 alignlength = ROUND_UP_WORD (length);
19593
19594 ASM_OUTPUT_ASCII (stream, name, length);
19595 ASM_OUTPUT_ALIGN (stream, 2);
19596 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19597 assemble_aligned_integer (UNITS_PER_WORD, x);
19598 }
19599
19600 /* Place some comments into the assembler stream
19601 describing the current function. */
19602 static void
19603 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19604 {
19605 unsigned long func_type;
19606
19607 /* ??? Do we want to print some of the below anyway? */
19608 if (TARGET_THUMB1)
19609 return;
19610
19611 /* Sanity check. */
19612 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19613
19614 func_type = arm_current_func_type ();
19615
19616 switch ((int) ARM_FUNC_TYPE (func_type))
19617 {
19618 default:
19619 case ARM_FT_NORMAL:
19620 break;
19621 case ARM_FT_INTERWORKED:
19622 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19623 break;
19624 case ARM_FT_ISR:
19625 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19626 break;
19627 case ARM_FT_FIQ:
19628 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19629 break;
19630 case ARM_FT_EXCEPTION:
19631 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19632 break;
19633 }
19634
19635 if (IS_NAKED (func_type))
19636 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19637
19638 if (IS_VOLATILE (func_type))
19639 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19640
19641 if (IS_NESTED (func_type))
19642 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19643 if (IS_STACKALIGN (func_type))
19644 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19645
19646 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19647 crtl->args.size,
19648 crtl->args.pretend_args_size, frame_size);
19649
19650 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19651 frame_pointer_needed,
19652 cfun->machine->uses_anonymous_args);
19653
19654 if (cfun->machine->lr_save_eliminated)
19655 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19656
19657 if (crtl->calls_eh_return)
19658 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19659
19660 }
19661
19662 static void
19663 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19664 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19665 {
19666 arm_stack_offsets *offsets;
19667
19668 if (TARGET_THUMB1)
19669 {
19670 int regno;
19671
19672 /* Emit any call-via-reg trampolines that are needed for v4t support
19673 of call_reg and call_value_reg type insns. */
19674 for (regno = 0; regno < LR_REGNUM; regno++)
19675 {
19676 rtx label = cfun->machine->call_via[regno];
19677
19678 if (label != NULL)
19679 {
19680 switch_to_section (function_section (current_function_decl));
19681 targetm.asm_out.internal_label (asm_out_file, "L",
19682 CODE_LABEL_NUMBER (label));
19683 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19684 }
19685 }
19686
19687 /* ??? Probably not safe to set this here, since it assumes that a
19688 function will be emitted as assembly immediately after we generate
19689 RTL for it. This does not happen for inline functions. */
19690 cfun->machine->return_used_this_function = 0;
19691 }
19692 else /* TARGET_32BIT */
19693 {
19694 /* We need to take into account any stack-frame rounding. */
19695 offsets = arm_get_frame_offsets ();
19696
19697 gcc_assert (!use_return_insn (FALSE, NULL)
19698 || (cfun->machine->return_used_this_function != 0)
19699 || offsets->saved_regs == offsets->outgoing_args
19700 || frame_pointer_needed);
19701 }
19702 }
19703
19704 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19705 STR and STRD. If an even number of registers are being pushed, one
19706 or more STRD patterns are created for each register pair. If an
19707 odd number of registers are pushed, emit an initial STR followed by
19708 as many STRD instructions as are needed. This works best when the
19709 stack is initially 64-bit aligned (the normal case), since it
19710 ensures that each STRD is also 64-bit aligned. */
19711 static void
19712 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19713 {
19714 int num_regs = 0;
19715 int i;
19716 int regno;
19717 rtx par = NULL_RTX;
19718 rtx dwarf = NULL_RTX;
19719 rtx tmp;
19720 bool first = true;
19721
19722 num_regs = bit_count (saved_regs_mask);
19723
19724 /* Must be at least one register to save, and can't save SP or PC. */
19725 gcc_assert (num_regs > 0 && num_regs <= 14);
19726 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19727 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19728
19729 /* Create sequence for DWARF info. All the frame-related data for
19730 debugging is held in this wrapper. */
19731 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19732
19733 /* Describe the stack adjustment. */
19734 tmp = gen_rtx_SET (stack_pointer_rtx,
19735 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19736 RTX_FRAME_RELATED_P (tmp) = 1;
19737 XVECEXP (dwarf, 0, 0) = tmp;
19738
19739 /* Find the first register. */
19740 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19741 ;
19742
19743 i = 0;
19744
19745 /* If there's an odd number of registers to push. Start off by
19746 pushing a single register. This ensures that subsequent strd
19747 operations are dword aligned (assuming that SP was originally
19748 64-bit aligned). */
19749 if ((num_regs & 1) != 0)
19750 {
19751 rtx reg, mem, insn;
19752
19753 reg = gen_rtx_REG (SImode, regno);
19754 if (num_regs == 1)
19755 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19756 stack_pointer_rtx));
19757 else
19758 mem = gen_frame_mem (Pmode,
19759 gen_rtx_PRE_MODIFY
19760 (Pmode, stack_pointer_rtx,
19761 plus_constant (Pmode, stack_pointer_rtx,
19762 -4 * num_regs)));
19763
19764 tmp = gen_rtx_SET (mem, reg);
19765 RTX_FRAME_RELATED_P (tmp) = 1;
19766 insn = emit_insn (tmp);
19767 RTX_FRAME_RELATED_P (insn) = 1;
19768 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19769 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19770 RTX_FRAME_RELATED_P (tmp) = 1;
19771 i++;
19772 regno++;
19773 XVECEXP (dwarf, 0, i) = tmp;
19774 first = false;
19775 }
19776
19777 while (i < num_regs)
19778 if (saved_regs_mask & (1 << regno))
19779 {
19780 rtx reg1, reg2, mem1, mem2;
19781 rtx tmp0, tmp1, tmp2;
19782 int regno2;
19783
19784 /* Find the register to pair with this one. */
19785 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19786 regno2++)
19787 ;
19788
19789 reg1 = gen_rtx_REG (SImode, regno);
19790 reg2 = gen_rtx_REG (SImode, regno2);
19791
19792 if (first)
19793 {
19794 rtx insn;
19795
19796 first = false;
19797 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19798 stack_pointer_rtx,
19799 -4 * num_regs));
19800 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19801 stack_pointer_rtx,
19802 -4 * (num_regs - 1)));
19803 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19804 plus_constant (Pmode, stack_pointer_rtx,
19805 -4 * (num_regs)));
19806 tmp1 = gen_rtx_SET (mem1, reg1);
19807 tmp2 = gen_rtx_SET (mem2, reg2);
19808 RTX_FRAME_RELATED_P (tmp0) = 1;
19809 RTX_FRAME_RELATED_P (tmp1) = 1;
19810 RTX_FRAME_RELATED_P (tmp2) = 1;
19811 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19812 XVECEXP (par, 0, 0) = tmp0;
19813 XVECEXP (par, 0, 1) = tmp1;
19814 XVECEXP (par, 0, 2) = tmp2;
19815 insn = emit_insn (par);
19816 RTX_FRAME_RELATED_P (insn) = 1;
19817 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19818 }
19819 else
19820 {
19821 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19822 stack_pointer_rtx,
19823 4 * i));
19824 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19825 stack_pointer_rtx,
19826 4 * (i + 1)));
19827 tmp1 = gen_rtx_SET (mem1, reg1);
19828 tmp2 = gen_rtx_SET (mem2, reg2);
19829 RTX_FRAME_RELATED_P (tmp1) = 1;
19830 RTX_FRAME_RELATED_P (tmp2) = 1;
19831 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19832 XVECEXP (par, 0, 0) = tmp1;
19833 XVECEXP (par, 0, 1) = tmp2;
19834 emit_insn (par);
19835 }
19836
19837 /* Create unwind information. This is an approximation. */
19838 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19839 plus_constant (Pmode,
19840 stack_pointer_rtx,
19841 4 * i)),
19842 reg1);
19843 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19844 plus_constant (Pmode,
19845 stack_pointer_rtx,
19846 4 * (i + 1))),
19847 reg2);
19848
19849 RTX_FRAME_RELATED_P (tmp1) = 1;
19850 RTX_FRAME_RELATED_P (tmp2) = 1;
19851 XVECEXP (dwarf, 0, i + 1) = tmp1;
19852 XVECEXP (dwarf, 0, i + 2) = tmp2;
19853 i += 2;
19854 regno = regno2 + 1;
19855 }
19856 else
19857 regno++;
19858
19859 return;
19860 }
19861
19862 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19863 whenever possible, otherwise it emits single-word stores. The first store
19864 also allocates stack space for all saved registers, using writeback with
19865 post-addressing mode. All other stores use offset addressing. If no STRD
19866 can be emitted, this function emits a sequence of single-word stores,
19867 and not an STM as before, because single-word stores provide more freedom
19868 scheduling and can be turned into an STM by peephole optimizations. */
19869 static void
19870 arm_emit_strd_push (unsigned long saved_regs_mask)
19871 {
19872 int num_regs = 0;
19873 int i, j, dwarf_index = 0;
19874 int offset = 0;
19875 rtx dwarf = NULL_RTX;
19876 rtx insn = NULL_RTX;
19877 rtx tmp, mem;
19878
19879 /* TODO: A more efficient code can be emitted by changing the
19880 layout, e.g., first push all pairs that can use STRD to keep the
19881 stack aligned, and then push all other registers. */
19882 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19883 if (saved_regs_mask & (1 << i))
19884 num_regs++;
19885
19886 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19887 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19888 gcc_assert (num_regs > 0);
19889
19890 /* Create sequence for DWARF info. */
19891 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19892
19893 /* For dwarf info, we generate explicit stack update. */
19894 tmp = gen_rtx_SET (stack_pointer_rtx,
19895 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19896 RTX_FRAME_RELATED_P (tmp) = 1;
19897 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19898
19899 /* Save registers. */
19900 offset = - 4 * num_regs;
19901 j = 0;
19902 while (j <= LAST_ARM_REGNUM)
19903 if (saved_regs_mask & (1 << j))
19904 {
19905 if ((j % 2 == 0)
19906 && (saved_regs_mask & (1 << (j + 1))))
19907 {
19908 /* Current register and previous register form register pair for
19909 which STRD can be generated. */
19910 if (offset < 0)
19911 {
19912 /* Allocate stack space for all saved registers. */
19913 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19914 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19915 mem = gen_frame_mem (DImode, tmp);
19916 offset = 0;
19917 }
19918 else if (offset > 0)
19919 mem = gen_frame_mem (DImode,
19920 plus_constant (Pmode,
19921 stack_pointer_rtx,
19922 offset));
19923 else
19924 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19925
19926 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
19927 RTX_FRAME_RELATED_P (tmp) = 1;
19928 tmp = emit_insn (tmp);
19929
19930 /* Record the first store insn. */
19931 if (dwarf_index == 1)
19932 insn = tmp;
19933
19934 /* Generate dwarf info. */
19935 mem = gen_frame_mem (SImode,
19936 plus_constant (Pmode,
19937 stack_pointer_rtx,
19938 offset));
19939 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19940 RTX_FRAME_RELATED_P (tmp) = 1;
19941 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19942
19943 mem = gen_frame_mem (SImode,
19944 plus_constant (Pmode,
19945 stack_pointer_rtx,
19946 offset + 4));
19947 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
19948 RTX_FRAME_RELATED_P (tmp) = 1;
19949 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19950
19951 offset += 8;
19952 j += 2;
19953 }
19954 else
19955 {
19956 /* Emit a single word store. */
19957 if (offset < 0)
19958 {
19959 /* Allocate stack space for all saved registers. */
19960 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19961 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19962 mem = gen_frame_mem (SImode, tmp);
19963 offset = 0;
19964 }
19965 else if (offset > 0)
19966 mem = gen_frame_mem (SImode,
19967 plus_constant (Pmode,
19968 stack_pointer_rtx,
19969 offset));
19970 else
19971 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19972
19973 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19974 RTX_FRAME_RELATED_P (tmp) = 1;
19975 tmp = emit_insn (tmp);
19976
19977 /* Record the first store insn. */
19978 if (dwarf_index == 1)
19979 insn = tmp;
19980
19981 /* Generate dwarf info. */
19982 mem = gen_frame_mem (SImode,
19983 plus_constant(Pmode,
19984 stack_pointer_rtx,
19985 offset));
19986 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19987 RTX_FRAME_RELATED_P (tmp) = 1;
19988 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19989
19990 offset += 4;
19991 j += 1;
19992 }
19993 }
19994 else
19995 j++;
19996
19997 /* Attach dwarf info to the first insn we generate. */
19998 gcc_assert (insn != NULL_RTX);
19999 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20000 RTX_FRAME_RELATED_P (insn) = 1;
20001 }
20002
20003 /* Generate and emit an insn that we will recognize as a push_multi.
20004 Unfortunately, since this insn does not reflect very well the actual
20005 semantics of the operation, we need to annotate the insn for the benefit
20006 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20007 MASK for registers that should be annotated for DWARF2 frame unwind
20008 information. */
20009 static rtx
20010 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20011 {
20012 int num_regs = 0;
20013 int num_dwarf_regs = 0;
20014 int i, j;
20015 rtx par;
20016 rtx dwarf;
20017 int dwarf_par_index;
20018 rtx tmp, reg;
20019
20020 /* We don't record the PC in the dwarf frame information. */
20021 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20022
20023 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20024 {
20025 if (mask & (1 << i))
20026 num_regs++;
20027 if (dwarf_regs_mask & (1 << i))
20028 num_dwarf_regs++;
20029 }
20030
20031 gcc_assert (num_regs && num_regs <= 16);
20032 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20033
20034 /* For the body of the insn we are going to generate an UNSPEC in
20035 parallel with several USEs. This allows the insn to be recognized
20036 by the push_multi pattern in the arm.md file.
20037
20038 The body of the insn looks something like this:
20039
20040 (parallel [
20041 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20042 (const_int:SI <num>)))
20043 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20044 (use (reg:SI XX))
20045 (use (reg:SI YY))
20046 ...
20047 ])
20048
20049 For the frame note however, we try to be more explicit and actually
20050 show each register being stored into the stack frame, plus a (single)
20051 decrement of the stack pointer. We do it this way in order to be
20052 friendly to the stack unwinding code, which only wants to see a single
20053 stack decrement per instruction. The RTL we generate for the note looks
20054 something like this:
20055
20056 (sequence [
20057 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20058 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20059 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20060 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20061 ...
20062 ])
20063
20064 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20065 instead we'd have a parallel expression detailing all
20066 the stores to the various memory addresses so that debug
20067 information is more up-to-date. Remember however while writing
20068 this to take care of the constraints with the push instruction.
20069
20070 Note also that this has to be taken care of for the VFP registers.
20071
20072 For more see PR43399. */
20073
20074 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20075 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20076 dwarf_par_index = 1;
20077
20078 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20079 {
20080 if (mask & (1 << i))
20081 {
20082 reg = gen_rtx_REG (SImode, i);
20083
20084 XVECEXP (par, 0, 0)
20085 = gen_rtx_SET (gen_frame_mem
20086 (BLKmode,
20087 gen_rtx_PRE_MODIFY (Pmode,
20088 stack_pointer_rtx,
20089 plus_constant
20090 (Pmode, stack_pointer_rtx,
20091 -4 * num_regs))
20092 ),
20093 gen_rtx_UNSPEC (BLKmode,
20094 gen_rtvec (1, reg),
20095 UNSPEC_PUSH_MULT));
20096
20097 if (dwarf_regs_mask & (1 << i))
20098 {
20099 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20100 reg);
20101 RTX_FRAME_RELATED_P (tmp) = 1;
20102 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20103 }
20104
20105 break;
20106 }
20107 }
20108
20109 for (j = 1, i++; j < num_regs; i++)
20110 {
20111 if (mask & (1 << i))
20112 {
20113 reg = gen_rtx_REG (SImode, i);
20114
20115 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20116
20117 if (dwarf_regs_mask & (1 << i))
20118 {
20119 tmp
20120 = gen_rtx_SET (gen_frame_mem
20121 (SImode,
20122 plus_constant (Pmode, stack_pointer_rtx,
20123 4 * j)),
20124 reg);
20125 RTX_FRAME_RELATED_P (tmp) = 1;
20126 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20127 }
20128
20129 j++;
20130 }
20131 }
20132
20133 par = emit_insn (par);
20134
20135 tmp = gen_rtx_SET (stack_pointer_rtx,
20136 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20137 RTX_FRAME_RELATED_P (tmp) = 1;
20138 XVECEXP (dwarf, 0, 0) = tmp;
20139
20140 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20141
20142 return par;
20143 }
20144
20145 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20146 SIZE is the offset to be adjusted.
20147 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20148 static void
20149 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20150 {
20151 rtx dwarf;
20152
20153 RTX_FRAME_RELATED_P (insn) = 1;
20154 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20155 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20156 }
20157
20158 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20159 SAVED_REGS_MASK shows which registers need to be restored.
20160
20161 Unfortunately, since this insn does not reflect very well the actual
20162 semantics of the operation, we need to annotate the insn for the benefit
20163 of DWARF2 frame unwind information. */
20164 static void
20165 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20166 {
20167 int num_regs = 0;
20168 int i, j;
20169 rtx par;
20170 rtx dwarf = NULL_RTX;
20171 rtx tmp, reg;
20172 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20173 int offset_adj;
20174 int emit_update;
20175
20176 offset_adj = return_in_pc ? 1 : 0;
20177 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20178 if (saved_regs_mask & (1 << i))
20179 num_regs++;
20180
20181 gcc_assert (num_regs && num_regs <= 16);
20182
20183 /* If SP is in reglist, then we don't emit SP update insn. */
20184 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20185
20186 /* The parallel needs to hold num_regs SETs
20187 and one SET for the stack update. */
20188 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20189
20190 if (return_in_pc)
20191 XVECEXP (par, 0, 0) = ret_rtx;
20192
20193 if (emit_update)
20194 {
20195 /* Increment the stack pointer, based on there being
20196 num_regs 4-byte registers to restore. */
20197 tmp = gen_rtx_SET (stack_pointer_rtx,
20198 plus_constant (Pmode,
20199 stack_pointer_rtx,
20200 4 * num_regs));
20201 RTX_FRAME_RELATED_P (tmp) = 1;
20202 XVECEXP (par, 0, offset_adj) = tmp;
20203 }
20204
20205 /* Now restore every reg, which may include PC. */
20206 for (j = 0, i = 0; j < num_regs; i++)
20207 if (saved_regs_mask & (1 << i))
20208 {
20209 reg = gen_rtx_REG (SImode, i);
20210 if ((num_regs == 1) && emit_update && !return_in_pc)
20211 {
20212 /* Emit single load with writeback. */
20213 tmp = gen_frame_mem (SImode,
20214 gen_rtx_POST_INC (Pmode,
20215 stack_pointer_rtx));
20216 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20217 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20218 return;
20219 }
20220
20221 tmp = gen_rtx_SET (reg,
20222 gen_frame_mem
20223 (SImode,
20224 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20225 RTX_FRAME_RELATED_P (tmp) = 1;
20226 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20227
20228 /* We need to maintain a sequence for DWARF info too. As dwarf info
20229 should not have PC, skip PC. */
20230 if (i != PC_REGNUM)
20231 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20232
20233 j++;
20234 }
20235
20236 if (return_in_pc)
20237 par = emit_jump_insn (par);
20238 else
20239 par = emit_insn (par);
20240
20241 REG_NOTES (par) = dwarf;
20242 if (!return_in_pc)
20243 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20244 stack_pointer_rtx, stack_pointer_rtx);
20245 }
20246
20247 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20248 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20249
20250 Unfortunately, since this insn does not reflect very well the actual
20251 semantics of the operation, we need to annotate the insn for the benefit
20252 of DWARF2 frame unwind information. */
20253 static void
20254 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20255 {
20256 int i, j;
20257 rtx par;
20258 rtx dwarf = NULL_RTX;
20259 rtx tmp, reg;
20260
20261 gcc_assert (num_regs && num_regs <= 32);
20262
20263 /* Workaround ARM10 VFPr1 bug. */
20264 if (num_regs == 2 && !arm_arch6)
20265 {
20266 if (first_reg == 15)
20267 first_reg--;
20268
20269 num_regs++;
20270 }
20271
20272 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20273 there could be up to 32 D-registers to restore.
20274 If there are more than 16 D-registers, make two recursive calls,
20275 each of which emits one pop_multi instruction. */
20276 if (num_regs > 16)
20277 {
20278 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20279 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20280 return;
20281 }
20282
20283 /* The parallel needs to hold num_regs SETs
20284 and one SET for the stack update. */
20285 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20286
20287 /* Increment the stack pointer, based on there being
20288 num_regs 8-byte registers to restore. */
20289 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20290 RTX_FRAME_RELATED_P (tmp) = 1;
20291 XVECEXP (par, 0, 0) = tmp;
20292
20293 /* Now show every reg that will be restored, using a SET for each. */
20294 for (j = 0, i=first_reg; j < num_regs; i += 2)
20295 {
20296 reg = gen_rtx_REG (DFmode, i);
20297
20298 tmp = gen_rtx_SET (reg,
20299 gen_frame_mem
20300 (DFmode,
20301 plus_constant (Pmode, base_reg, 8 * j)));
20302 RTX_FRAME_RELATED_P (tmp) = 1;
20303 XVECEXP (par, 0, j + 1) = tmp;
20304
20305 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20306
20307 j++;
20308 }
20309
20310 par = emit_insn (par);
20311 REG_NOTES (par) = dwarf;
20312
20313 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20314 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20315 {
20316 RTX_FRAME_RELATED_P (par) = 1;
20317 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20318 }
20319 else
20320 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20321 base_reg, base_reg);
20322 }
20323
20324 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20325 number of registers are being popped, multiple LDRD patterns are created for
20326 all register pairs. If odd number of registers are popped, last register is
20327 loaded by using LDR pattern. */
20328 static void
20329 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20330 {
20331 int num_regs = 0;
20332 int i, j;
20333 rtx par = NULL_RTX;
20334 rtx dwarf = NULL_RTX;
20335 rtx tmp, reg, tmp1;
20336 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20337
20338 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20339 if (saved_regs_mask & (1 << i))
20340 num_regs++;
20341
20342 gcc_assert (num_regs && num_regs <= 16);
20343
20344 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20345 to be popped. So, if num_regs is even, now it will become odd,
20346 and we can generate pop with PC. If num_regs is odd, it will be
20347 even now, and ldr with return can be generated for PC. */
20348 if (return_in_pc)
20349 num_regs--;
20350
20351 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20352
20353 /* Var j iterates over all the registers to gather all the registers in
20354 saved_regs_mask. Var i gives index of saved registers in stack frame.
20355 A PARALLEL RTX of register-pair is created here, so that pattern for
20356 LDRD can be matched. As PC is always last register to be popped, and
20357 we have already decremented num_regs if PC, we don't have to worry
20358 about PC in this loop. */
20359 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20360 if (saved_regs_mask & (1 << j))
20361 {
20362 /* Create RTX for memory load. */
20363 reg = gen_rtx_REG (SImode, j);
20364 tmp = gen_rtx_SET (reg,
20365 gen_frame_mem (SImode,
20366 plus_constant (Pmode,
20367 stack_pointer_rtx, 4 * i)));
20368 RTX_FRAME_RELATED_P (tmp) = 1;
20369
20370 if (i % 2 == 0)
20371 {
20372 /* When saved-register index (i) is even, the RTX to be emitted is
20373 yet to be created. Hence create it first. The LDRD pattern we
20374 are generating is :
20375 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20376 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20377 where target registers need not be consecutive. */
20378 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20379 dwarf = NULL_RTX;
20380 }
20381
20382 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20383 added as 0th element and if i is odd, reg_i is added as 1st element
20384 of LDRD pattern shown above. */
20385 XVECEXP (par, 0, (i % 2)) = tmp;
20386 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20387
20388 if ((i % 2) == 1)
20389 {
20390 /* When saved-register index (i) is odd, RTXs for both the registers
20391 to be loaded are generated in above given LDRD pattern, and the
20392 pattern can be emitted now. */
20393 par = emit_insn (par);
20394 REG_NOTES (par) = dwarf;
20395 RTX_FRAME_RELATED_P (par) = 1;
20396 }
20397
20398 i++;
20399 }
20400
20401 /* If the number of registers pushed is odd AND return_in_pc is false OR
20402 number of registers are even AND return_in_pc is true, last register is
20403 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20404 then LDR with post increment. */
20405
20406 /* Increment the stack pointer, based on there being
20407 num_regs 4-byte registers to restore. */
20408 tmp = gen_rtx_SET (stack_pointer_rtx,
20409 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20410 RTX_FRAME_RELATED_P (tmp) = 1;
20411 tmp = emit_insn (tmp);
20412 if (!return_in_pc)
20413 {
20414 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20415 stack_pointer_rtx, stack_pointer_rtx);
20416 }
20417
20418 dwarf = NULL_RTX;
20419
20420 if (((num_regs % 2) == 1 && !return_in_pc)
20421 || ((num_regs % 2) == 0 && return_in_pc))
20422 {
20423 /* Scan for the single register to be popped. Skip until the saved
20424 register is found. */
20425 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20426
20427 /* Gen LDR with post increment here. */
20428 tmp1 = gen_rtx_MEM (SImode,
20429 gen_rtx_POST_INC (SImode,
20430 stack_pointer_rtx));
20431 set_mem_alias_set (tmp1, get_frame_alias_set ());
20432
20433 reg = gen_rtx_REG (SImode, j);
20434 tmp = gen_rtx_SET (reg, tmp1);
20435 RTX_FRAME_RELATED_P (tmp) = 1;
20436 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20437
20438 if (return_in_pc)
20439 {
20440 /* If return_in_pc, j must be PC_REGNUM. */
20441 gcc_assert (j == PC_REGNUM);
20442 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20443 XVECEXP (par, 0, 0) = ret_rtx;
20444 XVECEXP (par, 0, 1) = tmp;
20445 par = emit_jump_insn (par);
20446 }
20447 else
20448 {
20449 par = emit_insn (tmp);
20450 REG_NOTES (par) = dwarf;
20451 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20452 stack_pointer_rtx, stack_pointer_rtx);
20453 }
20454
20455 }
20456 else if ((num_regs % 2) == 1 && return_in_pc)
20457 {
20458 /* There are 2 registers to be popped. So, generate the pattern
20459 pop_multiple_with_stack_update_and_return to pop in PC. */
20460 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20461 }
20462
20463 return;
20464 }
20465
20466 /* LDRD in ARM mode needs consecutive registers as operands. This function
20467 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20468 offset addressing and then generates one separate stack udpate. This provides
20469 more scheduling freedom, compared to writeback on every load. However,
20470 if the function returns using load into PC directly
20471 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20472 before the last load. TODO: Add a peephole optimization to recognize
20473 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20474 peephole optimization to merge the load at stack-offset zero
20475 with the stack update instruction using load with writeback
20476 in post-index addressing mode. */
20477 static void
20478 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20479 {
20480 int j = 0;
20481 int offset = 0;
20482 rtx par = NULL_RTX;
20483 rtx dwarf = NULL_RTX;
20484 rtx tmp, mem;
20485
20486 /* Restore saved registers. */
20487 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20488 j = 0;
20489 while (j <= LAST_ARM_REGNUM)
20490 if (saved_regs_mask & (1 << j))
20491 {
20492 if ((j % 2) == 0
20493 && (saved_regs_mask & (1 << (j + 1)))
20494 && (j + 1) != PC_REGNUM)
20495 {
20496 /* Current register and next register form register pair for which
20497 LDRD can be generated. PC is always the last register popped, and
20498 we handle it separately. */
20499 if (offset > 0)
20500 mem = gen_frame_mem (DImode,
20501 plus_constant (Pmode,
20502 stack_pointer_rtx,
20503 offset));
20504 else
20505 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20506
20507 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20508 tmp = emit_insn (tmp);
20509 RTX_FRAME_RELATED_P (tmp) = 1;
20510
20511 /* Generate dwarf info. */
20512
20513 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20514 gen_rtx_REG (SImode, j),
20515 NULL_RTX);
20516 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20517 gen_rtx_REG (SImode, j + 1),
20518 dwarf);
20519
20520 REG_NOTES (tmp) = dwarf;
20521
20522 offset += 8;
20523 j += 2;
20524 }
20525 else if (j != PC_REGNUM)
20526 {
20527 /* Emit a single word load. */
20528 if (offset > 0)
20529 mem = gen_frame_mem (SImode,
20530 plus_constant (Pmode,
20531 stack_pointer_rtx,
20532 offset));
20533 else
20534 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20535
20536 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20537 tmp = emit_insn (tmp);
20538 RTX_FRAME_RELATED_P (tmp) = 1;
20539
20540 /* Generate dwarf info. */
20541 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20542 gen_rtx_REG (SImode, j),
20543 NULL_RTX);
20544
20545 offset += 4;
20546 j += 1;
20547 }
20548 else /* j == PC_REGNUM */
20549 j++;
20550 }
20551 else
20552 j++;
20553
20554 /* Update the stack. */
20555 if (offset > 0)
20556 {
20557 tmp = gen_rtx_SET (stack_pointer_rtx,
20558 plus_constant (Pmode,
20559 stack_pointer_rtx,
20560 offset));
20561 tmp = emit_insn (tmp);
20562 arm_add_cfa_adjust_cfa_note (tmp, offset,
20563 stack_pointer_rtx, stack_pointer_rtx);
20564 offset = 0;
20565 }
20566
20567 if (saved_regs_mask & (1 << PC_REGNUM))
20568 {
20569 /* Only PC is to be popped. */
20570 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20571 XVECEXP (par, 0, 0) = ret_rtx;
20572 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20573 gen_frame_mem (SImode,
20574 gen_rtx_POST_INC (SImode,
20575 stack_pointer_rtx)));
20576 RTX_FRAME_RELATED_P (tmp) = 1;
20577 XVECEXP (par, 0, 1) = tmp;
20578 par = emit_jump_insn (par);
20579
20580 /* Generate dwarf info. */
20581 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20582 gen_rtx_REG (SImode, PC_REGNUM),
20583 NULL_RTX);
20584 REG_NOTES (par) = dwarf;
20585 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20586 stack_pointer_rtx, stack_pointer_rtx);
20587 }
20588 }
20589
20590 /* Calculate the size of the return value that is passed in registers. */
20591 static unsigned
20592 arm_size_return_regs (void)
20593 {
20594 machine_mode mode;
20595
20596 if (crtl->return_rtx != 0)
20597 mode = GET_MODE (crtl->return_rtx);
20598 else
20599 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20600
20601 return GET_MODE_SIZE (mode);
20602 }
20603
20604 /* Return true if the current function needs to save/restore LR. */
20605 static bool
20606 thumb_force_lr_save (void)
20607 {
20608 return !cfun->machine->lr_save_eliminated
20609 && (!leaf_function_p ()
20610 || thumb_far_jump_used_p ()
20611 || df_regs_ever_live_p (LR_REGNUM));
20612 }
20613
20614 /* We do not know if r3 will be available because
20615 we do have an indirect tailcall happening in this
20616 particular case. */
20617 static bool
20618 is_indirect_tailcall_p (rtx call)
20619 {
20620 rtx pat = PATTERN (call);
20621
20622 /* Indirect tail call. */
20623 pat = XVECEXP (pat, 0, 0);
20624 if (GET_CODE (pat) == SET)
20625 pat = SET_SRC (pat);
20626
20627 pat = XEXP (XEXP (pat, 0), 0);
20628 return REG_P (pat);
20629 }
20630
20631 /* Return true if r3 is used by any of the tail call insns in the
20632 current function. */
20633 static bool
20634 any_sibcall_could_use_r3 (void)
20635 {
20636 edge_iterator ei;
20637 edge e;
20638
20639 if (!crtl->tail_call_emit)
20640 return false;
20641 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20642 if (e->flags & EDGE_SIBCALL)
20643 {
20644 rtx call = BB_END (e->src);
20645 if (!CALL_P (call))
20646 call = prev_nonnote_nondebug_insn (call);
20647 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20648 if (find_regno_fusage (call, USE, 3)
20649 || is_indirect_tailcall_p (call))
20650 return true;
20651 }
20652 return false;
20653 }
20654
20655
20656 /* Compute the distance from register FROM to register TO.
20657 These can be the arg pointer (26), the soft frame pointer (25),
20658 the stack pointer (13) or the hard frame pointer (11).
20659 In thumb mode r7 is used as the soft frame pointer, if needed.
20660 Typical stack layout looks like this:
20661
20662 old stack pointer -> | |
20663 ----
20664 | | \
20665 | | saved arguments for
20666 | | vararg functions
20667 | | /
20668 --
20669 hard FP & arg pointer -> | | \
20670 | | stack
20671 | | frame
20672 | | /
20673 --
20674 | | \
20675 | | call saved
20676 | | registers
20677 soft frame pointer -> | | /
20678 --
20679 | | \
20680 | | local
20681 | | variables
20682 locals base pointer -> | | /
20683 --
20684 | | \
20685 | | outgoing
20686 | | arguments
20687 current stack pointer -> | | /
20688 --
20689
20690 For a given function some or all of these stack components
20691 may not be needed, giving rise to the possibility of
20692 eliminating some of the registers.
20693
20694 The values returned by this function must reflect the behavior
20695 of arm_expand_prologue() and arm_compute_save_reg_mask().
20696
20697 The sign of the number returned reflects the direction of stack
20698 growth, so the values are positive for all eliminations except
20699 from the soft frame pointer to the hard frame pointer.
20700
20701 SFP may point just inside the local variables block to ensure correct
20702 alignment. */
20703
20704
20705 /* Calculate stack offsets. These are used to calculate register elimination
20706 offsets and in prologue/epilogue code. Also calculates which registers
20707 should be saved. */
20708
20709 static arm_stack_offsets *
20710 arm_get_frame_offsets (void)
20711 {
20712 struct arm_stack_offsets *offsets;
20713 unsigned long func_type;
20714 int leaf;
20715 int saved;
20716 int core_saved;
20717 HOST_WIDE_INT frame_size;
20718 int i;
20719
20720 offsets = &cfun->machine->stack_offsets;
20721
20722 /* We need to know if we are a leaf function. Unfortunately, it
20723 is possible to be called after start_sequence has been called,
20724 which causes get_insns to return the insns for the sequence,
20725 not the function, which will cause leaf_function_p to return
20726 the incorrect result.
20727
20728 to know about leaf functions once reload has completed, and the
20729 frame size cannot be changed after that time, so we can safely
20730 use the cached value. */
20731
20732 if (reload_completed)
20733 return offsets;
20734
20735 /* Initially this is the size of the local variables. It will translated
20736 into an offset once we have determined the size of preceding data. */
20737 frame_size = ROUND_UP_WORD (get_frame_size ());
20738
20739 leaf = leaf_function_p ();
20740
20741 /* Space for variadic functions. */
20742 offsets->saved_args = crtl->args.pretend_args_size;
20743
20744 /* In Thumb mode this is incorrect, but never used. */
20745 offsets->frame
20746 = (offsets->saved_args
20747 + arm_compute_static_chain_stack_bytes ()
20748 + (frame_pointer_needed ? 4 : 0));
20749
20750 if (TARGET_32BIT)
20751 {
20752 unsigned int regno;
20753
20754 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20755 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20756 saved = core_saved;
20757
20758 /* We know that SP will be doubleword aligned on entry, and we must
20759 preserve that condition at any subroutine call. We also require the
20760 soft frame pointer to be doubleword aligned. */
20761
20762 if (TARGET_REALLY_IWMMXT)
20763 {
20764 /* Check for the call-saved iWMMXt registers. */
20765 for (regno = FIRST_IWMMXT_REGNUM;
20766 regno <= LAST_IWMMXT_REGNUM;
20767 regno++)
20768 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20769 saved += 8;
20770 }
20771
20772 func_type = arm_current_func_type ();
20773 /* Space for saved VFP registers. */
20774 if (! IS_VOLATILE (func_type)
20775 && TARGET_HARD_FLOAT && TARGET_VFP)
20776 saved += arm_get_vfp_saved_size ();
20777 }
20778 else /* TARGET_THUMB1 */
20779 {
20780 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20781 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20782 saved = core_saved;
20783 if (TARGET_BACKTRACE)
20784 saved += 16;
20785 }
20786
20787 /* Saved registers include the stack frame. */
20788 offsets->saved_regs
20789 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20790 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20791
20792 /* A leaf function does not need any stack alignment if it has nothing
20793 on the stack. */
20794 if (leaf && frame_size == 0
20795 /* However if it calls alloca(), we have a dynamically allocated
20796 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20797 && ! cfun->calls_alloca)
20798 {
20799 offsets->outgoing_args = offsets->soft_frame;
20800 offsets->locals_base = offsets->soft_frame;
20801 return offsets;
20802 }
20803
20804 /* Ensure SFP has the correct alignment. */
20805 if (ARM_DOUBLEWORD_ALIGN
20806 && (offsets->soft_frame & 7))
20807 {
20808 offsets->soft_frame += 4;
20809 /* Try to align stack by pushing an extra reg. Don't bother doing this
20810 when there is a stack frame as the alignment will be rolled into
20811 the normal stack adjustment. */
20812 if (frame_size + crtl->outgoing_args_size == 0)
20813 {
20814 int reg = -1;
20815
20816 /* Register r3 is caller-saved. Normally it does not need to be
20817 saved on entry by the prologue. However if we choose to save
20818 it for padding then we may confuse the compiler into thinking
20819 a prologue sequence is required when in fact it is not. This
20820 will occur when shrink-wrapping if r3 is used as a scratch
20821 register and there are no other callee-saved writes.
20822
20823 This situation can be avoided when other callee-saved registers
20824 are available and r3 is not mandatory if we choose a callee-saved
20825 register for padding. */
20826 bool prefer_callee_reg_p = false;
20827
20828 /* If it is safe to use r3, then do so. This sometimes
20829 generates better code on Thumb-2 by avoiding the need to
20830 use 32-bit push/pop instructions. */
20831 if (! any_sibcall_could_use_r3 ()
20832 && arm_size_return_regs () <= 12
20833 && (offsets->saved_regs_mask & (1 << 3)) == 0
20834 && (TARGET_THUMB2
20835 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20836 {
20837 reg = 3;
20838 if (!TARGET_THUMB2)
20839 prefer_callee_reg_p = true;
20840 }
20841 if (reg == -1
20842 || prefer_callee_reg_p)
20843 {
20844 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20845 {
20846 /* Avoid fixed registers; they may be changed at
20847 arbitrary times so it's unsafe to restore them
20848 during the epilogue. */
20849 if (!fixed_regs[i]
20850 && (offsets->saved_regs_mask & (1 << i)) == 0)
20851 {
20852 reg = i;
20853 break;
20854 }
20855 }
20856 }
20857
20858 if (reg != -1)
20859 {
20860 offsets->saved_regs += 4;
20861 offsets->saved_regs_mask |= (1 << reg);
20862 }
20863 }
20864 }
20865
20866 offsets->locals_base = offsets->soft_frame + frame_size;
20867 offsets->outgoing_args = (offsets->locals_base
20868 + crtl->outgoing_args_size);
20869
20870 if (ARM_DOUBLEWORD_ALIGN)
20871 {
20872 /* Ensure SP remains doubleword aligned. */
20873 if (offsets->outgoing_args & 7)
20874 offsets->outgoing_args += 4;
20875 gcc_assert (!(offsets->outgoing_args & 7));
20876 }
20877
20878 return offsets;
20879 }
20880
20881
20882 /* Calculate the relative offsets for the different stack pointers. Positive
20883 offsets are in the direction of stack growth. */
20884
20885 HOST_WIDE_INT
20886 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20887 {
20888 arm_stack_offsets *offsets;
20889
20890 offsets = arm_get_frame_offsets ();
20891
20892 /* OK, now we have enough information to compute the distances.
20893 There must be an entry in these switch tables for each pair
20894 of registers in ELIMINABLE_REGS, even if some of the entries
20895 seem to be redundant or useless. */
20896 switch (from)
20897 {
20898 case ARG_POINTER_REGNUM:
20899 switch (to)
20900 {
20901 case THUMB_HARD_FRAME_POINTER_REGNUM:
20902 return 0;
20903
20904 case FRAME_POINTER_REGNUM:
20905 /* This is the reverse of the soft frame pointer
20906 to hard frame pointer elimination below. */
20907 return offsets->soft_frame - offsets->saved_args;
20908
20909 case ARM_HARD_FRAME_POINTER_REGNUM:
20910 /* This is only non-zero in the case where the static chain register
20911 is stored above the frame. */
20912 return offsets->frame - offsets->saved_args - 4;
20913
20914 case STACK_POINTER_REGNUM:
20915 /* If nothing has been pushed on the stack at all
20916 then this will return -4. This *is* correct! */
20917 return offsets->outgoing_args - (offsets->saved_args + 4);
20918
20919 default:
20920 gcc_unreachable ();
20921 }
20922 gcc_unreachable ();
20923
20924 case FRAME_POINTER_REGNUM:
20925 switch (to)
20926 {
20927 case THUMB_HARD_FRAME_POINTER_REGNUM:
20928 return 0;
20929
20930 case ARM_HARD_FRAME_POINTER_REGNUM:
20931 /* The hard frame pointer points to the top entry in the
20932 stack frame. The soft frame pointer to the bottom entry
20933 in the stack frame. If there is no stack frame at all,
20934 then they are identical. */
20935
20936 return offsets->frame - offsets->soft_frame;
20937
20938 case STACK_POINTER_REGNUM:
20939 return offsets->outgoing_args - offsets->soft_frame;
20940
20941 default:
20942 gcc_unreachable ();
20943 }
20944 gcc_unreachable ();
20945
20946 default:
20947 /* You cannot eliminate from the stack pointer.
20948 In theory you could eliminate from the hard frame
20949 pointer to the stack pointer, but this will never
20950 happen, since if a stack frame is not needed the
20951 hard frame pointer will never be used. */
20952 gcc_unreachable ();
20953 }
20954 }
20955
20956 /* Given FROM and TO register numbers, say whether this elimination is
20957 allowed. Frame pointer elimination is automatically handled.
20958
20959 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20960 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20961 pointer, we must eliminate FRAME_POINTER_REGNUM into
20962 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20963 ARG_POINTER_REGNUM. */
20964
20965 bool
20966 arm_can_eliminate (const int from, const int to)
20967 {
20968 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20969 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20970 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20971 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20972 true);
20973 }
20974
20975 /* Emit RTL to save coprocessor registers on function entry. Returns the
20976 number of bytes pushed. */
20977
20978 static int
20979 arm_save_coproc_regs(void)
20980 {
20981 int saved_size = 0;
20982 unsigned reg;
20983 unsigned start_reg;
20984 rtx insn;
20985
20986 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20987 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20988 {
20989 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20990 insn = gen_rtx_MEM (V2SImode, insn);
20991 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20992 RTX_FRAME_RELATED_P (insn) = 1;
20993 saved_size += 8;
20994 }
20995
20996 if (TARGET_HARD_FLOAT && TARGET_VFP)
20997 {
20998 start_reg = FIRST_VFP_REGNUM;
20999
21000 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21001 {
21002 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21003 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21004 {
21005 if (start_reg != reg)
21006 saved_size += vfp_emit_fstmd (start_reg,
21007 (reg - start_reg) / 2);
21008 start_reg = reg + 2;
21009 }
21010 }
21011 if (start_reg != reg)
21012 saved_size += vfp_emit_fstmd (start_reg,
21013 (reg - start_reg) / 2);
21014 }
21015 return saved_size;
21016 }
21017
21018
21019 /* Set the Thumb frame pointer from the stack pointer. */
21020
21021 static void
21022 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21023 {
21024 HOST_WIDE_INT amount;
21025 rtx insn, dwarf;
21026
21027 amount = offsets->outgoing_args - offsets->locals_base;
21028 if (amount < 1024)
21029 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21030 stack_pointer_rtx, GEN_INT (amount)));
21031 else
21032 {
21033 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21034 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21035 expects the first two operands to be the same. */
21036 if (TARGET_THUMB2)
21037 {
21038 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21039 stack_pointer_rtx,
21040 hard_frame_pointer_rtx));
21041 }
21042 else
21043 {
21044 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21045 hard_frame_pointer_rtx,
21046 stack_pointer_rtx));
21047 }
21048 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21049 plus_constant (Pmode, stack_pointer_rtx, amount));
21050 RTX_FRAME_RELATED_P (dwarf) = 1;
21051 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21052 }
21053
21054 RTX_FRAME_RELATED_P (insn) = 1;
21055 }
21056
21057 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21058 function. */
21059 void
21060 arm_expand_prologue (void)
21061 {
21062 rtx amount;
21063 rtx insn;
21064 rtx ip_rtx;
21065 unsigned long live_regs_mask;
21066 unsigned long func_type;
21067 int fp_offset = 0;
21068 int saved_pretend_args = 0;
21069 int saved_regs = 0;
21070 unsigned HOST_WIDE_INT args_to_push;
21071 arm_stack_offsets *offsets;
21072
21073 func_type = arm_current_func_type ();
21074
21075 /* Naked functions don't have prologues. */
21076 if (IS_NAKED (func_type))
21077 return;
21078
21079 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21080 args_to_push = crtl->args.pretend_args_size;
21081
21082 /* Compute which register we will have to save onto the stack. */
21083 offsets = arm_get_frame_offsets ();
21084 live_regs_mask = offsets->saved_regs_mask;
21085
21086 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21087
21088 if (IS_STACKALIGN (func_type))
21089 {
21090 rtx r0, r1;
21091
21092 /* Handle a word-aligned stack pointer. We generate the following:
21093
21094 mov r0, sp
21095 bic r1, r0, #7
21096 mov sp, r1
21097 <save and restore r0 in normal prologue/epilogue>
21098 mov sp, r0
21099 bx lr
21100
21101 The unwinder doesn't need to know about the stack realignment.
21102 Just tell it we saved SP in r0. */
21103 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21104
21105 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21106 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21107
21108 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21109 RTX_FRAME_RELATED_P (insn) = 1;
21110 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21111
21112 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21113
21114 /* ??? The CFA changes here, which may cause GDB to conclude that it
21115 has entered a different function. That said, the unwind info is
21116 correct, individually, before and after this instruction because
21117 we've described the save of SP, which will override the default
21118 handling of SP as restoring from the CFA. */
21119 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21120 }
21121
21122 /* For APCS frames, if IP register is clobbered
21123 when creating frame, save that register in a special
21124 way. */
21125 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21126 {
21127 if (IS_INTERRUPT (func_type))
21128 {
21129 /* Interrupt functions must not corrupt any registers.
21130 Creating a frame pointer however, corrupts the IP
21131 register, so we must push it first. */
21132 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21133
21134 /* Do not set RTX_FRAME_RELATED_P on this insn.
21135 The dwarf stack unwinding code only wants to see one
21136 stack decrement per function, and this is not it. If
21137 this instruction is labeled as being part of the frame
21138 creation sequence then dwarf2out_frame_debug_expr will
21139 die when it encounters the assignment of IP to FP
21140 later on, since the use of SP here establishes SP as
21141 the CFA register and not IP.
21142
21143 Anyway this instruction is not really part of the stack
21144 frame creation although it is part of the prologue. */
21145 }
21146 else if (IS_NESTED (func_type))
21147 {
21148 /* The static chain register is the same as the IP register
21149 used as a scratch register during stack frame creation.
21150 To get around this need to find somewhere to store IP
21151 whilst the frame is being created. We try the following
21152 places in order:
21153
21154 1. The last argument register r3 if it is available.
21155 2. A slot on the stack above the frame if there are no
21156 arguments to push onto the stack.
21157 3. Register r3 again, after pushing the argument registers
21158 onto the stack, if this is a varargs function.
21159 4. The last slot on the stack created for the arguments to
21160 push, if this isn't a varargs function.
21161
21162 Note - we only need to tell the dwarf2 backend about the SP
21163 adjustment in the second variant; the static chain register
21164 doesn't need to be unwound, as it doesn't contain a value
21165 inherited from the caller. */
21166
21167 if (!arm_r3_live_at_start_p ())
21168 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21169 else if (args_to_push == 0)
21170 {
21171 rtx addr, dwarf;
21172
21173 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21174 saved_regs += 4;
21175
21176 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21177 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21178 fp_offset = 4;
21179
21180 /* Just tell the dwarf backend that we adjusted SP. */
21181 dwarf = gen_rtx_SET (stack_pointer_rtx,
21182 plus_constant (Pmode, stack_pointer_rtx,
21183 -fp_offset));
21184 RTX_FRAME_RELATED_P (insn) = 1;
21185 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21186 }
21187 else
21188 {
21189 /* Store the args on the stack. */
21190 if (cfun->machine->uses_anonymous_args)
21191 {
21192 insn
21193 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21194 (0xf0 >> (args_to_push / 4)) & 0xf);
21195 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21196 saved_pretend_args = 1;
21197 }
21198 else
21199 {
21200 rtx addr, dwarf;
21201
21202 if (args_to_push == 4)
21203 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21204 else
21205 addr
21206 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21207 plus_constant (Pmode,
21208 stack_pointer_rtx,
21209 -args_to_push));
21210
21211 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21212
21213 /* Just tell the dwarf backend that we adjusted SP. */
21214 dwarf
21215 = gen_rtx_SET (stack_pointer_rtx,
21216 plus_constant (Pmode, stack_pointer_rtx,
21217 -args_to_push));
21218 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21219 }
21220
21221 RTX_FRAME_RELATED_P (insn) = 1;
21222 fp_offset = args_to_push;
21223 args_to_push = 0;
21224 }
21225 }
21226
21227 insn = emit_set_insn (ip_rtx,
21228 plus_constant (Pmode, stack_pointer_rtx,
21229 fp_offset));
21230 RTX_FRAME_RELATED_P (insn) = 1;
21231 }
21232
21233 if (args_to_push)
21234 {
21235 /* Push the argument registers, or reserve space for them. */
21236 if (cfun->machine->uses_anonymous_args)
21237 insn = emit_multi_reg_push
21238 ((0xf0 >> (args_to_push / 4)) & 0xf,
21239 (0xf0 >> (args_to_push / 4)) & 0xf);
21240 else
21241 insn = emit_insn
21242 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21243 GEN_INT (- args_to_push)));
21244 RTX_FRAME_RELATED_P (insn) = 1;
21245 }
21246
21247 /* If this is an interrupt service routine, and the link register
21248 is going to be pushed, and we're not generating extra
21249 push of IP (needed when frame is needed and frame layout if apcs),
21250 subtracting four from LR now will mean that the function return
21251 can be done with a single instruction. */
21252 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21253 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21254 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21255 && TARGET_ARM)
21256 {
21257 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21258
21259 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21260 }
21261
21262 if (live_regs_mask)
21263 {
21264 unsigned long dwarf_regs_mask = live_regs_mask;
21265
21266 saved_regs += bit_count (live_regs_mask) * 4;
21267 if (optimize_size && !frame_pointer_needed
21268 && saved_regs == offsets->saved_regs - offsets->saved_args)
21269 {
21270 /* If no coprocessor registers are being pushed and we don't have
21271 to worry about a frame pointer then push extra registers to
21272 create the stack frame. This is done is a way that does not
21273 alter the frame layout, so is independent of the epilogue. */
21274 int n;
21275 int frame;
21276 n = 0;
21277 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21278 n++;
21279 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21280 if (frame && n * 4 >= frame)
21281 {
21282 n = frame / 4;
21283 live_regs_mask |= (1 << n) - 1;
21284 saved_regs += frame;
21285 }
21286 }
21287
21288 if (TARGET_LDRD
21289 && current_tune->prefer_ldrd_strd
21290 && !optimize_function_for_size_p (cfun))
21291 {
21292 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21293 if (TARGET_THUMB2)
21294 thumb2_emit_strd_push (live_regs_mask);
21295 else if (TARGET_ARM
21296 && !TARGET_APCS_FRAME
21297 && !IS_INTERRUPT (func_type))
21298 arm_emit_strd_push (live_regs_mask);
21299 else
21300 {
21301 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21302 RTX_FRAME_RELATED_P (insn) = 1;
21303 }
21304 }
21305 else
21306 {
21307 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21308 RTX_FRAME_RELATED_P (insn) = 1;
21309 }
21310 }
21311
21312 if (! IS_VOLATILE (func_type))
21313 saved_regs += arm_save_coproc_regs ();
21314
21315 if (frame_pointer_needed && TARGET_ARM)
21316 {
21317 /* Create the new frame pointer. */
21318 if (TARGET_APCS_FRAME)
21319 {
21320 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21321 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21322 RTX_FRAME_RELATED_P (insn) = 1;
21323
21324 if (IS_NESTED (func_type))
21325 {
21326 /* Recover the static chain register. */
21327 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21328 insn = gen_rtx_REG (SImode, 3);
21329 else
21330 {
21331 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21332 insn = gen_frame_mem (SImode, insn);
21333 }
21334 emit_set_insn (ip_rtx, insn);
21335 /* Add a USE to stop propagate_one_insn() from barfing. */
21336 emit_insn (gen_force_register_use (ip_rtx));
21337 }
21338 }
21339 else
21340 {
21341 insn = GEN_INT (saved_regs - 4);
21342 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21343 stack_pointer_rtx, insn));
21344 RTX_FRAME_RELATED_P (insn) = 1;
21345 }
21346 }
21347
21348 if (flag_stack_usage_info)
21349 current_function_static_stack_size
21350 = offsets->outgoing_args - offsets->saved_args;
21351
21352 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21353 {
21354 /* This add can produce multiple insns for a large constant, so we
21355 need to get tricky. */
21356 rtx_insn *last = get_last_insn ();
21357
21358 amount = GEN_INT (offsets->saved_args + saved_regs
21359 - offsets->outgoing_args);
21360
21361 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21362 amount));
21363 do
21364 {
21365 last = last ? NEXT_INSN (last) : get_insns ();
21366 RTX_FRAME_RELATED_P (last) = 1;
21367 }
21368 while (last != insn);
21369
21370 /* If the frame pointer is needed, emit a special barrier that
21371 will prevent the scheduler from moving stores to the frame
21372 before the stack adjustment. */
21373 if (frame_pointer_needed)
21374 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21375 hard_frame_pointer_rtx));
21376 }
21377
21378
21379 if (frame_pointer_needed && TARGET_THUMB2)
21380 thumb_set_frame_pointer (offsets);
21381
21382 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21383 {
21384 unsigned long mask;
21385
21386 mask = live_regs_mask;
21387 mask &= THUMB2_WORK_REGS;
21388 if (!IS_NESTED (func_type))
21389 mask |= (1 << IP_REGNUM);
21390 arm_load_pic_register (mask);
21391 }
21392
21393 /* If we are profiling, make sure no instructions are scheduled before
21394 the call to mcount. Similarly if the user has requested no
21395 scheduling in the prolog. Similarly if we want non-call exceptions
21396 using the EABI unwinder, to prevent faulting instructions from being
21397 swapped with a stack adjustment. */
21398 if (crtl->profile || !TARGET_SCHED_PROLOG
21399 || (arm_except_unwind_info (&global_options) == UI_TARGET
21400 && cfun->can_throw_non_call_exceptions))
21401 emit_insn (gen_blockage ());
21402
21403 /* If the link register is being kept alive, with the return address in it,
21404 then make sure that it does not get reused by the ce2 pass. */
21405 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21406 cfun->machine->lr_save_eliminated = 1;
21407 }
21408 \f
21409 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21410 static void
21411 arm_print_condition (FILE *stream)
21412 {
21413 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21414 {
21415 /* Branch conversion is not implemented for Thumb-2. */
21416 if (TARGET_THUMB)
21417 {
21418 output_operand_lossage ("predicated Thumb instruction");
21419 return;
21420 }
21421 if (current_insn_predicate != NULL)
21422 {
21423 output_operand_lossage
21424 ("predicated instruction in conditional sequence");
21425 return;
21426 }
21427
21428 fputs (arm_condition_codes[arm_current_cc], stream);
21429 }
21430 else if (current_insn_predicate)
21431 {
21432 enum arm_cond_code code;
21433
21434 if (TARGET_THUMB1)
21435 {
21436 output_operand_lossage ("predicated Thumb instruction");
21437 return;
21438 }
21439
21440 code = get_arm_condition_code (current_insn_predicate);
21441 fputs (arm_condition_codes[code], stream);
21442 }
21443 }
21444
21445
21446 /* Globally reserved letters: acln
21447 Puncutation letters currently used: @_|?().!#
21448 Lower case letters currently used: bcdefhimpqtvwxyz
21449 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21450 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21451
21452 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21453
21454 If CODE is 'd', then the X is a condition operand and the instruction
21455 should only be executed if the condition is true.
21456 if CODE is 'D', then the X is a condition operand and the instruction
21457 should only be executed if the condition is false: however, if the mode
21458 of the comparison is CCFPEmode, then always execute the instruction -- we
21459 do this because in these circumstances !GE does not necessarily imply LT;
21460 in these cases the instruction pattern will take care to make sure that
21461 an instruction containing %d will follow, thereby undoing the effects of
21462 doing this instruction unconditionally.
21463 If CODE is 'N' then X is a floating point operand that must be negated
21464 before output.
21465 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21466 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21467 static void
21468 arm_print_operand (FILE *stream, rtx x, int code)
21469 {
21470 switch (code)
21471 {
21472 case '@':
21473 fputs (ASM_COMMENT_START, stream);
21474 return;
21475
21476 case '_':
21477 fputs (user_label_prefix, stream);
21478 return;
21479
21480 case '|':
21481 fputs (REGISTER_PREFIX, stream);
21482 return;
21483
21484 case '?':
21485 arm_print_condition (stream);
21486 return;
21487
21488 case '(':
21489 /* Nothing in unified syntax, otherwise the current condition code. */
21490 if (!TARGET_UNIFIED_ASM)
21491 arm_print_condition (stream);
21492 break;
21493
21494 case ')':
21495 /* The current condition code in unified syntax, otherwise nothing. */
21496 if (TARGET_UNIFIED_ASM)
21497 arm_print_condition (stream);
21498 break;
21499
21500 case '.':
21501 /* The current condition code for a condition code setting instruction.
21502 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21503 if (TARGET_UNIFIED_ASM)
21504 {
21505 fputc('s', stream);
21506 arm_print_condition (stream);
21507 }
21508 else
21509 {
21510 arm_print_condition (stream);
21511 fputc('s', stream);
21512 }
21513 return;
21514
21515 case '!':
21516 /* If the instruction is conditionally executed then print
21517 the current condition code, otherwise print 's'. */
21518 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21519 if (current_insn_predicate)
21520 arm_print_condition (stream);
21521 else
21522 fputc('s', stream);
21523 break;
21524
21525 /* %# is a "break" sequence. It doesn't output anything, but is used to
21526 separate e.g. operand numbers from following text, if that text consists
21527 of further digits which we don't want to be part of the operand
21528 number. */
21529 case '#':
21530 return;
21531
21532 case 'N':
21533 {
21534 REAL_VALUE_TYPE r;
21535 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21536 r = real_value_negate (&r);
21537 fprintf (stream, "%s", fp_const_from_val (&r));
21538 }
21539 return;
21540
21541 /* An integer or symbol address without a preceding # sign. */
21542 case 'c':
21543 switch (GET_CODE (x))
21544 {
21545 case CONST_INT:
21546 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21547 break;
21548
21549 case SYMBOL_REF:
21550 output_addr_const (stream, x);
21551 break;
21552
21553 case CONST:
21554 if (GET_CODE (XEXP (x, 0)) == PLUS
21555 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21556 {
21557 output_addr_const (stream, x);
21558 break;
21559 }
21560 /* Fall through. */
21561
21562 default:
21563 output_operand_lossage ("Unsupported operand for code '%c'", code);
21564 }
21565 return;
21566
21567 /* An integer that we want to print in HEX. */
21568 case 'x':
21569 switch (GET_CODE (x))
21570 {
21571 case CONST_INT:
21572 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21573 break;
21574
21575 default:
21576 output_operand_lossage ("Unsupported operand for code '%c'", code);
21577 }
21578 return;
21579
21580 case 'B':
21581 if (CONST_INT_P (x))
21582 {
21583 HOST_WIDE_INT val;
21584 val = ARM_SIGN_EXTEND (~INTVAL (x));
21585 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21586 }
21587 else
21588 {
21589 putc ('~', stream);
21590 output_addr_const (stream, x);
21591 }
21592 return;
21593
21594 case 'b':
21595 /* Print the log2 of a CONST_INT. */
21596 {
21597 HOST_WIDE_INT val;
21598
21599 if (!CONST_INT_P (x)
21600 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21601 output_operand_lossage ("Unsupported operand for code '%c'", code);
21602 else
21603 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21604 }
21605 return;
21606
21607 case 'L':
21608 /* The low 16 bits of an immediate constant. */
21609 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21610 return;
21611
21612 case 'i':
21613 fprintf (stream, "%s", arithmetic_instr (x, 1));
21614 return;
21615
21616 case 'I':
21617 fprintf (stream, "%s", arithmetic_instr (x, 0));
21618 return;
21619
21620 case 'S':
21621 {
21622 HOST_WIDE_INT val;
21623 const char *shift;
21624
21625 shift = shift_op (x, &val);
21626
21627 if (shift)
21628 {
21629 fprintf (stream, ", %s ", shift);
21630 if (val == -1)
21631 arm_print_operand (stream, XEXP (x, 1), 0);
21632 else
21633 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21634 }
21635 }
21636 return;
21637
21638 /* An explanation of the 'Q', 'R' and 'H' register operands:
21639
21640 In a pair of registers containing a DI or DF value the 'Q'
21641 operand returns the register number of the register containing
21642 the least significant part of the value. The 'R' operand returns
21643 the register number of the register containing the most
21644 significant part of the value.
21645
21646 The 'H' operand returns the higher of the two register numbers.
21647 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21648 same as the 'Q' operand, since the most significant part of the
21649 value is held in the lower number register. The reverse is true
21650 on systems where WORDS_BIG_ENDIAN is false.
21651
21652 The purpose of these operands is to distinguish between cases
21653 where the endian-ness of the values is important (for example
21654 when they are added together), and cases where the endian-ness
21655 is irrelevant, but the order of register operations is important.
21656 For example when loading a value from memory into a register
21657 pair, the endian-ness does not matter. Provided that the value
21658 from the lower memory address is put into the lower numbered
21659 register, and the value from the higher address is put into the
21660 higher numbered register, the load will work regardless of whether
21661 the value being loaded is big-wordian or little-wordian. The
21662 order of the two register loads can matter however, if the address
21663 of the memory location is actually held in one of the registers
21664 being overwritten by the load.
21665
21666 The 'Q' and 'R' constraints are also available for 64-bit
21667 constants. */
21668 case 'Q':
21669 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21670 {
21671 rtx part = gen_lowpart (SImode, x);
21672 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21673 return;
21674 }
21675
21676 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21677 {
21678 output_operand_lossage ("invalid operand for code '%c'", code);
21679 return;
21680 }
21681
21682 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21683 return;
21684
21685 case 'R':
21686 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21687 {
21688 machine_mode mode = GET_MODE (x);
21689 rtx part;
21690
21691 if (mode == VOIDmode)
21692 mode = DImode;
21693 part = gen_highpart_mode (SImode, mode, x);
21694 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21695 return;
21696 }
21697
21698 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21699 {
21700 output_operand_lossage ("invalid operand for code '%c'", code);
21701 return;
21702 }
21703
21704 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21705 return;
21706
21707 case 'H':
21708 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21709 {
21710 output_operand_lossage ("invalid operand for code '%c'", code);
21711 return;
21712 }
21713
21714 asm_fprintf (stream, "%r", REGNO (x) + 1);
21715 return;
21716
21717 case 'J':
21718 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21719 {
21720 output_operand_lossage ("invalid operand for code '%c'", code);
21721 return;
21722 }
21723
21724 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21725 return;
21726
21727 case 'K':
21728 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21729 {
21730 output_operand_lossage ("invalid operand for code '%c'", code);
21731 return;
21732 }
21733
21734 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21735 return;
21736
21737 case 'm':
21738 asm_fprintf (stream, "%r",
21739 REG_P (XEXP (x, 0))
21740 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21741 return;
21742
21743 case 'M':
21744 asm_fprintf (stream, "{%r-%r}",
21745 REGNO (x),
21746 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21747 return;
21748
21749 /* Like 'M', but writing doubleword vector registers, for use by Neon
21750 insns. */
21751 case 'h':
21752 {
21753 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21754 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21755 if (numregs == 1)
21756 asm_fprintf (stream, "{d%d}", regno);
21757 else
21758 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21759 }
21760 return;
21761
21762 case 'd':
21763 /* CONST_TRUE_RTX means always -- that's the default. */
21764 if (x == const_true_rtx)
21765 return;
21766
21767 if (!COMPARISON_P (x))
21768 {
21769 output_operand_lossage ("invalid operand for code '%c'", code);
21770 return;
21771 }
21772
21773 fputs (arm_condition_codes[get_arm_condition_code (x)],
21774 stream);
21775 return;
21776
21777 case 'D':
21778 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21779 want to do that. */
21780 if (x == const_true_rtx)
21781 {
21782 output_operand_lossage ("instruction never executed");
21783 return;
21784 }
21785 if (!COMPARISON_P (x))
21786 {
21787 output_operand_lossage ("invalid operand for code '%c'", code);
21788 return;
21789 }
21790
21791 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21792 (get_arm_condition_code (x))],
21793 stream);
21794 return;
21795
21796 case 's':
21797 case 'V':
21798 case 'W':
21799 case 'X':
21800 case 'Y':
21801 case 'Z':
21802 /* Former Maverick support, removed after GCC-4.7. */
21803 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21804 return;
21805
21806 case 'U':
21807 if (!REG_P (x)
21808 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21809 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21810 /* Bad value for wCG register number. */
21811 {
21812 output_operand_lossage ("invalid operand for code '%c'", code);
21813 return;
21814 }
21815
21816 else
21817 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21818 return;
21819
21820 /* Print an iWMMXt control register name. */
21821 case 'w':
21822 if (!CONST_INT_P (x)
21823 || INTVAL (x) < 0
21824 || INTVAL (x) >= 16)
21825 /* Bad value for wC register number. */
21826 {
21827 output_operand_lossage ("invalid operand for code '%c'", code);
21828 return;
21829 }
21830
21831 else
21832 {
21833 static const char * wc_reg_names [16] =
21834 {
21835 "wCID", "wCon", "wCSSF", "wCASF",
21836 "wC4", "wC5", "wC6", "wC7",
21837 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21838 "wC12", "wC13", "wC14", "wC15"
21839 };
21840
21841 fputs (wc_reg_names [INTVAL (x)], stream);
21842 }
21843 return;
21844
21845 /* Print the high single-precision register of a VFP double-precision
21846 register. */
21847 case 'p':
21848 {
21849 machine_mode mode = GET_MODE (x);
21850 int regno;
21851
21852 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21853 {
21854 output_operand_lossage ("invalid operand for code '%c'", code);
21855 return;
21856 }
21857
21858 regno = REGNO (x);
21859 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21860 {
21861 output_operand_lossage ("invalid operand for code '%c'", code);
21862 return;
21863 }
21864
21865 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21866 }
21867 return;
21868
21869 /* Print a VFP/Neon double precision or quad precision register name. */
21870 case 'P':
21871 case 'q':
21872 {
21873 machine_mode mode = GET_MODE (x);
21874 int is_quad = (code == 'q');
21875 int regno;
21876
21877 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21878 {
21879 output_operand_lossage ("invalid operand for code '%c'", code);
21880 return;
21881 }
21882
21883 if (!REG_P (x)
21884 || !IS_VFP_REGNUM (REGNO (x)))
21885 {
21886 output_operand_lossage ("invalid operand for code '%c'", code);
21887 return;
21888 }
21889
21890 regno = REGNO (x);
21891 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21892 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21893 {
21894 output_operand_lossage ("invalid operand for code '%c'", code);
21895 return;
21896 }
21897
21898 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21899 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21900 }
21901 return;
21902
21903 /* These two codes print the low/high doubleword register of a Neon quad
21904 register, respectively. For pair-structure types, can also print
21905 low/high quadword registers. */
21906 case 'e':
21907 case 'f':
21908 {
21909 machine_mode mode = GET_MODE (x);
21910 int regno;
21911
21912 if ((GET_MODE_SIZE (mode) != 16
21913 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21914 {
21915 output_operand_lossage ("invalid operand for code '%c'", code);
21916 return;
21917 }
21918
21919 regno = REGNO (x);
21920 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21921 {
21922 output_operand_lossage ("invalid operand for code '%c'", code);
21923 return;
21924 }
21925
21926 if (GET_MODE_SIZE (mode) == 16)
21927 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21928 + (code == 'f' ? 1 : 0));
21929 else
21930 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21931 + (code == 'f' ? 1 : 0));
21932 }
21933 return;
21934
21935 /* Print a VFPv3 floating-point constant, represented as an integer
21936 index. */
21937 case 'G':
21938 {
21939 int index = vfp3_const_double_index (x);
21940 gcc_assert (index != -1);
21941 fprintf (stream, "%d", index);
21942 }
21943 return;
21944
21945 /* Print bits representing opcode features for Neon.
21946
21947 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21948 and polynomials as unsigned.
21949
21950 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21951
21952 Bit 2 is 1 for rounding functions, 0 otherwise. */
21953
21954 /* Identify the type as 's', 'u', 'p' or 'f'. */
21955 case 'T':
21956 {
21957 HOST_WIDE_INT bits = INTVAL (x);
21958 fputc ("uspf"[bits & 3], stream);
21959 }
21960 return;
21961
21962 /* Likewise, but signed and unsigned integers are both 'i'. */
21963 case 'F':
21964 {
21965 HOST_WIDE_INT bits = INTVAL (x);
21966 fputc ("iipf"[bits & 3], stream);
21967 }
21968 return;
21969
21970 /* As for 'T', but emit 'u' instead of 'p'. */
21971 case 't':
21972 {
21973 HOST_WIDE_INT bits = INTVAL (x);
21974 fputc ("usuf"[bits & 3], stream);
21975 }
21976 return;
21977
21978 /* Bit 2: rounding (vs none). */
21979 case 'O':
21980 {
21981 HOST_WIDE_INT bits = INTVAL (x);
21982 fputs ((bits & 4) != 0 ? "r" : "", stream);
21983 }
21984 return;
21985
21986 /* Memory operand for vld1/vst1 instruction. */
21987 case 'A':
21988 {
21989 rtx addr;
21990 bool postinc = FALSE;
21991 rtx postinc_reg = NULL;
21992 unsigned align, memsize, align_bits;
21993
21994 gcc_assert (MEM_P (x));
21995 addr = XEXP (x, 0);
21996 if (GET_CODE (addr) == POST_INC)
21997 {
21998 postinc = 1;
21999 addr = XEXP (addr, 0);
22000 }
22001 if (GET_CODE (addr) == POST_MODIFY)
22002 {
22003 postinc_reg = XEXP( XEXP (addr, 1), 1);
22004 addr = XEXP (addr, 0);
22005 }
22006 asm_fprintf (stream, "[%r", REGNO (addr));
22007
22008 /* We know the alignment of this access, so we can emit a hint in the
22009 instruction (for some alignments) as an aid to the memory subsystem
22010 of the target. */
22011 align = MEM_ALIGN (x) >> 3;
22012 memsize = MEM_SIZE (x);
22013
22014 /* Only certain alignment specifiers are supported by the hardware. */
22015 if (memsize == 32 && (align % 32) == 0)
22016 align_bits = 256;
22017 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22018 align_bits = 128;
22019 else if (memsize >= 8 && (align % 8) == 0)
22020 align_bits = 64;
22021 else
22022 align_bits = 0;
22023
22024 if (align_bits != 0)
22025 asm_fprintf (stream, ":%d", align_bits);
22026
22027 asm_fprintf (stream, "]");
22028
22029 if (postinc)
22030 fputs("!", stream);
22031 if (postinc_reg)
22032 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22033 }
22034 return;
22035
22036 case 'C':
22037 {
22038 rtx addr;
22039
22040 gcc_assert (MEM_P (x));
22041 addr = XEXP (x, 0);
22042 gcc_assert (REG_P (addr));
22043 asm_fprintf (stream, "[%r]", REGNO (addr));
22044 }
22045 return;
22046
22047 /* Translate an S register number into a D register number and element index. */
22048 case 'y':
22049 {
22050 machine_mode mode = GET_MODE (x);
22051 int regno;
22052
22053 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22054 {
22055 output_operand_lossage ("invalid operand for code '%c'", code);
22056 return;
22057 }
22058
22059 regno = REGNO (x);
22060 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22061 {
22062 output_operand_lossage ("invalid operand for code '%c'", code);
22063 return;
22064 }
22065
22066 regno = regno - FIRST_VFP_REGNUM;
22067 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22068 }
22069 return;
22070
22071 case 'v':
22072 gcc_assert (CONST_DOUBLE_P (x));
22073 int result;
22074 result = vfp3_const_double_for_fract_bits (x);
22075 if (result == 0)
22076 result = vfp3_const_double_for_bits (x);
22077 fprintf (stream, "#%d", result);
22078 return;
22079
22080 /* Register specifier for vld1.16/vst1.16. Translate the S register
22081 number into a D register number and element index. */
22082 case 'z':
22083 {
22084 machine_mode mode = GET_MODE (x);
22085 int regno;
22086
22087 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22088 {
22089 output_operand_lossage ("invalid operand for code '%c'", code);
22090 return;
22091 }
22092
22093 regno = REGNO (x);
22094 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22095 {
22096 output_operand_lossage ("invalid operand for code '%c'", code);
22097 return;
22098 }
22099
22100 regno = regno - FIRST_VFP_REGNUM;
22101 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22102 }
22103 return;
22104
22105 default:
22106 if (x == 0)
22107 {
22108 output_operand_lossage ("missing operand");
22109 return;
22110 }
22111
22112 switch (GET_CODE (x))
22113 {
22114 case REG:
22115 asm_fprintf (stream, "%r", REGNO (x));
22116 break;
22117
22118 case MEM:
22119 output_memory_reference_mode = GET_MODE (x);
22120 output_address (XEXP (x, 0));
22121 break;
22122
22123 case CONST_DOUBLE:
22124 {
22125 char fpstr[20];
22126 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22127 sizeof (fpstr), 0, 1);
22128 fprintf (stream, "#%s", fpstr);
22129 }
22130 break;
22131
22132 default:
22133 gcc_assert (GET_CODE (x) != NEG);
22134 fputc ('#', stream);
22135 if (GET_CODE (x) == HIGH)
22136 {
22137 fputs (":lower16:", stream);
22138 x = XEXP (x, 0);
22139 }
22140
22141 output_addr_const (stream, x);
22142 break;
22143 }
22144 }
22145 }
22146 \f
22147 /* Target hook for printing a memory address. */
22148 static void
22149 arm_print_operand_address (FILE *stream, rtx x)
22150 {
22151 if (TARGET_32BIT)
22152 {
22153 int is_minus = GET_CODE (x) == MINUS;
22154
22155 if (REG_P (x))
22156 asm_fprintf (stream, "[%r]", REGNO (x));
22157 else if (GET_CODE (x) == PLUS || is_minus)
22158 {
22159 rtx base = XEXP (x, 0);
22160 rtx index = XEXP (x, 1);
22161 HOST_WIDE_INT offset = 0;
22162 if (!REG_P (base)
22163 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22164 {
22165 /* Ensure that BASE is a register. */
22166 /* (one of them must be). */
22167 /* Also ensure the SP is not used as in index register. */
22168 std::swap (base, index);
22169 }
22170 switch (GET_CODE (index))
22171 {
22172 case CONST_INT:
22173 offset = INTVAL (index);
22174 if (is_minus)
22175 offset = -offset;
22176 asm_fprintf (stream, "[%r, #%wd]",
22177 REGNO (base), offset);
22178 break;
22179
22180 case REG:
22181 asm_fprintf (stream, "[%r, %s%r]",
22182 REGNO (base), is_minus ? "-" : "",
22183 REGNO (index));
22184 break;
22185
22186 case MULT:
22187 case ASHIFTRT:
22188 case LSHIFTRT:
22189 case ASHIFT:
22190 case ROTATERT:
22191 {
22192 asm_fprintf (stream, "[%r, %s%r",
22193 REGNO (base), is_minus ? "-" : "",
22194 REGNO (XEXP (index, 0)));
22195 arm_print_operand (stream, index, 'S');
22196 fputs ("]", stream);
22197 break;
22198 }
22199
22200 default:
22201 gcc_unreachable ();
22202 }
22203 }
22204 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22205 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22206 {
22207 extern machine_mode output_memory_reference_mode;
22208
22209 gcc_assert (REG_P (XEXP (x, 0)));
22210
22211 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22212 asm_fprintf (stream, "[%r, #%s%d]!",
22213 REGNO (XEXP (x, 0)),
22214 GET_CODE (x) == PRE_DEC ? "-" : "",
22215 GET_MODE_SIZE (output_memory_reference_mode));
22216 else
22217 asm_fprintf (stream, "[%r], #%s%d",
22218 REGNO (XEXP (x, 0)),
22219 GET_CODE (x) == POST_DEC ? "-" : "",
22220 GET_MODE_SIZE (output_memory_reference_mode));
22221 }
22222 else if (GET_CODE (x) == PRE_MODIFY)
22223 {
22224 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22225 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22226 asm_fprintf (stream, "#%wd]!",
22227 INTVAL (XEXP (XEXP (x, 1), 1)));
22228 else
22229 asm_fprintf (stream, "%r]!",
22230 REGNO (XEXP (XEXP (x, 1), 1)));
22231 }
22232 else if (GET_CODE (x) == POST_MODIFY)
22233 {
22234 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22235 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22236 asm_fprintf (stream, "#%wd",
22237 INTVAL (XEXP (XEXP (x, 1), 1)));
22238 else
22239 asm_fprintf (stream, "%r",
22240 REGNO (XEXP (XEXP (x, 1), 1)));
22241 }
22242 else output_addr_const (stream, x);
22243 }
22244 else
22245 {
22246 if (REG_P (x))
22247 asm_fprintf (stream, "[%r]", REGNO (x));
22248 else if (GET_CODE (x) == POST_INC)
22249 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22250 else if (GET_CODE (x) == PLUS)
22251 {
22252 gcc_assert (REG_P (XEXP (x, 0)));
22253 if (CONST_INT_P (XEXP (x, 1)))
22254 asm_fprintf (stream, "[%r, #%wd]",
22255 REGNO (XEXP (x, 0)),
22256 INTVAL (XEXP (x, 1)));
22257 else
22258 asm_fprintf (stream, "[%r, %r]",
22259 REGNO (XEXP (x, 0)),
22260 REGNO (XEXP (x, 1)));
22261 }
22262 else
22263 output_addr_const (stream, x);
22264 }
22265 }
22266 \f
22267 /* Target hook for indicating whether a punctuation character for
22268 TARGET_PRINT_OPERAND is valid. */
22269 static bool
22270 arm_print_operand_punct_valid_p (unsigned char code)
22271 {
22272 return (code == '@' || code == '|' || code == '.'
22273 || code == '(' || code == ')' || code == '#'
22274 || (TARGET_32BIT && (code == '?'))
22275 || (TARGET_THUMB2 && (code == '!'))
22276 || (TARGET_THUMB && (code == '_')));
22277 }
22278 \f
22279 /* Target hook for assembling integer objects. The ARM version needs to
22280 handle word-sized values specially. */
22281 static bool
22282 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22283 {
22284 machine_mode mode;
22285
22286 if (size == UNITS_PER_WORD && aligned_p)
22287 {
22288 fputs ("\t.word\t", asm_out_file);
22289 output_addr_const (asm_out_file, x);
22290
22291 /* Mark symbols as position independent. We only do this in the
22292 .text segment, not in the .data segment. */
22293 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22294 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22295 {
22296 /* See legitimize_pic_address for an explanation of the
22297 TARGET_VXWORKS_RTP check. */
22298 if (!arm_pic_data_is_text_relative
22299 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22300 fputs ("(GOT)", asm_out_file);
22301 else
22302 fputs ("(GOTOFF)", asm_out_file);
22303 }
22304 fputc ('\n', asm_out_file);
22305 return true;
22306 }
22307
22308 mode = GET_MODE (x);
22309
22310 if (arm_vector_mode_supported_p (mode))
22311 {
22312 int i, units;
22313
22314 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22315
22316 units = CONST_VECTOR_NUNITS (x);
22317 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22318
22319 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22320 for (i = 0; i < units; i++)
22321 {
22322 rtx elt = CONST_VECTOR_ELT (x, i);
22323 assemble_integer
22324 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22325 }
22326 else
22327 for (i = 0; i < units; i++)
22328 {
22329 rtx elt = CONST_VECTOR_ELT (x, i);
22330 REAL_VALUE_TYPE rval;
22331
22332 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22333
22334 assemble_real
22335 (rval, GET_MODE_INNER (mode),
22336 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22337 }
22338
22339 return true;
22340 }
22341
22342 return default_assemble_integer (x, size, aligned_p);
22343 }
22344
22345 static void
22346 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22347 {
22348 section *s;
22349
22350 if (!TARGET_AAPCS_BASED)
22351 {
22352 (is_ctor ?
22353 default_named_section_asm_out_constructor
22354 : default_named_section_asm_out_destructor) (symbol, priority);
22355 return;
22356 }
22357
22358 /* Put these in the .init_array section, using a special relocation. */
22359 if (priority != DEFAULT_INIT_PRIORITY)
22360 {
22361 char buf[18];
22362 sprintf (buf, "%s.%.5u",
22363 is_ctor ? ".init_array" : ".fini_array",
22364 priority);
22365 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22366 }
22367 else if (is_ctor)
22368 s = ctors_section;
22369 else
22370 s = dtors_section;
22371
22372 switch_to_section (s);
22373 assemble_align (POINTER_SIZE);
22374 fputs ("\t.word\t", asm_out_file);
22375 output_addr_const (asm_out_file, symbol);
22376 fputs ("(target1)\n", asm_out_file);
22377 }
22378
22379 /* Add a function to the list of static constructors. */
22380
22381 static void
22382 arm_elf_asm_constructor (rtx symbol, int priority)
22383 {
22384 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22385 }
22386
22387 /* Add a function to the list of static destructors. */
22388
22389 static void
22390 arm_elf_asm_destructor (rtx symbol, int priority)
22391 {
22392 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22393 }
22394 \f
22395 /* A finite state machine takes care of noticing whether or not instructions
22396 can be conditionally executed, and thus decrease execution time and code
22397 size by deleting branch instructions. The fsm is controlled by
22398 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22399
22400 /* The state of the fsm controlling condition codes are:
22401 0: normal, do nothing special
22402 1: make ASM_OUTPUT_OPCODE not output this instruction
22403 2: make ASM_OUTPUT_OPCODE not output this instruction
22404 3: make instructions conditional
22405 4: make instructions conditional
22406
22407 State transitions (state->state by whom under condition):
22408 0 -> 1 final_prescan_insn if the `target' is a label
22409 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22410 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22411 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22412 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22413 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22414 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22415 (the target insn is arm_target_insn).
22416
22417 If the jump clobbers the conditions then we use states 2 and 4.
22418
22419 A similar thing can be done with conditional return insns.
22420
22421 XXX In case the `target' is an unconditional branch, this conditionalising
22422 of the instructions always reduces code size, but not always execution
22423 time. But then, I want to reduce the code size to somewhere near what
22424 /bin/cc produces. */
22425
22426 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22427 instructions. When a COND_EXEC instruction is seen the subsequent
22428 instructions are scanned so that multiple conditional instructions can be
22429 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22430 specify the length and true/false mask for the IT block. These will be
22431 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22432
22433 /* Returns the index of the ARM condition code string in
22434 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22435 COMPARISON should be an rtx like `(eq (...) (...))'. */
22436
22437 enum arm_cond_code
22438 maybe_get_arm_condition_code (rtx comparison)
22439 {
22440 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22441 enum arm_cond_code code;
22442 enum rtx_code comp_code = GET_CODE (comparison);
22443
22444 if (GET_MODE_CLASS (mode) != MODE_CC)
22445 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22446 XEXP (comparison, 1));
22447
22448 switch (mode)
22449 {
22450 case CC_DNEmode: code = ARM_NE; goto dominance;
22451 case CC_DEQmode: code = ARM_EQ; goto dominance;
22452 case CC_DGEmode: code = ARM_GE; goto dominance;
22453 case CC_DGTmode: code = ARM_GT; goto dominance;
22454 case CC_DLEmode: code = ARM_LE; goto dominance;
22455 case CC_DLTmode: code = ARM_LT; goto dominance;
22456 case CC_DGEUmode: code = ARM_CS; goto dominance;
22457 case CC_DGTUmode: code = ARM_HI; goto dominance;
22458 case CC_DLEUmode: code = ARM_LS; goto dominance;
22459 case CC_DLTUmode: code = ARM_CC;
22460
22461 dominance:
22462 if (comp_code == EQ)
22463 return ARM_INVERSE_CONDITION_CODE (code);
22464 if (comp_code == NE)
22465 return code;
22466 return ARM_NV;
22467
22468 case CC_NOOVmode:
22469 switch (comp_code)
22470 {
22471 case NE: return ARM_NE;
22472 case EQ: return ARM_EQ;
22473 case GE: return ARM_PL;
22474 case LT: return ARM_MI;
22475 default: return ARM_NV;
22476 }
22477
22478 case CC_Zmode:
22479 switch (comp_code)
22480 {
22481 case NE: return ARM_NE;
22482 case EQ: return ARM_EQ;
22483 default: return ARM_NV;
22484 }
22485
22486 case CC_Nmode:
22487 switch (comp_code)
22488 {
22489 case NE: return ARM_MI;
22490 case EQ: return ARM_PL;
22491 default: return ARM_NV;
22492 }
22493
22494 case CCFPEmode:
22495 case CCFPmode:
22496 /* We can handle all cases except UNEQ and LTGT. */
22497 switch (comp_code)
22498 {
22499 case GE: return ARM_GE;
22500 case GT: return ARM_GT;
22501 case LE: return ARM_LS;
22502 case LT: return ARM_MI;
22503 case NE: return ARM_NE;
22504 case EQ: return ARM_EQ;
22505 case ORDERED: return ARM_VC;
22506 case UNORDERED: return ARM_VS;
22507 case UNLT: return ARM_LT;
22508 case UNLE: return ARM_LE;
22509 case UNGT: return ARM_HI;
22510 case UNGE: return ARM_PL;
22511 /* UNEQ and LTGT do not have a representation. */
22512 case UNEQ: /* Fall through. */
22513 case LTGT: /* Fall through. */
22514 default: return ARM_NV;
22515 }
22516
22517 case CC_SWPmode:
22518 switch (comp_code)
22519 {
22520 case NE: return ARM_NE;
22521 case EQ: return ARM_EQ;
22522 case GE: return ARM_LE;
22523 case GT: return ARM_LT;
22524 case LE: return ARM_GE;
22525 case LT: return ARM_GT;
22526 case GEU: return ARM_LS;
22527 case GTU: return ARM_CC;
22528 case LEU: return ARM_CS;
22529 case LTU: return ARM_HI;
22530 default: return ARM_NV;
22531 }
22532
22533 case CC_Cmode:
22534 switch (comp_code)
22535 {
22536 case LTU: return ARM_CS;
22537 case GEU: return ARM_CC;
22538 default: return ARM_NV;
22539 }
22540
22541 case CC_CZmode:
22542 switch (comp_code)
22543 {
22544 case NE: return ARM_NE;
22545 case EQ: return ARM_EQ;
22546 case GEU: return ARM_CS;
22547 case GTU: return ARM_HI;
22548 case LEU: return ARM_LS;
22549 case LTU: return ARM_CC;
22550 default: return ARM_NV;
22551 }
22552
22553 case CC_NCVmode:
22554 switch (comp_code)
22555 {
22556 case GE: return ARM_GE;
22557 case LT: return ARM_LT;
22558 case GEU: return ARM_CS;
22559 case LTU: return ARM_CC;
22560 default: return ARM_NV;
22561 }
22562
22563 case CCmode:
22564 switch (comp_code)
22565 {
22566 case NE: return ARM_NE;
22567 case EQ: return ARM_EQ;
22568 case GE: return ARM_GE;
22569 case GT: return ARM_GT;
22570 case LE: return ARM_LE;
22571 case LT: return ARM_LT;
22572 case GEU: return ARM_CS;
22573 case GTU: return ARM_HI;
22574 case LEU: return ARM_LS;
22575 case LTU: return ARM_CC;
22576 default: return ARM_NV;
22577 }
22578
22579 default: gcc_unreachable ();
22580 }
22581 }
22582
22583 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22584 static enum arm_cond_code
22585 get_arm_condition_code (rtx comparison)
22586 {
22587 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22588 gcc_assert (code != ARM_NV);
22589 return code;
22590 }
22591
22592 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22593 instructions. */
22594 void
22595 thumb2_final_prescan_insn (rtx_insn *insn)
22596 {
22597 rtx_insn *first_insn = insn;
22598 rtx body = PATTERN (insn);
22599 rtx predicate;
22600 enum arm_cond_code code;
22601 int n;
22602 int mask;
22603 int max;
22604
22605 /* max_insns_skipped in the tune was already taken into account in the
22606 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22607 just emit the IT blocks as we can. It does not make sense to split
22608 the IT blocks. */
22609 max = MAX_INSN_PER_IT_BLOCK;
22610
22611 /* Remove the previous insn from the count of insns to be output. */
22612 if (arm_condexec_count)
22613 arm_condexec_count--;
22614
22615 /* Nothing to do if we are already inside a conditional block. */
22616 if (arm_condexec_count)
22617 return;
22618
22619 if (GET_CODE (body) != COND_EXEC)
22620 return;
22621
22622 /* Conditional jumps are implemented directly. */
22623 if (JUMP_P (insn))
22624 return;
22625
22626 predicate = COND_EXEC_TEST (body);
22627 arm_current_cc = get_arm_condition_code (predicate);
22628
22629 n = get_attr_ce_count (insn);
22630 arm_condexec_count = 1;
22631 arm_condexec_mask = (1 << n) - 1;
22632 arm_condexec_masklen = n;
22633 /* See if subsequent instructions can be combined into the same block. */
22634 for (;;)
22635 {
22636 insn = next_nonnote_insn (insn);
22637
22638 /* Jumping into the middle of an IT block is illegal, so a label or
22639 barrier terminates the block. */
22640 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22641 break;
22642
22643 body = PATTERN (insn);
22644 /* USE and CLOBBER aren't really insns, so just skip them. */
22645 if (GET_CODE (body) == USE
22646 || GET_CODE (body) == CLOBBER)
22647 continue;
22648
22649 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22650 if (GET_CODE (body) != COND_EXEC)
22651 break;
22652 /* Maximum number of conditionally executed instructions in a block. */
22653 n = get_attr_ce_count (insn);
22654 if (arm_condexec_masklen + n > max)
22655 break;
22656
22657 predicate = COND_EXEC_TEST (body);
22658 code = get_arm_condition_code (predicate);
22659 mask = (1 << n) - 1;
22660 if (arm_current_cc == code)
22661 arm_condexec_mask |= (mask << arm_condexec_masklen);
22662 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22663 break;
22664
22665 arm_condexec_count++;
22666 arm_condexec_masklen += n;
22667
22668 /* A jump must be the last instruction in a conditional block. */
22669 if (JUMP_P (insn))
22670 break;
22671 }
22672 /* Restore recog_data (getting the attributes of other insns can
22673 destroy this array, but final.c assumes that it remains intact
22674 across this call). */
22675 extract_constrain_insn_cached (first_insn);
22676 }
22677
22678 void
22679 arm_final_prescan_insn (rtx_insn *insn)
22680 {
22681 /* BODY will hold the body of INSN. */
22682 rtx body = PATTERN (insn);
22683
22684 /* This will be 1 if trying to repeat the trick, and things need to be
22685 reversed if it appears to fail. */
22686 int reverse = 0;
22687
22688 /* If we start with a return insn, we only succeed if we find another one. */
22689 int seeking_return = 0;
22690 enum rtx_code return_code = UNKNOWN;
22691
22692 /* START_INSN will hold the insn from where we start looking. This is the
22693 first insn after the following code_label if REVERSE is true. */
22694 rtx_insn *start_insn = insn;
22695
22696 /* If in state 4, check if the target branch is reached, in order to
22697 change back to state 0. */
22698 if (arm_ccfsm_state == 4)
22699 {
22700 if (insn == arm_target_insn)
22701 {
22702 arm_target_insn = NULL;
22703 arm_ccfsm_state = 0;
22704 }
22705 return;
22706 }
22707
22708 /* If in state 3, it is possible to repeat the trick, if this insn is an
22709 unconditional branch to a label, and immediately following this branch
22710 is the previous target label which is only used once, and the label this
22711 branch jumps to is not too far off. */
22712 if (arm_ccfsm_state == 3)
22713 {
22714 if (simplejump_p (insn))
22715 {
22716 start_insn = next_nonnote_insn (start_insn);
22717 if (BARRIER_P (start_insn))
22718 {
22719 /* XXX Isn't this always a barrier? */
22720 start_insn = next_nonnote_insn (start_insn);
22721 }
22722 if (LABEL_P (start_insn)
22723 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22724 && LABEL_NUSES (start_insn) == 1)
22725 reverse = TRUE;
22726 else
22727 return;
22728 }
22729 else if (ANY_RETURN_P (body))
22730 {
22731 start_insn = next_nonnote_insn (start_insn);
22732 if (BARRIER_P (start_insn))
22733 start_insn = next_nonnote_insn (start_insn);
22734 if (LABEL_P (start_insn)
22735 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22736 && LABEL_NUSES (start_insn) == 1)
22737 {
22738 reverse = TRUE;
22739 seeking_return = 1;
22740 return_code = GET_CODE (body);
22741 }
22742 else
22743 return;
22744 }
22745 else
22746 return;
22747 }
22748
22749 gcc_assert (!arm_ccfsm_state || reverse);
22750 if (!JUMP_P (insn))
22751 return;
22752
22753 /* This jump might be paralleled with a clobber of the condition codes
22754 the jump should always come first */
22755 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22756 body = XVECEXP (body, 0, 0);
22757
22758 if (reverse
22759 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22760 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22761 {
22762 int insns_skipped;
22763 int fail = FALSE, succeed = FALSE;
22764 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22765 int then_not_else = TRUE;
22766 rtx_insn *this_insn = start_insn;
22767 rtx label = 0;
22768
22769 /* Register the insn jumped to. */
22770 if (reverse)
22771 {
22772 if (!seeking_return)
22773 label = XEXP (SET_SRC (body), 0);
22774 }
22775 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22776 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22777 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22778 {
22779 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22780 then_not_else = FALSE;
22781 }
22782 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22783 {
22784 seeking_return = 1;
22785 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22786 }
22787 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22788 {
22789 seeking_return = 1;
22790 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22791 then_not_else = FALSE;
22792 }
22793 else
22794 gcc_unreachable ();
22795
22796 /* See how many insns this branch skips, and what kind of insns. If all
22797 insns are okay, and the label or unconditional branch to the same
22798 label is not too far away, succeed. */
22799 for (insns_skipped = 0;
22800 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22801 {
22802 rtx scanbody;
22803
22804 this_insn = next_nonnote_insn (this_insn);
22805 if (!this_insn)
22806 break;
22807
22808 switch (GET_CODE (this_insn))
22809 {
22810 case CODE_LABEL:
22811 /* Succeed if it is the target label, otherwise fail since
22812 control falls in from somewhere else. */
22813 if (this_insn == label)
22814 {
22815 arm_ccfsm_state = 1;
22816 succeed = TRUE;
22817 }
22818 else
22819 fail = TRUE;
22820 break;
22821
22822 case BARRIER:
22823 /* Succeed if the following insn is the target label.
22824 Otherwise fail.
22825 If return insns are used then the last insn in a function
22826 will be a barrier. */
22827 this_insn = next_nonnote_insn (this_insn);
22828 if (this_insn && this_insn == label)
22829 {
22830 arm_ccfsm_state = 1;
22831 succeed = TRUE;
22832 }
22833 else
22834 fail = TRUE;
22835 break;
22836
22837 case CALL_INSN:
22838 /* The AAPCS says that conditional calls should not be
22839 used since they make interworking inefficient (the
22840 linker can't transform BL<cond> into BLX). That's
22841 only a problem if the machine has BLX. */
22842 if (arm_arch5)
22843 {
22844 fail = TRUE;
22845 break;
22846 }
22847
22848 /* Succeed if the following insn is the target label, or
22849 if the following two insns are a barrier and the
22850 target label. */
22851 this_insn = next_nonnote_insn (this_insn);
22852 if (this_insn && BARRIER_P (this_insn))
22853 this_insn = next_nonnote_insn (this_insn);
22854
22855 if (this_insn && this_insn == label
22856 && insns_skipped < max_insns_skipped)
22857 {
22858 arm_ccfsm_state = 1;
22859 succeed = TRUE;
22860 }
22861 else
22862 fail = TRUE;
22863 break;
22864
22865 case JUMP_INSN:
22866 /* If this is an unconditional branch to the same label, succeed.
22867 If it is to another label, do nothing. If it is conditional,
22868 fail. */
22869 /* XXX Probably, the tests for SET and the PC are
22870 unnecessary. */
22871
22872 scanbody = PATTERN (this_insn);
22873 if (GET_CODE (scanbody) == SET
22874 && GET_CODE (SET_DEST (scanbody)) == PC)
22875 {
22876 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22877 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22878 {
22879 arm_ccfsm_state = 2;
22880 succeed = TRUE;
22881 }
22882 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22883 fail = TRUE;
22884 }
22885 /* Fail if a conditional return is undesirable (e.g. on a
22886 StrongARM), but still allow this if optimizing for size. */
22887 else if (GET_CODE (scanbody) == return_code
22888 && !use_return_insn (TRUE, NULL)
22889 && !optimize_size)
22890 fail = TRUE;
22891 else if (GET_CODE (scanbody) == return_code)
22892 {
22893 arm_ccfsm_state = 2;
22894 succeed = TRUE;
22895 }
22896 else if (GET_CODE (scanbody) == PARALLEL)
22897 {
22898 switch (get_attr_conds (this_insn))
22899 {
22900 case CONDS_NOCOND:
22901 break;
22902 default:
22903 fail = TRUE;
22904 break;
22905 }
22906 }
22907 else
22908 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22909
22910 break;
22911
22912 case INSN:
22913 /* Instructions using or affecting the condition codes make it
22914 fail. */
22915 scanbody = PATTERN (this_insn);
22916 if (!(GET_CODE (scanbody) == SET
22917 || GET_CODE (scanbody) == PARALLEL)
22918 || get_attr_conds (this_insn) != CONDS_NOCOND)
22919 fail = TRUE;
22920 break;
22921
22922 default:
22923 break;
22924 }
22925 }
22926 if (succeed)
22927 {
22928 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22929 arm_target_label = CODE_LABEL_NUMBER (label);
22930 else
22931 {
22932 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22933
22934 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22935 {
22936 this_insn = next_nonnote_insn (this_insn);
22937 gcc_assert (!this_insn
22938 || (!BARRIER_P (this_insn)
22939 && !LABEL_P (this_insn)));
22940 }
22941 if (!this_insn)
22942 {
22943 /* Oh, dear! we ran off the end.. give up. */
22944 extract_constrain_insn_cached (insn);
22945 arm_ccfsm_state = 0;
22946 arm_target_insn = NULL;
22947 return;
22948 }
22949 arm_target_insn = this_insn;
22950 }
22951
22952 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22953 what it was. */
22954 if (!reverse)
22955 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22956
22957 if (reverse || then_not_else)
22958 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22959 }
22960
22961 /* Restore recog_data (getting the attributes of other insns can
22962 destroy this array, but final.c assumes that it remains intact
22963 across this call. */
22964 extract_constrain_insn_cached (insn);
22965 }
22966 }
22967
22968 /* Output IT instructions. */
22969 void
22970 thumb2_asm_output_opcode (FILE * stream)
22971 {
22972 char buff[5];
22973 int n;
22974
22975 if (arm_condexec_mask)
22976 {
22977 for (n = 0; n < arm_condexec_masklen; n++)
22978 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22979 buff[n] = 0;
22980 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22981 arm_condition_codes[arm_current_cc]);
22982 arm_condexec_mask = 0;
22983 }
22984 }
22985
22986 /* Returns true if REGNO is a valid register
22987 for holding a quantity of type MODE. */
22988 int
22989 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
22990 {
22991 if (GET_MODE_CLASS (mode) == MODE_CC)
22992 return (regno == CC_REGNUM
22993 || (TARGET_HARD_FLOAT && TARGET_VFP
22994 && regno == VFPCC_REGNUM));
22995
22996 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
22997 return false;
22998
22999 if (TARGET_THUMB1)
23000 /* For the Thumb we only allow values bigger than SImode in
23001 registers 0 - 6, so that there is always a second low
23002 register available to hold the upper part of the value.
23003 We probably we ought to ensure that the register is the
23004 start of an even numbered register pair. */
23005 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23006
23007 if (TARGET_HARD_FLOAT && TARGET_VFP
23008 && IS_VFP_REGNUM (regno))
23009 {
23010 if (mode == SFmode || mode == SImode)
23011 return VFP_REGNO_OK_FOR_SINGLE (regno);
23012
23013 if (mode == DFmode)
23014 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23015
23016 /* VFP registers can hold HFmode values, but there is no point in
23017 putting them there unless we have hardware conversion insns. */
23018 if (mode == HFmode)
23019 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23020
23021 if (TARGET_NEON)
23022 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23023 || (VALID_NEON_QREG_MODE (mode)
23024 && NEON_REGNO_OK_FOR_QUAD (regno))
23025 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23026 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23027 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23028 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23029 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23030
23031 return FALSE;
23032 }
23033
23034 if (TARGET_REALLY_IWMMXT)
23035 {
23036 if (IS_IWMMXT_GR_REGNUM (regno))
23037 return mode == SImode;
23038
23039 if (IS_IWMMXT_REGNUM (regno))
23040 return VALID_IWMMXT_REG_MODE (mode);
23041 }
23042
23043 /* We allow almost any value to be stored in the general registers.
23044 Restrict doubleword quantities to even register pairs in ARM state
23045 so that we can use ldrd. Do not allow very large Neon structure
23046 opaque modes in general registers; they would use too many. */
23047 if (regno <= LAST_ARM_REGNUM)
23048 {
23049 if (ARM_NUM_REGS (mode) > 4)
23050 return FALSE;
23051
23052 if (TARGET_THUMB2)
23053 return TRUE;
23054
23055 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23056 }
23057
23058 if (regno == FRAME_POINTER_REGNUM
23059 || regno == ARG_POINTER_REGNUM)
23060 /* We only allow integers in the fake hard registers. */
23061 return GET_MODE_CLASS (mode) == MODE_INT;
23062
23063 return FALSE;
23064 }
23065
23066 /* Implement MODES_TIEABLE_P. */
23067
23068 bool
23069 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23070 {
23071 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23072 return true;
23073
23074 /* We specifically want to allow elements of "structure" modes to
23075 be tieable to the structure. This more general condition allows
23076 other rarer situations too. */
23077 if (TARGET_NEON
23078 && (VALID_NEON_DREG_MODE (mode1)
23079 || VALID_NEON_QREG_MODE (mode1)
23080 || VALID_NEON_STRUCT_MODE (mode1))
23081 && (VALID_NEON_DREG_MODE (mode2)
23082 || VALID_NEON_QREG_MODE (mode2)
23083 || VALID_NEON_STRUCT_MODE (mode2)))
23084 return true;
23085
23086 return false;
23087 }
23088
23089 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23090 not used in arm mode. */
23091
23092 enum reg_class
23093 arm_regno_class (int regno)
23094 {
23095 if (regno == PC_REGNUM)
23096 return NO_REGS;
23097
23098 if (TARGET_THUMB1)
23099 {
23100 if (regno == STACK_POINTER_REGNUM)
23101 return STACK_REG;
23102 if (regno == CC_REGNUM)
23103 return CC_REG;
23104 if (regno < 8)
23105 return LO_REGS;
23106 return HI_REGS;
23107 }
23108
23109 if (TARGET_THUMB2 && regno < 8)
23110 return LO_REGS;
23111
23112 if ( regno <= LAST_ARM_REGNUM
23113 || regno == FRAME_POINTER_REGNUM
23114 || regno == ARG_POINTER_REGNUM)
23115 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23116
23117 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23118 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23119
23120 if (IS_VFP_REGNUM (regno))
23121 {
23122 if (regno <= D7_VFP_REGNUM)
23123 return VFP_D0_D7_REGS;
23124 else if (regno <= LAST_LO_VFP_REGNUM)
23125 return VFP_LO_REGS;
23126 else
23127 return VFP_HI_REGS;
23128 }
23129
23130 if (IS_IWMMXT_REGNUM (regno))
23131 return IWMMXT_REGS;
23132
23133 if (IS_IWMMXT_GR_REGNUM (regno))
23134 return IWMMXT_GR_REGS;
23135
23136 return NO_REGS;
23137 }
23138
23139 /* Handle a special case when computing the offset
23140 of an argument from the frame pointer. */
23141 int
23142 arm_debugger_arg_offset (int value, rtx addr)
23143 {
23144 rtx_insn *insn;
23145
23146 /* We are only interested if dbxout_parms() failed to compute the offset. */
23147 if (value != 0)
23148 return 0;
23149
23150 /* We can only cope with the case where the address is held in a register. */
23151 if (!REG_P (addr))
23152 return 0;
23153
23154 /* If we are using the frame pointer to point at the argument, then
23155 an offset of 0 is correct. */
23156 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23157 return 0;
23158
23159 /* If we are using the stack pointer to point at the
23160 argument, then an offset of 0 is correct. */
23161 /* ??? Check this is consistent with thumb2 frame layout. */
23162 if ((TARGET_THUMB || !frame_pointer_needed)
23163 && REGNO (addr) == SP_REGNUM)
23164 return 0;
23165
23166 /* Oh dear. The argument is pointed to by a register rather
23167 than being held in a register, or being stored at a known
23168 offset from the frame pointer. Since GDB only understands
23169 those two kinds of argument we must translate the address
23170 held in the register into an offset from the frame pointer.
23171 We do this by searching through the insns for the function
23172 looking to see where this register gets its value. If the
23173 register is initialized from the frame pointer plus an offset
23174 then we are in luck and we can continue, otherwise we give up.
23175
23176 This code is exercised by producing debugging information
23177 for a function with arguments like this:
23178
23179 double func (double a, double b, int c, double d) {return d;}
23180
23181 Without this code the stab for parameter 'd' will be set to
23182 an offset of 0 from the frame pointer, rather than 8. */
23183
23184 /* The if() statement says:
23185
23186 If the insn is a normal instruction
23187 and if the insn is setting the value in a register
23188 and if the register being set is the register holding the address of the argument
23189 and if the address is computing by an addition
23190 that involves adding to a register
23191 which is the frame pointer
23192 a constant integer
23193
23194 then... */
23195
23196 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23197 {
23198 if ( NONJUMP_INSN_P (insn)
23199 && GET_CODE (PATTERN (insn)) == SET
23200 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23201 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23202 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23203 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23204 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23205 )
23206 {
23207 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23208
23209 break;
23210 }
23211 }
23212
23213 if (value == 0)
23214 {
23215 debug_rtx (addr);
23216 warning (0, "unable to compute real location of stacked parameter");
23217 value = 8; /* XXX magic hack */
23218 }
23219
23220 return value;
23221 }
23222 \f
23223 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23224
23225 static const char *
23226 arm_invalid_parameter_type (const_tree t)
23227 {
23228 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23229 return N_("function parameters cannot have __fp16 type");
23230 return NULL;
23231 }
23232
23233 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23234
23235 static const char *
23236 arm_invalid_return_type (const_tree t)
23237 {
23238 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23239 return N_("functions cannot return __fp16 type");
23240 return NULL;
23241 }
23242
23243 /* Implement TARGET_PROMOTED_TYPE. */
23244
23245 static tree
23246 arm_promoted_type (const_tree t)
23247 {
23248 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23249 return float_type_node;
23250 return NULL_TREE;
23251 }
23252
23253 /* Implement TARGET_CONVERT_TO_TYPE.
23254 Specifically, this hook implements the peculiarity of the ARM
23255 half-precision floating-point C semantics that requires conversions between
23256 __fp16 to or from double to do an intermediate conversion to float. */
23257
23258 static tree
23259 arm_convert_to_type (tree type, tree expr)
23260 {
23261 tree fromtype = TREE_TYPE (expr);
23262 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23263 return NULL_TREE;
23264 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23265 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23266 return convert (type, convert (float_type_node, expr));
23267 return NULL_TREE;
23268 }
23269
23270 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23271 This simply adds HFmode as a supported mode; even though we don't
23272 implement arithmetic on this type directly, it's supported by
23273 optabs conversions, much the way the double-word arithmetic is
23274 special-cased in the default hook. */
23275
23276 static bool
23277 arm_scalar_mode_supported_p (machine_mode mode)
23278 {
23279 if (mode == HFmode)
23280 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23281 else if (ALL_FIXED_POINT_MODE_P (mode))
23282 return true;
23283 else
23284 return default_scalar_mode_supported_p (mode);
23285 }
23286
23287 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23288 void
23289 neon_reinterpret (rtx dest, rtx src)
23290 {
23291 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
23292 }
23293
23294 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23295 not to early-clobber SRC registers in the process.
23296
23297 We assume that the operands described by SRC and DEST represent a
23298 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23299 number of components into which the copy has been decomposed. */
23300 void
23301 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23302 {
23303 unsigned int i;
23304
23305 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23306 || REGNO (operands[0]) < REGNO (operands[1]))
23307 {
23308 for (i = 0; i < count; i++)
23309 {
23310 operands[2 * i] = dest[i];
23311 operands[2 * i + 1] = src[i];
23312 }
23313 }
23314 else
23315 {
23316 for (i = 0; i < count; i++)
23317 {
23318 operands[2 * i] = dest[count - i - 1];
23319 operands[2 * i + 1] = src[count - i - 1];
23320 }
23321 }
23322 }
23323
23324 /* Split operands into moves from op[1] + op[2] into op[0]. */
23325
23326 void
23327 neon_split_vcombine (rtx operands[3])
23328 {
23329 unsigned int dest = REGNO (operands[0]);
23330 unsigned int src1 = REGNO (operands[1]);
23331 unsigned int src2 = REGNO (operands[2]);
23332 machine_mode halfmode = GET_MODE (operands[1]);
23333 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23334 rtx destlo, desthi;
23335
23336 if (src1 == dest && src2 == dest + halfregs)
23337 {
23338 /* No-op move. Can't split to nothing; emit something. */
23339 emit_note (NOTE_INSN_DELETED);
23340 return;
23341 }
23342
23343 /* Preserve register attributes for variable tracking. */
23344 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23345 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23346 GET_MODE_SIZE (halfmode));
23347
23348 /* Special case of reversed high/low parts. Use VSWP. */
23349 if (src2 == dest && src1 == dest + halfregs)
23350 {
23351 rtx x = gen_rtx_SET (destlo, operands[1]);
23352 rtx y = gen_rtx_SET (desthi, operands[2]);
23353 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23354 return;
23355 }
23356
23357 if (!reg_overlap_mentioned_p (operands[2], destlo))
23358 {
23359 /* Try to avoid unnecessary moves if part of the result
23360 is in the right place already. */
23361 if (src1 != dest)
23362 emit_move_insn (destlo, operands[1]);
23363 if (src2 != dest + halfregs)
23364 emit_move_insn (desthi, operands[2]);
23365 }
23366 else
23367 {
23368 if (src2 != dest + halfregs)
23369 emit_move_insn (desthi, operands[2]);
23370 if (src1 != dest)
23371 emit_move_insn (destlo, operands[1]);
23372 }
23373 }
23374 \f
23375 /* Return the number (counting from 0) of
23376 the least significant set bit in MASK. */
23377
23378 inline static int
23379 number_of_first_bit_set (unsigned mask)
23380 {
23381 return ctz_hwi (mask);
23382 }
23383
23384 /* Like emit_multi_reg_push, but allowing for a different set of
23385 registers to be described as saved. MASK is the set of registers
23386 to be saved; REAL_REGS is the set of registers to be described as
23387 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23388
23389 static rtx_insn *
23390 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23391 {
23392 unsigned long regno;
23393 rtx par[10], tmp, reg;
23394 rtx_insn *insn;
23395 int i, j;
23396
23397 /* Build the parallel of the registers actually being stored. */
23398 for (i = 0; mask; ++i, mask &= mask - 1)
23399 {
23400 regno = ctz_hwi (mask);
23401 reg = gen_rtx_REG (SImode, regno);
23402
23403 if (i == 0)
23404 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23405 else
23406 tmp = gen_rtx_USE (VOIDmode, reg);
23407
23408 par[i] = tmp;
23409 }
23410
23411 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23412 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23413 tmp = gen_frame_mem (BLKmode, tmp);
23414 tmp = gen_rtx_SET (tmp, par[0]);
23415 par[0] = tmp;
23416
23417 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23418 insn = emit_insn (tmp);
23419
23420 /* Always build the stack adjustment note for unwind info. */
23421 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23422 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23423 par[0] = tmp;
23424
23425 /* Build the parallel of the registers recorded as saved for unwind. */
23426 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23427 {
23428 regno = ctz_hwi (real_regs);
23429 reg = gen_rtx_REG (SImode, regno);
23430
23431 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23432 tmp = gen_frame_mem (SImode, tmp);
23433 tmp = gen_rtx_SET (tmp, reg);
23434 RTX_FRAME_RELATED_P (tmp) = 1;
23435 par[j + 1] = tmp;
23436 }
23437
23438 if (j == 0)
23439 tmp = par[0];
23440 else
23441 {
23442 RTX_FRAME_RELATED_P (par[0]) = 1;
23443 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23444 }
23445
23446 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23447
23448 return insn;
23449 }
23450
23451 /* Emit code to push or pop registers to or from the stack. F is the
23452 assembly file. MASK is the registers to pop. */
23453 static void
23454 thumb_pop (FILE *f, unsigned long mask)
23455 {
23456 int regno;
23457 int lo_mask = mask & 0xFF;
23458 int pushed_words = 0;
23459
23460 gcc_assert (mask);
23461
23462 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23463 {
23464 /* Special case. Do not generate a POP PC statement here, do it in
23465 thumb_exit() */
23466 thumb_exit (f, -1);
23467 return;
23468 }
23469
23470 fprintf (f, "\tpop\t{");
23471
23472 /* Look at the low registers first. */
23473 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23474 {
23475 if (lo_mask & 1)
23476 {
23477 asm_fprintf (f, "%r", regno);
23478
23479 if ((lo_mask & ~1) != 0)
23480 fprintf (f, ", ");
23481
23482 pushed_words++;
23483 }
23484 }
23485
23486 if (mask & (1 << PC_REGNUM))
23487 {
23488 /* Catch popping the PC. */
23489 if (TARGET_INTERWORK || TARGET_BACKTRACE
23490 || crtl->calls_eh_return)
23491 {
23492 /* The PC is never poped directly, instead
23493 it is popped into r3 and then BX is used. */
23494 fprintf (f, "}\n");
23495
23496 thumb_exit (f, -1);
23497
23498 return;
23499 }
23500 else
23501 {
23502 if (mask & 0xFF)
23503 fprintf (f, ", ");
23504
23505 asm_fprintf (f, "%r", PC_REGNUM);
23506 }
23507 }
23508
23509 fprintf (f, "}\n");
23510 }
23511
23512 /* Generate code to return from a thumb function.
23513 If 'reg_containing_return_addr' is -1, then the return address is
23514 actually on the stack, at the stack pointer. */
23515 static void
23516 thumb_exit (FILE *f, int reg_containing_return_addr)
23517 {
23518 unsigned regs_available_for_popping;
23519 unsigned regs_to_pop;
23520 int pops_needed;
23521 unsigned available;
23522 unsigned required;
23523 machine_mode mode;
23524 int size;
23525 int restore_a4 = FALSE;
23526
23527 /* Compute the registers we need to pop. */
23528 regs_to_pop = 0;
23529 pops_needed = 0;
23530
23531 if (reg_containing_return_addr == -1)
23532 {
23533 regs_to_pop |= 1 << LR_REGNUM;
23534 ++pops_needed;
23535 }
23536
23537 if (TARGET_BACKTRACE)
23538 {
23539 /* Restore the (ARM) frame pointer and stack pointer. */
23540 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23541 pops_needed += 2;
23542 }
23543
23544 /* If there is nothing to pop then just emit the BX instruction and
23545 return. */
23546 if (pops_needed == 0)
23547 {
23548 if (crtl->calls_eh_return)
23549 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23550
23551 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23552 return;
23553 }
23554 /* Otherwise if we are not supporting interworking and we have not created
23555 a backtrace structure and the function was not entered in ARM mode then
23556 just pop the return address straight into the PC. */
23557 else if (!TARGET_INTERWORK
23558 && !TARGET_BACKTRACE
23559 && !is_called_in_ARM_mode (current_function_decl)
23560 && !crtl->calls_eh_return)
23561 {
23562 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23563 return;
23564 }
23565
23566 /* Find out how many of the (return) argument registers we can corrupt. */
23567 regs_available_for_popping = 0;
23568
23569 /* If returning via __builtin_eh_return, the bottom three registers
23570 all contain information needed for the return. */
23571 if (crtl->calls_eh_return)
23572 size = 12;
23573 else
23574 {
23575 /* If we can deduce the registers used from the function's
23576 return value. This is more reliable that examining
23577 df_regs_ever_live_p () because that will be set if the register is
23578 ever used in the function, not just if the register is used
23579 to hold a return value. */
23580
23581 if (crtl->return_rtx != 0)
23582 mode = GET_MODE (crtl->return_rtx);
23583 else
23584 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23585
23586 size = GET_MODE_SIZE (mode);
23587
23588 if (size == 0)
23589 {
23590 /* In a void function we can use any argument register.
23591 In a function that returns a structure on the stack
23592 we can use the second and third argument registers. */
23593 if (mode == VOIDmode)
23594 regs_available_for_popping =
23595 (1 << ARG_REGISTER (1))
23596 | (1 << ARG_REGISTER (2))
23597 | (1 << ARG_REGISTER (3));
23598 else
23599 regs_available_for_popping =
23600 (1 << ARG_REGISTER (2))
23601 | (1 << ARG_REGISTER (3));
23602 }
23603 else if (size <= 4)
23604 regs_available_for_popping =
23605 (1 << ARG_REGISTER (2))
23606 | (1 << ARG_REGISTER (3));
23607 else if (size <= 8)
23608 regs_available_for_popping =
23609 (1 << ARG_REGISTER (3));
23610 }
23611
23612 /* Match registers to be popped with registers into which we pop them. */
23613 for (available = regs_available_for_popping,
23614 required = regs_to_pop;
23615 required != 0 && available != 0;
23616 available &= ~(available & - available),
23617 required &= ~(required & - required))
23618 -- pops_needed;
23619
23620 /* If we have any popping registers left over, remove them. */
23621 if (available > 0)
23622 regs_available_for_popping &= ~available;
23623
23624 /* Otherwise if we need another popping register we can use
23625 the fourth argument register. */
23626 else if (pops_needed)
23627 {
23628 /* If we have not found any free argument registers and
23629 reg a4 contains the return address, we must move it. */
23630 if (regs_available_for_popping == 0
23631 && reg_containing_return_addr == LAST_ARG_REGNUM)
23632 {
23633 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23634 reg_containing_return_addr = LR_REGNUM;
23635 }
23636 else if (size > 12)
23637 {
23638 /* Register a4 is being used to hold part of the return value,
23639 but we have dire need of a free, low register. */
23640 restore_a4 = TRUE;
23641
23642 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23643 }
23644
23645 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23646 {
23647 /* The fourth argument register is available. */
23648 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23649
23650 --pops_needed;
23651 }
23652 }
23653
23654 /* Pop as many registers as we can. */
23655 thumb_pop (f, regs_available_for_popping);
23656
23657 /* Process the registers we popped. */
23658 if (reg_containing_return_addr == -1)
23659 {
23660 /* The return address was popped into the lowest numbered register. */
23661 regs_to_pop &= ~(1 << LR_REGNUM);
23662
23663 reg_containing_return_addr =
23664 number_of_first_bit_set (regs_available_for_popping);
23665
23666 /* Remove this register for the mask of available registers, so that
23667 the return address will not be corrupted by further pops. */
23668 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23669 }
23670
23671 /* If we popped other registers then handle them here. */
23672 if (regs_available_for_popping)
23673 {
23674 int frame_pointer;
23675
23676 /* Work out which register currently contains the frame pointer. */
23677 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23678
23679 /* Move it into the correct place. */
23680 asm_fprintf (f, "\tmov\t%r, %r\n",
23681 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23682
23683 /* (Temporarily) remove it from the mask of popped registers. */
23684 regs_available_for_popping &= ~(1 << frame_pointer);
23685 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23686
23687 if (regs_available_for_popping)
23688 {
23689 int stack_pointer;
23690
23691 /* We popped the stack pointer as well,
23692 find the register that contains it. */
23693 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23694
23695 /* Move it into the stack register. */
23696 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23697
23698 /* At this point we have popped all necessary registers, so
23699 do not worry about restoring regs_available_for_popping
23700 to its correct value:
23701
23702 assert (pops_needed == 0)
23703 assert (regs_available_for_popping == (1 << frame_pointer))
23704 assert (regs_to_pop == (1 << STACK_POINTER)) */
23705 }
23706 else
23707 {
23708 /* Since we have just move the popped value into the frame
23709 pointer, the popping register is available for reuse, and
23710 we know that we still have the stack pointer left to pop. */
23711 regs_available_for_popping |= (1 << frame_pointer);
23712 }
23713 }
23714
23715 /* If we still have registers left on the stack, but we no longer have
23716 any registers into which we can pop them, then we must move the return
23717 address into the link register and make available the register that
23718 contained it. */
23719 if (regs_available_for_popping == 0 && pops_needed > 0)
23720 {
23721 regs_available_for_popping |= 1 << reg_containing_return_addr;
23722
23723 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23724 reg_containing_return_addr);
23725
23726 reg_containing_return_addr = LR_REGNUM;
23727 }
23728
23729 /* If we have registers left on the stack then pop some more.
23730 We know that at most we will want to pop FP and SP. */
23731 if (pops_needed > 0)
23732 {
23733 int popped_into;
23734 int move_to;
23735
23736 thumb_pop (f, regs_available_for_popping);
23737
23738 /* We have popped either FP or SP.
23739 Move whichever one it is into the correct register. */
23740 popped_into = number_of_first_bit_set (regs_available_for_popping);
23741 move_to = number_of_first_bit_set (regs_to_pop);
23742
23743 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23744
23745 regs_to_pop &= ~(1 << move_to);
23746
23747 --pops_needed;
23748 }
23749
23750 /* If we still have not popped everything then we must have only
23751 had one register available to us and we are now popping the SP. */
23752 if (pops_needed > 0)
23753 {
23754 int popped_into;
23755
23756 thumb_pop (f, regs_available_for_popping);
23757
23758 popped_into = number_of_first_bit_set (regs_available_for_popping);
23759
23760 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23761 /*
23762 assert (regs_to_pop == (1 << STACK_POINTER))
23763 assert (pops_needed == 1)
23764 */
23765 }
23766
23767 /* If necessary restore the a4 register. */
23768 if (restore_a4)
23769 {
23770 if (reg_containing_return_addr != LR_REGNUM)
23771 {
23772 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23773 reg_containing_return_addr = LR_REGNUM;
23774 }
23775
23776 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23777 }
23778
23779 if (crtl->calls_eh_return)
23780 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23781
23782 /* Return to caller. */
23783 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23784 }
23785 \f
23786 /* Scan INSN just before assembler is output for it.
23787 For Thumb-1, we track the status of the condition codes; this
23788 information is used in the cbranchsi4_insn pattern. */
23789 void
23790 thumb1_final_prescan_insn (rtx_insn *insn)
23791 {
23792 if (flag_print_asm_name)
23793 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23794 INSN_ADDRESSES (INSN_UID (insn)));
23795 /* Don't overwrite the previous setter when we get to a cbranch. */
23796 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23797 {
23798 enum attr_conds conds;
23799
23800 if (cfun->machine->thumb1_cc_insn)
23801 {
23802 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23803 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23804 CC_STATUS_INIT;
23805 }
23806 conds = get_attr_conds (insn);
23807 if (conds == CONDS_SET)
23808 {
23809 rtx set = single_set (insn);
23810 cfun->machine->thumb1_cc_insn = insn;
23811 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23812 cfun->machine->thumb1_cc_op1 = const0_rtx;
23813 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23814 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23815 {
23816 rtx src1 = XEXP (SET_SRC (set), 1);
23817 if (src1 == const0_rtx)
23818 cfun->machine->thumb1_cc_mode = CCmode;
23819 }
23820 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23821 {
23822 /* Record the src register operand instead of dest because
23823 cprop_hardreg pass propagates src. */
23824 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23825 }
23826 }
23827 else if (conds != CONDS_NOCOND)
23828 cfun->machine->thumb1_cc_insn = NULL_RTX;
23829 }
23830
23831 /* Check if unexpected far jump is used. */
23832 if (cfun->machine->lr_save_eliminated
23833 && get_attr_far_jump (insn) == FAR_JUMP_YES)
23834 internal_error("Unexpected thumb1 far jump");
23835 }
23836
23837 int
23838 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23839 {
23840 unsigned HOST_WIDE_INT mask = 0xff;
23841 int i;
23842
23843 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23844 if (val == 0) /* XXX */
23845 return 0;
23846
23847 for (i = 0; i < 25; i++)
23848 if ((val & (mask << i)) == val)
23849 return 1;
23850
23851 return 0;
23852 }
23853
23854 /* Returns nonzero if the current function contains,
23855 or might contain a far jump. */
23856 static int
23857 thumb_far_jump_used_p (void)
23858 {
23859 rtx_insn *insn;
23860 bool far_jump = false;
23861 unsigned int func_size = 0;
23862
23863 /* This test is only important for leaf functions. */
23864 /* assert (!leaf_function_p ()); */
23865
23866 /* If we have already decided that far jumps may be used,
23867 do not bother checking again, and always return true even if
23868 it turns out that they are not being used. Once we have made
23869 the decision that far jumps are present (and that hence the link
23870 register will be pushed onto the stack) we cannot go back on it. */
23871 if (cfun->machine->far_jump_used)
23872 return 1;
23873
23874 /* If this function is not being called from the prologue/epilogue
23875 generation code then it must be being called from the
23876 INITIAL_ELIMINATION_OFFSET macro. */
23877 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
23878 {
23879 /* In this case we know that we are being asked about the elimination
23880 of the arg pointer register. If that register is not being used,
23881 then there are no arguments on the stack, and we do not have to
23882 worry that a far jump might force the prologue to push the link
23883 register, changing the stack offsets. In this case we can just
23884 return false, since the presence of far jumps in the function will
23885 not affect stack offsets.
23886
23887 If the arg pointer is live (or if it was live, but has now been
23888 eliminated and so set to dead) then we do have to test to see if
23889 the function might contain a far jump. This test can lead to some
23890 false negatives, since before reload is completed, then length of
23891 branch instructions is not known, so gcc defaults to returning their
23892 longest length, which in turn sets the far jump attribute to true.
23893
23894 A false negative will not result in bad code being generated, but it
23895 will result in a needless push and pop of the link register. We
23896 hope that this does not occur too often.
23897
23898 If we need doubleword stack alignment this could affect the other
23899 elimination offsets so we can't risk getting it wrong. */
23900 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
23901 cfun->machine->arg_pointer_live = 1;
23902 else if (!cfun->machine->arg_pointer_live)
23903 return 0;
23904 }
23905
23906 /* We should not change far_jump_used during or after reload, as there is
23907 no chance to change stack frame layout. */
23908 if (reload_in_progress || reload_completed)
23909 return 0;
23910
23911 /* Check to see if the function contains a branch
23912 insn with the far jump attribute set. */
23913 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23914 {
23915 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
23916 {
23917 far_jump = true;
23918 }
23919 func_size += get_attr_length (insn);
23920 }
23921
23922 /* Attribute far_jump will always be true for thumb1 before
23923 shorten_branch pass. So checking far_jump attribute before
23924 shorten_branch isn't much useful.
23925
23926 Following heuristic tries to estimate more accurately if a far jump
23927 may finally be used. The heuristic is very conservative as there is
23928 no chance to roll-back the decision of not to use far jump.
23929
23930 Thumb1 long branch offset is -2048 to 2046. The worst case is each
23931 2-byte insn is associated with a 4 byte constant pool. Using
23932 function size 2048/3 as the threshold is conservative enough. */
23933 if (far_jump)
23934 {
23935 if ((func_size * 3) >= 2048)
23936 {
23937 /* Record the fact that we have decided that
23938 the function does use far jumps. */
23939 cfun->machine->far_jump_used = 1;
23940 return 1;
23941 }
23942 }
23943
23944 return 0;
23945 }
23946
23947 /* Return nonzero if FUNC must be entered in ARM mode. */
23948 static bool
23949 is_called_in_ARM_mode (tree func)
23950 {
23951 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
23952
23953 /* Ignore the problem about functions whose address is taken. */
23954 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
23955 return true;
23956
23957 #ifdef ARM_PE
23958 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
23959 #else
23960 return false;
23961 #endif
23962 }
23963
23964 /* Given the stack offsets and register mask in OFFSETS, decide how
23965 many additional registers to push instead of subtracting a constant
23966 from SP. For epilogues the principle is the same except we use pop.
23967 FOR_PROLOGUE indicates which we're generating. */
23968 static int
23969 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
23970 {
23971 HOST_WIDE_INT amount;
23972 unsigned long live_regs_mask = offsets->saved_regs_mask;
23973 /* Extract a mask of the ones we can give to the Thumb's push/pop
23974 instruction. */
23975 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
23976 /* Then count how many other high registers will need to be pushed. */
23977 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23978 int n_free, reg_base, size;
23979
23980 if (!for_prologue && frame_pointer_needed)
23981 amount = offsets->locals_base - offsets->saved_regs;
23982 else
23983 amount = offsets->outgoing_args - offsets->saved_regs;
23984
23985 /* If the stack frame size is 512 exactly, we can save one load
23986 instruction, which should make this a win even when optimizing
23987 for speed. */
23988 if (!optimize_size && amount != 512)
23989 return 0;
23990
23991 /* Can't do this if there are high registers to push. */
23992 if (high_regs_pushed != 0)
23993 return 0;
23994
23995 /* Shouldn't do it in the prologue if no registers would normally
23996 be pushed at all. In the epilogue, also allow it if we'll have
23997 a pop insn for the PC. */
23998 if (l_mask == 0
23999 && (for_prologue
24000 || TARGET_BACKTRACE
24001 || (live_regs_mask & 1 << LR_REGNUM) == 0
24002 || TARGET_INTERWORK
24003 || crtl->args.pretend_args_size != 0))
24004 return 0;
24005
24006 /* Don't do this if thumb_expand_prologue wants to emit instructions
24007 between the push and the stack frame allocation. */
24008 if (for_prologue
24009 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24010 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24011 return 0;
24012
24013 reg_base = 0;
24014 n_free = 0;
24015 if (!for_prologue)
24016 {
24017 size = arm_size_return_regs ();
24018 reg_base = ARM_NUM_INTS (size);
24019 live_regs_mask >>= reg_base;
24020 }
24021
24022 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24023 && (for_prologue || call_used_regs[reg_base + n_free]))
24024 {
24025 live_regs_mask >>= 1;
24026 n_free++;
24027 }
24028
24029 if (n_free == 0)
24030 return 0;
24031 gcc_assert (amount / 4 * 4 == amount);
24032
24033 if (amount >= 512 && (amount - n_free * 4) < 512)
24034 return (amount - 508) / 4;
24035 if (amount <= n_free * 4)
24036 return amount / 4;
24037 return 0;
24038 }
24039
24040 /* The bits which aren't usefully expanded as rtl. */
24041 const char *
24042 thumb1_unexpanded_epilogue (void)
24043 {
24044 arm_stack_offsets *offsets;
24045 int regno;
24046 unsigned long live_regs_mask = 0;
24047 int high_regs_pushed = 0;
24048 int extra_pop;
24049 int had_to_push_lr;
24050 int size;
24051
24052 if (cfun->machine->return_used_this_function != 0)
24053 return "";
24054
24055 if (IS_NAKED (arm_current_func_type ()))
24056 return "";
24057
24058 offsets = arm_get_frame_offsets ();
24059 live_regs_mask = offsets->saved_regs_mask;
24060 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24061
24062 /* If we can deduce the registers used from the function's return value.
24063 This is more reliable that examining df_regs_ever_live_p () because that
24064 will be set if the register is ever used in the function, not just if
24065 the register is used to hold a return value. */
24066 size = arm_size_return_regs ();
24067
24068 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24069 if (extra_pop > 0)
24070 {
24071 unsigned long extra_mask = (1 << extra_pop) - 1;
24072 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24073 }
24074
24075 /* The prolog may have pushed some high registers to use as
24076 work registers. e.g. the testsuite file:
24077 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24078 compiles to produce:
24079 push {r4, r5, r6, r7, lr}
24080 mov r7, r9
24081 mov r6, r8
24082 push {r6, r7}
24083 as part of the prolog. We have to undo that pushing here. */
24084
24085 if (high_regs_pushed)
24086 {
24087 unsigned long mask = live_regs_mask & 0xff;
24088 int next_hi_reg;
24089
24090 /* The available low registers depend on the size of the value we are
24091 returning. */
24092 if (size <= 12)
24093 mask |= 1 << 3;
24094 if (size <= 8)
24095 mask |= 1 << 2;
24096
24097 if (mask == 0)
24098 /* Oh dear! We have no low registers into which we can pop
24099 high registers! */
24100 internal_error
24101 ("no low registers available for popping high registers");
24102
24103 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24104 if (live_regs_mask & (1 << next_hi_reg))
24105 break;
24106
24107 while (high_regs_pushed)
24108 {
24109 /* Find lo register(s) into which the high register(s) can
24110 be popped. */
24111 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24112 {
24113 if (mask & (1 << regno))
24114 high_regs_pushed--;
24115 if (high_regs_pushed == 0)
24116 break;
24117 }
24118
24119 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24120
24121 /* Pop the values into the low register(s). */
24122 thumb_pop (asm_out_file, mask);
24123
24124 /* Move the value(s) into the high registers. */
24125 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24126 {
24127 if (mask & (1 << regno))
24128 {
24129 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24130 regno);
24131
24132 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24133 if (live_regs_mask & (1 << next_hi_reg))
24134 break;
24135 }
24136 }
24137 }
24138 live_regs_mask &= ~0x0f00;
24139 }
24140
24141 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24142 live_regs_mask &= 0xff;
24143
24144 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24145 {
24146 /* Pop the return address into the PC. */
24147 if (had_to_push_lr)
24148 live_regs_mask |= 1 << PC_REGNUM;
24149
24150 /* Either no argument registers were pushed or a backtrace
24151 structure was created which includes an adjusted stack
24152 pointer, so just pop everything. */
24153 if (live_regs_mask)
24154 thumb_pop (asm_out_file, live_regs_mask);
24155
24156 /* We have either just popped the return address into the
24157 PC or it is was kept in LR for the entire function.
24158 Note that thumb_pop has already called thumb_exit if the
24159 PC was in the list. */
24160 if (!had_to_push_lr)
24161 thumb_exit (asm_out_file, LR_REGNUM);
24162 }
24163 else
24164 {
24165 /* Pop everything but the return address. */
24166 if (live_regs_mask)
24167 thumb_pop (asm_out_file, live_regs_mask);
24168
24169 if (had_to_push_lr)
24170 {
24171 if (size > 12)
24172 {
24173 /* We have no free low regs, so save one. */
24174 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24175 LAST_ARG_REGNUM);
24176 }
24177
24178 /* Get the return address into a temporary register. */
24179 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24180
24181 if (size > 12)
24182 {
24183 /* Move the return address to lr. */
24184 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24185 LAST_ARG_REGNUM);
24186 /* Restore the low register. */
24187 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24188 IP_REGNUM);
24189 regno = LR_REGNUM;
24190 }
24191 else
24192 regno = LAST_ARG_REGNUM;
24193 }
24194 else
24195 regno = LR_REGNUM;
24196
24197 /* Remove the argument registers that were pushed onto the stack. */
24198 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24199 SP_REGNUM, SP_REGNUM,
24200 crtl->args.pretend_args_size);
24201
24202 thumb_exit (asm_out_file, regno);
24203 }
24204
24205 return "";
24206 }
24207
24208 /* Functions to save and restore machine-specific function data. */
24209 static struct machine_function *
24210 arm_init_machine_status (void)
24211 {
24212 struct machine_function *machine;
24213 machine = ggc_cleared_alloc<machine_function> ();
24214
24215 #if ARM_FT_UNKNOWN != 0
24216 machine->func_type = ARM_FT_UNKNOWN;
24217 #endif
24218 return machine;
24219 }
24220
24221 /* Return an RTX indicating where the return address to the
24222 calling function can be found. */
24223 rtx
24224 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24225 {
24226 if (count != 0)
24227 return NULL_RTX;
24228
24229 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24230 }
24231
24232 /* Do anything needed before RTL is emitted for each function. */
24233 void
24234 arm_init_expanders (void)
24235 {
24236 /* Arrange to initialize and mark the machine per-function status. */
24237 init_machine_status = arm_init_machine_status;
24238
24239 /* This is to stop the combine pass optimizing away the alignment
24240 adjustment of va_arg. */
24241 /* ??? It is claimed that this should not be necessary. */
24242 if (cfun)
24243 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24244 }
24245
24246 /* Check that FUNC is called with a different mode. */
24247
24248 bool
24249 arm_change_mode_p (tree func)
24250 {
24251 if (TREE_CODE (func) != FUNCTION_DECL)
24252 return false;
24253
24254 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24255
24256 if (!callee_tree)
24257 callee_tree = target_option_default_node;
24258
24259 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24260 int flags = callee_opts->x_target_flags;
24261
24262 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24263 }
24264
24265 /* Like arm_compute_initial_elimination offset. Simpler because there
24266 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24267 to point at the base of the local variables after static stack
24268 space for a function has been allocated. */
24269
24270 HOST_WIDE_INT
24271 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24272 {
24273 arm_stack_offsets *offsets;
24274
24275 offsets = arm_get_frame_offsets ();
24276
24277 switch (from)
24278 {
24279 case ARG_POINTER_REGNUM:
24280 switch (to)
24281 {
24282 case STACK_POINTER_REGNUM:
24283 return offsets->outgoing_args - offsets->saved_args;
24284
24285 case FRAME_POINTER_REGNUM:
24286 return offsets->soft_frame - offsets->saved_args;
24287
24288 case ARM_HARD_FRAME_POINTER_REGNUM:
24289 return offsets->saved_regs - offsets->saved_args;
24290
24291 case THUMB_HARD_FRAME_POINTER_REGNUM:
24292 return offsets->locals_base - offsets->saved_args;
24293
24294 default:
24295 gcc_unreachable ();
24296 }
24297 break;
24298
24299 case FRAME_POINTER_REGNUM:
24300 switch (to)
24301 {
24302 case STACK_POINTER_REGNUM:
24303 return offsets->outgoing_args - offsets->soft_frame;
24304
24305 case ARM_HARD_FRAME_POINTER_REGNUM:
24306 return offsets->saved_regs - offsets->soft_frame;
24307
24308 case THUMB_HARD_FRAME_POINTER_REGNUM:
24309 return offsets->locals_base - offsets->soft_frame;
24310
24311 default:
24312 gcc_unreachable ();
24313 }
24314 break;
24315
24316 default:
24317 gcc_unreachable ();
24318 }
24319 }
24320
24321 /* Generate the function's prologue. */
24322
24323 void
24324 thumb1_expand_prologue (void)
24325 {
24326 rtx_insn *insn;
24327
24328 HOST_WIDE_INT amount;
24329 arm_stack_offsets *offsets;
24330 unsigned long func_type;
24331 int regno;
24332 unsigned long live_regs_mask;
24333 unsigned long l_mask;
24334 unsigned high_regs_pushed = 0;
24335
24336 func_type = arm_current_func_type ();
24337
24338 /* Naked functions don't have prologues. */
24339 if (IS_NAKED (func_type))
24340 return;
24341
24342 if (IS_INTERRUPT (func_type))
24343 {
24344 error ("interrupt Service Routines cannot be coded in Thumb mode");
24345 return;
24346 }
24347
24348 if (is_called_in_ARM_mode (current_function_decl))
24349 emit_insn (gen_prologue_thumb1_interwork ());
24350
24351 offsets = arm_get_frame_offsets ();
24352 live_regs_mask = offsets->saved_regs_mask;
24353
24354 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24355 l_mask = live_regs_mask & 0x40ff;
24356 /* Then count how many other high registers will need to be pushed. */
24357 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24358
24359 if (crtl->args.pretend_args_size)
24360 {
24361 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24362
24363 if (cfun->machine->uses_anonymous_args)
24364 {
24365 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24366 unsigned long mask;
24367
24368 mask = 1ul << (LAST_ARG_REGNUM + 1);
24369 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24370
24371 insn = thumb1_emit_multi_reg_push (mask, 0);
24372 }
24373 else
24374 {
24375 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24376 stack_pointer_rtx, x));
24377 }
24378 RTX_FRAME_RELATED_P (insn) = 1;
24379 }
24380
24381 if (TARGET_BACKTRACE)
24382 {
24383 HOST_WIDE_INT offset = 0;
24384 unsigned work_register;
24385 rtx work_reg, x, arm_hfp_rtx;
24386
24387 /* We have been asked to create a stack backtrace structure.
24388 The code looks like this:
24389
24390 0 .align 2
24391 0 func:
24392 0 sub SP, #16 Reserve space for 4 registers.
24393 2 push {R7} Push low registers.
24394 4 add R7, SP, #20 Get the stack pointer before the push.
24395 6 str R7, [SP, #8] Store the stack pointer
24396 (before reserving the space).
24397 8 mov R7, PC Get hold of the start of this code + 12.
24398 10 str R7, [SP, #16] Store it.
24399 12 mov R7, FP Get hold of the current frame pointer.
24400 14 str R7, [SP, #4] Store it.
24401 16 mov R7, LR Get hold of the current return address.
24402 18 str R7, [SP, #12] Store it.
24403 20 add R7, SP, #16 Point at the start of the
24404 backtrace structure.
24405 22 mov FP, R7 Put this value into the frame pointer. */
24406
24407 work_register = thumb_find_work_register (live_regs_mask);
24408 work_reg = gen_rtx_REG (SImode, work_register);
24409 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24410
24411 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24412 stack_pointer_rtx, GEN_INT (-16)));
24413 RTX_FRAME_RELATED_P (insn) = 1;
24414
24415 if (l_mask)
24416 {
24417 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24418 RTX_FRAME_RELATED_P (insn) = 1;
24419
24420 offset = bit_count (l_mask) * UNITS_PER_WORD;
24421 }
24422
24423 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24424 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24425
24426 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24427 x = gen_frame_mem (SImode, x);
24428 emit_move_insn (x, work_reg);
24429
24430 /* Make sure that the instruction fetching the PC is in the right place
24431 to calculate "start of backtrace creation code + 12". */
24432 /* ??? The stores using the common WORK_REG ought to be enough to
24433 prevent the scheduler from doing anything weird. Failing that
24434 we could always move all of the following into an UNSPEC_VOLATILE. */
24435 if (l_mask)
24436 {
24437 x = gen_rtx_REG (SImode, PC_REGNUM);
24438 emit_move_insn (work_reg, x);
24439
24440 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24441 x = gen_frame_mem (SImode, x);
24442 emit_move_insn (x, work_reg);
24443
24444 emit_move_insn (work_reg, arm_hfp_rtx);
24445
24446 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24447 x = gen_frame_mem (SImode, x);
24448 emit_move_insn (x, work_reg);
24449 }
24450 else
24451 {
24452 emit_move_insn (work_reg, arm_hfp_rtx);
24453
24454 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24455 x = gen_frame_mem (SImode, x);
24456 emit_move_insn (x, work_reg);
24457
24458 x = gen_rtx_REG (SImode, PC_REGNUM);
24459 emit_move_insn (work_reg, x);
24460
24461 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24462 x = gen_frame_mem (SImode, x);
24463 emit_move_insn (x, work_reg);
24464 }
24465
24466 x = gen_rtx_REG (SImode, LR_REGNUM);
24467 emit_move_insn (work_reg, x);
24468
24469 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24470 x = gen_frame_mem (SImode, x);
24471 emit_move_insn (x, work_reg);
24472
24473 x = GEN_INT (offset + 12);
24474 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24475
24476 emit_move_insn (arm_hfp_rtx, work_reg);
24477 }
24478 /* Optimization: If we are not pushing any low registers but we are going
24479 to push some high registers then delay our first push. This will just
24480 be a push of LR and we can combine it with the push of the first high
24481 register. */
24482 else if ((l_mask & 0xff) != 0
24483 || (high_regs_pushed == 0 && l_mask))
24484 {
24485 unsigned long mask = l_mask;
24486 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24487 insn = thumb1_emit_multi_reg_push (mask, mask);
24488 RTX_FRAME_RELATED_P (insn) = 1;
24489 }
24490
24491 if (high_regs_pushed)
24492 {
24493 unsigned pushable_regs;
24494 unsigned next_hi_reg;
24495 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24496 : crtl->args.info.nregs;
24497 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24498
24499 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24500 if (live_regs_mask & (1 << next_hi_reg))
24501 break;
24502
24503 /* Here we need to mask out registers used for passing arguments
24504 even if they can be pushed. This is to avoid using them to stash the high
24505 registers. Such kind of stash may clobber the use of arguments. */
24506 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24507
24508 if (pushable_regs == 0)
24509 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24510
24511 while (high_regs_pushed > 0)
24512 {
24513 unsigned long real_regs_mask = 0;
24514
24515 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24516 {
24517 if (pushable_regs & (1 << regno))
24518 {
24519 emit_move_insn (gen_rtx_REG (SImode, regno),
24520 gen_rtx_REG (SImode, next_hi_reg));
24521
24522 high_regs_pushed --;
24523 real_regs_mask |= (1 << next_hi_reg);
24524
24525 if (high_regs_pushed)
24526 {
24527 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24528 next_hi_reg --)
24529 if (live_regs_mask & (1 << next_hi_reg))
24530 break;
24531 }
24532 else
24533 {
24534 pushable_regs &= ~((1 << regno) - 1);
24535 break;
24536 }
24537 }
24538 }
24539
24540 /* If we had to find a work register and we have not yet
24541 saved the LR then add it to the list of regs to push. */
24542 if (l_mask == (1 << LR_REGNUM))
24543 {
24544 pushable_regs |= l_mask;
24545 real_regs_mask |= l_mask;
24546 l_mask = 0;
24547 }
24548
24549 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24550 RTX_FRAME_RELATED_P (insn) = 1;
24551 }
24552 }
24553
24554 /* Load the pic register before setting the frame pointer,
24555 so we can use r7 as a temporary work register. */
24556 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24557 arm_load_pic_register (live_regs_mask);
24558
24559 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24560 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24561 stack_pointer_rtx);
24562
24563 if (flag_stack_usage_info)
24564 current_function_static_stack_size
24565 = offsets->outgoing_args - offsets->saved_args;
24566
24567 amount = offsets->outgoing_args - offsets->saved_regs;
24568 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24569 if (amount)
24570 {
24571 if (amount < 512)
24572 {
24573 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24574 GEN_INT (- amount)));
24575 RTX_FRAME_RELATED_P (insn) = 1;
24576 }
24577 else
24578 {
24579 rtx reg, dwarf;
24580
24581 /* The stack decrement is too big for an immediate value in a single
24582 insn. In theory we could issue multiple subtracts, but after
24583 three of them it becomes more space efficient to place the full
24584 value in the constant pool and load into a register. (Also the
24585 ARM debugger really likes to see only one stack decrement per
24586 function). So instead we look for a scratch register into which
24587 we can load the decrement, and then we subtract this from the
24588 stack pointer. Unfortunately on the thumb the only available
24589 scratch registers are the argument registers, and we cannot use
24590 these as they may hold arguments to the function. Instead we
24591 attempt to locate a call preserved register which is used by this
24592 function. If we can find one, then we know that it will have
24593 been pushed at the start of the prologue and so we can corrupt
24594 it now. */
24595 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24596 if (live_regs_mask & (1 << regno))
24597 break;
24598
24599 gcc_assert(regno <= LAST_LO_REGNUM);
24600
24601 reg = gen_rtx_REG (SImode, regno);
24602
24603 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24604
24605 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24606 stack_pointer_rtx, reg));
24607
24608 dwarf = gen_rtx_SET (stack_pointer_rtx,
24609 plus_constant (Pmode, stack_pointer_rtx,
24610 -amount));
24611 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24612 RTX_FRAME_RELATED_P (insn) = 1;
24613 }
24614 }
24615
24616 if (frame_pointer_needed)
24617 thumb_set_frame_pointer (offsets);
24618
24619 /* If we are profiling, make sure no instructions are scheduled before
24620 the call to mcount. Similarly if the user has requested no
24621 scheduling in the prolog. Similarly if we want non-call exceptions
24622 using the EABI unwinder, to prevent faulting instructions from being
24623 swapped with a stack adjustment. */
24624 if (crtl->profile || !TARGET_SCHED_PROLOG
24625 || (arm_except_unwind_info (&global_options) == UI_TARGET
24626 && cfun->can_throw_non_call_exceptions))
24627 emit_insn (gen_blockage ());
24628
24629 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24630 if (live_regs_mask & 0xff)
24631 cfun->machine->lr_save_eliminated = 0;
24632 }
24633
24634 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24635 POP instruction can be generated. LR should be replaced by PC. All
24636 the checks required are already done by USE_RETURN_INSN (). Hence,
24637 all we really need to check here is if single register is to be
24638 returned, or multiple register return. */
24639 void
24640 thumb2_expand_return (bool simple_return)
24641 {
24642 int i, num_regs;
24643 unsigned long saved_regs_mask;
24644 arm_stack_offsets *offsets;
24645
24646 offsets = arm_get_frame_offsets ();
24647 saved_regs_mask = offsets->saved_regs_mask;
24648
24649 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24650 if (saved_regs_mask & (1 << i))
24651 num_regs++;
24652
24653 if (!simple_return && saved_regs_mask)
24654 {
24655 if (num_regs == 1)
24656 {
24657 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24658 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24659 rtx addr = gen_rtx_MEM (SImode,
24660 gen_rtx_POST_INC (SImode,
24661 stack_pointer_rtx));
24662 set_mem_alias_set (addr, get_frame_alias_set ());
24663 XVECEXP (par, 0, 0) = ret_rtx;
24664 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
24665 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24666 emit_jump_insn (par);
24667 }
24668 else
24669 {
24670 saved_regs_mask &= ~ (1 << LR_REGNUM);
24671 saved_regs_mask |= (1 << PC_REGNUM);
24672 arm_emit_multi_reg_pop (saved_regs_mask);
24673 }
24674 }
24675 else
24676 {
24677 emit_jump_insn (simple_return_rtx);
24678 }
24679 }
24680
24681 void
24682 thumb1_expand_epilogue (void)
24683 {
24684 HOST_WIDE_INT amount;
24685 arm_stack_offsets *offsets;
24686 int regno;
24687
24688 /* Naked functions don't have prologues. */
24689 if (IS_NAKED (arm_current_func_type ()))
24690 return;
24691
24692 offsets = arm_get_frame_offsets ();
24693 amount = offsets->outgoing_args - offsets->saved_regs;
24694
24695 if (frame_pointer_needed)
24696 {
24697 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
24698 amount = offsets->locals_base - offsets->saved_regs;
24699 }
24700 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
24701
24702 gcc_assert (amount >= 0);
24703 if (amount)
24704 {
24705 emit_insn (gen_blockage ());
24706
24707 if (amount < 512)
24708 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24709 GEN_INT (amount)));
24710 else
24711 {
24712 /* r3 is always free in the epilogue. */
24713 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
24714
24715 emit_insn (gen_movsi (reg, GEN_INT (amount)));
24716 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
24717 }
24718 }
24719
24720 /* Emit a USE (stack_pointer_rtx), so that
24721 the stack adjustment will not be deleted. */
24722 emit_insn (gen_force_register_use (stack_pointer_rtx));
24723
24724 if (crtl->profile || !TARGET_SCHED_PROLOG)
24725 emit_insn (gen_blockage ());
24726
24727 /* Emit a clobber for each insn that will be restored in the epilogue,
24728 so that flow2 will get register lifetimes correct. */
24729 for (regno = 0; regno < 13; regno++)
24730 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24731 emit_clobber (gen_rtx_REG (SImode, regno));
24732
24733 if (! df_regs_ever_live_p (LR_REGNUM))
24734 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24735 }
24736
24737 /* Epilogue code for APCS frame. */
24738 static void
24739 arm_expand_epilogue_apcs_frame (bool really_return)
24740 {
24741 unsigned long func_type;
24742 unsigned long saved_regs_mask;
24743 int num_regs = 0;
24744 int i;
24745 int floats_from_frame = 0;
24746 arm_stack_offsets *offsets;
24747
24748 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24749 func_type = arm_current_func_type ();
24750
24751 /* Get frame offsets for ARM. */
24752 offsets = arm_get_frame_offsets ();
24753 saved_regs_mask = offsets->saved_regs_mask;
24754
24755 /* Find the offset of the floating-point save area in the frame. */
24756 floats_from_frame
24757 = (offsets->saved_args
24758 + arm_compute_static_chain_stack_bytes ()
24759 - offsets->frame);
24760
24761 /* Compute how many core registers saved and how far away the floats are. */
24762 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24763 if (saved_regs_mask & (1 << i))
24764 {
24765 num_regs++;
24766 floats_from_frame += 4;
24767 }
24768
24769 if (TARGET_HARD_FLOAT && TARGET_VFP)
24770 {
24771 int start_reg;
24772 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
24773
24774 /* The offset is from IP_REGNUM. */
24775 int saved_size = arm_get_vfp_saved_size ();
24776 if (saved_size > 0)
24777 {
24778 rtx_insn *insn;
24779 floats_from_frame += saved_size;
24780 insn = emit_insn (gen_addsi3 (ip_rtx,
24781 hard_frame_pointer_rtx,
24782 GEN_INT (-floats_from_frame)));
24783 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
24784 ip_rtx, hard_frame_pointer_rtx);
24785 }
24786
24787 /* Generate VFP register multi-pop. */
24788 start_reg = FIRST_VFP_REGNUM;
24789
24790 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24791 /* Look for a case where a reg does not need restoring. */
24792 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24793 && (!df_regs_ever_live_p (i + 1)
24794 || call_used_regs[i + 1]))
24795 {
24796 if (start_reg != i)
24797 arm_emit_vfp_multi_reg_pop (start_reg,
24798 (i - start_reg) / 2,
24799 gen_rtx_REG (SImode,
24800 IP_REGNUM));
24801 start_reg = i + 2;
24802 }
24803
24804 /* Restore the remaining regs that we have discovered (or possibly
24805 even all of them, if the conditional in the for loop never
24806 fired). */
24807 if (start_reg != i)
24808 arm_emit_vfp_multi_reg_pop (start_reg,
24809 (i - start_reg) / 2,
24810 gen_rtx_REG (SImode, IP_REGNUM));
24811 }
24812
24813 if (TARGET_IWMMXT)
24814 {
24815 /* The frame pointer is guaranteed to be non-double-word aligned, as
24816 it is set to double-word-aligned old_stack_pointer - 4. */
24817 rtx_insn *insn;
24818 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24819
24820 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24821 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24822 {
24823 rtx addr = gen_frame_mem (V2SImode,
24824 plus_constant (Pmode, hard_frame_pointer_rtx,
24825 - lrm_count * 4));
24826 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24827 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24828 gen_rtx_REG (V2SImode, i),
24829 NULL_RTX);
24830 lrm_count += 2;
24831 }
24832 }
24833
24834 /* saved_regs_mask should contain IP which contains old stack pointer
24835 at the time of activation creation. Since SP and IP are adjacent registers,
24836 we can restore the value directly into SP. */
24837 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24838 saved_regs_mask &= ~(1 << IP_REGNUM);
24839 saved_regs_mask |= (1 << SP_REGNUM);
24840
24841 /* There are two registers left in saved_regs_mask - LR and PC. We
24842 only need to restore LR (the return address), but to
24843 save time we can load it directly into PC, unless we need a
24844 special function exit sequence, or we are not really returning. */
24845 if (really_return
24846 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24847 && !crtl->calls_eh_return)
24848 /* Delete LR from the register mask, so that LR on
24849 the stack is loaded into the PC in the register mask. */
24850 saved_regs_mask &= ~(1 << LR_REGNUM);
24851 else
24852 saved_regs_mask &= ~(1 << PC_REGNUM);
24853
24854 num_regs = bit_count (saved_regs_mask);
24855 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24856 {
24857 rtx_insn *insn;
24858 emit_insn (gen_blockage ());
24859 /* Unwind the stack to just below the saved registers. */
24860 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24861 hard_frame_pointer_rtx,
24862 GEN_INT (- 4 * num_regs)));
24863
24864 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
24865 stack_pointer_rtx, hard_frame_pointer_rtx);
24866 }
24867
24868 arm_emit_multi_reg_pop (saved_regs_mask);
24869
24870 if (IS_INTERRUPT (func_type))
24871 {
24872 /* Interrupt handlers will have pushed the
24873 IP onto the stack, so restore it now. */
24874 rtx_insn *insn;
24875 rtx addr = gen_rtx_MEM (SImode,
24876 gen_rtx_POST_INC (SImode,
24877 stack_pointer_rtx));
24878 set_mem_alias_set (addr, get_frame_alias_set ());
24879 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
24880 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24881 gen_rtx_REG (SImode, IP_REGNUM),
24882 NULL_RTX);
24883 }
24884
24885 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
24886 return;
24887
24888 if (crtl->calls_eh_return)
24889 emit_insn (gen_addsi3 (stack_pointer_rtx,
24890 stack_pointer_rtx,
24891 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24892
24893 if (IS_STACKALIGN (func_type))
24894 /* Restore the original stack pointer. Before prologue, the stack was
24895 realigned and the original stack pointer saved in r0. For details,
24896 see comment in arm_expand_prologue. */
24897 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
24898
24899 emit_jump_insn (simple_return_rtx);
24900 }
24901
24902 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24903 function is not a sibcall. */
24904 void
24905 arm_expand_epilogue (bool really_return)
24906 {
24907 unsigned long func_type;
24908 unsigned long saved_regs_mask;
24909 int num_regs = 0;
24910 int i;
24911 int amount;
24912 arm_stack_offsets *offsets;
24913
24914 func_type = arm_current_func_type ();
24915
24916 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24917 let output_return_instruction take care of instruction emission if any. */
24918 if (IS_NAKED (func_type)
24919 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
24920 {
24921 if (really_return)
24922 emit_jump_insn (simple_return_rtx);
24923 return;
24924 }
24925
24926 /* If we are throwing an exception, then we really must be doing a
24927 return, so we can't tail-call. */
24928 gcc_assert (!crtl->calls_eh_return || really_return);
24929
24930 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
24931 {
24932 arm_expand_epilogue_apcs_frame (really_return);
24933 return;
24934 }
24935
24936 /* Get frame offsets for ARM. */
24937 offsets = arm_get_frame_offsets ();
24938 saved_regs_mask = offsets->saved_regs_mask;
24939 num_regs = bit_count (saved_regs_mask);
24940
24941 if (frame_pointer_needed)
24942 {
24943 rtx_insn *insn;
24944 /* Restore stack pointer if necessary. */
24945 if (TARGET_ARM)
24946 {
24947 /* In ARM mode, frame pointer points to first saved register.
24948 Restore stack pointer to last saved register. */
24949 amount = offsets->frame - offsets->saved_regs;
24950
24951 /* Force out any pending memory operations that reference stacked data
24952 before stack de-allocation occurs. */
24953 emit_insn (gen_blockage ());
24954 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24955 hard_frame_pointer_rtx,
24956 GEN_INT (amount)));
24957 arm_add_cfa_adjust_cfa_note (insn, amount,
24958 stack_pointer_rtx,
24959 hard_frame_pointer_rtx);
24960
24961 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24962 deleted. */
24963 emit_insn (gen_force_register_use (stack_pointer_rtx));
24964 }
24965 else
24966 {
24967 /* In Thumb-2 mode, the frame pointer points to the last saved
24968 register. */
24969 amount = offsets->locals_base - offsets->saved_regs;
24970 if (amount)
24971 {
24972 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
24973 hard_frame_pointer_rtx,
24974 GEN_INT (amount)));
24975 arm_add_cfa_adjust_cfa_note (insn, amount,
24976 hard_frame_pointer_rtx,
24977 hard_frame_pointer_rtx);
24978 }
24979
24980 /* Force out any pending memory operations that reference stacked data
24981 before stack de-allocation occurs. */
24982 emit_insn (gen_blockage ());
24983 insn = emit_insn (gen_movsi (stack_pointer_rtx,
24984 hard_frame_pointer_rtx));
24985 arm_add_cfa_adjust_cfa_note (insn, 0,
24986 stack_pointer_rtx,
24987 hard_frame_pointer_rtx);
24988 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24989 deleted. */
24990 emit_insn (gen_force_register_use (stack_pointer_rtx));
24991 }
24992 }
24993 else
24994 {
24995 /* Pop off outgoing args and local frame to adjust stack pointer to
24996 last saved register. */
24997 amount = offsets->outgoing_args - offsets->saved_regs;
24998 if (amount)
24999 {
25000 rtx_insn *tmp;
25001 /* Force out any pending memory operations that reference stacked data
25002 before stack de-allocation occurs. */
25003 emit_insn (gen_blockage ());
25004 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25005 stack_pointer_rtx,
25006 GEN_INT (amount)));
25007 arm_add_cfa_adjust_cfa_note (tmp, amount,
25008 stack_pointer_rtx, stack_pointer_rtx);
25009 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25010 not deleted. */
25011 emit_insn (gen_force_register_use (stack_pointer_rtx));
25012 }
25013 }
25014
25015 if (TARGET_HARD_FLOAT && TARGET_VFP)
25016 {
25017 /* Generate VFP register multi-pop. */
25018 int end_reg = LAST_VFP_REGNUM + 1;
25019
25020 /* Scan the registers in reverse order. We need to match
25021 any groupings made in the prologue and generate matching
25022 vldm operations. The need to match groups is because,
25023 unlike pop, vldm can only do consecutive regs. */
25024 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25025 /* Look for a case where a reg does not need restoring. */
25026 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25027 && (!df_regs_ever_live_p (i + 1)
25028 || call_used_regs[i + 1]))
25029 {
25030 /* Restore the regs discovered so far (from reg+2 to
25031 end_reg). */
25032 if (end_reg > i + 2)
25033 arm_emit_vfp_multi_reg_pop (i + 2,
25034 (end_reg - (i + 2)) / 2,
25035 stack_pointer_rtx);
25036 end_reg = i;
25037 }
25038
25039 /* Restore the remaining regs that we have discovered (or possibly
25040 even all of them, if the conditional in the for loop never
25041 fired). */
25042 if (end_reg > i + 2)
25043 arm_emit_vfp_multi_reg_pop (i + 2,
25044 (end_reg - (i + 2)) / 2,
25045 stack_pointer_rtx);
25046 }
25047
25048 if (TARGET_IWMMXT)
25049 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25050 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25051 {
25052 rtx_insn *insn;
25053 rtx addr = gen_rtx_MEM (V2SImode,
25054 gen_rtx_POST_INC (SImode,
25055 stack_pointer_rtx));
25056 set_mem_alias_set (addr, get_frame_alias_set ());
25057 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25058 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25059 gen_rtx_REG (V2SImode, i),
25060 NULL_RTX);
25061 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25062 stack_pointer_rtx, stack_pointer_rtx);
25063 }
25064
25065 if (saved_regs_mask)
25066 {
25067 rtx insn;
25068 bool return_in_pc = false;
25069
25070 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25071 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25072 && !IS_STACKALIGN (func_type)
25073 && really_return
25074 && crtl->args.pretend_args_size == 0
25075 && saved_regs_mask & (1 << LR_REGNUM)
25076 && !crtl->calls_eh_return)
25077 {
25078 saved_regs_mask &= ~(1 << LR_REGNUM);
25079 saved_regs_mask |= (1 << PC_REGNUM);
25080 return_in_pc = true;
25081 }
25082
25083 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25084 {
25085 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25086 if (saved_regs_mask & (1 << i))
25087 {
25088 rtx addr = gen_rtx_MEM (SImode,
25089 gen_rtx_POST_INC (SImode,
25090 stack_pointer_rtx));
25091 set_mem_alias_set (addr, get_frame_alias_set ());
25092
25093 if (i == PC_REGNUM)
25094 {
25095 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25096 XVECEXP (insn, 0, 0) = ret_rtx;
25097 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25098 addr);
25099 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25100 insn = emit_jump_insn (insn);
25101 }
25102 else
25103 {
25104 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25105 addr));
25106 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25107 gen_rtx_REG (SImode, i),
25108 NULL_RTX);
25109 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25110 stack_pointer_rtx,
25111 stack_pointer_rtx);
25112 }
25113 }
25114 }
25115 else
25116 {
25117 if (TARGET_LDRD
25118 && current_tune->prefer_ldrd_strd
25119 && !optimize_function_for_size_p (cfun))
25120 {
25121 if (TARGET_THUMB2)
25122 thumb2_emit_ldrd_pop (saved_regs_mask);
25123 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25124 arm_emit_ldrd_pop (saved_regs_mask);
25125 else
25126 arm_emit_multi_reg_pop (saved_regs_mask);
25127 }
25128 else
25129 arm_emit_multi_reg_pop (saved_regs_mask);
25130 }
25131
25132 if (return_in_pc)
25133 return;
25134 }
25135
25136 if (crtl->args.pretend_args_size)
25137 {
25138 int i, j;
25139 rtx dwarf = NULL_RTX;
25140 rtx_insn *tmp =
25141 emit_insn (gen_addsi3 (stack_pointer_rtx,
25142 stack_pointer_rtx,
25143 GEN_INT (crtl->args.pretend_args_size)));
25144
25145 RTX_FRAME_RELATED_P (tmp) = 1;
25146
25147 if (cfun->machine->uses_anonymous_args)
25148 {
25149 /* Restore pretend args. Refer arm_expand_prologue on how to save
25150 pretend_args in stack. */
25151 int num_regs = crtl->args.pretend_args_size / 4;
25152 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25153 for (j = 0, i = 0; j < num_regs; i++)
25154 if (saved_regs_mask & (1 << i))
25155 {
25156 rtx reg = gen_rtx_REG (SImode, i);
25157 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25158 j++;
25159 }
25160 REG_NOTES (tmp) = dwarf;
25161 }
25162 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
25163 stack_pointer_rtx, stack_pointer_rtx);
25164 }
25165
25166 if (!really_return)
25167 return;
25168
25169 if (crtl->calls_eh_return)
25170 emit_insn (gen_addsi3 (stack_pointer_rtx,
25171 stack_pointer_rtx,
25172 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25173
25174 if (IS_STACKALIGN (func_type))
25175 /* Restore the original stack pointer. Before prologue, the stack was
25176 realigned and the original stack pointer saved in r0. For details,
25177 see comment in arm_expand_prologue. */
25178 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25179
25180 emit_jump_insn (simple_return_rtx);
25181 }
25182
25183 /* Implementation of insn prologue_thumb1_interwork. This is the first
25184 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25185
25186 const char *
25187 thumb1_output_interwork (void)
25188 {
25189 const char * name;
25190 FILE *f = asm_out_file;
25191
25192 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25193 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25194 == SYMBOL_REF);
25195 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25196
25197 /* Generate code sequence to switch us into Thumb mode. */
25198 /* The .code 32 directive has already been emitted by
25199 ASM_DECLARE_FUNCTION_NAME. */
25200 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25201 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25202
25203 /* Generate a label, so that the debugger will notice the
25204 change in instruction sets. This label is also used by
25205 the assembler to bypass the ARM code when this function
25206 is called from a Thumb encoded function elsewhere in the
25207 same file. Hence the definition of STUB_NAME here must
25208 agree with the definition in gas/config/tc-arm.c. */
25209
25210 #define STUB_NAME ".real_start_of"
25211
25212 fprintf (f, "\t.code\t16\n");
25213 #ifdef ARM_PE
25214 if (arm_dllexport_name_p (name))
25215 name = arm_strip_name_encoding (name);
25216 #endif
25217 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25218 fprintf (f, "\t.thumb_func\n");
25219 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25220
25221 return "";
25222 }
25223
25224 /* Handle the case of a double word load into a low register from
25225 a computed memory address. The computed address may involve a
25226 register which is overwritten by the load. */
25227 const char *
25228 thumb_load_double_from_address (rtx *operands)
25229 {
25230 rtx addr;
25231 rtx base;
25232 rtx offset;
25233 rtx arg1;
25234 rtx arg2;
25235
25236 gcc_assert (REG_P (operands[0]));
25237 gcc_assert (MEM_P (operands[1]));
25238
25239 /* Get the memory address. */
25240 addr = XEXP (operands[1], 0);
25241
25242 /* Work out how the memory address is computed. */
25243 switch (GET_CODE (addr))
25244 {
25245 case REG:
25246 operands[2] = adjust_address (operands[1], SImode, 4);
25247
25248 if (REGNO (operands[0]) == REGNO (addr))
25249 {
25250 output_asm_insn ("ldr\t%H0, %2", operands);
25251 output_asm_insn ("ldr\t%0, %1", operands);
25252 }
25253 else
25254 {
25255 output_asm_insn ("ldr\t%0, %1", operands);
25256 output_asm_insn ("ldr\t%H0, %2", operands);
25257 }
25258 break;
25259
25260 case CONST:
25261 /* Compute <address> + 4 for the high order load. */
25262 operands[2] = adjust_address (operands[1], SImode, 4);
25263
25264 output_asm_insn ("ldr\t%0, %1", operands);
25265 output_asm_insn ("ldr\t%H0, %2", operands);
25266 break;
25267
25268 case PLUS:
25269 arg1 = XEXP (addr, 0);
25270 arg2 = XEXP (addr, 1);
25271
25272 if (CONSTANT_P (arg1))
25273 base = arg2, offset = arg1;
25274 else
25275 base = arg1, offset = arg2;
25276
25277 gcc_assert (REG_P (base));
25278
25279 /* Catch the case of <address> = <reg> + <reg> */
25280 if (REG_P (offset))
25281 {
25282 int reg_offset = REGNO (offset);
25283 int reg_base = REGNO (base);
25284 int reg_dest = REGNO (operands[0]);
25285
25286 /* Add the base and offset registers together into the
25287 higher destination register. */
25288 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25289 reg_dest + 1, reg_base, reg_offset);
25290
25291 /* Load the lower destination register from the address in
25292 the higher destination register. */
25293 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25294 reg_dest, reg_dest + 1);
25295
25296 /* Load the higher destination register from its own address
25297 plus 4. */
25298 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25299 reg_dest + 1, reg_dest + 1);
25300 }
25301 else
25302 {
25303 /* Compute <address> + 4 for the high order load. */
25304 operands[2] = adjust_address (operands[1], SImode, 4);
25305
25306 /* If the computed address is held in the low order register
25307 then load the high order register first, otherwise always
25308 load the low order register first. */
25309 if (REGNO (operands[0]) == REGNO (base))
25310 {
25311 output_asm_insn ("ldr\t%H0, %2", operands);
25312 output_asm_insn ("ldr\t%0, %1", operands);
25313 }
25314 else
25315 {
25316 output_asm_insn ("ldr\t%0, %1", operands);
25317 output_asm_insn ("ldr\t%H0, %2", operands);
25318 }
25319 }
25320 break;
25321
25322 case LABEL_REF:
25323 /* With no registers to worry about we can just load the value
25324 directly. */
25325 operands[2] = adjust_address (operands[1], SImode, 4);
25326
25327 output_asm_insn ("ldr\t%H0, %2", operands);
25328 output_asm_insn ("ldr\t%0, %1", operands);
25329 break;
25330
25331 default:
25332 gcc_unreachable ();
25333 }
25334
25335 return "";
25336 }
25337
25338 const char *
25339 thumb_output_move_mem_multiple (int n, rtx *operands)
25340 {
25341 rtx tmp;
25342
25343 switch (n)
25344 {
25345 case 2:
25346 if (REGNO (operands[4]) > REGNO (operands[5]))
25347 {
25348 tmp = operands[4];
25349 operands[4] = operands[5];
25350 operands[5] = tmp;
25351 }
25352 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25353 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25354 break;
25355
25356 case 3:
25357 if (REGNO (operands[4]) > REGNO (operands[5]))
25358 std::swap (operands[4], operands[5]);
25359 if (REGNO (operands[5]) > REGNO (operands[6]))
25360 std::swap (operands[5], operands[6]);
25361 if (REGNO (operands[4]) > REGNO (operands[5]))
25362 std::swap (operands[4], operands[5]);
25363
25364 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25365 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25366 break;
25367
25368 default:
25369 gcc_unreachable ();
25370 }
25371
25372 return "";
25373 }
25374
25375 /* Output a call-via instruction for thumb state. */
25376 const char *
25377 thumb_call_via_reg (rtx reg)
25378 {
25379 int regno = REGNO (reg);
25380 rtx *labelp;
25381
25382 gcc_assert (regno < LR_REGNUM);
25383
25384 /* If we are in the normal text section we can use a single instance
25385 per compilation unit. If we are doing function sections, then we need
25386 an entry per section, since we can't rely on reachability. */
25387 if (in_section == text_section)
25388 {
25389 thumb_call_reg_needed = 1;
25390
25391 if (thumb_call_via_label[regno] == NULL)
25392 thumb_call_via_label[regno] = gen_label_rtx ();
25393 labelp = thumb_call_via_label + regno;
25394 }
25395 else
25396 {
25397 if (cfun->machine->call_via[regno] == NULL)
25398 cfun->machine->call_via[regno] = gen_label_rtx ();
25399 labelp = cfun->machine->call_via + regno;
25400 }
25401
25402 output_asm_insn ("bl\t%a0", labelp);
25403 return "";
25404 }
25405
25406 /* Routines for generating rtl. */
25407 void
25408 thumb_expand_movmemqi (rtx *operands)
25409 {
25410 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25411 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25412 HOST_WIDE_INT len = INTVAL (operands[2]);
25413 HOST_WIDE_INT offset = 0;
25414
25415 while (len >= 12)
25416 {
25417 emit_insn (gen_movmem12b (out, in, out, in));
25418 len -= 12;
25419 }
25420
25421 if (len >= 8)
25422 {
25423 emit_insn (gen_movmem8b (out, in, out, in));
25424 len -= 8;
25425 }
25426
25427 if (len >= 4)
25428 {
25429 rtx reg = gen_reg_rtx (SImode);
25430 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25431 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25432 len -= 4;
25433 offset += 4;
25434 }
25435
25436 if (len >= 2)
25437 {
25438 rtx reg = gen_reg_rtx (HImode);
25439 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25440 plus_constant (Pmode, in,
25441 offset))));
25442 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25443 offset)),
25444 reg));
25445 len -= 2;
25446 offset += 2;
25447 }
25448
25449 if (len)
25450 {
25451 rtx reg = gen_reg_rtx (QImode);
25452 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25453 plus_constant (Pmode, in,
25454 offset))));
25455 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25456 offset)),
25457 reg));
25458 }
25459 }
25460
25461 void
25462 thumb_reload_out_hi (rtx *operands)
25463 {
25464 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25465 }
25466
25467 /* Handle reading a half-word from memory during reload. */
25468 void
25469 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25470 {
25471 gcc_unreachable ();
25472 }
25473
25474 /* Return the length of a function name prefix
25475 that starts with the character 'c'. */
25476 static int
25477 arm_get_strip_length (int c)
25478 {
25479 switch (c)
25480 {
25481 ARM_NAME_ENCODING_LENGTHS
25482 default: return 0;
25483 }
25484 }
25485
25486 /* Return a pointer to a function's name with any
25487 and all prefix encodings stripped from it. */
25488 const char *
25489 arm_strip_name_encoding (const char *name)
25490 {
25491 int skip;
25492
25493 while ((skip = arm_get_strip_length (* name)))
25494 name += skip;
25495
25496 return name;
25497 }
25498
25499 /* If there is a '*' anywhere in the name's prefix, then
25500 emit the stripped name verbatim, otherwise prepend an
25501 underscore if leading underscores are being used. */
25502 void
25503 arm_asm_output_labelref (FILE *stream, const char *name)
25504 {
25505 int skip;
25506 int verbatim = 0;
25507
25508 while ((skip = arm_get_strip_length (* name)))
25509 {
25510 verbatim |= (*name == '*');
25511 name += skip;
25512 }
25513
25514 if (verbatim)
25515 fputs (name, stream);
25516 else
25517 asm_fprintf (stream, "%U%s", name);
25518 }
25519
25520 /* This function is used to emit an EABI tag and its associated value.
25521 We emit the numerical value of the tag in case the assembler does not
25522 support textual tags. (Eg gas prior to 2.20). If requested we include
25523 the tag name in a comment so that anyone reading the assembler output
25524 will know which tag is being set.
25525
25526 This function is not static because arm-c.c needs it too. */
25527
25528 void
25529 arm_emit_eabi_attribute (const char *name, int num, int val)
25530 {
25531 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25532 if (flag_verbose_asm || flag_debug_asm)
25533 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25534 asm_fprintf (asm_out_file, "\n");
25535 }
25536
25537 /* This function is used to print CPU tuning information as comment
25538 in assembler file. Pointers are not printed for now. */
25539
25540 void
25541 arm_print_tune_info (void)
25542 {
25543 asm_fprintf (asm_out_file, "\t@.tune parameters\n");
25544 asm_fprintf (asm_out_file, "\t\t@constant_limit:\t%d\n",
25545 current_tune->constant_limit);
25546 asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
25547 current_tune->max_insns_skipped);
25548 asm_fprintf (asm_out_file, "\t\t@prefetch.num_slots:\t%d\n",
25549 current_tune->prefetch.num_slots);
25550 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_size:\t%d\n",
25551 current_tune->prefetch.l1_cache_size);
25552 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
25553 current_tune->prefetch.l1_cache_line_size);
25554 asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
25555 (int) current_tune->prefer_constant_pool);
25556 asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25557 asm_fprintf (asm_out_file, "\t\t\t\ts&p\tcost\n");
25558 asm_fprintf (asm_out_file, "\t\t\t\t00\t%d\n",
25559 current_tune->branch_cost (false, false));
25560 asm_fprintf (asm_out_file, "\t\t\t\t01\t%d\n",
25561 current_tune->branch_cost (false, true));
25562 asm_fprintf (asm_out_file, "\t\t\t\t10\t%d\n",
25563 current_tune->branch_cost (true, false));
25564 asm_fprintf (asm_out_file, "\t\t\t\t11\t%d\n",
25565 current_tune->branch_cost (true, true));
25566 asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
25567 (int) current_tune->prefer_ldrd_strd);
25568 asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25569 (int) current_tune->logical_op_non_short_circuit_thumb,
25570 (int) current_tune->logical_op_non_short_circuit_arm);
25571 asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
25572 (int) current_tune->prefer_neon_for_64bits);
25573 asm_fprintf (asm_out_file,
25574 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25575 (int) current_tune->disparage_flag_setting_t16_encodings);
25576 asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
25577 (int) current_tune->string_ops_prefer_neon);
25578 asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
25579 current_tune->max_insns_inline_memset);
25580 asm_fprintf (asm_out_file, "\t\t@fusible_ops:\t%u\n",
25581 current_tune->fusible_ops);
25582 asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n",
25583 (int) current_tune->sched_autopref);
25584 }
25585
25586 static void
25587 arm_file_start (void)
25588 {
25589 int val;
25590
25591 if (TARGET_BPABI)
25592 {
25593 const char *fpu_name;
25594 if (arm_selected_arch)
25595 {
25596 /* armv7ve doesn't support any extensions. */
25597 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
25598 {
25599 /* Keep backward compatability for assemblers
25600 which don't support armv7ve. */
25601 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
25602 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
25603 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
25604 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
25605 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
25606 }
25607 else
25608 {
25609 const char* pos = strchr (arm_selected_arch->name, '+');
25610 if (pos)
25611 {
25612 char buf[15];
25613 gcc_assert (strlen (arm_selected_arch->name)
25614 <= sizeof (buf) / sizeof (*pos));
25615 strncpy (buf, arm_selected_arch->name,
25616 (pos - arm_selected_arch->name) * sizeof (*pos));
25617 buf[pos - arm_selected_arch->name] = '\0';
25618 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25619 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25620 }
25621 else
25622 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25623 }
25624 }
25625 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25626 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25627 else
25628 {
25629 const char* truncated_name
25630 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
25631 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
25632 }
25633
25634 if (print_tune_info)
25635 arm_print_tune_info ();
25636
25637 if (TARGET_SOFT_FLOAT)
25638 {
25639 fpu_name = "softvfp";
25640 }
25641 else
25642 {
25643 fpu_name = arm_fpu_desc->name;
25644 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
25645 {
25646 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
25647 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
25648
25649 if (TARGET_HARD_FLOAT_ABI)
25650 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25651 }
25652 }
25653 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25654
25655 /* Some of these attributes only apply when the corresponding features
25656 are used. However we don't have any easy way of figuring this out.
25657 Conservatively record the setting that would have been used. */
25658
25659 if (flag_rounding_math)
25660 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25661
25662 if (!flag_unsafe_math_optimizations)
25663 {
25664 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25665 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25666 }
25667 if (flag_signaling_nans)
25668 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25669
25670 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25671 flag_finite_math_only ? 1 : 3);
25672
25673 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25674 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25675 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25676 flag_short_enums ? 1 : 2);
25677
25678 /* Tag_ABI_optimization_goals. */
25679 if (optimize_size)
25680 val = 4;
25681 else if (optimize >= 2)
25682 val = 2;
25683 else if (optimize)
25684 val = 1;
25685 else
25686 val = 6;
25687 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
25688
25689 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25690 unaligned_access);
25691
25692 if (arm_fp16_format)
25693 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25694 (int) arm_fp16_format);
25695
25696 if (arm_lang_output_object_attributes_hook)
25697 arm_lang_output_object_attributes_hook();
25698 }
25699
25700 default_file_start ();
25701 }
25702
25703 static void
25704 arm_file_end (void)
25705 {
25706 int regno;
25707
25708 if (NEED_INDICATE_EXEC_STACK)
25709 /* Add .note.GNU-stack. */
25710 file_end_indicate_exec_stack ();
25711
25712 if (! thumb_call_reg_needed)
25713 return;
25714
25715 switch_to_section (text_section);
25716 asm_fprintf (asm_out_file, "\t.code 16\n");
25717 ASM_OUTPUT_ALIGN (asm_out_file, 1);
25718
25719 for (regno = 0; regno < LR_REGNUM; regno++)
25720 {
25721 rtx label = thumb_call_via_label[regno];
25722
25723 if (label != 0)
25724 {
25725 targetm.asm_out.internal_label (asm_out_file, "L",
25726 CODE_LABEL_NUMBER (label));
25727 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
25728 }
25729 }
25730 }
25731
25732 #ifndef ARM_PE
25733 /* Symbols in the text segment can be accessed without indirecting via the
25734 constant pool; it may take an extra binary operation, but this is still
25735 faster than indirecting via memory. Don't do this when not optimizing,
25736 since we won't be calculating al of the offsets necessary to do this
25737 simplification. */
25738
25739 static void
25740 arm_encode_section_info (tree decl, rtx rtl, int first)
25741 {
25742 if (optimize > 0 && TREE_CONSTANT (decl))
25743 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
25744
25745 default_encode_section_info (decl, rtl, first);
25746 }
25747 #endif /* !ARM_PE */
25748
25749 static void
25750 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25751 {
25752 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25753 && !strcmp (prefix, "L"))
25754 {
25755 arm_ccfsm_state = 0;
25756 arm_target_insn = NULL;
25757 }
25758 default_internal_label (stream, prefix, labelno);
25759 }
25760
25761 /* Output code to add DELTA to the first argument, and then jump
25762 to FUNCTION. Used for C++ multiple inheritance. */
25763 static void
25764 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
25765 HOST_WIDE_INT delta,
25766 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
25767 tree function)
25768 {
25769 static int thunk_label = 0;
25770 char label[256];
25771 char labelpc[256];
25772 int mi_delta = delta;
25773 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25774 int shift = 0;
25775 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25776 ? 1 : 0);
25777 if (mi_delta < 0)
25778 mi_delta = - mi_delta;
25779
25780 final_start_function (emit_barrier (), file, 1);
25781
25782 if (TARGET_THUMB1)
25783 {
25784 int labelno = thunk_label++;
25785 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25786 /* Thunks are entered in arm mode when avaiable. */
25787 if (TARGET_THUMB1_ONLY)
25788 {
25789 /* push r3 so we can use it as a temporary. */
25790 /* TODO: Omit this save if r3 is not used. */
25791 fputs ("\tpush {r3}\n", file);
25792 fputs ("\tldr\tr3, ", file);
25793 }
25794 else
25795 {
25796 fputs ("\tldr\tr12, ", file);
25797 }
25798 assemble_name (file, label);
25799 fputc ('\n', file);
25800 if (flag_pic)
25801 {
25802 /* If we are generating PIC, the ldr instruction below loads
25803 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25804 the address of the add + 8, so we have:
25805
25806 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25807 = target + 1.
25808
25809 Note that we have "+ 1" because some versions of GNU ld
25810 don't set the low bit of the result for R_ARM_REL32
25811 relocations against thumb function symbols.
25812 On ARMv6M this is +4, not +8. */
25813 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25814 assemble_name (file, labelpc);
25815 fputs (":\n", file);
25816 if (TARGET_THUMB1_ONLY)
25817 {
25818 /* This is 2 insns after the start of the thunk, so we know it
25819 is 4-byte aligned. */
25820 fputs ("\tadd\tr3, pc, r3\n", file);
25821 fputs ("\tmov r12, r3\n", file);
25822 }
25823 else
25824 fputs ("\tadd\tr12, pc, r12\n", file);
25825 }
25826 else if (TARGET_THUMB1_ONLY)
25827 fputs ("\tmov r12, r3\n", file);
25828 }
25829 if (TARGET_THUMB1_ONLY)
25830 {
25831 if (mi_delta > 255)
25832 {
25833 fputs ("\tldr\tr3, ", file);
25834 assemble_name (file, label);
25835 fputs ("+4\n", file);
25836 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
25837 mi_op, this_regno, this_regno);
25838 }
25839 else if (mi_delta != 0)
25840 {
25841 /* Thumb1 unified syntax requires s suffix in instruction name when
25842 one of the operands is immediate. */
25843 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
25844 mi_op, this_regno, this_regno,
25845 mi_delta);
25846 }
25847 }
25848 else
25849 {
25850 /* TODO: Use movw/movt for large constants when available. */
25851 while (mi_delta != 0)
25852 {
25853 if ((mi_delta & (3 << shift)) == 0)
25854 shift += 2;
25855 else
25856 {
25857 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25858 mi_op, this_regno, this_regno,
25859 mi_delta & (0xff << shift));
25860 mi_delta &= ~(0xff << shift);
25861 shift += 8;
25862 }
25863 }
25864 }
25865 if (TARGET_THUMB1)
25866 {
25867 if (TARGET_THUMB1_ONLY)
25868 fputs ("\tpop\t{r3}\n", file);
25869
25870 fprintf (file, "\tbx\tr12\n");
25871 ASM_OUTPUT_ALIGN (file, 2);
25872 assemble_name (file, label);
25873 fputs (":\n", file);
25874 if (flag_pic)
25875 {
25876 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25877 rtx tem = XEXP (DECL_RTL (function), 0);
25878 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25879 pipeline offset is four rather than eight. Adjust the offset
25880 accordingly. */
25881 tem = plus_constant (GET_MODE (tem), tem,
25882 TARGET_THUMB1_ONLY ? -3 : -7);
25883 tem = gen_rtx_MINUS (GET_MODE (tem),
25884 tem,
25885 gen_rtx_SYMBOL_REF (Pmode,
25886 ggc_strdup (labelpc)));
25887 assemble_integer (tem, 4, BITS_PER_WORD, 1);
25888 }
25889 else
25890 /* Output ".word .LTHUNKn". */
25891 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
25892
25893 if (TARGET_THUMB1_ONLY && mi_delta > 255)
25894 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
25895 }
25896 else
25897 {
25898 fputs ("\tb\t", file);
25899 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
25900 if (NEED_PLT_RELOC)
25901 fputs ("(PLT)", file);
25902 fputc ('\n', file);
25903 }
25904
25905 final_end_function ();
25906 }
25907
25908 int
25909 arm_emit_vector_const (FILE *file, rtx x)
25910 {
25911 int i;
25912 const char * pattern;
25913
25914 gcc_assert (GET_CODE (x) == CONST_VECTOR);
25915
25916 switch (GET_MODE (x))
25917 {
25918 case V2SImode: pattern = "%08x"; break;
25919 case V4HImode: pattern = "%04x"; break;
25920 case V8QImode: pattern = "%02x"; break;
25921 default: gcc_unreachable ();
25922 }
25923
25924 fprintf (file, "0x");
25925 for (i = CONST_VECTOR_NUNITS (x); i--;)
25926 {
25927 rtx element;
25928
25929 element = CONST_VECTOR_ELT (x, i);
25930 fprintf (file, pattern, INTVAL (element));
25931 }
25932
25933 return 1;
25934 }
25935
25936 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25937 HFmode constant pool entries are actually loaded with ldr. */
25938 void
25939 arm_emit_fp16_const (rtx c)
25940 {
25941 REAL_VALUE_TYPE r;
25942 long bits;
25943
25944 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
25945 bits = real_to_target (NULL, &r, HFmode);
25946 if (WORDS_BIG_ENDIAN)
25947 assemble_zeros (2);
25948 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
25949 if (!WORDS_BIG_ENDIAN)
25950 assemble_zeros (2);
25951 }
25952
25953 const char *
25954 arm_output_load_gr (rtx *operands)
25955 {
25956 rtx reg;
25957 rtx offset;
25958 rtx wcgr;
25959 rtx sum;
25960
25961 if (!MEM_P (operands [1])
25962 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
25963 || !REG_P (reg = XEXP (sum, 0))
25964 || !CONST_INT_P (offset = XEXP (sum, 1))
25965 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
25966 return "wldrw%?\t%0, %1";
25967
25968 /* Fix up an out-of-range load of a GR register. */
25969 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
25970 wcgr = operands[0];
25971 operands[0] = reg;
25972 output_asm_insn ("ldr%?\t%0, %1", operands);
25973
25974 operands[0] = wcgr;
25975 operands[1] = reg;
25976 output_asm_insn ("tmcr%?\t%0, %1", operands);
25977 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
25978
25979 return "";
25980 }
25981
25982 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25983
25984 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25985 named arg and all anonymous args onto the stack.
25986 XXX I know the prologue shouldn't be pushing registers, but it is faster
25987 that way. */
25988
25989 static void
25990 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
25991 machine_mode mode,
25992 tree type,
25993 int *pretend_size,
25994 int second_time ATTRIBUTE_UNUSED)
25995 {
25996 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
25997 int nregs;
25998
25999 cfun->machine->uses_anonymous_args = 1;
26000 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26001 {
26002 nregs = pcum->aapcs_ncrn;
26003 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
26004 nregs++;
26005 }
26006 else
26007 nregs = pcum->nregs;
26008
26009 if (nregs < NUM_ARG_REGS)
26010 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26011 }
26012
26013 /* We can't rely on the caller doing the proper promotion when
26014 using APCS or ATPCS. */
26015
26016 static bool
26017 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26018 {
26019 return !TARGET_AAPCS_BASED;
26020 }
26021
26022 static machine_mode
26023 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26024 machine_mode mode,
26025 int *punsignedp ATTRIBUTE_UNUSED,
26026 const_tree fntype ATTRIBUTE_UNUSED,
26027 int for_return ATTRIBUTE_UNUSED)
26028 {
26029 if (GET_MODE_CLASS (mode) == MODE_INT
26030 && GET_MODE_SIZE (mode) < 4)
26031 return SImode;
26032
26033 return mode;
26034 }
26035
26036 /* AAPCS based ABIs use short enums by default. */
26037
26038 static bool
26039 arm_default_short_enums (void)
26040 {
26041 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
26042 }
26043
26044
26045 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26046
26047 static bool
26048 arm_align_anon_bitfield (void)
26049 {
26050 return TARGET_AAPCS_BASED;
26051 }
26052
26053
26054 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26055
26056 static tree
26057 arm_cxx_guard_type (void)
26058 {
26059 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26060 }
26061
26062
26063 /* The EABI says test the least significant bit of a guard variable. */
26064
26065 static bool
26066 arm_cxx_guard_mask_bit (void)
26067 {
26068 return TARGET_AAPCS_BASED;
26069 }
26070
26071
26072 /* The EABI specifies that all array cookies are 8 bytes long. */
26073
26074 static tree
26075 arm_get_cookie_size (tree type)
26076 {
26077 tree size;
26078
26079 if (!TARGET_AAPCS_BASED)
26080 return default_cxx_get_cookie_size (type);
26081
26082 size = build_int_cst (sizetype, 8);
26083 return size;
26084 }
26085
26086
26087 /* The EABI says that array cookies should also contain the element size. */
26088
26089 static bool
26090 arm_cookie_has_size (void)
26091 {
26092 return TARGET_AAPCS_BASED;
26093 }
26094
26095
26096 /* The EABI says constructors and destructors should return a pointer to
26097 the object constructed/destroyed. */
26098
26099 static bool
26100 arm_cxx_cdtor_returns_this (void)
26101 {
26102 return TARGET_AAPCS_BASED;
26103 }
26104
26105 /* The EABI says that an inline function may never be the key
26106 method. */
26107
26108 static bool
26109 arm_cxx_key_method_may_be_inline (void)
26110 {
26111 return !TARGET_AAPCS_BASED;
26112 }
26113
26114 static void
26115 arm_cxx_determine_class_data_visibility (tree decl)
26116 {
26117 if (!TARGET_AAPCS_BASED
26118 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26119 return;
26120
26121 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26122 is exported. However, on systems without dynamic vague linkage,
26123 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26124 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26125 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26126 else
26127 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26128 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26129 }
26130
26131 static bool
26132 arm_cxx_class_data_always_comdat (void)
26133 {
26134 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26135 vague linkage if the class has no key function. */
26136 return !TARGET_AAPCS_BASED;
26137 }
26138
26139
26140 /* The EABI says __aeabi_atexit should be used to register static
26141 destructors. */
26142
26143 static bool
26144 arm_cxx_use_aeabi_atexit (void)
26145 {
26146 return TARGET_AAPCS_BASED;
26147 }
26148
26149
26150 void
26151 arm_set_return_address (rtx source, rtx scratch)
26152 {
26153 arm_stack_offsets *offsets;
26154 HOST_WIDE_INT delta;
26155 rtx addr;
26156 unsigned long saved_regs;
26157
26158 offsets = arm_get_frame_offsets ();
26159 saved_regs = offsets->saved_regs_mask;
26160
26161 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26162 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26163 else
26164 {
26165 if (frame_pointer_needed)
26166 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26167 else
26168 {
26169 /* LR will be the first saved register. */
26170 delta = offsets->outgoing_args - (offsets->frame + 4);
26171
26172
26173 if (delta >= 4096)
26174 {
26175 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26176 GEN_INT (delta & ~4095)));
26177 addr = scratch;
26178 delta &= 4095;
26179 }
26180 else
26181 addr = stack_pointer_rtx;
26182
26183 addr = plus_constant (Pmode, addr, delta);
26184 }
26185 /* The store needs to be marked as frame related in order to prevent
26186 DSE from deleting it as dead if it is based on fp. */
26187 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26188 RTX_FRAME_RELATED_P (insn) = 1;
26189 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26190 }
26191 }
26192
26193
26194 void
26195 thumb_set_return_address (rtx source, rtx scratch)
26196 {
26197 arm_stack_offsets *offsets;
26198 HOST_WIDE_INT delta;
26199 HOST_WIDE_INT limit;
26200 int reg;
26201 rtx addr;
26202 unsigned long mask;
26203
26204 emit_use (source);
26205
26206 offsets = arm_get_frame_offsets ();
26207 mask = offsets->saved_regs_mask;
26208 if (mask & (1 << LR_REGNUM))
26209 {
26210 limit = 1024;
26211 /* Find the saved regs. */
26212 if (frame_pointer_needed)
26213 {
26214 delta = offsets->soft_frame - offsets->saved_args;
26215 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26216 if (TARGET_THUMB1)
26217 limit = 128;
26218 }
26219 else
26220 {
26221 delta = offsets->outgoing_args - offsets->saved_args;
26222 reg = SP_REGNUM;
26223 }
26224 /* Allow for the stack frame. */
26225 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26226 delta -= 16;
26227 /* The link register is always the first saved register. */
26228 delta -= 4;
26229
26230 /* Construct the address. */
26231 addr = gen_rtx_REG (SImode, reg);
26232 if (delta > limit)
26233 {
26234 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26235 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26236 addr = scratch;
26237 }
26238 else
26239 addr = plus_constant (Pmode, addr, delta);
26240
26241 /* The store needs to be marked as frame related in order to prevent
26242 DSE from deleting it as dead if it is based on fp. */
26243 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26244 RTX_FRAME_RELATED_P (insn) = 1;
26245 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26246 }
26247 else
26248 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26249 }
26250
26251 /* Implements target hook vector_mode_supported_p. */
26252 bool
26253 arm_vector_mode_supported_p (machine_mode mode)
26254 {
26255 /* Neon also supports V2SImode, etc. listed in the clause below. */
26256 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26257 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
26258 return true;
26259
26260 if ((TARGET_NEON || TARGET_IWMMXT)
26261 && ((mode == V2SImode)
26262 || (mode == V4HImode)
26263 || (mode == V8QImode)))
26264 return true;
26265
26266 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26267 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26268 || mode == V2HAmode))
26269 return true;
26270
26271 return false;
26272 }
26273
26274 /* Implements target hook array_mode_supported_p. */
26275
26276 static bool
26277 arm_array_mode_supported_p (machine_mode mode,
26278 unsigned HOST_WIDE_INT nelems)
26279 {
26280 if (TARGET_NEON
26281 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26282 && (nelems >= 2 && nelems <= 4))
26283 return true;
26284
26285 return false;
26286 }
26287
26288 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26289 registers when autovectorizing for Neon, at least until multiple vector
26290 widths are supported properly by the middle-end. */
26291
26292 static machine_mode
26293 arm_preferred_simd_mode (machine_mode mode)
26294 {
26295 if (TARGET_NEON)
26296 switch (mode)
26297 {
26298 case SFmode:
26299 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26300 case SImode:
26301 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26302 case HImode:
26303 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26304 case QImode:
26305 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26306 case DImode:
26307 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26308 return V2DImode;
26309 break;
26310
26311 default:;
26312 }
26313
26314 if (TARGET_REALLY_IWMMXT)
26315 switch (mode)
26316 {
26317 case SImode:
26318 return V2SImode;
26319 case HImode:
26320 return V4HImode;
26321 case QImode:
26322 return V8QImode;
26323
26324 default:;
26325 }
26326
26327 return word_mode;
26328 }
26329
26330 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26331
26332 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26333 using r0-r4 for function arguments, r7 for the stack frame and don't have
26334 enough left over to do doubleword arithmetic. For Thumb-2 all the
26335 potentially problematic instructions accept high registers so this is not
26336 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26337 that require many low registers. */
26338 static bool
26339 arm_class_likely_spilled_p (reg_class_t rclass)
26340 {
26341 if ((TARGET_THUMB1 && rclass == LO_REGS)
26342 || rclass == CC_REG)
26343 return true;
26344
26345 return false;
26346 }
26347
26348 /* Implements target hook small_register_classes_for_mode_p. */
26349 bool
26350 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26351 {
26352 return TARGET_THUMB1;
26353 }
26354
26355 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26356 ARM insns and therefore guarantee that the shift count is modulo 256.
26357 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26358 guarantee no particular behavior for out-of-range counts. */
26359
26360 static unsigned HOST_WIDE_INT
26361 arm_shift_truncation_mask (machine_mode mode)
26362 {
26363 return mode == SImode ? 255 : 0;
26364 }
26365
26366
26367 /* Map internal gcc register numbers to DWARF2 register numbers. */
26368
26369 unsigned int
26370 arm_dbx_register_number (unsigned int regno)
26371 {
26372 if (regno < 16)
26373 return regno;
26374
26375 if (IS_VFP_REGNUM (regno))
26376 {
26377 /* See comment in arm_dwarf_register_span. */
26378 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26379 return 64 + regno - FIRST_VFP_REGNUM;
26380 else
26381 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26382 }
26383
26384 if (IS_IWMMXT_GR_REGNUM (regno))
26385 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26386
26387 if (IS_IWMMXT_REGNUM (regno))
26388 return 112 + regno - FIRST_IWMMXT_REGNUM;
26389
26390 return DWARF_FRAME_REGISTERS;
26391 }
26392
26393 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26394 GCC models tham as 64 32-bit registers, so we need to describe this to
26395 the DWARF generation code. Other registers can use the default. */
26396 static rtx
26397 arm_dwarf_register_span (rtx rtl)
26398 {
26399 machine_mode mode;
26400 unsigned regno;
26401 rtx parts[16];
26402 int nregs;
26403 int i;
26404
26405 regno = REGNO (rtl);
26406 if (!IS_VFP_REGNUM (regno))
26407 return NULL_RTX;
26408
26409 /* XXX FIXME: The EABI defines two VFP register ranges:
26410 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26411 256-287: D0-D31
26412 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26413 corresponding D register. Until GDB supports this, we shall use the
26414 legacy encodings. We also use these encodings for D0-D15 for
26415 compatibility with older debuggers. */
26416 mode = GET_MODE (rtl);
26417 if (GET_MODE_SIZE (mode) < 8)
26418 return NULL_RTX;
26419
26420 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26421 {
26422 nregs = GET_MODE_SIZE (mode) / 4;
26423 for (i = 0; i < nregs; i += 2)
26424 if (TARGET_BIG_END)
26425 {
26426 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26427 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26428 }
26429 else
26430 {
26431 parts[i] = gen_rtx_REG (SImode, regno + i);
26432 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26433 }
26434 }
26435 else
26436 {
26437 nregs = GET_MODE_SIZE (mode) / 8;
26438 for (i = 0; i < nregs; i++)
26439 parts[i] = gen_rtx_REG (DImode, regno + i);
26440 }
26441
26442 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26443 }
26444
26445 #if ARM_UNWIND_INFO
26446 /* Emit unwind directives for a store-multiple instruction or stack pointer
26447 push during alignment.
26448 These should only ever be generated by the function prologue code, so
26449 expect them to have a particular form.
26450 The store-multiple instruction sometimes pushes pc as the last register,
26451 although it should not be tracked into unwind information, or for -Os
26452 sometimes pushes some dummy registers before first register that needs
26453 to be tracked in unwind information; such dummy registers are there just
26454 to avoid separate stack adjustment, and will not be restored in the
26455 epilogue. */
26456
26457 static void
26458 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26459 {
26460 int i;
26461 HOST_WIDE_INT offset;
26462 HOST_WIDE_INT nregs;
26463 int reg_size;
26464 unsigned reg;
26465 unsigned lastreg;
26466 unsigned padfirst = 0, padlast = 0;
26467 rtx e;
26468
26469 e = XVECEXP (p, 0, 0);
26470 gcc_assert (GET_CODE (e) == SET);
26471
26472 /* First insn will adjust the stack pointer. */
26473 gcc_assert (GET_CODE (e) == SET
26474 && REG_P (SET_DEST (e))
26475 && REGNO (SET_DEST (e)) == SP_REGNUM
26476 && GET_CODE (SET_SRC (e)) == PLUS);
26477
26478 offset = -INTVAL (XEXP (SET_SRC (e), 1));
26479 nregs = XVECLEN (p, 0) - 1;
26480 gcc_assert (nregs);
26481
26482 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26483 if (reg < 16)
26484 {
26485 /* For -Os dummy registers can be pushed at the beginning to
26486 avoid separate stack pointer adjustment. */
26487 e = XVECEXP (p, 0, 1);
26488 e = XEXP (SET_DEST (e), 0);
26489 if (GET_CODE (e) == PLUS)
26490 padfirst = INTVAL (XEXP (e, 1));
26491 gcc_assert (padfirst == 0 || optimize_size);
26492 /* The function prologue may also push pc, but not annotate it as it is
26493 never restored. We turn this into a stack pointer adjustment. */
26494 e = XVECEXP (p, 0, nregs);
26495 e = XEXP (SET_DEST (e), 0);
26496 if (GET_CODE (e) == PLUS)
26497 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26498 else
26499 padlast = offset - 4;
26500 gcc_assert (padlast == 0 || padlast == 4);
26501 if (padlast == 4)
26502 fprintf (asm_out_file, "\t.pad #4\n");
26503 reg_size = 4;
26504 fprintf (asm_out_file, "\t.save {");
26505 }
26506 else if (IS_VFP_REGNUM (reg))
26507 {
26508 reg_size = 8;
26509 fprintf (asm_out_file, "\t.vsave {");
26510 }
26511 else
26512 /* Unknown register type. */
26513 gcc_unreachable ();
26514
26515 /* If the stack increment doesn't match the size of the saved registers,
26516 something has gone horribly wrong. */
26517 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26518
26519 offset = padfirst;
26520 lastreg = 0;
26521 /* The remaining insns will describe the stores. */
26522 for (i = 1; i <= nregs; i++)
26523 {
26524 /* Expect (set (mem <addr>) (reg)).
26525 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26526 e = XVECEXP (p, 0, i);
26527 gcc_assert (GET_CODE (e) == SET
26528 && MEM_P (SET_DEST (e))
26529 && REG_P (SET_SRC (e)));
26530
26531 reg = REGNO (SET_SRC (e));
26532 gcc_assert (reg >= lastreg);
26533
26534 if (i != 1)
26535 fprintf (asm_out_file, ", ");
26536 /* We can't use %r for vfp because we need to use the
26537 double precision register names. */
26538 if (IS_VFP_REGNUM (reg))
26539 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26540 else
26541 asm_fprintf (asm_out_file, "%r", reg);
26542
26543 #ifdef ENABLE_CHECKING
26544 /* Check that the addresses are consecutive. */
26545 e = XEXP (SET_DEST (e), 0);
26546 if (GET_CODE (e) == PLUS)
26547 gcc_assert (REG_P (XEXP (e, 0))
26548 && REGNO (XEXP (e, 0)) == SP_REGNUM
26549 && CONST_INT_P (XEXP (e, 1))
26550 && offset == INTVAL (XEXP (e, 1)));
26551 else
26552 gcc_assert (i == 1
26553 && REG_P (e)
26554 && REGNO (e) == SP_REGNUM);
26555 offset += reg_size;
26556 #endif
26557 }
26558 fprintf (asm_out_file, "}\n");
26559 if (padfirst)
26560 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
26561 }
26562
26563 /* Emit unwind directives for a SET. */
26564
26565 static void
26566 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26567 {
26568 rtx e0;
26569 rtx e1;
26570 unsigned reg;
26571
26572 e0 = XEXP (p, 0);
26573 e1 = XEXP (p, 1);
26574 switch (GET_CODE (e0))
26575 {
26576 case MEM:
26577 /* Pushing a single register. */
26578 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26579 || !REG_P (XEXP (XEXP (e0, 0), 0))
26580 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26581 abort ();
26582
26583 asm_fprintf (asm_out_file, "\t.save ");
26584 if (IS_VFP_REGNUM (REGNO (e1)))
26585 asm_fprintf(asm_out_file, "{d%d}\n",
26586 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26587 else
26588 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26589 break;
26590
26591 case REG:
26592 if (REGNO (e0) == SP_REGNUM)
26593 {
26594 /* A stack increment. */
26595 if (GET_CODE (e1) != PLUS
26596 || !REG_P (XEXP (e1, 0))
26597 || REGNO (XEXP (e1, 0)) != SP_REGNUM
26598 || !CONST_INT_P (XEXP (e1, 1)))
26599 abort ();
26600
26601 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26602 -INTVAL (XEXP (e1, 1)));
26603 }
26604 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26605 {
26606 HOST_WIDE_INT offset;
26607
26608 if (GET_CODE (e1) == PLUS)
26609 {
26610 if (!REG_P (XEXP (e1, 0))
26611 || !CONST_INT_P (XEXP (e1, 1)))
26612 abort ();
26613 reg = REGNO (XEXP (e1, 0));
26614 offset = INTVAL (XEXP (e1, 1));
26615 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26616 HARD_FRAME_POINTER_REGNUM, reg,
26617 offset);
26618 }
26619 else if (REG_P (e1))
26620 {
26621 reg = REGNO (e1);
26622 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26623 HARD_FRAME_POINTER_REGNUM, reg);
26624 }
26625 else
26626 abort ();
26627 }
26628 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26629 {
26630 /* Move from sp to reg. */
26631 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26632 }
26633 else if (GET_CODE (e1) == PLUS
26634 && REG_P (XEXP (e1, 0))
26635 && REGNO (XEXP (e1, 0)) == SP_REGNUM
26636 && CONST_INT_P (XEXP (e1, 1)))
26637 {
26638 /* Set reg to offset from sp. */
26639 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26640 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26641 }
26642 else
26643 abort ();
26644 break;
26645
26646 default:
26647 abort ();
26648 }
26649 }
26650
26651
26652 /* Emit unwind directives for the given insn. */
26653
26654 static void
26655 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
26656 {
26657 rtx note, pat;
26658 bool handled_one = false;
26659
26660 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26661 return;
26662
26663 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26664 && (TREE_NOTHROW (current_function_decl)
26665 || crtl->all_throwers_are_sibcalls))
26666 return;
26667
26668 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26669 return;
26670
26671 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26672 {
26673 switch (REG_NOTE_KIND (note))
26674 {
26675 case REG_FRAME_RELATED_EXPR:
26676 pat = XEXP (note, 0);
26677 goto found;
26678
26679 case REG_CFA_REGISTER:
26680 pat = XEXP (note, 0);
26681 if (pat == NULL)
26682 {
26683 pat = PATTERN (insn);
26684 if (GET_CODE (pat) == PARALLEL)
26685 pat = XVECEXP (pat, 0, 0);
26686 }
26687
26688 /* Only emitted for IS_STACKALIGN re-alignment. */
26689 {
26690 rtx dest, src;
26691 unsigned reg;
26692
26693 src = SET_SRC (pat);
26694 dest = SET_DEST (pat);
26695
26696 gcc_assert (src == stack_pointer_rtx);
26697 reg = REGNO (dest);
26698 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26699 reg + 0x90, reg);
26700 }
26701 handled_one = true;
26702 break;
26703
26704 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26705 to get correct dwarf information for shrink-wrap. We should not
26706 emit unwind information for it because these are used either for
26707 pretend arguments or notes to adjust sp and restore registers from
26708 stack. */
26709 case REG_CFA_DEF_CFA:
26710 case REG_CFA_ADJUST_CFA:
26711 case REG_CFA_RESTORE:
26712 return;
26713
26714 case REG_CFA_EXPRESSION:
26715 case REG_CFA_OFFSET:
26716 /* ??? Only handling here what we actually emit. */
26717 gcc_unreachable ();
26718
26719 default:
26720 break;
26721 }
26722 }
26723 if (handled_one)
26724 return;
26725 pat = PATTERN (insn);
26726 found:
26727
26728 switch (GET_CODE (pat))
26729 {
26730 case SET:
26731 arm_unwind_emit_set (asm_out_file, pat);
26732 break;
26733
26734 case SEQUENCE:
26735 /* Store multiple. */
26736 arm_unwind_emit_sequence (asm_out_file, pat);
26737 break;
26738
26739 default:
26740 abort();
26741 }
26742 }
26743
26744
26745 /* Output a reference from a function exception table to the type_info
26746 object X. The EABI specifies that the symbol should be relocated by
26747 an R_ARM_TARGET2 relocation. */
26748
26749 static bool
26750 arm_output_ttype (rtx x)
26751 {
26752 fputs ("\t.word\t", asm_out_file);
26753 output_addr_const (asm_out_file, x);
26754 /* Use special relocations for symbol references. */
26755 if (!CONST_INT_P (x))
26756 fputs ("(TARGET2)", asm_out_file);
26757 fputc ('\n', asm_out_file);
26758
26759 return TRUE;
26760 }
26761
26762 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26763
26764 static void
26765 arm_asm_emit_except_personality (rtx personality)
26766 {
26767 fputs ("\t.personality\t", asm_out_file);
26768 output_addr_const (asm_out_file, personality);
26769 fputc ('\n', asm_out_file);
26770 }
26771
26772 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26773
26774 static void
26775 arm_asm_init_sections (void)
26776 {
26777 exception_section = get_unnamed_section (0, output_section_asm_op,
26778 "\t.handlerdata");
26779 }
26780 #endif /* ARM_UNWIND_INFO */
26781
26782 /* Output unwind directives for the start/end of a function. */
26783
26784 void
26785 arm_output_fn_unwind (FILE * f, bool prologue)
26786 {
26787 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26788 return;
26789
26790 if (prologue)
26791 fputs ("\t.fnstart\n", f);
26792 else
26793 {
26794 /* If this function will never be unwound, then mark it as such.
26795 The came condition is used in arm_unwind_emit to suppress
26796 the frame annotations. */
26797 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26798 && (TREE_NOTHROW (current_function_decl)
26799 || crtl->all_throwers_are_sibcalls))
26800 fputs("\t.cantunwind\n", f);
26801
26802 fputs ("\t.fnend\n", f);
26803 }
26804 }
26805
26806 static bool
26807 arm_emit_tls_decoration (FILE *fp, rtx x)
26808 {
26809 enum tls_reloc reloc;
26810 rtx val;
26811
26812 val = XVECEXP (x, 0, 0);
26813 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26814
26815 output_addr_const (fp, val);
26816
26817 switch (reloc)
26818 {
26819 case TLS_GD32:
26820 fputs ("(tlsgd)", fp);
26821 break;
26822 case TLS_LDM32:
26823 fputs ("(tlsldm)", fp);
26824 break;
26825 case TLS_LDO32:
26826 fputs ("(tlsldo)", fp);
26827 break;
26828 case TLS_IE32:
26829 fputs ("(gottpoff)", fp);
26830 break;
26831 case TLS_LE32:
26832 fputs ("(tpoff)", fp);
26833 break;
26834 case TLS_DESCSEQ:
26835 fputs ("(tlsdesc)", fp);
26836 break;
26837 default:
26838 gcc_unreachable ();
26839 }
26840
26841 switch (reloc)
26842 {
26843 case TLS_GD32:
26844 case TLS_LDM32:
26845 case TLS_IE32:
26846 case TLS_DESCSEQ:
26847 fputs (" + (. - ", fp);
26848 output_addr_const (fp, XVECEXP (x, 0, 2));
26849 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26850 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26851 output_addr_const (fp, XVECEXP (x, 0, 3));
26852 fputc (')', fp);
26853 break;
26854 default:
26855 break;
26856 }
26857
26858 return TRUE;
26859 }
26860
26861 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26862
26863 static void
26864 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
26865 {
26866 gcc_assert (size == 4);
26867 fputs ("\t.word\t", file);
26868 output_addr_const (file, x);
26869 fputs ("(tlsldo)", file);
26870 }
26871
26872 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26873
26874 static bool
26875 arm_output_addr_const_extra (FILE *fp, rtx x)
26876 {
26877 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
26878 return arm_emit_tls_decoration (fp, x);
26879 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
26880 {
26881 char label[256];
26882 int labelno = INTVAL (XVECEXP (x, 0, 0));
26883
26884 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
26885 assemble_name_raw (fp, label);
26886
26887 return TRUE;
26888 }
26889 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
26890 {
26891 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
26892 if (GOT_PCREL)
26893 fputs ("+.", fp);
26894 fputs ("-(", fp);
26895 output_addr_const (fp, XVECEXP (x, 0, 0));
26896 fputc (')', fp);
26897 return TRUE;
26898 }
26899 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
26900 {
26901 output_addr_const (fp, XVECEXP (x, 0, 0));
26902 if (GOT_PCREL)
26903 fputs ("+.", fp);
26904 fputs ("-(", fp);
26905 output_addr_const (fp, XVECEXP (x, 0, 1));
26906 fputc (')', fp);
26907 return TRUE;
26908 }
26909 else if (GET_CODE (x) == CONST_VECTOR)
26910 return arm_emit_vector_const (fp, x);
26911
26912 return FALSE;
26913 }
26914
26915 /* Output assembly for a shift instruction.
26916 SET_FLAGS determines how the instruction modifies the condition codes.
26917 0 - Do not set condition codes.
26918 1 - Set condition codes.
26919 2 - Use smallest instruction. */
26920 const char *
26921 arm_output_shift(rtx * operands, int set_flags)
26922 {
26923 char pattern[100];
26924 static const char flag_chars[3] = {'?', '.', '!'};
26925 const char *shift;
26926 HOST_WIDE_INT val;
26927 char c;
26928
26929 c = flag_chars[set_flags];
26930 if (TARGET_UNIFIED_ASM)
26931 {
26932 shift = shift_op(operands[3], &val);
26933 if (shift)
26934 {
26935 if (val != -1)
26936 operands[2] = GEN_INT(val);
26937 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
26938 }
26939 else
26940 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
26941 }
26942 else
26943 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
26944 output_asm_insn (pattern, operands);
26945 return "";
26946 }
26947
26948 /* Output assembly for a WMMX immediate shift instruction. */
26949 const char *
26950 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
26951 {
26952 int shift = INTVAL (operands[2]);
26953 char templ[50];
26954 machine_mode opmode = GET_MODE (operands[0]);
26955
26956 gcc_assert (shift >= 0);
26957
26958 /* If the shift value in the register versions is > 63 (for D qualifier),
26959 31 (for W qualifier) or 15 (for H qualifier). */
26960 if (((opmode == V4HImode) && (shift > 15))
26961 || ((opmode == V2SImode) && (shift > 31))
26962 || ((opmode == DImode) && (shift > 63)))
26963 {
26964 if (wror_or_wsra)
26965 {
26966 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26967 output_asm_insn (templ, operands);
26968 if (opmode == DImode)
26969 {
26970 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
26971 output_asm_insn (templ, operands);
26972 }
26973 }
26974 else
26975 {
26976 /* The destination register will contain all zeros. */
26977 sprintf (templ, "wzero\t%%0");
26978 output_asm_insn (templ, operands);
26979 }
26980 return "";
26981 }
26982
26983 if ((opmode == DImode) && (shift > 32))
26984 {
26985 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26986 output_asm_insn (templ, operands);
26987 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
26988 output_asm_insn (templ, operands);
26989 }
26990 else
26991 {
26992 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
26993 output_asm_insn (templ, operands);
26994 }
26995 return "";
26996 }
26997
26998 /* Output assembly for a WMMX tinsr instruction. */
26999 const char *
27000 arm_output_iwmmxt_tinsr (rtx *operands)
27001 {
27002 int mask = INTVAL (operands[3]);
27003 int i;
27004 char templ[50];
27005 int units = mode_nunits[GET_MODE (operands[0])];
27006 gcc_assert ((mask & (mask - 1)) == 0);
27007 for (i = 0; i < units; ++i)
27008 {
27009 if ((mask & 0x01) == 1)
27010 {
27011 break;
27012 }
27013 mask >>= 1;
27014 }
27015 gcc_assert (i < units);
27016 {
27017 switch (GET_MODE (operands[0]))
27018 {
27019 case V8QImode:
27020 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27021 break;
27022 case V4HImode:
27023 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27024 break;
27025 case V2SImode:
27026 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27027 break;
27028 default:
27029 gcc_unreachable ();
27030 break;
27031 }
27032 output_asm_insn (templ, operands);
27033 }
27034 return "";
27035 }
27036
27037 /* Output a Thumb-1 casesi dispatch sequence. */
27038 const char *
27039 thumb1_output_casesi (rtx *operands)
27040 {
27041 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27042
27043 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27044
27045 switch (GET_MODE(diff_vec))
27046 {
27047 case QImode:
27048 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27049 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27050 case HImode:
27051 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27052 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27053 case SImode:
27054 return "bl\t%___gnu_thumb1_case_si";
27055 default:
27056 gcc_unreachable ();
27057 }
27058 }
27059
27060 /* Output a Thumb-2 casesi instruction. */
27061 const char *
27062 thumb2_output_casesi (rtx *operands)
27063 {
27064 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27065
27066 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27067
27068 output_asm_insn ("cmp\t%0, %1", operands);
27069 output_asm_insn ("bhi\t%l3", operands);
27070 switch (GET_MODE(diff_vec))
27071 {
27072 case QImode:
27073 return "tbb\t[%|pc, %0]";
27074 case HImode:
27075 return "tbh\t[%|pc, %0, lsl #1]";
27076 case SImode:
27077 if (flag_pic)
27078 {
27079 output_asm_insn ("adr\t%4, %l2", operands);
27080 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27081 output_asm_insn ("add\t%4, %4, %5", operands);
27082 return "bx\t%4";
27083 }
27084 else
27085 {
27086 output_asm_insn ("adr\t%4, %l2", operands);
27087 return "ldr\t%|pc, [%4, %0, lsl #2]";
27088 }
27089 default:
27090 gcc_unreachable ();
27091 }
27092 }
27093
27094 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27095 per-core tuning structs. */
27096 static int
27097 arm_issue_rate (void)
27098 {
27099 return current_tune->issue_rate;
27100 }
27101
27102 /* Return how many instructions should scheduler lookahead to choose the
27103 best one. */
27104 static int
27105 arm_first_cycle_multipass_dfa_lookahead (void)
27106 {
27107 int issue_rate = arm_issue_rate ();
27108
27109 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27110 }
27111
27112 /* Enable modeling of L2 auto-prefetcher. */
27113 static int
27114 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27115 {
27116 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27117 }
27118
27119 const char *
27120 arm_mangle_type (const_tree type)
27121 {
27122 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27123 has to be managled as if it is in the "std" namespace. */
27124 if (TARGET_AAPCS_BASED
27125 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27126 return "St9__va_list";
27127
27128 /* Half-precision float. */
27129 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27130 return "Dh";
27131
27132 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27133 builtin type. */
27134 if (TYPE_NAME (type) != NULL)
27135 return arm_mangle_builtin_type (type);
27136
27137 /* Use the default mangling. */
27138 return NULL;
27139 }
27140
27141 /* Order of allocation of core registers for Thumb: this allocation is
27142 written over the corresponding initial entries of the array
27143 initialized with REG_ALLOC_ORDER. We allocate all low registers
27144 first. Saving and restoring a low register is usually cheaper than
27145 using a call-clobbered high register. */
27146
27147 static const int thumb_core_reg_alloc_order[] =
27148 {
27149 3, 2, 1, 0, 4, 5, 6, 7,
27150 14, 12, 8, 9, 10, 11
27151 };
27152
27153 /* Adjust register allocation order when compiling for Thumb. */
27154
27155 void
27156 arm_order_regs_for_local_alloc (void)
27157 {
27158 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27159 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27160 if (TARGET_THUMB)
27161 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27162 sizeof (thumb_core_reg_alloc_order));
27163 }
27164
27165 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27166
27167 bool
27168 arm_frame_pointer_required (void)
27169 {
27170 return (cfun->has_nonlocal_label
27171 || SUBTARGET_FRAME_POINTER_REQUIRED
27172 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
27173 }
27174
27175 /* Only thumb1 can't support conditional execution, so return true if
27176 the target is not thumb1. */
27177 static bool
27178 arm_have_conditional_execution (void)
27179 {
27180 return !TARGET_THUMB1;
27181 }
27182
27183 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27184 static HOST_WIDE_INT
27185 arm_vector_alignment (const_tree type)
27186 {
27187 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27188
27189 if (TARGET_AAPCS_BASED)
27190 align = MIN (align, 64);
27191
27192 return align;
27193 }
27194
27195 static unsigned int
27196 arm_autovectorize_vector_sizes (void)
27197 {
27198 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27199 }
27200
27201 static bool
27202 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27203 {
27204 /* Vectors which aren't in packed structures will not be less aligned than
27205 the natural alignment of their element type, so this is safe. */
27206 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27207 return !is_packed;
27208
27209 return default_builtin_vector_alignment_reachable (type, is_packed);
27210 }
27211
27212 static bool
27213 arm_builtin_support_vector_misalignment (machine_mode mode,
27214 const_tree type, int misalignment,
27215 bool is_packed)
27216 {
27217 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27218 {
27219 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27220
27221 if (is_packed)
27222 return align == 1;
27223
27224 /* If the misalignment is unknown, we should be able to handle the access
27225 so long as it is not to a member of a packed data structure. */
27226 if (misalignment == -1)
27227 return true;
27228
27229 /* Return true if the misalignment is a multiple of the natural alignment
27230 of the vector's element type. This is probably always going to be
27231 true in practice, since we've already established that this isn't a
27232 packed access. */
27233 return ((misalignment % align) == 0);
27234 }
27235
27236 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27237 is_packed);
27238 }
27239
27240 static void
27241 arm_conditional_register_usage (void)
27242 {
27243 int regno;
27244
27245 if (TARGET_THUMB1 && optimize_size)
27246 {
27247 /* When optimizing for size on Thumb-1, it's better not
27248 to use the HI regs, because of the overhead of
27249 stacking them. */
27250 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27251 fixed_regs[regno] = call_used_regs[regno] = 1;
27252 }
27253
27254 /* The link register can be clobbered by any branch insn,
27255 but we have no way to track that at present, so mark
27256 it as unavailable. */
27257 if (TARGET_THUMB1)
27258 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27259
27260 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27261 {
27262 /* VFPv3 registers are disabled when earlier VFP
27263 versions are selected due to the definition of
27264 LAST_VFP_REGNUM. */
27265 for (regno = FIRST_VFP_REGNUM;
27266 regno <= LAST_VFP_REGNUM; ++ regno)
27267 {
27268 fixed_regs[regno] = 0;
27269 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27270 || regno >= FIRST_VFP_REGNUM + 32;
27271 }
27272 }
27273
27274 if (TARGET_REALLY_IWMMXT)
27275 {
27276 regno = FIRST_IWMMXT_GR_REGNUM;
27277 /* The 2002/10/09 revision of the XScale ABI has wCG0
27278 and wCG1 as call-preserved registers. The 2002/11/21
27279 revision changed this so that all wCG registers are
27280 scratch registers. */
27281 for (regno = FIRST_IWMMXT_GR_REGNUM;
27282 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27283 fixed_regs[regno] = 0;
27284 /* The XScale ABI has wR0 - wR9 as scratch registers,
27285 the rest as call-preserved registers. */
27286 for (regno = FIRST_IWMMXT_REGNUM;
27287 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27288 {
27289 fixed_regs[regno] = 0;
27290 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27291 }
27292 }
27293
27294 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27295 {
27296 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27297 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27298 }
27299 else if (TARGET_APCS_STACK)
27300 {
27301 fixed_regs[10] = 1;
27302 call_used_regs[10] = 1;
27303 }
27304 /* -mcaller-super-interworking reserves r11 for calls to
27305 _interwork_r11_call_via_rN(). Making the register global
27306 is an easy way of ensuring that it remains valid for all
27307 calls. */
27308 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27309 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27310 {
27311 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27312 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27313 if (TARGET_CALLER_INTERWORKING)
27314 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27315 }
27316 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27317 }
27318
27319 static reg_class_t
27320 arm_preferred_rename_class (reg_class_t rclass)
27321 {
27322 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27323 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27324 and code size can be reduced. */
27325 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27326 return LO_REGS;
27327 else
27328 return NO_REGS;
27329 }
27330
27331 /* Compute the atrribute "length" of insn "*push_multi".
27332 So this function MUST be kept in sync with that insn pattern. */
27333 int
27334 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27335 {
27336 int i, regno, hi_reg;
27337 int num_saves = XVECLEN (parallel_op, 0);
27338
27339 /* ARM mode. */
27340 if (TARGET_ARM)
27341 return 4;
27342 /* Thumb1 mode. */
27343 if (TARGET_THUMB1)
27344 return 2;
27345
27346 /* Thumb2 mode. */
27347 regno = REGNO (first_op);
27348 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27349 for (i = 1; i < num_saves && !hi_reg; i++)
27350 {
27351 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27352 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27353 }
27354
27355 if (!hi_reg)
27356 return 2;
27357 return 4;
27358 }
27359
27360 /* Compute the number of instructions emitted by output_move_double. */
27361 int
27362 arm_count_output_move_double_insns (rtx *operands)
27363 {
27364 int count;
27365 rtx ops[2];
27366 /* output_move_double may modify the operands array, so call it
27367 here on a copy of the array. */
27368 ops[0] = operands[0];
27369 ops[1] = operands[1];
27370 output_move_double (ops, false, &count);
27371 return count;
27372 }
27373
27374 int
27375 vfp3_const_double_for_fract_bits (rtx operand)
27376 {
27377 REAL_VALUE_TYPE r0;
27378
27379 if (!CONST_DOUBLE_P (operand))
27380 return 0;
27381
27382 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27383 if (exact_real_inverse (DFmode, &r0)
27384 && !REAL_VALUE_NEGATIVE (r0))
27385 {
27386 if (exact_real_truncate (DFmode, &r0))
27387 {
27388 HOST_WIDE_INT value = real_to_integer (&r0);
27389 value = value & 0xffffffff;
27390 if ((value != 0) && ( (value & (value - 1)) == 0))
27391 return int_log2 (value);
27392 }
27393 }
27394 return 0;
27395 }
27396
27397 int
27398 vfp3_const_double_for_bits (rtx operand)
27399 {
27400 REAL_VALUE_TYPE r0;
27401
27402 if (!CONST_DOUBLE_P (operand))
27403 return 0;
27404
27405 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27406 if (exact_real_truncate (DFmode, &r0))
27407 {
27408 HOST_WIDE_INT value = real_to_integer (&r0);
27409 value = value & 0xffffffff;
27410 if ((value != 0) && ( (value & (value - 1)) == 0))
27411 return int_log2 (value);
27412 }
27413
27414 return 0;
27415 }
27416 \f
27417 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27418
27419 static void
27420 arm_pre_atomic_barrier (enum memmodel model)
27421 {
27422 if (need_atomic_barrier_p (model, true))
27423 emit_insn (gen_memory_barrier ());
27424 }
27425
27426 static void
27427 arm_post_atomic_barrier (enum memmodel model)
27428 {
27429 if (need_atomic_barrier_p (model, false))
27430 emit_insn (gen_memory_barrier ());
27431 }
27432
27433 /* Emit the load-exclusive and store-exclusive instructions.
27434 Use acquire and release versions if necessary. */
27435
27436 static void
27437 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
27438 {
27439 rtx (*gen) (rtx, rtx);
27440
27441 if (acq)
27442 {
27443 switch (mode)
27444 {
27445 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27446 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27447 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27448 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27449 default:
27450 gcc_unreachable ();
27451 }
27452 }
27453 else
27454 {
27455 switch (mode)
27456 {
27457 case QImode: gen = gen_arm_load_exclusiveqi; break;
27458 case HImode: gen = gen_arm_load_exclusivehi; break;
27459 case SImode: gen = gen_arm_load_exclusivesi; break;
27460 case DImode: gen = gen_arm_load_exclusivedi; break;
27461 default:
27462 gcc_unreachable ();
27463 }
27464 }
27465
27466 emit_insn (gen (rval, mem));
27467 }
27468
27469 static void
27470 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
27471 rtx mem, bool rel)
27472 {
27473 rtx (*gen) (rtx, rtx, rtx);
27474
27475 if (rel)
27476 {
27477 switch (mode)
27478 {
27479 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27480 case HImode: gen = gen_arm_store_release_exclusivehi; break;
27481 case SImode: gen = gen_arm_store_release_exclusivesi; break;
27482 case DImode: gen = gen_arm_store_release_exclusivedi; break;
27483 default:
27484 gcc_unreachable ();
27485 }
27486 }
27487 else
27488 {
27489 switch (mode)
27490 {
27491 case QImode: gen = gen_arm_store_exclusiveqi; break;
27492 case HImode: gen = gen_arm_store_exclusivehi; break;
27493 case SImode: gen = gen_arm_store_exclusivesi; break;
27494 case DImode: gen = gen_arm_store_exclusivedi; break;
27495 default:
27496 gcc_unreachable ();
27497 }
27498 }
27499
27500 emit_insn (gen (bval, rval, mem));
27501 }
27502
27503 /* Mark the previous jump instruction as unlikely. */
27504
27505 static void
27506 emit_unlikely_jump (rtx insn)
27507 {
27508 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
27509
27510 insn = emit_jump_insn (insn);
27511 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
27512 }
27513
27514 /* Expand a compare and swap pattern. */
27515
27516 void
27517 arm_expand_compare_and_swap (rtx operands[])
27518 {
27519 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27520 machine_mode mode;
27521 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27522
27523 bval = operands[0];
27524 rval = operands[1];
27525 mem = operands[2];
27526 oldval = operands[3];
27527 newval = operands[4];
27528 is_weak = operands[5];
27529 mod_s = operands[6];
27530 mod_f = operands[7];
27531 mode = GET_MODE (mem);
27532
27533 /* Normally the succ memory model must be stronger than fail, but in the
27534 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27535 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27536
27537 if (TARGET_HAVE_LDACQ
27538 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
27539 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
27540 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27541
27542 switch (mode)
27543 {
27544 case QImode:
27545 case HImode:
27546 /* For narrow modes, we're going to perform the comparison in SImode,
27547 so do the zero-extension now. */
27548 rval = gen_reg_rtx (SImode);
27549 oldval = convert_modes (SImode, mode, oldval, true);
27550 /* FALLTHRU */
27551
27552 case SImode:
27553 /* Force the value into a register if needed. We waited until after
27554 the zero-extension above to do this properly. */
27555 if (!arm_add_operand (oldval, SImode))
27556 oldval = force_reg (SImode, oldval);
27557 break;
27558
27559 case DImode:
27560 if (!cmpdi_operand (oldval, mode))
27561 oldval = force_reg (mode, oldval);
27562 break;
27563
27564 default:
27565 gcc_unreachable ();
27566 }
27567
27568 switch (mode)
27569 {
27570 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27571 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27572 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27573 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27574 default:
27575 gcc_unreachable ();
27576 }
27577
27578 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27579
27580 if (mode == QImode || mode == HImode)
27581 emit_move_insn (operands[1], gen_lowpart (mode, rval));
27582
27583 /* In all cases, we arrange for success to be signaled by Z set.
27584 This arrangement allows for the boolean result to be used directly
27585 in a subsequent branch, post optimization. */
27586 x = gen_rtx_REG (CCmode, CC_REGNUM);
27587 x = gen_rtx_EQ (SImode, x, const0_rtx);
27588 emit_insn (gen_rtx_SET (bval, x));
27589 }
27590
27591 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27592 another memory store between the load-exclusive and store-exclusive can
27593 reset the monitor from Exclusive to Open state. This means we must wait
27594 until after reload to split the pattern, lest we get a register spill in
27595 the middle of the atomic sequence. */
27596
27597 void
27598 arm_split_compare_and_swap (rtx operands[])
27599 {
27600 rtx rval, mem, oldval, newval, scratch;
27601 machine_mode mode;
27602 enum memmodel mod_s, mod_f;
27603 bool is_weak;
27604 rtx_code_label *label1, *label2;
27605 rtx x, cond;
27606
27607 rval = operands[0];
27608 mem = operands[1];
27609 oldval = operands[2];
27610 newval = operands[3];
27611 is_weak = (operands[4] != const0_rtx);
27612 mod_s = memmodel_from_int (INTVAL (operands[5]));
27613 mod_f = memmodel_from_int (INTVAL (operands[6]));
27614 scratch = operands[7];
27615 mode = GET_MODE (mem);
27616
27617 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
27618
27619 bool use_acquire = TARGET_HAVE_LDACQ
27620 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
27621 || is_mm_release (mod_s));
27622
27623 bool use_release = TARGET_HAVE_LDACQ
27624 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
27625 || is_mm_acquire (mod_s));
27626
27627 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
27628 a full barrier is emitted after the store-release. */
27629 if (is_armv8_sync)
27630 use_acquire = false;
27631
27632 /* Checks whether a barrier is needed and emits one accordingly. */
27633 if (!(use_acquire || use_release))
27634 arm_pre_atomic_barrier (mod_s);
27635
27636 label1 = NULL;
27637 if (!is_weak)
27638 {
27639 label1 = gen_label_rtx ();
27640 emit_label (label1);
27641 }
27642 label2 = gen_label_rtx ();
27643
27644 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27645
27646 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27647 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27648 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27649 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27650 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
27651
27652 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
27653
27654 /* Weak or strong, we want EQ to be true for success, so that we
27655 match the flags that we got from the compare above. */
27656 cond = gen_rtx_REG (CCmode, CC_REGNUM);
27657 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
27658 emit_insn (gen_rtx_SET (cond, x));
27659
27660 if (!is_weak)
27661 {
27662 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27663 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27664 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
27665 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
27666 }
27667
27668 if (!is_mm_relaxed (mod_f))
27669 emit_label (label2);
27670
27671 /* Checks whether a barrier is needed and emits one accordingly. */
27672 if (is_armv8_sync
27673 || !(use_acquire || use_release))
27674 arm_post_atomic_barrier (mod_s);
27675
27676 if (is_mm_relaxed (mod_f))
27677 emit_label (label2);
27678 }
27679
27680 void
27681 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27682 rtx value, rtx model_rtx, rtx cond)
27683 {
27684 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
27685 machine_mode mode = GET_MODE (mem);
27686 machine_mode wmode = (mode == DImode ? DImode : SImode);
27687 rtx_code_label *label;
27688 rtx x;
27689
27690 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
27691
27692 bool use_acquire = TARGET_HAVE_LDACQ
27693 && !(is_mm_relaxed (model) || is_mm_consume (model)
27694 || is_mm_release (model));
27695
27696 bool use_release = TARGET_HAVE_LDACQ
27697 && !(is_mm_relaxed (model) || is_mm_consume (model)
27698 || is_mm_acquire (model));
27699
27700 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
27701 a full barrier is emitted after the store-release. */
27702 if (is_armv8_sync)
27703 use_acquire = false;
27704
27705 /* Checks whether a barrier is needed and emits one accordingly. */
27706 if (!(use_acquire || use_release))
27707 arm_pre_atomic_barrier (model);
27708
27709 label = gen_label_rtx ();
27710 emit_label (label);
27711
27712 if (new_out)
27713 new_out = gen_lowpart (wmode, new_out);
27714 if (old_out)
27715 old_out = gen_lowpart (wmode, old_out);
27716 else
27717 old_out = new_out;
27718 value = simplify_gen_subreg (wmode, value, mode, 0);
27719
27720 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27721
27722 switch (code)
27723 {
27724 case SET:
27725 new_out = value;
27726 break;
27727
27728 case NOT:
27729 x = gen_rtx_AND (wmode, old_out, value);
27730 emit_insn (gen_rtx_SET (new_out, x));
27731 x = gen_rtx_NOT (wmode, new_out);
27732 emit_insn (gen_rtx_SET (new_out, x));
27733 break;
27734
27735 case MINUS:
27736 if (CONST_INT_P (value))
27737 {
27738 value = GEN_INT (-INTVAL (value));
27739 code = PLUS;
27740 }
27741 /* FALLTHRU */
27742
27743 case PLUS:
27744 if (mode == DImode)
27745 {
27746 /* DImode plus/minus need to clobber flags. */
27747 /* The adddi3 and subdi3 patterns are incorrectly written so that
27748 they require matching operands, even when we could easily support
27749 three operands. Thankfully, this can be fixed up post-splitting,
27750 as the individual add+adc patterns do accept three operands and
27751 post-reload cprop can make these moves go away. */
27752 emit_move_insn (new_out, old_out);
27753 if (code == PLUS)
27754 x = gen_adddi3 (new_out, new_out, value);
27755 else
27756 x = gen_subdi3 (new_out, new_out, value);
27757 emit_insn (x);
27758 break;
27759 }
27760 /* FALLTHRU */
27761
27762 default:
27763 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27764 emit_insn (gen_rtx_SET (new_out, x));
27765 break;
27766 }
27767
27768 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27769 use_release);
27770
27771 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27772 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27773
27774 /* Checks whether a barrier is needed and emits one accordingly. */
27775 if (is_armv8_sync
27776 || !(use_acquire || use_release))
27777 arm_post_atomic_barrier (model);
27778 }
27779 \f
27780 #define MAX_VECT_LEN 16
27781
27782 struct expand_vec_perm_d
27783 {
27784 rtx target, op0, op1;
27785 unsigned char perm[MAX_VECT_LEN];
27786 machine_mode vmode;
27787 unsigned char nelt;
27788 bool one_vector_p;
27789 bool testing_p;
27790 };
27791
27792 /* Generate a variable permutation. */
27793
27794 static void
27795 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27796 {
27797 machine_mode vmode = GET_MODE (target);
27798 bool one_vector_p = rtx_equal_p (op0, op1);
27799
27800 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27801 gcc_checking_assert (GET_MODE (op0) == vmode);
27802 gcc_checking_assert (GET_MODE (op1) == vmode);
27803 gcc_checking_assert (GET_MODE (sel) == vmode);
27804 gcc_checking_assert (TARGET_NEON);
27805
27806 if (one_vector_p)
27807 {
27808 if (vmode == V8QImode)
27809 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27810 else
27811 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27812 }
27813 else
27814 {
27815 rtx pair;
27816
27817 if (vmode == V8QImode)
27818 {
27819 pair = gen_reg_rtx (V16QImode);
27820 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27821 pair = gen_lowpart (TImode, pair);
27822 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27823 }
27824 else
27825 {
27826 pair = gen_reg_rtx (OImode);
27827 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
27828 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
27829 }
27830 }
27831 }
27832
27833 void
27834 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
27835 {
27836 machine_mode vmode = GET_MODE (target);
27837 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
27838 bool one_vector_p = rtx_equal_p (op0, op1);
27839 rtx rmask[MAX_VECT_LEN], mask;
27840
27841 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27842 numbering of elements for big-endian, we must reverse the order. */
27843 gcc_checking_assert (!BYTES_BIG_ENDIAN);
27844
27845 /* The VTBL instruction does not use a modulo index, so we must take care
27846 of that ourselves. */
27847 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
27848 for (i = 0; i < nelt; ++i)
27849 rmask[i] = mask;
27850 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
27851 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
27852
27853 arm_expand_vec_perm_1 (target, op0, op1, sel);
27854 }
27855
27856 /* Generate or test for an insn that supports a constant permutation. */
27857
27858 /* Recognize patterns for the VUZP insns. */
27859
27860 static bool
27861 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
27862 {
27863 unsigned int i, odd, mask, nelt = d->nelt;
27864 rtx out0, out1, in0, in1, x;
27865 rtx (*gen)(rtx, rtx, rtx, rtx);
27866
27867 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27868 return false;
27869
27870 /* Note that these are little-endian tests. Adjust for big-endian later. */
27871 if (d->perm[0] == 0)
27872 odd = 0;
27873 else if (d->perm[0] == 1)
27874 odd = 1;
27875 else
27876 return false;
27877 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27878
27879 for (i = 0; i < nelt; i++)
27880 {
27881 unsigned elt = (i * 2 + odd) & mask;
27882 if (d->perm[i] != elt)
27883 return false;
27884 }
27885
27886 /* Success! */
27887 if (d->testing_p)
27888 return true;
27889
27890 switch (d->vmode)
27891 {
27892 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
27893 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
27894 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
27895 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
27896 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
27897 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
27898 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
27899 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
27900 default:
27901 gcc_unreachable ();
27902 }
27903
27904 in0 = d->op0;
27905 in1 = d->op1;
27906 if (BYTES_BIG_ENDIAN)
27907 {
27908 x = in0, in0 = in1, in1 = x;
27909 odd = !odd;
27910 }
27911
27912 out0 = d->target;
27913 out1 = gen_reg_rtx (d->vmode);
27914 if (odd)
27915 x = out0, out0 = out1, out1 = x;
27916
27917 emit_insn (gen (out0, in0, in1, out1));
27918 return true;
27919 }
27920
27921 /* Recognize patterns for the VZIP insns. */
27922
27923 static bool
27924 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
27925 {
27926 unsigned int i, high, mask, nelt = d->nelt;
27927 rtx out0, out1, in0, in1, x;
27928 rtx (*gen)(rtx, rtx, rtx, rtx);
27929
27930 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27931 return false;
27932
27933 /* Note that these are little-endian tests. Adjust for big-endian later. */
27934 high = nelt / 2;
27935 if (d->perm[0] == high)
27936 ;
27937 else if (d->perm[0] == 0)
27938 high = 0;
27939 else
27940 return false;
27941 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27942
27943 for (i = 0; i < nelt / 2; i++)
27944 {
27945 unsigned elt = (i + high) & mask;
27946 if (d->perm[i * 2] != elt)
27947 return false;
27948 elt = (elt + nelt) & mask;
27949 if (d->perm[i * 2 + 1] != elt)
27950 return false;
27951 }
27952
27953 /* Success! */
27954 if (d->testing_p)
27955 return true;
27956
27957 switch (d->vmode)
27958 {
27959 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
27960 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
27961 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
27962 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
27963 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
27964 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
27965 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
27966 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
27967 default:
27968 gcc_unreachable ();
27969 }
27970
27971 in0 = d->op0;
27972 in1 = d->op1;
27973 if (BYTES_BIG_ENDIAN)
27974 {
27975 x = in0, in0 = in1, in1 = x;
27976 high = !high;
27977 }
27978
27979 out0 = d->target;
27980 out1 = gen_reg_rtx (d->vmode);
27981 if (high)
27982 x = out0, out0 = out1, out1 = x;
27983
27984 emit_insn (gen (out0, in0, in1, out1));
27985 return true;
27986 }
27987
27988 /* Recognize patterns for the VREV insns. */
27989
27990 static bool
27991 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
27992 {
27993 unsigned int i, j, diff, nelt = d->nelt;
27994 rtx (*gen)(rtx, rtx);
27995
27996 if (!d->one_vector_p)
27997 return false;
27998
27999 diff = d->perm[0];
28000 switch (diff)
28001 {
28002 case 7:
28003 switch (d->vmode)
28004 {
28005 case V16QImode: gen = gen_neon_vrev64v16qi; break;
28006 case V8QImode: gen = gen_neon_vrev64v8qi; break;
28007 default:
28008 return false;
28009 }
28010 break;
28011 case 3:
28012 switch (d->vmode)
28013 {
28014 case V16QImode: gen = gen_neon_vrev32v16qi; break;
28015 case V8QImode: gen = gen_neon_vrev32v8qi; break;
28016 case V8HImode: gen = gen_neon_vrev64v8hi; break;
28017 case V4HImode: gen = gen_neon_vrev64v4hi; break;
28018 default:
28019 return false;
28020 }
28021 break;
28022 case 1:
28023 switch (d->vmode)
28024 {
28025 case V16QImode: gen = gen_neon_vrev16v16qi; break;
28026 case V8QImode: gen = gen_neon_vrev16v8qi; break;
28027 case V8HImode: gen = gen_neon_vrev32v8hi; break;
28028 case V4HImode: gen = gen_neon_vrev32v4hi; break;
28029 case V4SImode: gen = gen_neon_vrev64v4si; break;
28030 case V2SImode: gen = gen_neon_vrev64v2si; break;
28031 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
28032 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
28033 default:
28034 return false;
28035 }
28036 break;
28037 default:
28038 return false;
28039 }
28040
28041 for (i = 0; i < nelt ; i += diff + 1)
28042 for (j = 0; j <= diff; j += 1)
28043 {
28044 /* This is guaranteed to be true as the value of diff
28045 is 7, 3, 1 and we should have enough elements in the
28046 queue to generate this. Getting a vector mask with a
28047 value of diff other than these values implies that
28048 something is wrong by the time we get here. */
28049 gcc_assert (i + j < nelt);
28050 if (d->perm[i + j] != i + diff - j)
28051 return false;
28052 }
28053
28054 /* Success! */
28055 if (d->testing_p)
28056 return true;
28057
28058 emit_insn (gen (d->target, d->op0));
28059 return true;
28060 }
28061
28062 /* Recognize patterns for the VTRN insns. */
28063
28064 static bool
28065 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28066 {
28067 unsigned int i, odd, mask, nelt = d->nelt;
28068 rtx out0, out1, in0, in1, x;
28069 rtx (*gen)(rtx, rtx, rtx, rtx);
28070
28071 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28072 return false;
28073
28074 /* Note that these are little-endian tests. Adjust for big-endian later. */
28075 if (d->perm[0] == 0)
28076 odd = 0;
28077 else if (d->perm[0] == 1)
28078 odd = 1;
28079 else
28080 return false;
28081 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28082
28083 for (i = 0; i < nelt; i += 2)
28084 {
28085 if (d->perm[i] != i + odd)
28086 return false;
28087 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28088 return false;
28089 }
28090
28091 /* Success! */
28092 if (d->testing_p)
28093 return true;
28094
28095 switch (d->vmode)
28096 {
28097 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28098 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28099 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28100 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28101 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28102 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28103 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28104 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28105 default:
28106 gcc_unreachable ();
28107 }
28108
28109 in0 = d->op0;
28110 in1 = d->op1;
28111 if (BYTES_BIG_ENDIAN)
28112 {
28113 x = in0, in0 = in1, in1 = x;
28114 odd = !odd;
28115 }
28116
28117 out0 = d->target;
28118 out1 = gen_reg_rtx (d->vmode);
28119 if (odd)
28120 x = out0, out0 = out1, out1 = x;
28121
28122 emit_insn (gen (out0, in0, in1, out1));
28123 return true;
28124 }
28125
28126 /* Recognize patterns for the VEXT insns. */
28127
28128 static bool
28129 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28130 {
28131 unsigned int i, nelt = d->nelt;
28132 rtx (*gen) (rtx, rtx, rtx, rtx);
28133 rtx offset;
28134
28135 unsigned int location;
28136
28137 unsigned int next = d->perm[0] + 1;
28138
28139 /* TODO: Handle GCC's numbering of elements for big-endian. */
28140 if (BYTES_BIG_ENDIAN)
28141 return false;
28142
28143 /* Check if the extracted indexes are increasing by one. */
28144 for (i = 1; i < nelt; next++, i++)
28145 {
28146 /* If we hit the most significant element of the 2nd vector in
28147 the previous iteration, no need to test further. */
28148 if (next == 2 * nelt)
28149 return false;
28150
28151 /* If we are operating on only one vector: it could be a
28152 rotation. If there are only two elements of size < 64, let
28153 arm_evpc_neon_vrev catch it. */
28154 if (d->one_vector_p && (next == nelt))
28155 {
28156 if ((nelt == 2) && (d->vmode != V2DImode))
28157 return false;
28158 else
28159 next = 0;
28160 }
28161
28162 if (d->perm[i] != next)
28163 return false;
28164 }
28165
28166 location = d->perm[0];
28167
28168 switch (d->vmode)
28169 {
28170 case V16QImode: gen = gen_neon_vextv16qi; break;
28171 case V8QImode: gen = gen_neon_vextv8qi; break;
28172 case V4HImode: gen = gen_neon_vextv4hi; break;
28173 case V8HImode: gen = gen_neon_vextv8hi; break;
28174 case V2SImode: gen = gen_neon_vextv2si; break;
28175 case V4SImode: gen = gen_neon_vextv4si; break;
28176 case V2SFmode: gen = gen_neon_vextv2sf; break;
28177 case V4SFmode: gen = gen_neon_vextv4sf; break;
28178 case V2DImode: gen = gen_neon_vextv2di; break;
28179 default:
28180 return false;
28181 }
28182
28183 /* Success! */
28184 if (d->testing_p)
28185 return true;
28186
28187 offset = GEN_INT (location);
28188 emit_insn (gen (d->target, d->op0, d->op1, offset));
28189 return true;
28190 }
28191
28192 /* The NEON VTBL instruction is a fully variable permuation that's even
28193 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28194 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28195 can do slightly better by expanding this as a constant where we don't
28196 have to apply a mask. */
28197
28198 static bool
28199 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28200 {
28201 rtx rperm[MAX_VECT_LEN], sel;
28202 machine_mode vmode = d->vmode;
28203 unsigned int i, nelt = d->nelt;
28204
28205 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28206 numbering of elements for big-endian, we must reverse the order. */
28207 if (BYTES_BIG_ENDIAN)
28208 return false;
28209
28210 if (d->testing_p)
28211 return true;
28212
28213 /* Generic code will try constant permutation twice. Once with the
28214 original mode and again with the elements lowered to QImode.
28215 So wait and don't do the selector expansion ourselves. */
28216 if (vmode != V8QImode && vmode != V16QImode)
28217 return false;
28218
28219 for (i = 0; i < nelt; ++i)
28220 rperm[i] = GEN_INT (d->perm[i]);
28221 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28222 sel = force_reg (vmode, sel);
28223
28224 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28225 return true;
28226 }
28227
28228 static bool
28229 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28230 {
28231 /* Check if the input mask matches vext before reordering the
28232 operands. */
28233 if (TARGET_NEON)
28234 if (arm_evpc_neon_vext (d))
28235 return true;
28236
28237 /* The pattern matching functions above are written to look for a small
28238 number to begin the sequence (0, 1, N/2). If we begin with an index
28239 from the second operand, we can swap the operands. */
28240 if (d->perm[0] >= d->nelt)
28241 {
28242 unsigned i, nelt = d->nelt;
28243 rtx x;
28244
28245 for (i = 0; i < nelt; ++i)
28246 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28247
28248 x = d->op0;
28249 d->op0 = d->op1;
28250 d->op1 = x;
28251 }
28252
28253 if (TARGET_NEON)
28254 {
28255 if (arm_evpc_neon_vuzp (d))
28256 return true;
28257 if (arm_evpc_neon_vzip (d))
28258 return true;
28259 if (arm_evpc_neon_vrev (d))
28260 return true;
28261 if (arm_evpc_neon_vtrn (d))
28262 return true;
28263 return arm_evpc_neon_vtbl (d);
28264 }
28265 return false;
28266 }
28267
28268 /* Expand a vec_perm_const pattern. */
28269
28270 bool
28271 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28272 {
28273 struct expand_vec_perm_d d;
28274 int i, nelt, which;
28275
28276 d.target = target;
28277 d.op0 = op0;
28278 d.op1 = op1;
28279
28280 d.vmode = GET_MODE (target);
28281 gcc_assert (VECTOR_MODE_P (d.vmode));
28282 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28283 d.testing_p = false;
28284
28285 for (i = which = 0; i < nelt; ++i)
28286 {
28287 rtx e = XVECEXP (sel, 0, i);
28288 int ei = INTVAL (e) & (2 * nelt - 1);
28289 which |= (ei < nelt ? 1 : 2);
28290 d.perm[i] = ei;
28291 }
28292
28293 switch (which)
28294 {
28295 default:
28296 gcc_unreachable();
28297
28298 case 3:
28299 d.one_vector_p = false;
28300 if (!rtx_equal_p (op0, op1))
28301 break;
28302
28303 /* The elements of PERM do not suggest that only the first operand
28304 is used, but both operands are identical. Allow easier matching
28305 of the permutation by folding the permutation into the single
28306 input vector. */
28307 /* FALLTHRU */
28308 case 2:
28309 for (i = 0; i < nelt; ++i)
28310 d.perm[i] &= nelt - 1;
28311 d.op0 = op1;
28312 d.one_vector_p = true;
28313 break;
28314
28315 case 1:
28316 d.op1 = op0;
28317 d.one_vector_p = true;
28318 break;
28319 }
28320
28321 return arm_expand_vec_perm_const_1 (&d);
28322 }
28323
28324 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28325
28326 static bool
28327 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28328 const unsigned char *sel)
28329 {
28330 struct expand_vec_perm_d d;
28331 unsigned int i, nelt, which;
28332 bool ret;
28333
28334 d.vmode = vmode;
28335 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28336 d.testing_p = true;
28337 memcpy (d.perm, sel, nelt);
28338
28339 /* Categorize the set of elements in the selector. */
28340 for (i = which = 0; i < nelt; ++i)
28341 {
28342 unsigned char e = d.perm[i];
28343 gcc_assert (e < 2 * nelt);
28344 which |= (e < nelt ? 1 : 2);
28345 }
28346
28347 /* For all elements from second vector, fold the elements to first. */
28348 if (which == 2)
28349 for (i = 0; i < nelt; ++i)
28350 d.perm[i] -= nelt;
28351
28352 /* Check whether the mask can be applied to the vector type. */
28353 d.one_vector_p = (which != 3);
28354
28355 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28356 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28357 if (!d.one_vector_p)
28358 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28359
28360 start_sequence ();
28361 ret = arm_expand_vec_perm_const_1 (&d);
28362 end_sequence ();
28363
28364 return ret;
28365 }
28366
28367 bool
28368 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
28369 {
28370 /* If we are soft float and we do not have ldrd
28371 then all auto increment forms are ok. */
28372 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28373 return true;
28374
28375 switch (code)
28376 {
28377 /* Post increment and Pre Decrement are supported for all
28378 instruction forms except for vector forms. */
28379 case ARM_POST_INC:
28380 case ARM_PRE_DEC:
28381 if (VECTOR_MODE_P (mode))
28382 {
28383 if (code != ARM_PRE_DEC)
28384 return true;
28385 else
28386 return false;
28387 }
28388
28389 return true;
28390
28391 case ARM_POST_DEC:
28392 case ARM_PRE_INC:
28393 /* Without LDRD and mode size greater than
28394 word size, there is no point in auto-incrementing
28395 because ldm and stm will not have these forms. */
28396 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28397 return false;
28398
28399 /* Vector and floating point modes do not support
28400 these auto increment forms. */
28401 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28402 return false;
28403
28404 return true;
28405
28406 default:
28407 return false;
28408
28409 }
28410
28411 return false;
28412 }
28413
28414 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28415 on ARM, since we know that shifts by negative amounts are no-ops.
28416 Additionally, the default expansion code is not available or suitable
28417 for post-reload insn splits (this can occur when the register allocator
28418 chooses not to do a shift in NEON).
28419
28420 This function is used in both initial expand and post-reload splits, and
28421 handles all kinds of 64-bit shifts.
28422
28423 Input requirements:
28424 - It is safe for the input and output to be the same register, but
28425 early-clobber rules apply for the shift amount and scratch registers.
28426 - Shift by register requires both scratch registers. In all other cases
28427 the scratch registers may be NULL.
28428 - Ashiftrt by a register also clobbers the CC register. */
28429 void
28430 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28431 rtx amount, rtx scratch1, rtx scratch2)
28432 {
28433 rtx out_high = gen_highpart (SImode, out);
28434 rtx out_low = gen_lowpart (SImode, out);
28435 rtx in_high = gen_highpart (SImode, in);
28436 rtx in_low = gen_lowpart (SImode, in);
28437
28438 /* Terminology:
28439 in = the register pair containing the input value.
28440 out = the destination register pair.
28441 up = the high- or low-part of each pair.
28442 down = the opposite part to "up".
28443 In a shift, we can consider bits to shift from "up"-stream to
28444 "down"-stream, so in a left-shift "up" is the low-part and "down"
28445 is the high-part of each register pair. */
28446
28447 rtx out_up = code == ASHIFT ? out_low : out_high;
28448 rtx out_down = code == ASHIFT ? out_high : out_low;
28449 rtx in_up = code == ASHIFT ? in_low : in_high;
28450 rtx in_down = code == ASHIFT ? in_high : in_low;
28451
28452 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28453 gcc_assert (out
28454 && (REG_P (out) || GET_CODE (out) == SUBREG)
28455 && GET_MODE (out) == DImode);
28456 gcc_assert (in
28457 && (REG_P (in) || GET_CODE (in) == SUBREG)
28458 && GET_MODE (in) == DImode);
28459 gcc_assert (amount
28460 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28461 && GET_MODE (amount) == SImode)
28462 || CONST_INT_P (amount)));
28463 gcc_assert (scratch1 == NULL
28464 || (GET_CODE (scratch1) == SCRATCH)
28465 || (GET_MODE (scratch1) == SImode
28466 && REG_P (scratch1)));
28467 gcc_assert (scratch2 == NULL
28468 || (GET_CODE (scratch2) == SCRATCH)
28469 || (GET_MODE (scratch2) == SImode
28470 && REG_P (scratch2)));
28471 gcc_assert (!REG_P (out) || !REG_P (amount)
28472 || !HARD_REGISTER_P (out)
28473 || (REGNO (out) != REGNO (amount)
28474 && REGNO (out) + 1 != REGNO (amount)));
28475
28476 /* Macros to make following code more readable. */
28477 #define SUB_32(DEST,SRC) \
28478 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28479 #define RSB_32(DEST,SRC) \
28480 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28481 #define SUB_S_32(DEST,SRC) \
28482 gen_addsi3_compare0 ((DEST), (SRC), \
28483 GEN_INT (-32))
28484 #define SET(DEST,SRC) \
28485 gen_rtx_SET ((DEST), (SRC))
28486 #define SHIFT(CODE,SRC,AMOUNT) \
28487 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28488 #define LSHIFT(CODE,SRC,AMOUNT) \
28489 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28490 SImode, (SRC), (AMOUNT))
28491 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28492 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28493 SImode, (SRC), (AMOUNT))
28494 #define ORR(A,B) \
28495 gen_rtx_IOR (SImode, (A), (B))
28496 #define BRANCH(COND,LABEL) \
28497 gen_arm_cond_branch ((LABEL), \
28498 gen_rtx_ ## COND (CCmode, cc_reg, \
28499 const0_rtx), \
28500 cc_reg)
28501
28502 /* Shifts by register and shifts by constant are handled separately. */
28503 if (CONST_INT_P (amount))
28504 {
28505 /* We have a shift-by-constant. */
28506
28507 /* First, handle out-of-range shift amounts.
28508 In both cases we try to match the result an ARM instruction in a
28509 shift-by-register would give. This helps reduce execution
28510 differences between optimization levels, but it won't stop other
28511 parts of the compiler doing different things. This is "undefined
28512 behaviour, in any case. */
28513 if (INTVAL (amount) <= 0)
28514 emit_insn (gen_movdi (out, in));
28515 else if (INTVAL (amount) >= 64)
28516 {
28517 if (code == ASHIFTRT)
28518 {
28519 rtx const31_rtx = GEN_INT (31);
28520 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28521 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28522 }
28523 else
28524 emit_insn (gen_movdi (out, const0_rtx));
28525 }
28526
28527 /* Now handle valid shifts. */
28528 else if (INTVAL (amount) < 32)
28529 {
28530 /* Shifts by a constant less than 32. */
28531 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28532
28533 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28534 emit_insn (SET (out_down,
28535 ORR (REV_LSHIFT (code, in_up, reverse_amount),
28536 out_down)));
28537 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28538 }
28539 else
28540 {
28541 /* Shifts by a constant greater than 31. */
28542 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28543
28544 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28545 if (code == ASHIFTRT)
28546 emit_insn (gen_ashrsi3 (out_up, in_up,
28547 GEN_INT (31)));
28548 else
28549 emit_insn (SET (out_up, const0_rtx));
28550 }
28551 }
28552 else
28553 {
28554 /* We have a shift-by-register. */
28555 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28556
28557 /* This alternative requires the scratch registers. */
28558 gcc_assert (scratch1 && REG_P (scratch1));
28559 gcc_assert (scratch2 && REG_P (scratch2));
28560
28561 /* We will need the values "amount-32" and "32-amount" later.
28562 Swapping them around now allows the later code to be more general. */
28563 switch (code)
28564 {
28565 case ASHIFT:
28566 emit_insn (SUB_32 (scratch1, amount));
28567 emit_insn (RSB_32 (scratch2, amount));
28568 break;
28569 case ASHIFTRT:
28570 emit_insn (RSB_32 (scratch1, amount));
28571 /* Also set CC = amount > 32. */
28572 emit_insn (SUB_S_32 (scratch2, amount));
28573 break;
28574 case LSHIFTRT:
28575 emit_insn (RSB_32 (scratch1, amount));
28576 emit_insn (SUB_32 (scratch2, amount));
28577 break;
28578 default:
28579 gcc_unreachable ();
28580 }
28581
28582 /* Emit code like this:
28583
28584 arithmetic-left:
28585 out_down = in_down << amount;
28586 out_down = (in_up << (amount - 32)) | out_down;
28587 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28588 out_up = in_up << amount;
28589
28590 arithmetic-right:
28591 out_down = in_down >> amount;
28592 out_down = (in_up << (32 - amount)) | out_down;
28593 if (amount < 32)
28594 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28595 out_up = in_up << amount;
28596
28597 logical-right:
28598 out_down = in_down >> amount;
28599 out_down = (in_up << (32 - amount)) | out_down;
28600 if (amount < 32)
28601 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28602 out_up = in_up << amount;
28603
28604 The ARM and Thumb2 variants are the same but implemented slightly
28605 differently. If this were only called during expand we could just
28606 use the Thumb2 case and let combine do the right thing, but this
28607 can also be called from post-reload splitters. */
28608
28609 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28610
28611 if (!TARGET_THUMB2)
28612 {
28613 /* Emit code for ARM mode. */
28614 emit_insn (SET (out_down,
28615 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28616 if (code == ASHIFTRT)
28617 {
28618 rtx_code_label *done_label = gen_label_rtx ();
28619 emit_jump_insn (BRANCH (LT, done_label));
28620 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28621 out_down)));
28622 emit_label (done_label);
28623 }
28624 else
28625 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28626 out_down)));
28627 }
28628 else
28629 {
28630 /* Emit code for Thumb2 mode.
28631 Thumb2 can't do shift and or in one insn. */
28632 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28633 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28634
28635 if (code == ASHIFTRT)
28636 {
28637 rtx_code_label *done_label = gen_label_rtx ();
28638 emit_jump_insn (BRANCH (LT, done_label));
28639 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28640 emit_insn (SET (out_down, ORR (out_down, scratch2)));
28641 emit_label (done_label);
28642 }
28643 else
28644 {
28645 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28646 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28647 }
28648 }
28649
28650 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28651 }
28652
28653 #undef SUB_32
28654 #undef RSB_32
28655 #undef SUB_S_32
28656 #undef SET
28657 #undef SHIFT
28658 #undef LSHIFT
28659 #undef REV_LSHIFT
28660 #undef ORR
28661 #undef BRANCH
28662 }
28663
28664
28665 /* Returns true if a valid comparison operation and makes
28666 the operands in a form that is valid. */
28667 bool
28668 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28669 {
28670 enum rtx_code code = GET_CODE (*comparison);
28671 int code_int;
28672 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28673 ? GET_MODE (*op2) : GET_MODE (*op1);
28674
28675 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28676
28677 if (code == UNEQ || code == LTGT)
28678 return false;
28679
28680 code_int = (int)code;
28681 arm_canonicalize_comparison (&code_int, op1, op2, 0);
28682 PUT_CODE (*comparison, (enum rtx_code)code_int);
28683
28684 switch (mode)
28685 {
28686 case SImode:
28687 if (!arm_add_operand (*op1, mode))
28688 *op1 = force_reg (mode, *op1);
28689 if (!arm_add_operand (*op2, mode))
28690 *op2 = force_reg (mode, *op2);
28691 return true;
28692
28693 case DImode:
28694 if (!cmpdi_operand (*op1, mode))
28695 *op1 = force_reg (mode, *op1);
28696 if (!cmpdi_operand (*op2, mode))
28697 *op2 = force_reg (mode, *op2);
28698 return true;
28699
28700 case SFmode:
28701 case DFmode:
28702 if (!arm_float_compare_operand (*op1, mode))
28703 *op1 = force_reg (mode, *op1);
28704 if (!arm_float_compare_operand (*op2, mode))
28705 *op2 = force_reg (mode, *op2);
28706 return true;
28707 default:
28708 break;
28709 }
28710
28711 return false;
28712
28713 }
28714
28715 /* Maximum number of instructions to set block of memory. */
28716 static int
28717 arm_block_set_max_insns (void)
28718 {
28719 if (optimize_function_for_size_p (cfun))
28720 return 4;
28721 else
28722 return current_tune->max_insns_inline_memset;
28723 }
28724
28725 /* Return TRUE if it's profitable to set block of memory for
28726 non-vectorized case. VAL is the value to set the memory
28727 with. LENGTH is the number of bytes to set. ALIGN is the
28728 alignment of the destination memory in bytes. UNALIGNED_P
28729 is TRUE if we can only set the memory with instructions
28730 meeting alignment requirements. USE_STRD_P is TRUE if we
28731 can use strd to set the memory. */
28732 static bool
28733 arm_block_set_non_vect_profit_p (rtx val,
28734 unsigned HOST_WIDE_INT length,
28735 unsigned HOST_WIDE_INT align,
28736 bool unaligned_p, bool use_strd_p)
28737 {
28738 int num = 0;
28739 /* For leftovers in bytes of 0-7, we can set the memory block using
28740 strb/strh/str with minimum instruction number. */
28741 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28742
28743 if (unaligned_p)
28744 {
28745 num = arm_const_inline_cost (SET, val);
28746 num += length / align + length % align;
28747 }
28748 else if (use_strd_p)
28749 {
28750 num = arm_const_double_inline_cost (val);
28751 num += (length >> 3) + leftover[length & 7];
28752 }
28753 else
28754 {
28755 num = arm_const_inline_cost (SET, val);
28756 num += (length >> 2) + leftover[length & 3];
28757 }
28758
28759 /* We may be able to combine last pair STRH/STRB into a single STR
28760 by shifting one byte back. */
28761 if (unaligned_access && length > 3 && (length & 3) == 3)
28762 num--;
28763
28764 return (num <= arm_block_set_max_insns ());
28765 }
28766
28767 /* Return TRUE if it's profitable to set block of memory for
28768 vectorized case. LENGTH is the number of bytes to set.
28769 ALIGN is the alignment of destination memory in bytes.
28770 MODE is the vector mode used to set the memory. */
28771 static bool
28772 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
28773 unsigned HOST_WIDE_INT align,
28774 machine_mode mode)
28775 {
28776 int num;
28777 bool unaligned_p = ((align & 3) != 0);
28778 unsigned int nelt = GET_MODE_NUNITS (mode);
28779
28780 /* Instruction loading constant value. */
28781 num = 1;
28782 /* Instructions storing the memory. */
28783 num += (length + nelt - 1) / nelt;
28784 /* Instructions adjusting the address expression. Only need to
28785 adjust address expression if it's 4 bytes aligned and bytes
28786 leftover can only be stored by mis-aligned store instruction. */
28787 if (!unaligned_p && (length & 3) != 0)
28788 num++;
28789
28790 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
28791 if (!unaligned_p && mode == V16QImode)
28792 num--;
28793
28794 return (num <= arm_block_set_max_insns ());
28795 }
28796
28797 /* Set a block of memory using vectorization instructions for the
28798 unaligned case. We fill the first LENGTH bytes of the memory
28799 area starting from DSTBASE with byte constant VALUE. ALIGN is
28800 the alignment requirement of memory. Return TRUE if succeeded. */
28801 static bool
28802 arm_block_set_unaligned_vect (rtx dstbase,
28803 unsigned HOST_WIDE_INT length,
28804 unsigned HOST_WIDE_INT value,
28805 unsigned HOST_WIDE_INT align)
28806 {
28807 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
28808 rtx dst, mem;
28809 rtx val_elt, val_vec, reg;
28810 rtx rval[MAX_VECT_LEN];
28811 rtx (*gen_func) (rtx, rtx);
28812 machine_mode mode;
28813 unsigned HOST_WIDE_INT v = value;
28814
28815 gcc_assert ((align & 0x3) != 0);
28816 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28817 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28818 if (length >= nelt_v16)
28819 {
28820 mode = V16QImode;
28821 gen_func = gen_movmisalignv16qi;
28822 }
28823 else
28824 {
28825 mode = V8QImode;
28826 gen_func = gen_movmisalignv8qi;
28827 }
28828 nelt_mode = GET_MODE_NUNITS (mode);
28829 gcc_assert (length >= nelt_mode);
28830 /* Skip if it isn't profitable. */
28831 if (!arm_block_set_vect_profit_p (length, align, mode))
28832 return false;
28833
28834 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28835 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28836
28837 v = sext_hwi (v, BITS_PER_WORD);
28838 val_elt = GEN_INT (v);
28839 for (j = 0; j < nelt_mode; j++)
28840 rval[j] = val_elt;
28841
28842 reg = gen_reg_rtx (mode);
28843 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28844 /* Emit instruction loading the constant value. */
28845 emit_move_insn (reg, val_vec);
28846
28847 /* Handle nelt_mode bytes in a vector. */
28848 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
28849 {
28850 emit_insn ((*gen_func) (mem, reg));
28851 if (i + 2 * nelt_mode <= length)
28852 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
28853 }
28854
28855 /* If there are not less than nelt_v8 bytes leftover, we must be in
28856 V16QI mode. */
28857 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
28858
28859 /* Handle (8, 16) bytes leftover. */
28860 if (i + nelt_v8 < length)
28861 {
28862 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
28863 /* We are shifting bytes back, set the alignment accordingly. */
28864 if ((length & 1) != 0 && align >= 2)
28865 set_mem_align (mem, BITS_PER_UNIT);
28866
28867 emit_insn (gen_movmisalignv16qi (mem, reg));
28868 }
28869 /* Handle (0, 8] bytes leftover. */
28870 else if (i < length && i + nelt_v8 >= length)
28871 {
28872 if (mode == V16QImode)
28873 {
28874 reg = gen_lowpart (V8QImode, reg);
28875 mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
28876 }
28877 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
28878 + (nelt_mode - nelt_v8))));
28879 /* We are shifting bytes back, set the alignment accordingly. */
28880 if ((length & 1) != 0 && align >= 2)
28881 set_mem_align (mem, BITS_PER_UNIT);
28882
28883 emit_insn (gen_movmisalignv8qi (mem, reg));
28884 }
28885
28886 return true;
28887 }
28888
28889 /* Set a block of memory using vectorization instructions for the
28890 aligned case. We fill the first LENGTH bytes of the memory area
28891 starting from DSTBASE with byte constant VALUE. ALIGN is the
28892 alignment requirement of memory. Return TRUE if succeeded. */
28893 static bool
28894 arm_block_set_aligned_vect (rtx dstbase,
28895 unsigned HOST_WIDE_INT length,
28896 unsigned HOST_WIDE_INT value,
28897 unsigned HOST_WIDE_INT align)
28898 {
28899 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
28900 rtx dst, addr, mem;
28901 rtx val_elt, val_vec, reg;
28902 rtx rval[MAX_VECT_LEN];
28903 machine_mode mode;
28904 unsigned HOST_WIDE_INT v = value;
28905
28906 gcc_assert ((align & 0x3) == 0);
28907 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28908 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28909 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
28910 mode = V16QImode;
28911 else
28912 mode = V8QImode;
28913
28914 nelt_mode = GET_MODE_NUNITS (mode);
28915 gcc_assert (length >= nelt_mode);
28916 /* Skip if it isn't profitable. */
28917 if (!arm_block_set_vect_profit_p (length, align, mode))
28918 return false;
28919
28920 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28921
28922 v = sext_hwi (v, BITS_PER_WORD);
28923 val_elt = GEN_INT (v);
28924 for (j = 0; j < nelt_mode; j++)
28925 rval[j] = val_elt;
28926
28927 reg = gen_reg_rtx (mode);
28928 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28929 /* Emit instruction loading the constant value. */
28930 emit_move_insn (reg, val_vec);
28931
28932 i = 0;
28933 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
28934 if (mode == V16QImode)
28935 {
28936 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28937 emit_insn (gen_movmisalignv16qi (mem, reg));
28938 i += nelt_mode;
28939 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
28940 if (i + nelt_v8 < length && i + nelt_v16 > length)
28941 {
28942 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28943 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28944 /* We are shifting bytes back, set the alignment accordingly. */
28945 if ((length & 0x3) == 0)
28946 set_mem_align (mem, BITS_PER_UNIT * 4);
28947 else if ((length & 0x1) == 0)
28948 set_mem_align (mem, BITS_PER_UNIT * 2);
28949 else
28950 set_mem_align (mem, BITS_PER_UNIT);
28951
28952 emit_insn (gen_movmisalignv16qi (mem, reg));
28953 return true;
28954 }
28955 /* Fall through for bytes leftover. */
28956 mode = V8QImode;
28957 nelt_mode = GET_MODE_NUNITS (mode);
28958 reg = gen_lowpart (V8QImode, reg);
28959 }
28960
28961 /* Handle 8 bytes in a vector. */
28962 for (; (i + nelt_mode <= length); i += nelt_mode)
28963 {
28964 addr = plus_constant (Pmode, dst, i);
28965 mem = adjust_automodify_address (dstbase, mode, addr, i);
28966 emit_move_insn (mem, reg);
28967 }
28968
28969 /* Handle single word leftover by shifting 4 bytes back. We can
28970 use aligned access for this case. */
28971 if (i + UNITS_PER_WORD == length)
28972 {
28973 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
28974 mem = adjust_automodify_address (dstbase, mode,
28975 addr, i - UNITS_PER_WORD);
28976 /* We are shifting 4 bytes back, set the alignment accordingly. */
28977 if (align > UNITS_PER_WORD)
28978 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
28979
28980 emit_move_insn (mem, reg);
28981 }
28982 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28983 We have to use unaligned access for this case. */
28984 else if (i < length)
28985 {
28986 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28987 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28988 /* We are shifting bytes back, set the alignment accordingly. */
28989 if ((length & 1) == 0)
28990 set_mem_align (mem, BITS_PER_UNIT * 2);
28991 else
28992 set_mem_align (mem, BITS_PER_UNIT);
28993
28994 emit_insn (gen_movmisalignv8qi (mem, reg));
28995 }
28996
28997 return true;
28998 }
28999
29000 /* Set a block of memory using plain strh/strb instructions, only
29001 using instructions allowed by ALIGN on processor. We fill the
29002 first LENGTH bytes of the memory area starting from DSTBASE
29003 with byte constant VALUE. ALIGN is the alignment requirement
29004 of memory. */
29005 static bool
29006 arm_block_set_unaligned_non_vect (rtx dstbase,
29007 unsigned HOST_WIDE_INT length,
29008 unsigned HOST_WIDE_INT value,
29009 unsigned HOST_WIDE_INT align)
29010 {
29011 unsigned int i;
29012 rtx dst, addr, mem;
29013 rtx val_exp, val_reg, reg;
29014 machine_mode mode;
29015 HOST_WIDE_INT v = value;
29016
29017 gcc_assert (align == 1 || align == 2);
29018
29019 if (align == 2)
29020 v |= (value << BITS_PER_UNIT);
29021
29022 v = sext_hwi (v, BITS_PER_WORD);
29023 val_exp = GEN_INT (v);
29024 /* Skip if it isn't profitable. */
29025 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29026 align, true, false))
29027 return false;
29028
29029 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29030 mode = (align == 2 ? HImode : QImode);
29031 val_reg = force_reg (SImode, val_exp);
29032 reg = gen_lowpart (mode, val_reg);
29033
29034 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29035 {
29036 addr = plus_constant (Pmode, dst, i);
29037 mem = adjust_automodify_address (dstbase, mode, addr, i);
29038 emit_move_insn (mem, reg);
29039 }
29040
29041 /* Handle single byte leftover. */
29042 if (i + 1 == length)
29043 {
29044 reg = gen_lowpart (QImode, val_reg);
29045 addr = plus_constant (Pmode, dst, i);
29046 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29047 emit_move_insn (mem, reg);
29048 i++;
29049 }
29050
29051 gcc_assert (i == length);
29052 return true;
29053 }
29054
29055 /* Set a block of memory using plain strd/str/strh/strb instructions,
29056 to permit unaligned copies on processors which support unaligned
29057 semantics for those instructions. We fill the first LENGTH bytes
29058 of the memory area starting from DSTBASE with byte constant VALUE.
29059 ALIGN is the alignment requirement of memory. */
29060 static bool
29061 arm_block_set_aligned_non_vect (rtx dstbase,
29062 unsigned HOST_WIDE_INT length,
29063 unsigned HOST_WIDE_INT value,
29064 unsigned HOST_WIDE_INT align)
29065 {
29066 unsigned int i;
29067 rtx dst, addr, mem;
29068 rtx val_exp, val_reg, reg;
29069 unsigned HOST_WIDE_INT v;
29070 bool use_strd_p;
29071
29072 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
29073 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29074
29075 v = (value | (value << 8) | (value << 16) | (value << 24));
29076 if (length < UNITS_PER_WORD)
29077 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29078
29079 if (use_strd_p)
29080 v |= (v << BITS_PER_WORD);
29081 else
29082 v = sext_hwi (v, BITS_PER_WORD);
29083
29084 val_exp = GEN_INT (v);
29085 /* Skip if it isn't profitable. */
29086 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29087 align, false, use_strd_p))
29088 {
29089 if (!use_strd_p)
29090 return false;
29091
29092 /* Try without strd. */
29093 v = (v >> BITS_PER_WORD);
29094 v = sext_hwi (v, BITS_PER_WORD);
29095 val_exp = GEN_INT (v);
29096 use_strd_p = false;
29097 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29098 align, false, use_strd_p))
29099 return false;
29100 }
29101
29102 i = 0;
29103 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29104 /* Handle double words using strd if possible. */
29105 if (use_strd_p)
29106 {
29107 val_reg = force_reg (DImode, val_exp);
29108 reg = val_reg;
29109 for (; (i + 8 <= length); i += 8)
29110 {
29111 addr = plus_constant (Pmode, dst, i);
29112 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29113 emit_move_insn (mem, reg);
29114 }
29115 }
29116 else
29117 val_reg = force_reg (SImode, val_exp);
29118
29119 /* Handle words. */
29120 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29121 for (; (i + 4 <= length); i += 4)
29122 {
29123 addr = plus_constant (Pmode, dst, i);
29124 mem = adjust_automodify_address (dstbase, SImode, addr, i);
29125 if ((align & 3) == 0)
29126 emit_move_insn (mem, reg);
29127 else
29128 emit_insn (gen_unaligned_storesi (mem, reg));
29129 }
29130
29131 /* Merge last pair of STRH and STRB into a STR if possible. */
29132 if (unaligned_access && i > 0 && (i + 3) == length)
29133 {
29134 addr = plus_constant (Pmode, dst, i - 1);
29135 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29136 /* We are shifting one byte back, set the alignment accordingly. */
29137 if ((align & 1) == 0)
29138 set_mem_align (mem, BITS_PER_UNIT);
29139
29140 /* Most likely this is an unaligned access, and we can't tell at
29141 compilation time. */
29142 emit_insn (gen_unaligned_storesi (mem, reg));
29143 return true;
29144 }
29145
29146 /* Handle half word leftover. */
29147 if (i + 2 <= length)
29148 {
29149 reg = gen_lowpart (HImode, val_reg);
29150 addr = plus_constant (Pmode, dst, i);
29151 mem = adjust_automodify_address (dstbase, HImode, addr, i);
29152 if ((align & 1) == 0)
29153 emit_move_insn (mem, reg);
29154 else
29155 emit_insn (gen_unaligned_storehi (mem, reg));
29156
29157 i += 2;
29158 }
29159
29160 /* Handle single byte leftover. */
29161 if (i + 1 == length)
29162 {
29163 reg = gen_lowpart (QImode, val_reg);
29164 addr = plus_constant (Pmode, dst, i);
29165 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29166 emit_move_insn (mem, reg);
29167 }
29168
29169 return true;
29170 }
29171
29172 /* Set a block of memory using vectorization instructions for both
29173 aligned and unaligned cases. We fill the first LENGTH bytes of
29174 the memory area starting from DSTBASE with byte constant VALUE.
29175 ALIGN is the alignment requirement of memory. */
29176 static bool
29177 arm_block_set_vect (rtx dstbase,
29178 unsigned HOST_WIDE_INT length,
29179 unsigned HOST_WIDE_INT value,
29180 unsigned HOST_WIDE_INT align)
29181 {
29182 /* Check whether we need to use unaligned store instruction. */
29183 if (((align & 3) != 0 || (length & 3) != 0)
29184 /* Check whether unaligned store instruction is available. */
29185 && (!unaligned_access || BYTES_BIG_ENDIAN))
29186 return false;
29187
29188 if ((align & 3) == 0)
29189 return arm_block_set_aligned_vect (dstbase, length, value, align);
29190 else
29191 return arm_block_set_unaligned_vect (dstbase, length, value, align);
29192 }
29193
29194 /* Expand string store operation. Firstly we try to do that by using
29195 vectorization instructions, then try with ARM unaligned access and
29196 double-word store if profitable. OPERANDS[0] is the destination,
29197 OPERANDS[1] is the number of bytes, operands[2] is the value to
29198 initialize the memory, OPERANDS[3] is the known alignment of the
29199 destination. */
29200 bool
29201 arm_gen_setmem (rtx *operands)
29202 {
29203 rtx dstbase = operands[0];
29204 unsigned HOST_WIDE_INT length;
29205 unsigned HOST_WIDE_INT value;
29206 unsigned HOST_WIDE_INT align;
29207
29208 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29209 return false;
29210
29211 length = UINTVAL (operands[1]);
29212 if (length > 64)
29213 return false;
29214
29215 value = (UINTVAL (operands[2]) & 0xFF);
29216 align = UINTVAL (operands[3]);
29217 if (TARGET_NEON && length >= 8
29218 && current_tune->string_ops_prefer_neon
29219 && arm_block_set_vect (dstbase, length, value, align))
29220 return true;
29221
29222 if (!unaligned_access && (align & 3) != 0)
29223 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29224
29225 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29226 }
29227
29228
29229 static bool
29230 arm_macro_fusion_p (void)
29231 {
29232 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
29233 }
29234
29235
29236 static bool
29237 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
29238 {
29239 rtx set_dest;
29240 rtx prev_set = single_set (prev);
29241 rtx curr_set = single_set (curr);
29242
29243 if (!prev_set
29244 || !curr_set)
29245 return false;
29246
29247 if (any_condjump_p (curr))
29248 return false;
29249
29250 if (!arm_macro_fusion_p ())
29251 return false;
29252
29253 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT)
29254 {
29255 /* We are trying to fuse
29256 movw imm / movt imm
29257 instructions as a group that gets scheduled together. */
29258
29259 set_dest = SET_DEST (curr_set);
29260
29261 if (GET_MODE (set_dest) != SImode)
29262 return false;
29263
29264 /* We are trying to match:
29265 prev (movw) == (set (reg r0) (const_int imm16))
29266 curr (movt) == (set (zero_extract (reg r0)
29267 (const_int 16)
29268 (const_int 16))
29269 (const_int imm16_1))
29270 or
29271 prev (movw) == (set (reg r1)
29272 (high (symbol_ref ("SYM"))))
29273 curr (movt) == (set (reg r0)
29274 (lo_sum (reg r1)
29275 (symbol_ref ("SYM")))) */
29276 if (GET_CODE (set_dest) == ZERO_EXTRACT)
29277 {
29278 if (CONST_INT_P (SET_SRC (curr_set))
29279 && CONST_INT_P (SET_SRC (prev_set))
29280 && REG_P (XEXP (set_dest, 0))
29281 && REG_P (SET_DEST (prev_set))
29282 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
29283 return true;
29284 }
29285 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
29286 && REG_P (SET_DEST (curr_set))
29287 && REG_P (SET_DEST (prev_set))
29288 && GET_CODE (SET_SRC (prev_set)) == HIGH
29289 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
29290 return true;
29291 }
29292 return false;
29293 }
29294
29295 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29296
29297 static unsigned HOST_WIDE_INT
29298 arm_asan_shadow_offset (void)
29299 {
29300 return (unsigned HOST_WIDE_INT) 1 << 29;
29301 }
29302
29303
29304 /* This is a temporary fix for PR60655. Ideally we need
29305 to handle most of these cases in the generic part but
29306 currently we reject minus (..) (sym_ref). We try to
29307 ameliorate the case with minus (sym_ref1) (sym_ref2)
29308 where they are in the same section. */
29309
29310 static bool
29311 arm_const_not_ok_for_debug_p (rtx p)
29312 {
29313 tree decl_op0 = NULL;
29314 tree decl_op1 = NULL;
29315
29316 if (GET_CODE (p) == MINUS)
29317 {
29318 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
29319 {
29320 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
29321 if (decl_op1
29322 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
29323 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
29324 {
29325 if ((TREE_CODE (decl_op1) == VAR_DECL
29326 || TREE_CODE (decl_op1) == CONST_DECL)
29327 && (TREE_CODE (decl_op0) == VAR_DECL
29328 || TREE_CODE (decl_op0) == CONST_DECL))
29329 return (get_variable_section (decl_op1, false)
29330 != get_variable_section (decl_op0, false));
29331
29332 if (TREE_CODE (decl_op1) == LABEL_DECL
29333 && TREE_CODE (decl_op0) == LABEL_DECL)
29334 return (DECL_CONTEXT (decl_op1)
29335 != DECL_CONTEXT (decl_op0));
29336 }
29337
29338 return true;
29339 }
29340 }
29341
29342 return false;
29343 }
29344
29345 /* return TRUE if x is a reference to a value in a constant pool */
29346 extern bool
29347 arm_is_constant_pool_ref (rtx x)
29348 {
29349 return (MEM_P (x)
29350 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
29351 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
29352 }
29353
29354 /* Remember the last target of arm_set_current_function. */
29355 static GTY(()) tree arm_previous_fndecl;
29356
29357 /* Invalidate arm_previous_fndecl. */
29358 void
29359 arm_reset_previous_fndecl (void)
29360 {
29361 arm_previous_fndecl = NULL_TREE;
29362 }
29363
29364 /* Establish appropriate back-end context for processing the function
29365 FNDECL. The argument might be NULL to indicate processing at top
29366 level, outside of any function scope. */
29367 static void
29368 arm_set_current_function (tree fndecl)
29369 {
29370 if (!fndecl || fndecl == arm_previous_fndecl)
29371 return;
29372
29373 tree old_tree = (arm_previous_fndecl
29374 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
29375 : NULL_TREE);
29376
29377 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
29378
29379 arm_previous_fndecl = fndecl;
29380 if (old_tree == new_tree)
29381 return;
29382
29383 if (new_tree && new_tree != target_option_default_node)
29384 {
29385 cl_target_option_restore (&global_options,
29386 TREE_TARGET_OPTION (new_tree));
29387
29388 if (TREE_TARGET_GLOBALS (new_tree))
29389 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
29390 else
29391 TREE_TARGET_GLOBALS (new_tree)
29392 = save_target_globals_default_opts ();
29393 }
29394
29395 else if (old_tree && old_tree != target_option_default_node)
29396 {
29397 new_tree = target_option_current_node;
29398
29399 cl_target_option_restore (&global_options,
29400 TREE_TARGET_OPTION (new_tree));
29401 if (TREE_TARGET_GLOBALS (new_tree))
29402 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
29403 else if (new_tree == target_option_default_node)
29404 restore_target_globals (&default_target_globals);
29405 else
29406 TREE_TARGET_GLOBALS (new_tree)
29407 = save_target_globals_default_opts ();
29408 }
29409
29410 arm_option_params_internal (&global_options);
29411 }
29412
29413 /* Hook to determine if one function can safely inline another. */
29414
29415 static bool
29416 arm_can_inline_p (tree caller ATTRIBUTE_UNUSED, tree callee ATTRIBUTE_UNUSED)
29417 {
29418 /* Overidde default hook: Always OK to inline between different modes.
29419 Function with mode specific instructions, e.g using asm, must be explicitely
29420 protected with noinline. */
29421 return true;
29422 }
29423
29424 /* Inner function to process the attribute((target(...))), take an argument and
29425 set the current options from the argument. If we have a list, recursively
29426 go over the list. */
29427
29428 static bool
29429 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
29430 {
29431 if (TREE_CODE (args) == TREE_LIST)
29432 {
29433 bool ret = true;
29434 for (; args; args = TREE_CHAIN (args))
29435 if (TREE_VALUE (args)
29436 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
29437 ret = false;
29438 return ret;
29439 }
29440
29441 else if (TREE_CODE (args) != STRING_CST)
29442 {
29443 error ("attribute %<target%> argument not a string");
29444 return false;
29445 }
29446
29447 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
29448 while (argstr && *argstr != '\0')
29449 {
29450 while (ISSPACE (*argstr))
29451 argstr++;
29452
29453 if (!strcmp (argstr, "thumb"))
29454 {
29455 opts->x_target_flags |= MASK_THUMB;
29456 arm_option_check_internal (opts);
29457 return true;
29458 }
29459
29460 if (!strcmp (argstr, "arm"))
29461 {
29462 opts->x_target_flags &= ~MASK_THUMB;
29463 arm_option_check_internal (opts);
29464 return true;
29465 }
29466
29467 warning (0, "attribute(target(\"%s\")) is unknown", argstr);
29468 return false;
29469 }
29470
29471 return false;
29472 }
29473
29474 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
29475
29476 tree
29477 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
29478 struct gcc_options *opts_set)
29479 {
29480 if (!arm_valid_target_attribute_rec (args, opts))
29481 return NULL_TREE;
29482
29483 /* Do any overrides, such as global options arch=xxx. */
29484 arm_option_override_internal (opts, opts_set);
29485
29486 return build_target_option_node (opts);
29487 }
29488
29489 static void
29490 add_attribute (const char * mode, tree *attributes)
29491 {
29492 size_t len = strlen (mode);
29493 tree value = build_string (len, mode);
29494
29495 TREE_TYPE (value) = build_array_type (char_type_node,
29496 build_index_type (size_int (len)));
29497
29498 *attributes = tree_cons (get_identifier ("target"),
29499 build_tree_list (NULL_TREE, value),
29500 *attributes);
29501 }
29502
29503 /* For testing. Insert thumb or arm modes alternatively on functions. */
29504
29505 static void
29506 arm_insert_attributes (tree fndecl, tree * attributes)
29507 {
29508 const char *mode;
29509
29510 if (! TARGET_FLIP_THUMB)
29511 return;
29512
29513 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
29514 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
29515 return;
29516
29517 /* Nested definitions must inherit mode. */
29518 if (current_function_decl)
29519 {
29520 mode = TARGET_THUMB ? "thumb" : "arm";
29521 add_attribute (mode, attributes);
29522 return;
29523 }
29524
29525 /* If there is already a setting don't change it. */
29526 if (lookup_attribute ("target", *attributes) != NULL)
29527 return;
29528
29529 mode = thumb_flipper ? "thumb" : "arm";
29530 add_attribute (mode, attributes);
29531
29532 thumb_flipper = !thumb_flipper;
29533 }
29534
29535 /* Hook to validate attribute((target("string"))). */
29536
29537 static bool
29538 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
29539 tree args, int ARG_UNUSED (flags))
29540 {
29541 bool ret = true;
29542 struct gcc_options func_options;
29543 tree cur_tree, new_optimize;
29544 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
29545
29546 /* Get the optimization options of the current function. */
29547 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
29548
29549 /* If the function changed the optimization levels as well as setting target
29550 options, start with the optimizations specified. */
29551 if (!func_optimize)
29552 func_optimize = optimization_default_node;
29553
29554 /* Init func_options. */
29555 memset (&func_options, 0, sizeof (func_options));
29556 init_options_struct (&func_options, NULL);
29557 lang_hooks.init_options_struct (&func_options);
29558
29559 /* Initialize func_options to the defaults. */
29560 cl_optimization_restore (&func_options,
29561 TREE_OPTIMIZATION (func_optimize));
29562
29563 cl_target_option_restore (&func_options,
29564 TREE_TARGET_OPTION (target_option_default_node));
29565
29566 /* Set func_options flags with new target mode. */
29567 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
29568 &global_options_set);
29569
29570 if (cur_tree == NULL_TREE)
29571 ret = false;
29572
29573 new_optimize = build_optimization_node (&func_options);
29574
29575 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
29576
29577 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
29578
29579 return ret;
29580 }
29581
29582 void
29583 arm_declare_function_name (FILE *stream, const char *name, tree decl)
29584 {
29585 if (TARGET_UNIFIED_ASM)
29586 fprintf (stream, "\t.syntax unified\n");
29587 else
29588 fprintf (stream, "\t.syntax divided\n");
29589
29590 if (TARGET_THUMB)
29591 {
29592 if (is_called_in_ARM_mode (decl)
29593 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
29594 && cfun->is_thunk))
29595 fprintf (stream, "\t.code 32\n");
29596 else if (TARGET_THUMB1)
29597 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
29598 else
29599 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
29600 }
29601 else
29602 fprintf (stream, "\t.arm\n");
29603
29604 if (TARGET_POKE_FUNCTION_NAME)
29605 arm_poke_function_name (stream, (const char *) name);
29606 }
29607
29608 /* If MEM is in the form of [base+offset], extract the two parts
29609 of address and set to BASE and OFFSET, otherwise return false
29610 after clearing BASE and OFFSET. */
29611
29612 static bool
29613 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
29614 {
29615 rtx addr;
29616
29617 gcc_assert (MEM_P (mem));
29618
29619 addr = XEXP (mem, 0);
29620
29621 /* Strip off const from addresses like (const (addr)). */
29622 if (GET_CODE (addr) == CONST)
29623 addr = XEXP (addr, 0);
29624
29625 if (GET_CODE (addr) == REG)
29626 {
29627 *base = addr;
29628 *offset = const0_rtx;
29629 return true;
29630 }
29631
29632 if (GET_CODE (addr) == PLUS
29633 && GET_CODE (XEXP (addr, 0)) == REG
29634 && CONST_INT_P (XEXP (addr, 1)))
29635 {
29636 *base = XEXP (addr, 0);
29637 *offset = XEXP (addr, 1);
29638 return true;
29639 }
29640
29641 *base = NULL_RTX;
29642 *offset = NULL_RTX;
29643
29644 return false;
29645 }
29646
29647 /* If INSN is a load or store of address in the form of [base+offset],
29648 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
29649 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
29650 otherwise return FALSE. */
29651
29652 static bool
29653 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
29654 {
29655 rtx x, dest, src;
29656
29657 gcc_assert (INSN_P (insn));
29658 x = PATTERN (insn);
29659 if (GET_CODE (x) != SET)
29660 return false;
29661
29662 src = SET_SRC (x);
29663 dest = SET_DEST (x);
29664 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
29665 {
29666 *is_load = false;
29667 extract_base_offset_in_addr (dest, base, offset);
29668 }
29669 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
29670 {
29671 *is_load = true;
29672 extract_base_offset_in_addr (src, base, offset);
29673 }
29674 else
29675 return false;
29676
29677 return (*base != NULL_RTX && *offset != NULL_RTX);
29678 }
29679
29680 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29681
29682 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29683 and PRI are only calculated for these instructions. For other instruction,
29684 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
29685 instruction fusion can be supported by returning different priorities.
29686
29687 It's important that irrelevant instructions get the largest FUSION_PRI. */
29688
29689 static void
29690 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
29691 int *fusion_pri, int *pri)
29692 {
29693 int tmp, off_val;
29694 bool is_load;
29695 rtx base, offset;
29696
29697 gcc_assert (INSN_P (insn));
29698
29699 tmp = max_pri - 1;
29700 if (!fusion_load_store (insn, &base, &offset, &is_load))
29701 {
29702 *pri = tmp;
29703 *fusion_pri = tmp;
29704 return;
29705 }
29706
29707 /* Load goes first. */
29708 if (is_load)
29709 *fusion_pri = tmp - 1;
29710 else
29711 *fusion_pri = tmp - 2;
29712
29713 tmp /= 2;
29714
29715 /* INSN with smaller base register goes first. */
29716 tmp -= ((REGNO (base) & 0xff) << 20);
29717
29718 /* INSN with smaller offset goes first. */
29719 off_val = (int)(INTVAL (offset));
29720 if (off_val >= 0)
29721 tmp -= (off_val & 0xfffff);
29722 else
29723 tmp += ((- off_val) & 0xfffff);
29724
29725 *pri = tmp;
29726 return;
29727 }
29728 #include "gt-arm.h"