[ARM] [Trivial] Fix shortening of field name extend.
[gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "stringpool.h"
31 #include "stor-layout.h"
32 #include "calls.h"
33 #include "varasm.h"
34 #include "obstack.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "insn-config.h"
38 #include "conditions.h"
39 #include "output.h"
40 #include "insn-attr.h"
41 #include "flags.h"
42 #include "reload.h"
43 #include "function.h"
44 #include "expr.h"
45 #include "optabs.h"
46 #include "diagnostic-core.h"
47 #include "recog.h"
48 #include "cgraph.h"
49 #include "ggc.h"
50 #include "except.h"
51 #include "tm_p.h"
52 #include "target.h"
53 #include "target-def.h"
54 #include "debug.h"
55 #include "langhooks.h"
56 #include "df.h"
57 #include "intl.h"
58 #include "libfuncs.h"
59 #include "params.h"
60 #include "opts.h"
61 #include "dumpfile.h"
62
63 /* Forward definitions of types. */
64 typedef struct minipool_node Mnode;
65 typedef struct minipool_fixup Mfix;
66
67 void (*arm_lang_output_object_attributes_hook)(void);
68
69 struct four_ints
70 {
71 int i[4];
72 };
73
74 /* Forward function declarations. */
75 static bool arm_lra_p (void);
76 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
77 static int arm_compute_static_chain_stack_bytes (void);
78 static arm_stack_offsets *arm_get_frame_offsets (void);
79 static void arm_add_gc_roots (void);
80 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
81 HOST_WIDE_INT, rtx, rtx, int, int);
82 static unsigned bit_count (unsigned long);
83 static int arm_address_register_rtx_p (rtx, int);
84 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
85 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
86 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
87 static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
88 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
89 static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
90 inline static int thumb1_index_register_rtx_p (rtx, int);
91 static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
92 static int thumb_far_jump_used_p (void);
93 static bool thumb_force_lr_save (void);
94 static unsigned arm_size_return_regs (void);
95 static bool arm_assemble_integer (rtx, unsigned int, int);
96 static void arm_print_operand (FILE *, rtx, int);
97 static void arm_print_operand_address (FILE *, rtx);
98 static bool arm_print_operand_punct_valid_p (unsigned char code);
99 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
100 static arm_cc get_arm_condition_code (rtx);
101 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
102 static const char *output_multi_immediate (rtx *, const char *, const char *,
103 int, HOST_WIDE_INT);
104 static const char *shift_op (rtx, HOST_WIDE_INT *);
105 static struct machine_function *arm_init_machine_status (void);
106 static void thumb_exit (FILE *, int);
107 static HOST_WIDE_INT get_jump_table_size (rtx);
108 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
109 static Mnode *add_minipool_forward_ref (Mfix *);
110 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
111 static Mnode *add_minipool_backward_ref (Mfix *);
112 static void assign_minipool_offsets (Mfix *);
113 static void arm_print_value (FILE *, rtx);
114 static void dump_minipool (rtx);
115 static int arm_barrier_cost (rtx);
116 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
117 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
118 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
119 rtx);
120 static void arm_reorg (void);
121 static void note_invalid_constants (rtx, HOST_WIDE_INT, int);
122 static unsigned long arm_compute_save_reg0_reg12_mask (void);
123 static unsigned long arm_compute_save_reg_mask (void);
124 static unsigned long arm_isr_value (tree);
125 static unsigned long arm_compute_func_type (void);
126 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
127 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
128 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
129 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
130 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
131 #endif
132 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
133 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
134 static int arm_comp_type_attributes (const_tree, const_tree);
135 static void arm_set_default_type_attributes (tree);
136 static int arm_adjust_cost (rtx, rtx, rtx, int);
137 static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
138 static int optimal_immediate_sequence (enum rtx_code code,
139 unsigned HOST_WIDE_INT val,
140 struct four_ints *return_sequence);
141 static int optimal_immediate_sequence_1 (enum rtx_code code,
142 unsigned HOST_WIDE_INT val,
143 struct four_ints *return_sequence,
144 int i);
145 static int arm_get_strip_length (int);
146 static bool arm_function_ok_for_sibcall (tree, tree);
147 static enum machine_mode arm_promote_function_mode (const_tree,
148 enum machine_mode, int *,
149 const_tree, int);
150 static bool arm_return_in_memory (const_tree, const_tree);
151 static rtx arm_function_value (const_tree, const_tree, bool);
152 static rtx arm_libcall_value_1 (enum machine_mode);
153 static rtx arm_libcall_value (enum machine_mode, const_rtx);
154 static bool arm_function_value_regno_p (const unsigned int);
155 static void arm_internal_label (FILE *, const char *, unsigned long);
156 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
157 tree);
158 static bool arm_have_conditional_execution (void);
159 static bool arm_cannot_force_const_mem (enum machine_mode, rtx);
160 static bool arm_legitimate_constant_p (enum machine_mode, rtx);
161 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
162 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
163 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
164 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
165 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
166 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
167 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
168 static int arm_address_cost (rtx, enum machine_mode, addr_space_t, bool);
169 static int arm_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
170 static int arm_memory_move_cost (enum machine_mode, reg_class_t, bool);
171 static void arm_init_builtins (void);
172 static void arm_init_iwmmxt_builtins (void);
173 static rtx safe_vector_operand (rtx, enum machine_mode);
174 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
175 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
176 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
177 static tree arm_builtin_decl (unsigned, bool);
178 static void emit_constant_insn (rtx cond, rtx pattern);
179 static rtx emit_set_insn (rtx, rtx);
180 static rtx emit_multi_reg_push (unsigned long, unsigned long);
181 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
182 tree, bool);
183 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
184 const_tree, bool);
185 static void arm_function_arg_advance (cumulative_args_t, enum machine_mode,
186 const_tree, bool);
187 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
188 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
189 const_tree);
190 static rtx aapcs_libcall_value (enum machine_mode);
191 static int aapcs_select_return_coproc (const_tree, const_tree);
192
193 #ifdef OBJECT_FORMAT_ELF
194 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
195 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
196 #endif
197 #ifndef ARM_PE
198 static void arm_encode_section_info (tree, rtx, int);
199 #endif
200
201 static void arm_file_end (void);
202 static void arm_file_start (void);
203
204 static void arm_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
205 tree, int *, int);
206 static bool arm_pass_by_reference (cumulative_args_t,
207 enum machine_mode, const_tree, bool);
208 static bool arm_promote_prototypes (const_tree);
209 static bool arm_default_short_enums (void);
210 static bool arm_align_anon_bitfield (void);
211 static bool arm_return_in_msb (const_tree);
212 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
213 static bool arm_return_in_memory (const_tree, const_tree);
214 #if ARM_UNWIND_INFO
215 static void arm_unwind_emit (FILE *, rtx);
216 static bool arm_output_ttype (rtx);
217 static void arm_asm_emit_except_personality (rtx);
218 static void arm_asm_init_sections (void);
219 #endif
220 static rtx arm_dwarf_register_span (rtx);
221
222 static tree arm_cxx_guard_type (void);
223 static bool arm_cxx_guard_mask_bit (void);
224 static tree arm_get_cookie_size (tree);
225 static bool arm_cookie_has_size (void);
226 static bool arm_cxx_cdtor_returns_this (void);
227 static bool arm_cxx_key_method_may_be_inline (void);
228 static void arm_cxx_determine_class_data_visibility (tree);
229 static bool arm_cxx_class_data_always_comdat (void);
230 static bool arm_cxx_use_aeabi_atexit (void);
231 static void arm_init_libfuncs (void);
232 static tree arm_build_builtin_va_list (void);
233 static void arm_expand_builtin_va_start (tree, rtx);
234 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
235 static void arm_option_override (void);
236 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
237 static bool arm_cannot_copy_insn_p (rtx);
238 static int arm_issue_rate (void);
239 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
240 static bool arm_output_addr_const_extra (FILE *, rtx);
241 static bool arm_allocate_stack_slots_for_args (void);
242 static bool arm_warn_func_return (tree);
243 static const char *arm_invalid_parameter_type (const_tree t);
244 static const char *arm_invalid_return_type (const_tree t);
245 static tree arm_promoted_type (const_tree t);
246 static tree arm_convert_to_type (tree type, tree expr);
247 static bool arm_scalar_mode_supported_p (enum machine_mode);
248 static bool arm_frame_pointer_required (void);
249 static bool arm_can_eliminate (const int, const int);
250 static void arm_asm_trampoline_template (FILE *);
251 static void arm_trampoline_init (rtx, tree, rtx);
252 static rtx arm_trampoline_adjust_address (rtx);
253 static rtx arm_pic_static_addr (rtx orig, rtx reg);
254 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
255 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
256 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
257 static bool arm_array_mode_supported_p (enum machine_mode,
258 unsigned HOST_WIDE_INT);
259 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
260 static bool arm_class_likely_spilled_p (reg_class_t);
261 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
262 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
263 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
264 const_tree type,
265 int misalignment,
266 bool is_packed);
267 static void arm_conditional_register_usage (void);
268 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
269 static unsigned int arm_autovectorize_vector_sizes (void);
270 static int arm_default_branch_cost (bool, bool);
271 static int arm_cortex_a5_branch_cost (bool, bool);
272 static int arm_cortex_m_branch_cost (bool, bool);
273
274 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
275 const unsigned char *sel);
276
277 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
278 tree vectype,
279 int misalign ATTRIBUTE_UNUSED);
280 static unsigned arm_add_stmt_cost (void *data, int count,
281 enum vect_cost_for_stmt kind,
282 struct _stmt_vec_info *stmt_info,
283 int misalign,
284 enum vect_cost_model_location where);
285
286 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
287 bool op0_preserve_value);
288 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
289 \f
290 /* Table of machine attributes. */
291 static const struct attribute_spec arm_attribute_table[] =
292 {
293 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
294 affects_type_identity } */
295 /* Function calls made to this symbol must be done indirectly, because
296 it may lie outside of the 26 bit addressing range of a normal function
297 call. */
298 { "long_call", 0, 0, false, true, true, NULL, false },
299 /* Whereas these functions are always known to reside within the 26 bit
300 addressing range. */
301 { "short_call", 0, 0, false, true, true, NULL, false },
302 /* Specify the procedure call conventions for a function. */
303 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
304 false },
305 /* Interrupt Service Routines have special prologue and epilogue requirements. */
306 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
307 false },
308 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
309 false },
310 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
311 false },
312 #ifdef ARM_PE
313 /* ARM/PE has three new attributes:
314 interfacearm - ?
315 dllexport - for exporting a function/variable that will live in a dll
316 dllimport - for importing a function/variable from a dll
317
318 Microsoft allows multiple declspecs in one __declspec, separating
319 them with spaces. We do NOT support this. Instead, use __declspec
320 multiple times.
321 */
322 { "dllimport", 0, 0, true, false, false, NULL, false },
323 { "dllexport", 0, 0, true, false, false, NULL, false },
324 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
325 false },
326 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
327 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
328 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
329 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
330 false },
331 #endif
332 { NULL, 0, 0, false, false, false, NULL, false }
333 };
334 \f
335 /* Initialize the GCC target structure. */
336 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
337 #undef TARGET_MERGE_DECL_ATTRIBUTES
338 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
339 #endif
340
341 #undef TARGET_LEGITIMIZE_ADDRESS
342 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
343
344 #undef TARGET_LRA_P
345 #define TARGET_LRA_P arm_lra_p
346
347 #undef TARGET_ATTRIBUTE_TABLE
348 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
349
350 #undef TARGET_ASM_FILE_START
351 #define TARGET_ASM_FILE_START arm_file_start
352 #undef TARGET_ASM_FILE_END
353 #define TARGET_ASM_FILE_END arm_file_end
354
355 #undef TARGET_ASM_ALIGNED_SI_OP
356 #define TARGET_ASM_ALIGNED_SI_OP NULL
357 #undef TARGET_ASM_INTEGER
358 #define TARGET_ASM_INTEGER arm_assemble_integer
359
360 #undef TARGET_PRINT_OPERAND
361 #define TARGET_PRINT_OPERAND arm_print_operand
362 #undef TARGET_PRINT_OPERAND_ADDRESS
363 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
364 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
365 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
366
367 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
368 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
369
370 #undef TARGET_ASM_FUNCTION_PROLOGUE
371 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
372
373 #undef TARGET_ASM_FUNCTION_EPILOGUE
374 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
375
376 #undef TARGET_OPTION_OVERRIDE
377 #define TARGET_OPTION_OVERRIDE arm_option_override
378
379 #undef TARGET_COMP_TYPE_ATTRIBUTES
380 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
381
382 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
383 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
384
385 #undef TARGET_SCHED_ADJUST_COST
386 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
387
388 #undef TARGET_SCHED_REORDER
389 #define TARGET_SCHED_REORDER arm_sched_reorder
390
391 #undef TARGET_REGISTER_MOVE_COST
392 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
393
394 #undef TARGET_MEMORY_MOVE_COST
395 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
396
397 #undef TARGET_ENCODE_SECTION_INFO
398 #ifdef ARM_PE
399 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
400 #else
401 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
402 #endif
403
404 #undef TARGET_STRIP_NAME_ENCODING
405 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
406
407 #undef TARGET_ASM_INTERNAL_LABEL
408 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
409
410 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
411 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
412
413 #undef TARGET_FUNCTION_VALUE
414 #define TARGET_FUNCTION_VALUE arm_function_value
415
416 #undef TARGET_LIBCALL_VALUE
417 #define TARGET_LIBCALL_VALUE arm_libcall_value
418
419 #undef TARGET_FUNCTION_VALUE_REGNO_P
420 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
421
422 #undef TARGET_ASM_OUTPUT_MI_THUNK
423 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
424 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
425 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
426
427 #undef TARGET_RTX_COSTS
428 #define TARGET_RTX_COSTS arm_rtx_costs
429 #undef TARGET_ADDRESS_COST
430 #define TARGET_ADDRESS_COST arm_address_cost
431
432 #undef TARGET_SHIFT_TRUNCATION_MASK
433 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
434 #undef TARGET_VECTOR_MODE_SUPPORTED_P
435 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
436 #undef TARGET_ARRAY_MODE_SUPPORTED_P
437 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
438 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
439 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
440 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
441 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
442 arm_autovectorize_vector_sizes
443
444 #undef TARGET_MACHINE_DEPENDENT_REORG
445 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
446
447 #undef TARGET_INIT_BUILTINS
448 #define TARGET_INIT_BUILTINS arm_init_builtins
449 #undef TARGET_EXPAND_BUILTIN
450 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
451 #undef TARGET_BUILTIN_DECL
452 #define TARGET_BUILTIN_DECL arm_builtin_decl
453
454 #undef TARGET_INIT_LIBFUNCS
455 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
456
457 #undef TARGET_PROMOTE_FUNCTION_MODE
458 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
459 #undef TARGET_PROMOTE_PROTOTYPES
460 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
461 #undef TARGET_PASS_BY_REFERENCE
462 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
463 #undef TARGET_ARG_PARTIAL_BYTES
464 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
465 #undef TARGET_FUNCTION_ARG
466 #define TARGET_FUNCTION_ARG arm_function_arg
467 #undef TARGET_FUNCTION_ARG_ADVANCE
468 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
469 #undef TARGET_FUNCTION_ARG_BOUNDARY
470 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
471
472 #undef TARGET_SETUP_INCOMING_VARARGS
473 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
474
475 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
476 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
477
478 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
479 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
480 #undef TARGET_TRAMPOLINE_INIT
481 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
482 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
483 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
484
485 #undef TARGET_WARN_FUNC_RETURN
486 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
487
488 #undef TARGET_DEFAULT_SHORT_ENUMS
489 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
490
491 #undef TARGET_ALIGN_ANON_BITFIELD
492 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
493
494 #undef TARGET_NARROW_VOLATILE_BITFIELD
495 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
496
497 #undef TARGET_CXX_GUARD_TYPE
498 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
499
500 #undef TARGET_CXX_GUARD_MASK_BIT
501 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
502
503 #undef TARGET_CXX_GET_COOKIE_SIZE
504 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
505
506 #undef TARGET_CXX_COOKIE_HAS_SIZE
507 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
508
509 #undef TARGET_CXX_CDTOR_RETURNS_THIS
510 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
511
512 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
513 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
514
515 #undef TARGET_CXX_USE_AEABI_ATEXIT
516 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
517
518 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
519 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
520 arm_cxx_determine_class_data_visibility
521
522 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
523 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
524
525 #undef TARGET_RETURN_IN_MSB
526 #define TARGET_RETURN_IN_MSB arm_return_in_msb
527
528 #undef TARGET_RETURN_IN_MEMORY
529 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
530
531 #undef TARGET_MUST_PASS_IN_STACK
532 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
533
534 #if ARM_UNWIND_INFO
535 #undef TARGET_ASM_UNWIND_EMIT
536 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
537
538 /* EABI unwinding tables use a different format for the typeinfo tables. */
539 #undef TARGET_ASM_TTYPE
540 #define TARGET_ASM_TTYPE arm_output_ttype
541
542 #undef TARGET_ARM_EABI_UNWINDER
543 #define TARGET_ARM_EABI_UNWINDER true
544
545 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
546 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
547
548 #undef TARGET_ASM_INIT_SECTIONS
549 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
550 #endif /* ARM_UNWIND_INFO */
551
552 #undef TARGET_DWARF_REGISTER_SPAN
553 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
554
555 #undef TARGET_CANNOT_COPY_INSN_P
556 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
557
558 #ifdef HAVE_AS_TLS
559 #undef TARGET_HAVE_TLS
560 #define TARGET_HAVE_TLS true
561 #endif
562
563 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
564 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
565
566 #undef TARGET_LEGITIMATE_CONSTANT_P
567 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
568
569 #undef TARGET_CANNOT_FORCE_CONST_MEM
570 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
571
572 #undef TARGET_MAX_ANCHOR_OFFSET
573 #define TARGET_MAX_ANCHOR_OFFSET 4095
574
575 /* The minimum is set such that the total size of the block
576 for a particular anchor is -4088 + 1 + 4095 bytes, which is
577 divisible by eight, ensuring natural spacing of anchors. */
578 #undef TARGET_MIN_ANCHOR_OFFSET
579 #define TARGET_MIN_ANCHOR_OFFSET -4088
580
581 #undef TARGET_SCHED_ISSUE_RATE
582 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
583
584 #undef TARGET_MANGLE_TYPE
585 #define TARGET_MANGLE_TYPE arm_mangle_type
586
587 #undef TARGET_BUILD_BUILTIN_VA_LIST
588 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
589 #undef TARGET_EXPAND_BUILTIN_VA_START
590 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
591 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
592 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
593
594 #ifdef HAVE_AS_TLS
595 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
596 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
597 #endif
598
599 #undef TARGET_LEGITIMATE_ADDRESS_P
600 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
601
602 #undef TARGET_PREFERRED_RELOAD_CLASS
603 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
604
605 #undef TARGET_INVALID_PARAMETER_TYPE
606 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
607
608 #undef TARGET_INVALID_RETURN_TYPE
609 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
610
611 #undef TARGET_PROMOTED_TYPE
612 #define TARGET_PROMOTED_TYPE arm_promoted_type
613
614 #undef TARGET_CONVERT_TO_TYPE
615 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
616
617 #undef TARGET_SCALAR_MODE_SUPPORTED_P
618 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
619
620 #undef TARGET_FRAME_POINTER_REQUIRED
621 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
622
623 #undef TARGET_CAN_ELIMINATE
624 #define TARGET_CAN_ELIMINATE arm_can_eliminate
625
626 #undef TARGET_CONDITIONAL_REGISTER_USAGE
627 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
628
629 #undef TARGET_CLASS_LIKELY_SPILLED_P
630 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
631
632 #undef TARGET_VECTORIZE_BUILTINS
633 #define TARGET_VECTORIZE_BUILTINS
634
635 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
636 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
637 arm_builtin_vectorized_function
638
639 #undef TARGET_VECTOR_ALIGNMENT
640 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
641
642 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
643 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
644 arm_vector_alignment_reachable
645
646 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
647 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
648 arm_builtin_support_vector_misalignment
649
650 #undef TARGET_PREFERRED_RENAME_CLASS
651 #define TARGET_PREFERRED_RENAME_CLASS \
652 arm_preferred_rename_class
653
654 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
655 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
656 arm_vectorize_vec_perm_const_ok
657
658 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
659 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
660 arm_builtin_vectorization_cost
661 #undef TARGET_VECTORIZE_ADD_STMT_COST
662 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
663
664 #undef TARGET_CANONICALIZE_COMPARISON
665 #define TARGET_CANONICALIZE_COMPARISON \
666 arm_canonicalize_comparison
667
668 #undef TARGET_ASAN_SHADOW_OFFSET
669 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
670
671 #undef MAX_INSN_PER_IT_BLOCK
672 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
673
674 #undef TARGET_CAN_USE_DOLOOP_P
675 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
676
677 struct gcc_target targetm = TARGET_INITIALIZER;
678 \f
679 /* Obstack for minipool constant handling. */
680 static struct obstack minipool_obstack;
681 static char * minipool_startobj;
682
683 /* The maximum number of insns skipped which
684 will be conditionalised if possible. */
685 static int max_insns_skipped = 5;
686
687 extern FILE * asm_out_file;
688
689 /* True if we are currently building a constant table. */
690 int making_const_table;
691
692 /* The processor for which instructions should be scheduled. */
693 enum processor_type arm_tune = arm_none;
694
695 /* The current tuning set. */
696 const struct tune_params *current_tune;
697
698 /* Which floating point hardware to schedule for. */
699 int arm_fpu_attr;
700
701 /* Which floating popint hardware to use. */
702 const struct arm_fpu_desc *arm_fpu_desc;
703
704 /* Used for Thumb call_via trampolines. */
705 rtx thumb_call_via_label[14];
706 static int thumb_call_reg_needed;
707
708 /* Bit values used to identify processor capabilities. */
709 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
710 #define FL_ARCH3M (1 << 1) /* Extended multiply */
711 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
712 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
713 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
714 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
715 #define FL_THUMB (1 << 6) /* Thumb aware */
716 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
717 #define FL_STRONG (1 << 8) /* StrongARM */
718 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
719 #define FL_XSCALE (1 << 10) /* XScale */
720 /* spare (1 << 11) */
721 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
722 media instructions. */
723 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
724 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
725 Note: ARM6 & 7 derivatives only. */
726 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
727 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
728 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
729 profile. */
730 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
731 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
732 #define FL_NEON (1 << 20) /* Neon instructions. */
733 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
734 architecture. */
735 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
736 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
737 #define FL_ARCH8 (1 << 24) /* Architecture 8. */
738 #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */
739
740 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
741 #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */
742
743 /* Flags that only effect tuning, not available instructions. */
744 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
745 | FL_CO_PROC)
746
747 #define FL_FOR_ARCH2 FL_NOTM
748 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
749 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
750 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
751 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
752 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
753 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
754 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
755 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
756 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
757 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
758 #define FL_FOR_ARCH6J FL_FOR_ARCH6
759 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
760 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
761 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
762 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
763 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
764 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
765 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
766 #define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV)
767 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
768 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
769 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
770 #define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8)
771
772 /* The bits in this mask specify which
773 instructions we are allowed to generate. */
774 static unsigned long insn_flags = 0;
775
776 /* The bits in this mask specify which instruction scheduling options should
777 be used. */
778 static unsigned long tune_flags = 0;
779
780 /* The highest ARM architecture version supported by the
781 target. */
782 enum base_architecture arm_base_arch = BASE_ARCH_0;
783
784 /* The following are used in the arm.md file as equivalents to bits
785 in the above two flag variables. */
786
787 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
788 int arm_arch3m = 0;
789
790 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
791 int arm_arch4 = 0;
792
793 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
794 int arm_arch4t = 0;
795
796 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
797 int arm_arch5 = 0;
798
799 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
800 int arm_arch5e = 0;
801
802 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
803 int arm_arch6 = 0;
804
805 /* Nonzero if this chip supports the ARM 6K extensions. */
806 int arm_arch6k = 0;
807
808 /* Nonzero if instructions present in ARMv6-M can be used. */
809 int arm_arch6m = 0;
810
811 /* Nonzero if this chip supports the ARM 7 extensions. */
812 int arm_arch7 = 0;
813
814 /* Nonzero if instructions not present in the 'M' profile can be used. */
815 int arm_arch_notm = 0;
816
817 /* Nonzero if instructions present in ARMv7E-M can be used. */
818 int arm_arch7em = 0;
819
820 /* Nonzero if instructions present in ARMv8 can be used. */
821 int arm_arch8 = 0;
822
823 /* Nonzero if this chip can benefit from load scheduling. */
824 int arm_ld_sched = 0;
825
826 /* Nonzero if this chip is a StrongARM. */
827 int arm_tune_strongarm = 0;
828
829 /* Nonzero if this chip supports Intel Wireless MMX technology. */
830 int arm_arch_iwmmxt = 0;
831
832 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
833 int arm_arch_iwmmxt2 = 0;
834
835 /* Nonzero if this chip is an XScale. */
836 int arm_arch_xscale = 0;
837
838 /* Nonzero if tuning for XScale */
839 int arm_tune_xscale = 0;
840
841 /* Nonzero if we want to tune for stores that access the write-buffer.
842 This typically means an ARM6 or ARM7 with MMU or MPU. */
843 int arm_tune_wbuf = 0;
844
845 /* Nonzero if tuning for Cortex-A9. */
846 int arm_tune_cortex_a9 = 0;
847
848 /* Nonzero if generating Thumb instructions. */
849 int thumb_code = 0;
850
851 /* Nonzero if generating Thumb-1 instructions. */
852 int thumb1_code = 0;
853
854 /* Nonzero if we should define __THUMB_INTERWORK__ in the
855 preprocessor.
856 XXX This is a bit of a hack, it's intended to help work around
857 problems in GLD which doesn't understand that armv5t code is
858 interworking clean. */
859 int arm_cpp_interwork = 0;
860
861 /* Nonzero if chip supports Thumb 2. */
862 int arm_arch_thumb2;
863
864 /* Nonzero if chip supports integer division instruction. */
865 int arm_arch_arm_hwdiv;
866 int arm_arch_thumb_hwdiv;
867
868 /* Nonzero if we should use Neon to handle 64-bits operations rather
869 than core registers. */
870 int prefer_neon_for_64bits = 0;
871
872 /* Nonzero if we shouldn't use literal pools. */
873 bool arm_disable_literal_pool = false;
874
875 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
876 we must report the mode of the memory reference from
877 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
878 enum machine_mode output_memory_reference_mode;
879
880 /* The register number to be used for the PIC offset register. */
881 unsigned arm_pic_register = INVALID_REGNUM;
882
883 /* Set to 1 after arm_reorg has started. Reset to start at the start of
884 the next function. */
885 static int after_arm_reorg = 0;
886
887 enum arm_pcs arm_pcs_default;
888
889 /* For an explanation of these variables, see final_prescan_insn below. */
890 int arm_ccfsm_state;
891 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
892 enum arm_cond_code arm_current_cc;
893
894 rtx arm_target_insn;
895 int arm_target_label;
896 /* The number of conditionally executed insns, including the current insn. */
897 int arm_condexec_count = 0;
898 /* A bitmask specifying the patterns for the IT block.
899 Zero means do not output an IT block before this insn. */
900 int arm_condexec_mask = 0;
901 /* The number of bits used in arm_condexec_mask. */
902 int arm_condexec_masklen = 0;
903
904 /* Nonzero if chip supports the ARMv8 CRC instructions. */
905 int arm_arch_crc = 0;
906
907 /* The condition codes of the ARM, and the inverse function. */
908 static const char * const arm_condition_codes[] =
909 {
910 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
911 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
912 };
913
914 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
915 int arm_regs_in_sequence[] =
916 {
917 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
918 };
919
920 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
921 #define streq(string1, string2) (strcmp (string1, string2) == 0)
922
923 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
924 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
925 | (1 << PIC_OFFSET_TABLE_REGNUM)))
926 \f
927 /* Initialization code. */
928
929 struct processors
930 {
931 const char *const name;
932 enum processor_type core;
933 const char *arch;
934 enum base_architecture base_arch;
935 const unsigned long flags;
936 const struct tune_params *const tune;
937 };
938
939
940 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
941 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
942 prefetch_slots, \
943 l1_size, \
944 l1_line_size
945
946 /* arm generic vectorizer costs. */
947 static const
948 struct cpu_vec_costs arm_default_vec_cost = {
949 1, /* scalar_stmt_cost. */
950 1, /* scalar load_cost. */
951 1, /* scalar_store_cost. */
952 1, /* vec_stmt_cost. */
953 1, /* vec_to_scalar_cost. */
954 1, /* scalar_to_vec_cost. */
955 1, /* vec_align_load_cost. */
956 1, /* vec_unalign_load_cost. */
957 1, /* vec_unalign_store_cost. */
958 1, /* vec_store_cost. */
959 3, /* cond_taken_branch_cost. */
960 1, /* cond_not_taken_branch_cost. */
961 };
962
963 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
964 #include "aarch-cost-tables.h"
965
966
967
968 const struct cpu_cost_table cortexa9_extra_costs =
969 {
970 /* ALU */
971 {
972 0, /* arith. */
973 0, /* logical. */
974 0, /* shift. */
975 COSTS_N_INSNS (1), /* shift_reg. */
976 COSTS_N_INSNS (1), /* arith_shift. */
977 COSTS_N_INSNS (2), /* arith_shift_reg. */
978 0, /* log_shift. */
979 COSTS_N_INSNS (1), /* log_shift_reg. */
980 COSTS_N_INSNS (1), /* extend. */
981 COSTS_N_INSNS (2), /* extend_arith. */
982 COSTS_N_INSNS (1), /* bfi. */
983 COSTS_N_INSNS (1), /* bfx. */
984 0, /* clz. */
985 0, /* non_exec. */
986 true /* non_exec_costs_exec. */
987 },
988 {
989 /* MULT SImode */
990 {
991 COSTS_N_INSNS (3), /* simple. */
992 COSTS_N_INSNS (3), /* flag_setting. */
993 COSTS_N_INSNS (2), /* extend. */
994 COSTS_N_INSNS (3), /* add. */
995 COSTS_N_INSNS (2), /* extend_add. */
996 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
997 },
998 /* MULT DImode */
999 {
1000 0, /* simple (N/A). */
1001 0, /* flag_setting (N/A). */
1002 COSTS_N_INSNS (4), /* extend. */
1003 0, /* add (N/A). */
1004 COSTS_N_INSNS (4), /* extend_add. */
1005 0 /* idiv (N/A). */
1006 }
1007 },
1008 /* LD/ST */
1009 {
1010 COSTS_N_INSNS (2), /* load. */
1011 COSTS_N_INSNS (2), /* load_sign_extend. */
1012 COSTS_N_INSNS (2), /* ldrd. */
1013 COSTS_N_INSNS (2), /* ldm_1st. */
1014 1, /* ldm_regs_per_insn_1st. */
1015 2, /* ldm_regs_per_insn_subsequent. */
1016 COSTS_N_INSNS (5), /* loadf. */
1017 COSTS_N_INSNS (5), /* loadd. */
1018 COSTS_N_INSNS (1), /* load_unaligned. */
1019 COSTS_N_INSNS (2), /* store. */
1020 COSTS_N_INSNS (2), /* strd. */
1021 COSTS_N_INSNS (2), /* stm_1st. */
1022 1, /* stm_regs_per_insn_1st. */
1023 2, /* stm_regs_per_insn_subsequent. */
1024 COSTS_N_INSNS (1), /* storef. */
1025 COSTS_N_INSNS (1), /* stored. */
1026 COSTS_N_INSNS (1) /* store_unaligned. */
1027 },
1028 {
1029 /* FP SFmode */
1030 {
1031 COSTS_N_INSNS (14), /* div. */
1032 COSTS_N_INSNS (4), /* mult. */
1033 COSTS_N_INSNS (7), /* mult_addsub. */
1034 COSTS_N_INSNS (30), /* fma. */
1035 COSTS_N_INSNS (3), /* addsub. */
1036 COSTS_N_INSNS (1), /* fpconst. */
1037 COSTS_N_INSNS (1), /* neg. */
1038 COSTS_N_INSNS (3), /* compare. */
1039 COSTS_N_INSNS (3), /* widen. */
1040 COSTS_N_INSNS (3), /* narrow. */
1041 COSTS_N_INSNS (3), /* toint. */
1042 COSTS_N_INSNS (3), /* fromint. */
1043 COSTS_N_INSNS (3) /* roundint. */
1044 },
1045 /* FP DFmode */
1046 {
1047 COSTS_N_INSNS (24), /* div. */
1048 COSTS_N_INSNS (5), /* mult. */
1049 COSTS_N_INSNS (8), /* mult_addsub. */
1050 COSTS_N_INSNS (30), /* fma. */
1051 COSTS_N_INSNS (3), /* addsub. */
1052 COSTS_N_INSNS (1), /* fpconst. */
1053 COSTS_N_INSNS (1), /* neg. */
1054 COSTS_N_INSNS (3), /* compare. */
1055 COSTS_N_INSNS (3), /* widen. */
1056 COSTS_N_INSNS (3), /* narrow. */
1057 COSTS_N_INSNS (3), /* toint. */
1058 COSTS_N_INSNS (3), /* fromint. */
1059 COSTS_N_INSNS (3) /* roundint. */
1060 }
1061 },
1062 /* Vector */
1063 {
1064 COSTS_N_INSNS (1) /* alu. */
1065 }
1066 };
1067
1068
1069 const struct cpu_cost_table cortexa7_extra_costs =
1070 {
1071 /* ALU */
1072 {
1073 0, /* arith. */
1074 0, /* logical. */
1075 COSTS_N_INSNS (1), /* shift. */
1076 COSTS_N_INSNS (1), /* shift_reg. */
1077 COSTS_N_INSNS (1), /* arith_shift. */
1078 COSTS_N_INSNS (1), /* arith_shift_reg. */
1079 COSTS_N_INSNS (1), /* log_shift. */
1080 COSTS_N_INSNS (1), /* log_shift_reg. */
1081 COSTS_N_INSNS (1), /* extend. */
1082 COSTS_N_INSNS (1), /* extend_arith. */
1083 COSTS_N_INSNS (1), /* bfi. */
1084 COSTS_N_INSNS (1), /* bfx. */
1085 COSTS_N_INSNS (1), /* clz. */
1086 0, /* non_exec. */
1087 true /* non_exec_costs_exec. */
1088 },
1089
1090 {
1091 /* MULT SImode */
1092 {
1093 0, /* simple. */
1094 COSTS_N_INSNS (1), /* flag_setting. */
1095 COSTS_N_INSNS (1), /* extend. */
1096 COSTS_N_INSNS (1), /* add. */
1097 COSTS_N_INSNS (1), /* extend_add. */
1098 COSTS_N_INSNS (7) /* idiv. */
1099 },
1100 /* MULT DImode */
1101 {
1102 0, /* simple (N/A). */
1103 0, /* flag_setting (N/A). */
1104 COSTS_N_INSNS (1), /* extend. */
1105 0, /* add. */
1106 COSTS_N_INSNS (2), /* extend_add. */
1107 0 /* idiv (N/A). */
1108 }
1109 },
1110 /* LD/ST */
1111 {
1112 COSTS_N_INSNS (1), /* load. */
1113 COSTS_N_INSNS (1), /* load_sign_extend. */
1114 COSTS_N_INSNS (3), /* ldrd. */
1115 COSTS_N_INSNS (1), /* ldm_1st. */
1116 1, /* ldm_regs_per_insn_1st. */
1117 2, /* ldm_regs_per_insn_subsequent. */
1118 COSTS_N_INSNS (2), /* loadf. */
1119 COSTS_N_INSNS (2), /* loadd. */
1120 COSTS_N_INSNS (1), /* load_unaligned. */
1121 COSTS_N_INSNS (1), /* store. */
1122 COSTS_N_INSNS (3), /* strd. */
1123 COSTS_N_INSNS (1), /* stm_1st. */
1124 1, /* stm_regs_per_insn_1st. */
1125 2, /* stm_regs_per_insn_subsequent. */
1126 COSTS_N_INSNS (2), /* storef. */
1127 COSTS_N_INSNS (2), /* stored. */
1128 COSTS_N_INSNS (1) /* store_unaligned. */
1129 },
1130 {
1131 /* FP SFmode */
1132 {
1133 COSTS_N_INSNS (15), /* div. */
1134 COSTS_N_INSNS (3), /* mult. */
1135 COSTS_N_INSNS (7), /* mult_addsub. */
1136 COSTS_N_INSNS (7), /* fma. */
1137 COSTS_N_INSNS (3), /* addsub. */
1138 COSTS_N_INSNS (3), /* fpconst. */
1139 COSTS_N_INSNS (3), /* neg. */
1140 COSTS_N_INSNS (3), /* compare. */
1141 COSTS_N_INSNS (3), /* widen. */
1142 COSTS_N_INSNS (3), /* narrow. */
1143 COSTS_N_INSNS (3), /* toint. */
1144 COSTS_N_INSNS (3), /* fromint. */
1145 COSTS_N_INSNS (3) /* roundint. */
1146 },
1147 /* FP DFmode */
1148 {
1149 COSTS_N_INSNS (30), /* div. */
1150 COSTS_N_INSNS (6), /* mult. */
1151 COSTS_N_INSNS (10), /* mult_addsub. */
1152 COSTS_N_INSNS (7), /* fma. */
1153 COSTS_N_INSNS (3), /* addsub. */
1154 COSTS_N_INSNS (3), /* fpconst. */
1155 COSTS_N_INSNS (3), /* neg. */
1156 COSTS_N_INSNS (3), /* compare. */
1157 COSTS_N_INSNS (3), /* widen. */
1158 COSTS_N_INSNS (3), /* narrow. */
1159 COSTS_N_INSNS (3), /* toint. */
1160 COSTS_N_INSNS (3), /* fromint. */
1161 COSTS_N_INSNS (3) /* roundint. */
1162 }
1163 },
1164 /* Vector */
1165 {
1166 COSTS_N_INSNS (1) /* alu. */
1167 }
1168 };
1169
1170 const struct cpu_cost_table cortexa12_extra_costs =
1171 {
1172 /* ALU */
1173 {
1174 0, /* arith. */
1175 0, /* logical. */
1176 0, /* shift. */
1177 COSTS_N_INSNS (1), /* shift_reg. */
1178 COSTS_N_INSNS (1), /* arith_shift. */
1179 COSTS_N_INSNS (1), /* arith_shift_reg. */
1180 COSTS_N_INSNS (1), /* log_shift. */
1181 COSTS_N_INSNS (1), /* log_shift_reg. */
1182 0, /* extend. */
1183 COSTS_N_INSNS (1), /* extend_arith. */
1184 0, /* bfi. */
1185 COSTS_N_INSNS (1), /* bfx. */
1186 COSTS_N_INSNS (1), /* clz. */
1187 0, /* non_exec. */
1188 true /* non_exec_costs_exec. */
1189 },
1190 /* MULT SImode */
1191 {
1192 {
1193 COSTS_N_INSNS (2), /* simple. */
1194 COSTS_N_INSNS (3), /* flag_setting. */
1195 COSTS_N_INSNS (2), /* extend. */
1196 COSTS_N_INSNS (3), /* add. */
1197 COSTS_N_INSNS (2), /* extend_add. */
1198 COSTS_N_INSNS (18) /* idiv. */
1199 },
1200 /* MULT DImode */
1201 {
1202 0, /* simple (N/A). */
1203 0, /* flag_setting (N/A). */
1204 COSTS_N_INSNS (3), /* extend. */
1205 0, /* add (N/A). */
1206 COSTS_N_INSNS (3), /* extend_add. */
1207 0 /* idiv (N/A). */
1208 }
1209 },
1210 /* LD/ST */
1211 {
1212 COSTS_N_INSNS (3), /* load. */
1213 COSTS_N_INSNS (3), /* load_sign_extend. */
1214 COSTS_N_INSNS (3), /* ldrd. */
1215 COSTS_N_INSNS (3), /* ldm_1st. */
1216 1, /* ldm_regs_per_insn_1st. */
1217 2, /* ldm_regs_per_insn_subsequent. */
1218 COSTS_N_INSNS (3), /* loadf. */
1219 COSTS_N_INSNS (3), /* loadd. */
1220 0, /* load_unaligned. */
1221 0, /* store. */
1222 0, /* strd. */
1223 0, /* stm_1st. */
1224 1, /* stm_regs_per_insn_1st. */
1225 2, /* stm_regs_per_insn_subsequent. */
1226 COSTS_N_INSNS (2), /* storef. */
1227 COSTS_N_INSNS (2), /* stored. */
1228 0 /* store_unaligned. */
1229 },
1230 {
1231 /* FP SFmode */
1232 {
1233 COSTS_N_INSNS (17), /* div. */
1234 COSTS_N_INSNS (4), /* mult. */
1235 COSTS_N_INSNS (8), /* mult_addsub. */
1236 COSTS_N_INSNS (8), /* fma. */
1237 COSTS_N_INSNS (4), /* addsub. */
1238 COSTS_N_INSNS (2), /* fpconst. */
1239 COSTS_N_INSNS (2), /* neg. */
1240 COSTS_N_INSNS (2), /* compare. */
1241 COSTS_N_INSNS (4), /* widen. */
1242 COSTS_N_INSNS (4), /* narrow. */
1243 COSTS_N_INSNS (4), /* toint. */
1244 COSTS_N_INSNS (4), /* fromint. */
1245 COSTS_N_INSNS (4) /* roundint. */
1246 },
1247 /* FP DFmode */
1248 {
1249 COSTS_N_INSNS (31), /* div. */
1250 COSTS_N_INSNS (4), /* mult. */
1251 COSTS_N_INSNS (8), /* mult_addsub. */
1252 COSTS_N_INSNS (8), /* fma. */
1253 COSTS_N_INSNS (4), /* addsub. */
1254 COSTS_N_INSNS (2), /* fpconst. */
1255 COSTS_N_INSNS (2), /* neg. */
1256 COSTS_N_INSNS (2), /* compare. */
1257 COSTS_N_INSNS (4), /* widen. */
1258 COSTS_N_INSNS (4), /* narrow. */
1259 COSTS_N_INSNS (4), /* toint. */
1260 COSTS_N_INSNS (4), /* fromint. */
1261 COSTS_N_INSNS (4) /* roundint. */
1262 }
1263 },
1264 /* Vector */
1265 {
1266 COSTS_N_INSNS (1) /* alu. */
1267 }
1268 };
1269
1270 const struct cpu_cost_table cortexa15_extra_costs =
1271 {
1272 /* ALU */
1273 {
1274 0, /* arith. */
1275 0, /* logical. */
1276 0, /* shift. */
1277 0, /* shift_reg. */
1278 COSTS_N_INSNS (1), /* arith_shift. */
1279 COSTS_N_INSNS (1), /* arith_shift_reg. */
1280 COSTS_N_INSNS (1), /* log_shift. */
1281 COSTS_N_INSNS (1), /* log_shift_reg. */
1282 0, /* extend. */
1283 COSTS_N_INSNS (1), /* extend_arith. */
1284 COSTS_N_INSNS (1), /* bfi. */
1285 0, /* bfx. */
1286 0, /* clz. */
1287 0, /* non_exec. */
1288 true /* non_exec_costs_exec. */
1289 },
1290 /* MULT SImode */
1291 {
1292 {
1293 COSTS_N_INSNS (2), /* simple. */
1294 COSTS_N_INSNS (3), /* flag_setting. */
1295 COSTS_N_INSNS (2), /* extend. */
1296 COSTS_N_INSNS (2), /* add. */
1297 COSTS_N_INSNS (2), /* extend_add. */
1298 COSTS_N_INSNS (18) /* idiv. */
1299 },
1300 /* MULT DImode */
1301 {
1302 0, /* simple (N/A). */
1303 0, /* flag_setting (N/A). */
1304 COSTS_N_INSNS (3), /* extend. */
1305 0, /* add (N/A). */
1306 COSTS_N_INSNS (3), /* extend_add. */
1307 0 /* idiv (N/A). */
1308 }
1309 },
1310 /* LD/ST */
1311 {
1312 COSTS_N_INSNS (3), /* load. */
1313 COSTS_N_INSNS (3), /* load_sign_extend. */
1314 COSTS_N_INSNS (3), /* ldrd. */
1315 COSTS_N_INSNS (4), /* ldm_1st. */
1316 1, /* ldm_regs_per_insn_1st. */
1317 2, /* ldm_regs_per_insn_subsequent. */
1318 COSTS_N_INSNS (4), /* loadf. */
1319 COSTS_N_INSNS (4), /* loadd. */
1320 0, /* load_unaligned. */
1321 0, /* store. */
1322 0, /* strd. */
1323 COSTS_N_INSNS (1), /* stm_1st. */
1324 1, /* stm_regs_per_insn_1st. */
1325 2, /* stm_regs_per_insn_subsequent. */
1326 0, /* storef. */
1327 0, /* stored. */
1328 0 /* store_unaligned. */
1329 },
1330 {
1331 /* FP SFmode */
1332 {
1333 COSTS_N_INSNS (17), /* div. */
1334 COSTS_N_INSNS (4), /* mult. */
1335 COSTS_N_INSNS (8), /* mult_addsub. */
1336 COSTS_N_INSNS (8), /* fma. */
1337 COSTS_N_INSNS (4), /* addsub. */
1338 COSTS_N_INSNS (2), /* fpconst. */
1339 COSTS_N_INSNS (2), /* neg. */
1340 COSTS_N_INSNS (5), /* compare. */
1341 COSTS_N_INSNS (4), /* widen. */
1342 COSTS_N_INSNS (4), /* narrow. */
1343 COSTS_N_INSNS (4), /* toint. */
1344 COSTS_N_INSNS (4), /* fromint. */
1345 COSTS_N_INSNS (4) /* roundint. */
1346 },
1347 /* FP DFmode */
1348 {
1349 COSTS_N_INSNS (31), /* div. */
1350 COSTS_N_INSNS (4), /* mult. */
1351 COSTS_N_INSNS (8), /* mult_addsub. */
1352 COSTS_N_INSNS (8), /* fma. */
1353 COSTS_N_INSNS (4), /* addsub. */
1354 COSTS_N_INSNS (2), /* fpconst. */
1355 COSTS_N_INSNS (2), /* neg. */
1356 COSTS_N_INSNS (2), /* compare. */
1357 COSTS_N_INSNS (4), /* widen. */
1358 COSTS_N_INSNS (4), /* narrow. */
1359 COSTS_N_INSNS (4), /* toint. */
1360 COSTS_N_INSNS (4), /* fromint. */
1361 COSTS_N_INSNS (4) /* roundint. */
1362 }
1363 },
1364 /* Vector */
1365 {
1366 COSTS_N_INSNS (1) /* alu. */
1367 }
1368 };
1369
1370 const struct cpu_cost_table v7m_extra_costs =
1371 {
1372 /* ALU */
1373 {
1374 0, /* arith. */
1375 0, /* logical. */
1376 0, /* shift. */
1377 0, /* shift_reg. */
1378 0, /* arith_shift. */
1379 COSTS_N_INSNS (1), /* arith_shift_reg. */
1380 0, /* log_shift. */
1381 COSTS_N_INSNS (1), /* log_shift_reg. */
1382 0, /* extend. */
1383 COSTS_N_INSNS (1), /* extend_arith. */
1384 0, /* bfi. */
1385 0, /* bfx. */
1386 0, /* clz. */
1387 COSTS_N_INSNS (1), /* non_exec. */
1388 false /* non_exec_costs_exec. */
1389 },
1390 {
1391 /* MULT SImode */
1392 {
1393 COSTS_N_INSNS (1), /* simple. */
1394 COSTS_N_INSNS (1), /* flag_setting. */
1395 COSTS_N_INSNS (2), /* extend. */
1396 COSTS_N_INSNS (1), /* add. */
1397 COSTS_N_INSNS (3), /* extend_add. */
1398 COSTS_N_INSNS (8) /* idiv. */
1399 },
1400 /* MULT DImode */
1401 {
1402 0, /* simple (N/A). */
1403 0, /* flag_setting (N/A). */
1404 COSTS_N_INSNS (2), /* extend. */
1405 0, /* add (N/A). */
1406 COSTS_N_INSNS (3), /* extend_add. */
1407 0 /* idiv (N/A). */
1408 }
1409 },
1410 /* LD/ST */
1411 {
1412 COSTS_N_INSNS (2), /* load. */
1413 0, /* load_sign_extend. */
1414 COSTS_N_INSNS (3), /* ldrd. */
1415 COSTS_N_INSNS (2), /* ldm_1st. */
1416 1, /* ldm_regs_per_insn_1st. */
1417 1, /* ldm_regs_per_insn_subsequent. */
1418 COSTS_N_INSNS (2), /* loadf. */
1419 COSTS_N_INSNS (3), /* loadd. */
1420 COSTS_N_INSNS (1), /* load_unaligned. */
1421 COSTS_N_INSNS (2), /* store. */
1422 COSTS_N_INSNS (3), /* strd. */
1423 COSTS_N_INSNS (2), /* stm_1st. */
1424 1, /* stm_regs_per_insn_1st. */
1425 1, /* stm_regs_per_insn_subsequent. */
1426 COSTS_N_INSNS (2), /* storef. */
1427 COSTS_N_INSNS (3), /* stored. */
1428 COSTS_N_INSNS (1) /* store_unaligned. */
1429 },
1430 {
1431 /* FP SFmode */
1432 {
1433 COSTS_N_INSNS (7), /* div. */
1434 COSTS_N_INSNS (2), /* mult. */
1435 COSTS_N_INSNS (5), /* mult_addsub. */
1436 COSTS_N_INSNS (3), /* fma. */
1437 COSTS_N_INSNS (1), /* addsub. */
1438 0, /* fpconst. */
1439 0, /* neg. */
1440 0, /* compare. */
1441 0, /* widen. */
1442 0, /* narrow. */
1443 0, /* toint. */
1444 0, /* fromint. */
1445 0 /* roundint. */
1446 },
1447 /* FP DFmode */
1448 {
1449 COSTS_N_INSNS (15), /* div. */
1450 COSTS_N_INSNS (5), /* mult. */
1451 COSTS_N_INSNS (7), /* mult_addsub. */
1452 COSTS_N_INSNS (7), /* fma. */
1453 COSTS_N_INSNS (3), /* addsub. */
1454 0, /* fpconst. */
1455 0, /* neg. */
1456 0, /* compare. */
1457 0, /* widen. */
1458 0, /* narrow. */
1459 0, /* toint. */
1460 0, /* fromint. */
1461 0 /* roundint. */
1462 }
1463 },
1464 /* Vector */
1465 {
1466 COSTS_N_INSNS (1) /* alu. */
1467 }
1468 };
1469
1470 const struct tune_params arm_slowmul_tune =
1471 {
1472 arm_slowmul_rtx_costs,
1473 NULL,
1474 NULL, /* Sched adj cost. */
1475 3, /* Constant limit. */
1476 5, /* Max cond insns. */
1477 ARM_PREFETCH_NOT_BENEFICIAL,
1478 true, /* Prefer constant pool. */
1479 arm_default_branch_cost,
1480 false, /* Prefer LDRD/STRD. */
1481 {true, true}, /* Prefer non short circuit. */
1482 &arm_default_vec_cost, /* Vectorizer costs. */
1483 false /* Prefer Neon for 64-bits bitops. */
1484 };
1485
1486 const struct tune_params arm_fastmul_tune =
1487 {
1488 arm_fastmul_rtx_costs,
1489 NULL,
1490 NULL, /* Sched adj cost. */
1491 1, /* Constant limit. */
1492 5, /* Max cond insns. */
1493 ARM_PREFETCH_NOT_BENEFICIAL,
1494 true, /* Prefer constant pool. */
1495 arm_default_branch_cost,
1496 false, /* Prefer LDRD/STRD. */
1497 {true, true}, /* Prefer non short circuit. */
1498 &arm_default_vec_cost, /* Vectorizer costs. */
1499 false /* Prefer Neon for 64-bits bitops. */
1500 };
1501
1502 /* StrongARM has early execution of branches, so a sequence that is worth
1503 skipping is shorter. Set max_insns_skipped to a lower value. */
1504
1505 const struct tune_params arm_strongarm_tune =
1506 {
1507 arm_fastmul_rtx_costs,
1508 NULL,
1509 NULL, /* Sched adj cost. */
1510 1, /* Constant limit. */
1511 3, /* Max cond insns. */
1512 ARM_PREFETCH_NOT_BENEFICIAL,
1513 true, /* Prefer constant pool. */
1514 arm_default_branch_cost,
1515 false, /* Prefer LDRD/STRD. */
1516 {true, true}, /* Prefer non short circuit. */
1517 &arm_default_vec_cost, /* Vectorizer costs. */
1518 false /* Prefer Neon for 64-bits bitops. */
1519 };
1520
1521 const struct tune_params arm_xscale_tune =
1522 {
1523 arm_xscale_rtx_costs,
1524 NULL,
1525 xscale_sched_adjust_cost,
1526 2, /* Constant limit. */
1527 3, /* Max cond insns. */
1528 ARM_PREFETCH_NOT_BENEFICIAL,
1529 true, /* Prefer constant pool. */
1530 arm_default_branch_cost,
1531 false, /* Prefer LDRD/STRD. */
1532 {true, true}, /* Prefer non short circuit. */
1533 &arm_default_vec_cost, /* Vectorizer costs. */
1534 false /* Prefer Neon for 64-bits bitops. */
1535 };
1536
1537 const struct tune_params arm_9e_tune =
1538 {
1539 arm_9e_rtx_costs,
1540 NULL,
1541 NULL, /* Sched adj cost. */
1542 1, /* Constant limit. */
1543 5, /* Max cond insns. */
1544 ARM_PREFETCH_NOT_BENEFICIAL,
1545 true, /* Prefer constant pool. */
1546 arm_default_branch_cost,
1547 false, /* Prefer LDRD/STRD. */
1548 {true, true}, /* Prefer non short circuit. */
1549 &arm_default_vec_cost, /* Vectorizer costs. */
1550 false /* Prefer Neon for 64-bits bitops. */
1551 };
1552
1553 const struct tune_params arm_v6t2_tune =
1554 {
1555 arm_9e_rtx_costs,
1556 NULL,
1557 NULL, /* Sched adj cost. */
1558 1, /* Constant limit. */
1559 5, /* Max cond insns. */
1560 ARM_PREFETCH_NOT_BENEFICIAL,
1561 false, /* Prefer constant pool. */
1562 arm_default_branch_cost,
1563 false, /* Prefer LDRD/STRD. */
1564 {true, true}, /* Prefer non short circuit. */
1565 &arm_default_vec_cost, /* Vectorizer costs. */
1566 false /* Prefer Neon for 64-bits bitops. */
1567 };
1568
1569 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1570 const struct tune_params arm_cortex_tune =
1571 {
1572 arm_9e_rtx_costs,
1573 &generic_extra_costs,
1574 NULL, /* Sched adj cost. */
1575 1, /* Constant limit. */
1576 5, /* Max cond insns. */
1577 ARM_PREFETCH_NOT_BENEFICIAL,
1578 false, /* Prefer constant pool. */
1579 arm_default_branch_cost,
1580 false, /* Prefer LDRD/STRD. */
1581 {true, true}, /* Prefer non short circuit. */
1582 &arm_default_vec_cost, /* Vectorizer costs. */
1583 false /* Prefer Neon for 64-bits bitops. */
1584 };
1585
1586 const struct tune_params arm_cortex_a7_tune =
1587 {
1588 arm_9e_rtx_costs,
1589 &cortexa7_extra_costs,
1590 NULL,
1591 1, /* Constant limit. */
1592 5, /* Max cond insns. */
1593 ARM_PREFETCH_NOT_BENEFICIAL,
1594 false, /* Prefer constant pool. */
1595 arm_default_branch_cost,
1596 false, /* Prefer LDRD/STRD. */
1597 {true, true}, /* Prefer non short circuit. */
1598 &arm_default_vec_cost, /* Vectorizer costs. */
1599 false /* Prefer Neon for 64-bits bitops. */
1600 };
1601
1602 const struct tune_params arm_cortex_a15_tune =
1603 {
1604 arm_9e_rtx_costs,
1605 &cortexa15_extra_costs,
1606 NULL, /* Sched adj cost. */
1607 1, /* Constant limit. */
1608 2, /* Max cond insns. */
1609 ARM_PREFETCH_NOT_BENEFICIAL,
1610 false, /* Prefer constant pool. */
1611 arm_default_branch_cost,
1612 true, /* Prefer LDRD/STRD. */
1613 {true, true}, /* Prefer non short circuit. */
1614 &arm_default_vec_cost, /* Vectorizer costs. */
1615 false /* Prefer Neon for 64-bits bitops. */
1616 };
1617
1618 const struct tune_params arm_cortex_a53_tune =
1619 {
1620 arm_9e_rtx_costs,
1621 &cortexa53_extra_costs,
1622 NULL, /* Scheduler cost adjustment. */
1623 1, /* Constant limit. */
1624 5, /* Max cond insns. */
1625 ARM_PREFETCH_NOT_BENEFICIAL,
1626 false, /* Prefer constant pool. */
1627 arm_default_branch_cost,
1628 false, /* Prefer LDRD/STRD. */
1629 {true, true}, /* Prefer non short circuit. */
1630 &arm_default_vec_cost, /* Vectorizer costs. */
1631 false /* Prefer Neon for 64-bits bitops. */
1632 };
1633
1634 const struct tune_params arm_cortex_a57_tune =
1635 {
1636 arm_9e_rtx_costs,
1637 &cortexa57_extra_costs,
1638 NULL, /* Scheduler cost adjustment. */
1639 1, /* Constant limit. */
1640 2, /* Max cond insns. */
1641 ARM_PREFETCH_NOT_BENEFICIAL,
1642 false, /* Prefer constant pool. */
1643 arm_default_branch_cost,
1644 true, /* Prefer LDRD/STRD. */
1645 {true, true}, /* Prefer non short circuit. */
1646 &arm_default_vec_cost, /* Vectorizer costs. */
1647 false /* Prefer Neon for 64-bits bitops. */
1648 };
1649
1650 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1651 less appealing. Set max_insns_skipped to a low value. */
1652
1653 const struct tune_params arm_cortex_a5_tune =
1654 {
1655 arm_9e_rtx_costs,
1656 NULL,
1657 NULL, /* Sched adj cost. */
1658 1, /* Constant limit. */
1659 1, /* Max cond insns. */
1660 ARM_PREFETCH_NOT_BENEFICIAL,
1661 false, /* Prefer constant pool. */
1662 arm_cortex_a5_branch_cost,
1663 false, /* Prefer LDRD/STRD. */
1664 {false, false}, /* Prefer non short circuit. */
1665 &arm_default_vec_cost, /* Vectorizer costs. */
1666 false /* Prefer Neon for 64-bits bitops. */
1667 };
1668
1669 const struct tune_params arm_cortex_a9_tune =
1670 {
1671 arm_9e_rtx_costs,
1672 &cortexa9_extra_costs,
1673 cortex_a9_sched_adjust_cost,
1674 1, /* Constant limit. */
1675 5, /* Max cond insns. */
1676 ARM_PREFETCH_BENEFICIAL(4,32,32),
1677 false, /* Prefer constant pool. */
1678 arm_default_branch_cost,
1679 false, /* Prefer LDRD/STRD. */
1680 {true, true}, /* Prefer non short circuit. */
1681 &arm_default_vec_cost, /* Vectorizer costs. */
1682 false /* Prefer Neon for 64-bits bitops. */
1683 };
1684
1685 const struct tune_params arm_cortex_a12_tune =
1686 {
1687 arm_9e_rtx_costs,
1688 &cortexa12_extra_costs,
1689 NULL,
1690 1, /* Constant limit. */
1691 5, /* Max cond insns. */
1692 ARM_PREFETCH_BENEFICIAL(4,32,32),
1693 false, /* Prefer constant pool. */
1694 arm_default_branch_cost,
1695 true, /* Prefer LDRD/STRD. */
1696 {true, true}, /* Prefer non short circuit. */
1697 &arm_default_vec_cost, /* Vectorizer costs. */
1698 false /* Prefer Neon for 64-bits bitops. */
1699 };
1700
1701 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
1702 cycle to execute each. An LDR from the constant pool also takes two cycles
1703 to execute, but mildly increases pipelining opportunity (consecutive
1704 loads/stores can be pipelined together, saving one cycle), and may also
1705 improve icache utilisation. Hence we prefer the constant pool for such
1706 processors. */
1707
1708 const struct tune_params arm_v7m_tune =
1709 {
1710 arm_9e_rtx_costs,
1711 &v7m_extra_costs,
1712 NULL, /* Sched adj cost. */
1713 1, /* Constant limit. */
1714 2, /* Max cond insns. */
1715 ARM_PREFETCH_NOT_BENEFICIAL,
1716 true, /* Prefer constant pool. */
1717 arm_cortex_m_branch_cost,
1718 false, /* Prefer LDRD/STRD. */
1719 {false, false}, /* Prefer non short circuit. */
1720 &arm_default_vec_cost, /* Vectorizer costs. */
1721 false /* Prefer Neon for 64-bits bitops. */
1722 };
1723
1724 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1725 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
1726 const struct tune_params arm_v6m_tune =
1727 {
1728 arm_9e_rtx_costs,
1729 NULL,
1730 NULL, /* Sched adj cost. */
1731 1, /* Constant limit. */
1732 5, /* Max cond insns. */
1733 ARM_PREFETCH_NOT_BENEFICIAL,
1734 false, /* Prefer constant pool. */
1735 arm_default_branch_cost,
1736 false, /* Prefer LDRD/STRD. */
1737 {false, false}, /* Prefer non short circuit. */
1738 &arm_default_vec_cost, /* Vectorizer costs. */
1739 false /* Prefer Neon for 64-bits bitops. */
1740 };
1741
1742 const struct tune_params arm_fa726te_tune =
1743 {
1744 arm_9e_rtx_costs,
1745 NULL,
1746 fa726te_sched_adjust_cost,
1747 1, /* Constant limit. */
1748 5, /* Max cond insns. */
1749 ARM_PREFETCH_NOT_BENEFICIAL,
1750 true, /* Prefer constant pool. */
1751 arm_default_branch_cost,
1752 false, /* Prefer LDRD/STRD. */
1753 {true, true}, /* Prefer non short circuit. */
1754 &arm_default_vec_cost, /* Vectorizer costs. */
1755 false /* Prefer Neon for 64-bits bitops. */
1756 };
1757
1758
1759 /* Not all of these give usefully different compilation alternatives,
1760 but there is no simple way of generalizing them. */
1761 static const struct processors all_cores[] =
1762 {
1763 /* ARM Cores */
1764 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
1765 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
1766 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
1767 #include "arm-cores.def"
1768 #undef ARM_CORE
1769 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1770 };
1771
1772 static const struct processors all_architectures[] =
1773 {
1774 /* ARM Architectures */
1775 /* We don't specify tuning costs here as it will be figured out
1776 from the core. */
1777
1778 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
1779 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
1780 #include "arm-arches.def"
1781 #undef ARM_ARCH
1782 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
1783 };
1784
1785
1786 /* These are populated as commandline arguments are processed, or NULL
1787 if not specified. */
1788 static const struct processors *arm_selected_arch;
1789 static const struct processors *arm_selected_cpu;
1790 static const struct processors *arm_selected_tune;
1791
1792 /* The name of the preprocessor macro to define for this architecture. */
1793
1794 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
1795
1796 /* Available values for -mfpu=. */
1797
1798 static const struct arm_fpu_desc all_fpus[] =
1799 {
1800 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
1801 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
1802 #include "arm-fpus.def"
1803 #undef ARM_FPU
1804 };
1805
1806
1807 /* Supported TLS relocations. */
1808
1809 enum tls_reloc {
1810 TLS_GD32,
1811 TLS_LDM32,
1812 TLS_LDO32,
1813 TLS_IE32,
1814 TLS_LE32,
1815 TLS_DESCSEQ /* GNU scheme */
1816 };
1817
1818 /* The maximum number of insns to be used when loading a constant. */
1819 inline static int
1820 arm_constant_limit (bool size_p)
1821 {
1822 return size_p ? 1 : current_tune->constant_limit;
1823 }
1824
1825 /* Emit an insn that's a simple single-set. Both the operands must be known
1826 to be valid. */
1827 inline static rtx
1828 emit_set_insn (rtx x, rtx y)
1829 {
1830 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
1831 }
1832
1833 /* Return the number of bits set in VALUE. */
1834 static unsigned
1835 bit_count (unsigned long value)
1836 {
1837 unsigned long count = 0;
1838
1839 while (value)
1840 {
1841 count++;
1842 value &= value - 1; /* Clear the least-significant set bit. */
1843 }
1844
1845 return count;
1846 }
1847
1848 typedef struct
1849 {
1850 enum machine_mode mode;
1851 const char *name;
1852 } arm_fixed_mode_set;
1853
1854 /* A small helper for setting fixed-point library libfuncs. */
1855
1856 static void
1857 arm_set_fixed_optab_libfunc (optab optable, enum machine_mode mode,
1858 const char *funcname, const char *modename,
1859 int num_suffix)
1860 {
1861 char buffer[50];
1862
1863 if (num_suffix == 0)
1864 sprintf (buffer, "__gnu_%s%s", funcname, modename);
1865 else
1866 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
1867
1868 set_optab_libfunc (optable, mode, buffer);
1869 }
1870
1871 static void
1872 arm_set_fixed_conv_libfunc (convert_optab optable, enum machine_mode to,
1873 enum machine_mode from, const char *funcname,
1874 const char *toname, const char *fromname)
1875 {
1876 char buffer[50];
1877 const char *maybe_suffix_2 = "";
1878
1879 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1880 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
1881 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
1882 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
1883 maybe_suffix_2 = "2";
1884
1885 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
1886 maybe_suffix_2);
1887
1888 set_conv_libfunc (optable, to, from, buffer);
1889 }
1890
1891 /* Set up library functions unique to ARM. */
1892
1893 static void
1894 arm_init_libfuncs (void)
1895 {
1896 /* For Linux, we have access to kernel support for atomic operations. */
1897 if (arm_abi == ARM_ABI_AAPCS_LINUX)
1898 init_sync_libfuncs (2 * UNITS_PER_WORD);
1899
1900 /* There are no special library functions unless we are using the
1901 ARM BPABI. */
1902 if (!TARGET_BPABI)
1903 return;
1904
1905 /* The functions below are described in Section 4 of the "Run-Time
1906 ABI for the ARM architecture", Version 1.0. */
1907
1908 /* Double-precision floating-point arithmetic. Table 2. */
1909 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
1910 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
1911 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
1912 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
1913 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
1914
1915 /* Double-precision comparisons. Table 3. */
1916 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
1917 set_optab_libfunc (ne_optab, DFmode, NULL);
1918 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
1919 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
1920 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
1921 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
1922 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
1923
1924 /* Single-precision floating-point arithmetic. Table 4. */
1925 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
1926 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
1927 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
1928 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
1929 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
1930
1931 /* Single-precision comparisons. Table 5. */
1932 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
1933 set_optab_libfunc (ne_optab, SFmode, NULL);
1934 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
1935 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
1936 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
1937 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
1938 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
1939
1940 /* Floating-point to integer conversions. Table 6. */
1941 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
1942 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
1943 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
1944 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
1945 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
1946 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
1947 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
1948 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
1949
1950 /* Conversions between floating types. Table 7. */
1951 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
1952 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
1953
1954 /* Integer to floating-point conversions. Table 8. */
1955 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
1956 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
1957 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
1958 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
1959 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
1960 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
1961 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
1962 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
1963
1964 /* Long long. Table 9. */
1965 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
1966 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
1967 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
1968 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
1969 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
1970 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
1971 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
1972 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
1973
1974 /* Integer (32/32->32) division. \S 4.3.1. */
1975 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
1976 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
1977
1978 /* The divmod functions are designed so that they can be used for
1979 plain division, even though they return both the quotient and the
1980 remainder. The quotient is returned in the usual location (i.e.,
1981 r0 for SImode, {r0, r1} for DImode), just as would be expected
1982 for an ordinary division routine. Because the AAPCS calling
1983 conventions specify that all of { r0, r1, r2, r3 } are
1984 callee-saved registers, there is no need to tell the compiler
1985 explicitly that those registers are clobbered by these
1986 routines. */
1987 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
1988 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
1989
1990 /* For SImode division the ABI provides div-without-mod routines,
1991 which are faster. */
1992 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
1993 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
1994
1995 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1996 divmod libcalls instead. */
1997 set_optab_libfunc (smod_optab, DImode, NULL);
1998 set_optab_libfunc (umod_optab, DImode, NULL);
1999 set_optab_libfunc (smod_optab, SImode, NULL);
2000 set_optab_libfunc (umod_optab, SImode, NULL);
2001
2002 /* Half-precision float operations. The compiler handles all operations
2003 with NULL libfuncs by converting the SFmode. */
2004 switch (arm_fp16_format)
2005 {
2006 case ARM_FP16_FORMAT_IEEE:
2007 case ARM_FP16_FORMAT_ALTERNATIVE:
2008
2009 /* Conversions. */
2010 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2011 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2012 ? "__gnu_f2h_ieee"
2013 : "__gnu_f2h_alternative"));
2014 set_conv_libfunc (sext_optab, SFmode, HFmode,
2015 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2016 ? "__gnu_h2f_ieee"
2017 : "__gnu_h2f_alternative"));
2018
2019 /* Arithmetic. */
2020 set_optab_libfunc (add_optab, HFmode, NULL);
2021 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2022 set_optab_libfunc (smul_optab, HFmode, NULL);
2023 set_optab_libfunc (neg_optab, HFmode, NULL);
2024 set_optab_libfunc (sub_optab, HFmode, NULL);
2025
2026 /* Comparisons. */
2027 set_optab_libfunc (eq_optab, HFmode, NULL);
2028 set_optab_libfunc (ne_optab, HFmode, NULL);
2029 set_optab_libfunc (lt_optab, HFmode, NULL);
2030 set_optab_libfunc (le_optab, HFmode, NULL);
2031 set_optab_libfunc (ge_optab, HFmode, NULL);
2032 set_optab_libfunc (gt_optab, HFmode, NULL);
2033 set_optab_libfunc (unord_optab, HFmode, NULL);
2034 break;
2035
2036 default:
2037 break;
2038 }
2039
2040 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2041 {
2042 const arm_fixed_mode_set fixed_arith_modes[] =
2043 {
2044 { QQmode, "qq" },
2045 { UQQmode, "uqq" },
2046 { HQmode, "hq" },
2047 { UHQmode, "uhq" },
2048 { SQmode, "sq" },
2049 { USQmode, "usq" },
2050 { DQmode, "dq" },
2051 { UDQmode, "udq" },
2052 { TQmode, "tq" },
2053 { UTQmode, "utq" },
2054 { HAmode, "ha" },
2055 { UHAmode, "uha" },
2056 { SAmode, "sa" },
2057 { USAmode, "usa" },
2058 { DAmode, "da" },
2059 { UDAmode, "uda" },
2060 { TAmode, "ta" },
2061 { UTAmode, "uta" }
2062 };
2063 const arm_fixed_mode_set fixed_conv_modes[] =
2064 {
2065 { QQmode, "qq" },
2066 { UQQmode, "uqq" },
2067 { HQmode, "hq" },
2068 { UHQmode, "uhq" },
2069 { SQmode, "sq" },
2070 { USQmode, "usq" },
2071 { DQmode, "dq" },
2072 { UDQmode, "udq" },
2073 { TQmode, "tq" },
2074 { UTQmode, "utq" },
2075 { HAmode, "ha" },
2076 { UHAmode, "uha" },
2077 { SAmode, "sa" },
2078 { USAmode, "usa" },
2079 { DAmode, "da" },
2080 { UDAmode, "uda" },
2081 { TAmode, "ta" },
2082 { UTAmode, "uta" },
2083 { QImode, "qi" },
2084 { HImode, "hi" },
2085 { SImode, "si" },
2086 { DImode, "di" },
2087 { TImode, "ti" },
2088 { SFmode, "sf" },
2089 { DFmode, "df" }
2090 };
2091 unsigned int i, j;
2092
2093 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2094 {
2095 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2096 "add", fixed_arith_modes[i].name, 3);
2097 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2098 "ssadd", fixed_arith_modes[i].name, 3);
2099 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2100 "usadd", fixed_arith_modes[i].name, 3);
2101 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2102 "sub", fixed_arith_modes[i].name, 3);
2103 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2104 "sssub", fixed_arith_modes[i].name, 3);
2105 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2106 "ussub", fixed_arith_modes[i].name, 3);
2107 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2108 "mul", fixed_arith_modes[i].name, 3);
2109 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2110 "ssmul", fixed_arith_modes[i].name, 3);
2111 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2112 "usmul", fixed_arith_modes[i].name, 3);
2113 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2114 "div", fixed_arith_modes[i].name, 3);
2115 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2116 "udiv", fixed_arith_modes[i].name, 3);
2117 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2118 "ssdiv", fixed_arith_modes[i].name, 3);
2119 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2120 "usdiv", fixed_arith_modes[i].name, 3);
2121 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2122 "neg", fixed_arith_modes[i].name, 2);
2123 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2124 "ssneg", fixed_arith_modes[i].name, 2);
2125 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2126 "usneg", fixed_arith_modes[i].name, 2);
2127 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2128 "ashl", fixed_arith_modes[i].name, 3);
2129 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2130 "ashr", fixed_arith_modes[i].name, 3);
2131 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2132 "lshr", fixed_arith_modes[i].name, 3);
2133 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2134 "ssashl", fixed_arith_modes[i].name, 3);
2135 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2136 "usashl", fixed_arith_modes[i].name, 3);
2137 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2138 "cmp", fixed_arith_modes[i].name, 2);
2139 }
2140
2141 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2142 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2143 {
2144 if (i == j
2145 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2146 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2147 continue;
2148
2149 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2150 fixed_conv_modes[j].mode, "fract",
2151 fixed_conv_modes[i].name,
2152 fixed_conv_modes[j].name);
2153 arm_set_fixed_conv_libfunc (satfract_optab,
2154 fixed_conv_modes[i].mode,
2155 fixed_conv_modes[j].mode, "satfract",
2156 fixed_conv_modes[i].name,
2157 fixed_conv_modes[j].name);
2158 arm_set_fixed_conv_libfunc (fractuns_optab,
2159 fixed_conv_modes[i].mode,
2160 fixed_conv_modes[j].mode, "fractuns",
2161 fixed_conv_modes[i].name,
2162 fixed_conv_modes[j].name);
2163 arm_set_fixed_conv_libfunc (satfractuns_optab,
2164 fixed_conv_modes[i].mode,
2165 fixed_conv_modes[j].mode, "satfractuns",
2166 fixed_conv_modes[i].name,
2167 fixed_conv_modes[j].name);
2168 }
2169 }
2170
2171 if (TARGET_AAPCS_BASED)
2172 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2173 }
2174
2175 /* On AAPCS systems, this is the "struct __va_list". */
2176 static GTY(()) tree va_list_type;
2177
2178 /* Return the type to use as __builtin_va_list. */
2179 static tree
2180 arm_build_builtin_va_list (void)
2181 {
2182 tree va_list_name;
2183 tree ap_field;
2184
2185 if (!TARGET_AAPCS_BASED)
2186 return std_build_builtin_va_list ();
2187
2188 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2189 defined as:
2190
2191 struct __va_list
2192 {
2193 void *__ap;
2194 };
2195
2196 The C Library ABI further reinforces this definition in \S
2197 4.1.
2198
2199 We must follow this definition exactly. The structure tag
2200 name is visible in C++ mangled names, and thus forms a part
2201 of the ABI. The field name may be used by people who
2202 #include <stdarg.h>. */
2203 /* Create the type. */
2204 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2205 /* Give it the required name. */
2206 va_list_name = build_decl (BUILTINS_LOCATION,
2207 TYPE_DECL,
2208 get_identifier ("__va_list"),
2209 va_list_type);
2210 DECL_ARTIFICIAL (va_list_name) = 1;
2211 TYPE_NAME (va_list_type) = va_list_name;
2212 TYPE_STUB_DECL (va_list_type) = va_list_name;
2213 /* Create the __ap field. */
2214 ap_field = build_decl (BUILTINS_LOCATION,
2215 FIELD_DECL,
2216 get_identifier ("__ap"),
2217 ptr_type_node);
2218 DECL_ARTIFICIAL (ap_field) = 1;
2219 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2220 TYPE_FIELDS (va_list_type) = ap_field;
2221 /* Compute its layout. */
2222 layout_type (va_list_type);
2223
2224 return va_list_type;
2225 }
2226
2227 /* Return an expression of type "void *" pointing to the next
2228 available argument in a variable-argument list. VALIST is the
2229 user-level va_list object, of type __builtin_va_list. */
2230 static tree
2231 arm_extract_valist_ptr (tree valist)
2232 {
2233 if (TREE_TYPE (valist) == error_mark_node)
2234 return error_mark_node;
2235
2236 /* On an AAPCS target, the pointer is stored within "struct
2237 va_list". */
2238 if (TARGET_AAPCS_BASED)
2239 {
2240 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2241 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2242 valist, ap_field, NULL_TREE);
2243 }
2244
2245 return valist;
2246 }
2247
2248 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2249 static void
2250 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2251 {
2252 valist = arm_extract_valist_ptr (valist);
2253 std_expand_builtin_va_start (valist, nextarg);
2254 }
2255
2256 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2257 static tree
2258 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2259 gimple_seq *post_p)
2260 {
2261 valist = arm_extract_valist_ptr (valist);
2262 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2263 }
2264
2265 /* Fix up any incompatible options that the user has specified. */
2266 static void
2267 arm_option_override (void)
2268 {
2269 if (global_options_set.x_arm_arch_option)
2270 arm_selected_arch = &all_architectures[arm_arch_option];
2271
2272 if (global_options_set.x_arm_cpu_option)
2273 {
2274 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2275 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2276 }
2277
2278 if (global_options_set.x_arm_tune_option)
2279 arm_selected_tune = &all_cores[(int) arm_tune_option];
2280
2281 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2282 SUBTARGET_OVERRIDE_OPTIONS;
2283 #endif
2284
2285 if (arm_selected_arch)
2286 {
2287 if (arm_selected_cpu)
2288 {
2289 /* Check for conflict between mcpu and march. */
2290 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2291 {
2292 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2293 arm_selected_cpu->name, arm_selected_arch->name);
2294 /* -march wins for code generation.
2295 -mcpu wins for default tuning. */
2296 if (!arm_selected_tune)
2297 arm_selected_tune = arm_selected_cpu;
2298
2299 arm_selected_cpu = arm_selected_arch;
2300 }
2301 else
2302 /* -mcpu wins. */
2303 arm_selected_arch = NULL;
2304 }
2305 else
2306 /* Pick a CPU based on the architecture. */
2307 arm_selected_cpu = arm_selected_arch;
2308 }
2309
2310 /* If the user did not specify a processor, choose one for them. */
2311 if (!arm_selected_cpu)
2312 {
2313 const struct processors * sel;
2314 unsigned int sought;
2315
2316 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2317 if (!arm_selected_cpu->name)
2318 {
2319 #ifdef SUBTARGET_CPU_DEFAULT
2320 /* Use the subtarget default CPU if none was specified by
2321 configure. */
2322 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2323 #endif
2324 /* Default to ARM6. */
2325 if (!arm_selected_cpu->name)
2326 arm_selected_cpu = &all_cores[arm6];
2327 }
2328
2329 sel = arm_selected_cpu;
2330 insn_flags = sel->flags;
2331
2332 /* Now check to see if the user has specified some command line
2333 switch that require certain abilities from the cpu. */
2334 sought = 0;
2335
2336 if (TARGET_INTERWORK || TARGET_THUMB)
2337 {
2338 sought |= (FL_THUMB | FL_MODE32);
2339
2340 /* There are no ARM processors that support both APCS-26 and
2341 interworking. Therefore we force FL_MODE26 to be removed
2342 from insn_flags here (if it was set), so that the search
2343 below will always be able to find a compatible processor. */
2344 insn_flags &= ~FL_MODE26;
2345 }
2346
2347 if (sought != 0 && ((sought & insn_flags) != sought))
2348 {
2349 /* Try to locate a CPU type that supports all of the abilities
2350 of the default CPU, plus the extra abilities requested by
2351 the user. */
2352 for (sel = all_cores; sel->name != NULL; sel++)
2353 if ((sel->flags & sought) == (sought | insn_flags))
2354 break;
2355
2356 if (sel->name == NULL)
2357 {
2358 unsigned current_bit_count = 0;
2359 const struct processors * best_fit = NULL;
2360
2361 /* Ideally we would like to issue an error message here
2362 saying that it was not possible to find a CPU compatible
2363 with the default CPU, but which also supports the command
2364 line options specified by the programmer, and so they
2365 ought to use the -mcpu=<name> command line option to
2366 override the default CPU type.
2367
2368 If we cannot find a cpu that has both the
2369 characteristics of the default cpu and the given
2370 command line options we scan the array again looking
2371 for a best match. */
2372 for (sel = all_cores; sel->name != NULL; sel++)
2373 if ((sel->flags & sought) == sought)
2374 {
2375 unsigned count;
2376
2377 count = bit_count (sel->flags & insn_flags);
2378
2379 if (count >= current_bit_count)
2380 {
2381 best_fit = sel;
2382 current_bit_count = count;
2383 }
2384 }
2385
2386 gcc_assert (best_fit);
2387 sel = best_fit;
2388 }
2389
2390 arm_selected_cpu = sel;
2391 }
2392 }
2393
2394 gcc_assert (arm_selected_cpu);
2395 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2396 if (!arm_selected_tune)
2397 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2398
2399 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2400 insn_flags = arm_selected_cpu->flags;
2401 arm_base_arch = arm_selected_cpu->base_arch;
2402
2403 arm_tune = arm_selected_tune->core;
2404 tune_flags = arm_selected_tune->flags;
2405 current_tune = arm_selected_tune->tune;
2406
2407 /* Make sure that the processor choice does not conflict with any of the
2408 other command line choices. */
2409 if (TARGET_ARM && !(insn_flags & FL_NOTM))
2410 error ("target CPU does not support ARM mode");
2411
2412 /* BPABI targets use linker tricks to allow interworking on cores
2413 without thumb support. */
2414 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2415 {
2416 warning (0, "target CPU does not support interworking" );
2417 target_flags &= ~MASK_INTERWORK;
2418 }
2419
2420 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2421 {
2422 warning (0, "target CPU does not support THUMB instructions");
2423 target_flags &= ~MASK_THUMB;
2424 }
2425
2426 if (TARGET_APCS_FRAME && TARGET_THUMB)
2427 {
2428 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2429 target_flags &= ~MASK_APCS_FRAME;
2430 }
2431
2432 /* Callee super interworking implies thumb interworking. Adding
2433 this to the flags here simplifies the logic elsewhere. */
2434 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2435 target_flags |= MASK_INTERWORK;
2436
2437 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2438 from here where no function is being compiled currently. */
2439 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2440 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2441
2442 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2443 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2444
2445 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2446 {
2447 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2448 target_flags |= MASK_APCS_FRAME;
2449 }
2450
2451 if (TARGET_POKE_FUNCTION_NAME)
2452 target_flags |= MASK_APCS_FRAME;
2453
2454 if (TARGET_APCS_REENT && flag_pic)
2455 error ("-fpic and -mapcs-reent are incompatible");
2456
2457 if (TARGET_APCS_REENT)
2458 warning (0, "APCS reentrant code not supported. Ignored");
2459
2460 /* If this target is normally configured to use APCS frames, warn if they
2461 are turned off and debugging is turned on. */
2462 if (TARGET_ARM
2463 && write_symbols != NO_DEBUG
2464 && !TARGET_APCS_FRAME
2465 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2466 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2467
2468 if (TARGET_APCS_FLOAT)
2469 warning (0, "passing floating point arguments in fp regs not yet supported");
2470
2471 if (TARGET_LITTLE_WORDS)
2472 warning (OPT_Wdeprecated, "%<mwords-little-endian%> is deprecated and "
2473 "will be removed in a future release");
2474
2475 /* Initialize boolean versions of the flags, for use in the arm.md file. */
2476 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2477 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2478 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2479 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2480 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2481 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2482 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2483 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2484 arm_arch6m = arm_arch6 && !arm_arch_notm;
2485 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2486 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2487 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2488 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2489 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2490
2491 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2492 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2493 thumb_code = TARGET_ARM == 0;
2494 thumb1_code = TARGET_THUMB1 != 0;
2495 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2496 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2497 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2498 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2499 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2500 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2501 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2502 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2503 if (arm_restrict_it == 2)
2504 arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2505
2506 if (!TARGET_THUMB2)
2507 arm_restrict_it = 0;
2508
2509 /* If we are not using the default (ARM mode) section anchor offset
2510 ranges, then set the correct ranges now. */
2511 if (TARGET_THUMB1)
2512 {
2513 /* Thumb-1 LDR instructions cannot have negative offsets.
2514 Permissible positive offset ranges are 5-bit (for byte loads),
2515 6-bit (for halfword loads), or 7-bit (for word loads).
2516 Empirical results suggest a 7-bit anchor range gives the best
2517 overall code size. */
2518 targetm.min_anchor_offset = 0;
2519 targetm.max_anchor_offset = 127;
2520 }
2521 else if (TARGET_THUMB2)
2522 {
2523 /* The minimum is set such that the total size of the block
2524 for a particular anchor is 248 + 1 + 4095 bytes, which is
2525 divisible by eight, ensuring natural spacing of anchors. */
2526 targetm.min_anchor_offset = -248;
2527 targetm.max_anchor_offset = 4095;
2528 }
2529
2530 /* V5 code we generate is completely interworking capable, so we turn off
2531 TARGET_INTERWORK here to avoid many tests later on. */
2532
2533 /* XXX However, we must pass the right pre-processor defines to CPP
2534 or GLD can get confused. This is a hack. */
2535 if (TARGET_INTERWORK)
2536 arm_cpp_interwork = 1;
2537
2538 if (arm_arch5)
2539 target_flags &= ~MASK_INTERWORK;
2540
2541 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2542 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2543
2544 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2545 error ("iwmmxt abi requires an iwmmxt capable cpu");
2546
2547 if (!global_options_set.x_arm_fpu_index)
2548 {
2549 const char *target_fpu_name;
2550 bool ok;
2551
2552 #ifdef FPUTYPE_DEFAULT
2553 target_fpu_name = FPUTYPE_DEFAULT;
2554 #else
2555 target_fpu_name = "vfp";
2556 #endif
2557
2558 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2559 CL_TARGET);
2560 gcc_assert (ok);
2561 }
2562
2563 arm_fpu_desc = &all_fpus[arm_fpu_index];
2564
2565 switch (arm_fpu_desc->model)
2566 {
2567 case ARM_FP_MODEL_VFP:
2568 arm_fpu_attr = FPU_VFP;
2569 break;
2570
2571 default:
2572 gcc_unreachable();
2573 }
2574
2575 if (TARGET_AAPCS_BASED)
2576 {
2577 if (TARGET_CALLER_INTERWORKING)
2578 error ("AAPCS does not support -mcaller-super-interworking");
2579 else
2580 if (TARGET_CALLEE_INTERWORKING)
2581 error ("AAPCS does not support -mcallee-super-interworking");
2582 }
2583
2584 /* iWMMXt and NEON are incompatible. */
2585 if (TARGET_IWMMXT && TARGET_NEON)
2586 error ("iWMMXt and NEON are incompatible");
2587
2588 /* iWMMXt unsupported under Thumb mode. */
2589 if (TARGET_THUMB && TARGET_IWMMXT)
2590 error ("iWMMXt unsupported under Thumb mode");
2591
2592 /* __fp16 support currently assumes the core has ldrh. */
2593 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2594 sorry ("__fp16 and no ldrh");
2595
2596 /* If soft-float is specified then don't use FPU. */
2597 if (TARGET_SOFT_FLOAT)
2598 arm_fpu_attr = FPU_NONE;
2599
2600 if (TARGET_AAPCS_BASED)
2601 {
2602 if (arm_abi == ARM_ABI_IWMMXT)
2603 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2604 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2605 && TARGET_HARD_FLOAT
2606 && TARGET_VFP)
2607 arm_pcs_default = ARM_PCS_AAPCS_VFP;
2608 else
2609 arm_pcs_default = ARM_PCS_AAPCS;
2610 }
2611 else
2612 {
2613 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2614 sorry ("-mfloat-abi=hard and VFP");
2615
2616 if (arm_abi == ARM_ABI_APCS)
2617 arm_pcs_default = ARM_PCS_APCS;
2618 else
2619 arm_pcs_default = ARM_PCS_ATPCS;
2620 }
2621
2622 /* For arm2/3 there is no need to do any scheduling if we are doing
2623 software floating-point. */
2624 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2625 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2626
2627 /* Use the cp15 method if it is available. */
2628 if (target_thread_pointer == TP_AUTO)
2629 {
2630 if (arm_arch6k && !TARGET_THUMB1)
2631 target_thread_pointer = TP_CP15;
2632 else
2633 target_thread_pointer = TP_SOFT;
2634 }
2635
2636 if (TARGET_HARD_TP && TARGET_THUMB1)
2637 error ("can not use -mtp=cp15 with 16-bit Thumb");
2638
2639 /* Override the default structure alignment for AAPCS ABI. */
2640 if (!global_options_set.x_arm_structure_size_boundary)
2641 {
2642 if (TARGET_AAPCS_BASED)
2643 arm_structure_size_boundary = 8;
2644 }
2645 else
2646 {
2647 if (arm_structure_size_boundary != 8
2648 && arm_structure_size_boundary != 32
2649 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2650 {
2651 if (ARM_DOUBLEWORD_ALIGN)
2652 warning (0,
2653 "structure size boundary can only be set to 8, 32 or 64");
2654 else
2655 warning (0, "structure size boundary can only be set to 8 or 32");
2656 arm_structure_size_boundary
2657 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2658 }
2659 }
2660
2661 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2662 {
2663 error ("RTP PIC is incompatible with Thumb");
2664 flag_pic = 0;
2665 }
2666
2667 /* If stack checking is disabled, we can use r10 as the PIC register,
2668 which keeps r9 available. The EABI specifies r9 as the PIC register. */
2669 if (flag_pic && TARGET_SINGLE_PIC_BASE)
2670 {
2671 if (TARGET_VXWORKS_RTP)
2672 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2673 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2674 }
2675
2676 if (flag_pic && TARGET_VXWORKS_RTP)
2677 arm_pic_register = 9;
2678
2679 if (arm_pic_register_string != NULL)
2680 {
2681 int pic_register = decode_reg_name (arm_pic_register_string);
2682
2683 if (!flag_pic)
2684 warning (0, "-mpic-register= is useless without -fpic");
2685
2686 /* Prevent the user from choosing an obviously stupid PIC register. */
2687 else if (pic_register < 0 || call_used_regs[pic_register]
2688 || pic_register == HARD_FRAME_POINTER_REGNUM
2689 || pic_register == STACK_POINTER_REGNUM
2690 || pic_register >= PC_REGNUM
2691 || (TARGET_VXWORKS_RTP
2692 && (unsigned int) pic_register != arm_pic_register))
2693 error ("unable to use '%s' for PIC register", arm_pic_register_string);
2694 else
2695 arm_pic_register = pic_register;
2696 }
2697
2698 if (TARGET_VXWORKS_RTP
2699 && !global_options_set.x_arm_pic_data_is_text_relative)
2700 arm_pic_data_is_text_relative = 0;
2701
2702 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
2703 if (fix_cm3_ldrd == 2)
2704 {
2705 if (arm_selected_cpu->core == cortexm3)
2706 fix_cm3_ldrd = 1;
2707 else
2708 fix_cm3_ldrd = 0;
2709 }
2710
2711 /* Enable -munaligned-access by default for
2712 - all ARMv6 architecture-based processors
2713 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2714 - ARMv8 architecture-base processors.
2715
2716 Disable -munaligned-access by default for
2717 - all pre-ARMv6 architecture-based processors
2718 - ARMv6-M architecture-based processors. */
2719
2720 if (unaligned_access == 2)
2721 {
2722 if (arm_arch6 && (arm_arch_notm || arm_arch7))
2723 unaligned_access = 1;
2724 else
2725 unaligned_access = 0;
2726 }
2727 else if (unaligned_access == 1
2728 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2729 {
2730 warning (0, "target CPU does not support unaligned accesses");
2731 unaligned_access = 0;
2732 }
2733
2734 if (TARGET_THUMB1 && flag_schedule_insns)
2735 {
2736 /* Don't warn since it's on by default in -O2. */
2737 flag_schedule_insns = 0;
2738 }
2739
2740 if (optimize_size)
2741 {
2742 /* If optimizing for size, bump the number of instructions that we
2743 are prepared to conditionally execute (even on a StrongARM). */
2744 max_insns_skipped = 6;
2745 }
2746 else
2747 max_insns_skipped = current_tune->max_insns_skipped;
2748
2749 /* Hot/Cold partitioning is not currently supported, since we can't
2750 handle literal pool placement in that case. */
2751 if (flag_reorder_blocks_and_partition)
2752 {
2753 inform (input_location,
2754 "-freorder-blocks-and-partition not supported on this architecture");
2755 flag_reorder_blocks_and_partition = 0;
2756 flag_reorder_blocks = 1;
2757 }
2758
2759 if (flag_pic)
2760 /* Hoisting PIC address calculations more aggressively provides a small,
2761 but measurable, size reduction for PIC code. Therefore, we decrease
2762 the bar for unrestricted expression hoisting to the cost of PIC address
2763 calculation, which is 2 instructions. */
2764 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2765 global_options.x_param_values,
2766 global_options_set.x_param_values);
2767
2768 /* ARM EABI defaults to strict volatile bitfields. */
2769 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
2770 && abi_version_at_least(2))
2771 flag_strict_volatile_bitfields = 1;
2772
2773 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2774 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2775 if (flag_prefetch_loop_arrays < 0
2776 && HAVE_prefetch
2777 && optimize >= 3
2778 && current_tune->num_prefetch_slots > 0)
2779 flag_prefetch_loop_arrays = 1;
2780
2781 /* Set up parameters to be used in prefetching algorithm. Do not override the
2782 defaults unless we are tuning for a core we have researched values for. */
2783 if (current_tune->num_prefetch_slots > 0)
2784 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2785 current_tune->num_prefetch_slots,
2786 global_options.x_param_values,
2787 global_options_set.x_param_values);
2788 if (current_tune->l1_cache_line_size >= 0)
2789 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2790 current_tune->l1_cache_line_size,
2791 global_options.x_param_values,
2792 global_options_set.x_param_values);
2793 if (current_tune->l1_cache_size >= 0)
2794 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2795 current_tune->l1_cache_size,
2796 global_options.x_param_values,
2797 global_options_set.x_param_values);
2798
2799 /* Use Neon to perform 64-bits operations rather than core
2800 registers. */
2801 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
2802 if (use_neon_for_64bits == 1)
2803 prefer_neon_for_64bits = true;
2804
2805 /* Use the alternative scheduling-pressure algorithm by default. */
2806 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
2807 global_options.x_param_values,
2808 global_options_set.x_param_values);
2809
2810 /* Disable shrink-wrap when optimizing function for size, since it tends to
2811 generate additional returns. */
2812 if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
2813 flag_shrink_wrap = false;
2814 /* TBD: Dwarf info for apcs frame is not handled yet. */
2815 if (TARGET_APCS_FRAME)
2816 flag_shrink_wrap = false;
2817
2818 /* We only support -mslow-flash-data on armv7-m targets. */
2819 if (target_slow_flash_data
2820 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2821 || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
2822 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2823
2824 /* Currently, for slow flash data, we just disable literal pools. */
2825 if (target_slow_flash_data)
2826 arm_disable_literal_pool = true;
2827
2828 /* Register global variables with the garbage collector. */
2829 arm_add_gc_roots ();
2830 }
2831
2832 static void
2833 arm_add_gc_roots (void)
2834 {
2835 gcc_obstack_init(&minipool_obstack);
2836 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
2837 }
2838 \f
2839 /* A table of known ARM exception types.
2840 For use with the interrupt function attribute. */
2841
2842 typedef struct
2843 {
2844 const char *const arg;
2845 const unsigned long return_value;
2846 }
2847 isr_attribute_arg;
2848
2849 static const isr_attribute_arg isr_attribute_args [] =
2850 {
2851 { "IRQ", ARM_FT_ISR },
2852 { "irq", ARM_FT_ISR },
2853 { "FIQ", ARM_FT_FIQ },
2854 { "fiq", ARM_FT_FIQ },
2855 { "ABORT", ARM_FT_ISR },
2856 { "abort", ARM_FT_ISR },
2857 { "ABORT", ARM_FT_ISR },
2858 { "abort", ARM_FT_ISR },
2859 { "UNDEF", ARM_FT_EXCEPTION },
2860 { "undef", ARM_FT_EXCEPTION },
2861 { "SWI", ARM_FT_EXCEPTION },
2862 { "swi", ARM_FT_EXCEPTION },
2863 { NULL, ARM_FT_NORMAL }
2864 };
2865
2866 /* Returns the (interrupt) function type of the current
2867 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2868
2869 static unsigned long
2870 arm_isr_value (tree argument)
2871 {
2872 const isr_attribute_arg * ptr;
2873 const char * arg;
2874
2875 if (!arm_arch_notm)
2876 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
2877
2878 /* No argument - default to IRQ. */
2879 if (argument == NULL_TREE)
2880 return ARM_FT_ISR;
2881
2882 /* Get the value of the argument. */
2883 if (TREE_VALUE (argument) == NULL_TREE
2884 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
2885 return ARM_FT_UNKNOWN;
2886
2887 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
2888
2889 /* Check it against the list of known arguments. */
2890 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
2891 if (streq (arg, ptr->arg))
2892 return ptr->return_value;
2893
2894 /* An unrecognized interrupt type. */
2895 return ARM_FT_UNKNOWN;
2896 }
2897
2898 /* Computes the type of the current function. */
2899
2900 static unsigned long
2901 arm_compute_func_type (void)
2902 {
2903 unsigned long type = ARM_FT_UNKNOWN;
2904 tree a;
2905 tree attr;
2906
2907 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
2908
2909 /* Decide if the current function is volatile. Such functions
2910 never return, and many memory cycles can be saved by not storing
2911 register values that will never be needed again. This optimization
2912 was added to speed up context switching in a kernel application. */
2913 if (optimize > 0
2914 && (TREE_NOTHROW (current_function_decl)
2915 || !(flag_unwind_tables
2916 || (flag_exceptions
2917 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
2918 && TREE_THIS_VOLATILE (current_function_decl))
2919 type |= ARM_FT_VOLATILE;
2920
2921 if (cfun->static_chain_decl != NULL)
2922 type |= ARM_FT_NESTED;
2923
2924 attr = DECL_ATTRIBUTES (current_function_decl);
2925
2926 a = lookup_attribute ("naked", attr);
2927 if (a != NULL_TREE)
2928 type |= ARM_FT_NAKED;
2929
2930 a = lookup_attribute ("isr", attr);
2931 if (a == NULL_TREE)
2932 a = lookup_attribute ("interrupt", attr);
2933
2934 if (a == NULL_TREE)
2935 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
2936 else
2937 type |= arm_isr_value (TREE_VALUE (a));
2938
2939 return type;
2940 }
2941
2942 /* Returns the type of the current function. */
2943
2944 unsigned long
2945 arm_current_func_type (void)
2946 {
2947 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
2948 cfun->machine->func_type = arm_compute_func_type ();
2949
2950 return cfun->machine->func_type;
2951 }
2952
2953 bool
2954 arm_allocate_stack_slots_for_args (void)
2955 {
2956 /* Naked functions should not allocate stack slots for arguments. */
2957 return !IS_NAKED (arm_current_func_type ());
2958 }
2959
2960 static bool
2961 arm_warn_func_return (tree decl)
2962 {
2963 /* Naked functions are implemented entirely in assembly, including the
2964 return sequence, so suppress warnings about this. */
2965 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
2966 }
2967
2968 \f
2969 /* Output assembler code for a block containing the constant parts
2970 of a trampoline, leaving space for the variable parts.
2971
2972 On the ARM, (if r8 is the static chain regnum, and remembering that
2973 referencing pc adds an offset of 8) the trampoline looks like:
2974 ldr r8, [pc, #0]
2975 ldr pc, [pc]
2976 .word static chain value
2977 .word function's address
2978 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2979
2980 static void
2981 arm_asm_trampoline_template (FILE *f)
2982 {
2983 if (TARGET_ARM)
2984 {
2985 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
2986 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
2987 }
2988 else if (TARGET_THUMB2)
2989 {
2990 /* The Thumb-2 trampoline is similar to the arm implementation.
2991 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2992 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
2993 STATIC_CHAIN_REGNUM, PC_REGNUM);
2994 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
2995 }
2996 else
2997 {
2998 ASM_OUTPUT_ALIGN (f, 2);
2999 fprintf (f, "\t.code\t16\n");
3000 fprintf (f, ".Ltrampoline_start:\n");
3001 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3002 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3003 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3004 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3005 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3006 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3007 }
3008 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3009 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3010 }
3011
3012 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3013
3014 static void
3015 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3016 {
3017 rtx fnaddr, mem, a_tramp;
3018
3019 emit_block_move (m_tramp, assemble_trampoline_template (),
3020 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3021
3022 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3023 emit_move_insn (mem, chain_value);
3024
3025 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3026 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3027 emit_move_insn (mem, fnaddr);
3028
3029 a_tramp = XEXP (m_tramp, 0);
3030 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3031 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3032 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3033 }
3034
3035 /* Thumb trampolines should be entered in thumb mode, so set
3036 the bottom bit of the address. */
3037
3038 static rtx
3039 arm_trampoline_adjust_address (rtx addr)
3040 {
3041 if (TARGET_THUMB)
3042 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3043 NULL, 0, OPTAB_LIB_WIDEN);
3044 return addr;
3045 }
3046 \f
3047 /* Return 1 if it is possible to return using a single instruction.
3048 If SIBLING is non-null, this is a test for a return before a sibling
3049 call. SIBLING is the call insn, so we can examine its register usage. */
3050
3051 int
3052 use_return_insn (int iscond, rtx sibling)
3053 {
3054 int regno;
3055 unsigned int func_type;
3056 unsigned long saved_int_regs;
3057 unsigned HOST_WIDE_INT stack_adjust;
3058 arm_stack_offsets *offsets;
3059
3060 /* Never use a return instruction before reload has run. */
3061 if (!reload_completed)
3062 return 0;
3063
3064 func_type = arm_current_func_type ();
3065
3066 /* Naked, volatile and stack alignment functions need special
3067 consideration. */
3068 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3069 return 0;
3070
3071 /* So do interrupt functions that use the frame pointer and Thumb
3072 interrupt functions. */
3073 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3074 return 0;
3075
3076 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3077 && !optimize_function_for_size_p (cfun))
3078 return 0;
3079
3080 offsets = arm_get_frame_offsets ();
3081 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3082
3083 /* As do variadic functions. */
3084 if (crtl->args.pretend_args_size
3085 || cfun->machine->uses_anonymous_args
3086 /* Or if the function calls __builtin_eh_return () */
3087 || crtl->calls_eh_return
3088 /* Or if the function calls alloca */
3089 || cfun->calls_alloca
3090 /* Or if there is a stack adjustment. However, if the stack pointer
3091 is saved on the stack, we can use a pre-incrementing stack load. */
3092 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3093 && stack_adjust == 4)))
3094 return 0;
3095
3096 saved_int_regs = offsets->saved_regs_mask;
3097
3098 /* Unfortunately, the insn
3099
3100 ldmib sp, {..., sp, ...}
3101
3102 triggers a bug on most SA-110 based devices, such that the stack
3103 pointer won't be correctly restored if the instruction takes a
3104 page fault. We work around this problem by popping r3 along with
3105 the other registers, since that is never slower than executing
3106 another instruction.
3107
3108 We test for !arm_arch5 here, because code for any architecture
3109 less than this could potentially be run on one of the buggy
3110 chips. */
3111 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3112 {
3113 /* Validate that r3 is a call-clobbered register (always true in
3114 the default abi) ... */
3115 if (!call_used_regs[3])
3116 return 0;
3117
3118 /* ... that it isn't being used for a return value ... */
3119 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3120 return 0;
3121
3122 /* ... or for a tail-call argument ... */
3123 if (sibling)
3124 {
3125 gcc_assert (CALL_P (sibling));
3126
3127 if (find_regno_fusage (sibling, USE, 3))
3128 return 0;
3129 }
3130
3131 /* ... and that there are no call-saved registers in r0-r2
3132 (always true in the default ABI). */
3133 if (saved_int_regs & 0x7)
3134 return 0;
3135 }
3136
3137 /* Can't be done if interworking with Thumb, and any registers have been
3138 stacked. */
3139 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3140 return 0;
3141
3142 /* On StrongARM, conditional returns are expensive if they aren't
3143 taken and multiple registers have been stacked. */
3144 if (iscond && arm_tune_strongarm)
3145 {
3146 /* Conditional return when just the LR is stored is a simple
3147 conditional-load instruction, that's not expensive. */
3148 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3149 return 0;
3150
3151 if (flag_pic
3152 && arm_pic_register != INVALID_REGNUM
3153 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3154 return 0;
3155 }
3156
3157 /* If there are saved registers but the LR isn't saved, then we need
3158 two instructions for the return. */
3159 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3160 return 0;
3161
3162 /* Can't be done if any of the VFP regs are pushed,
3163 since this also requires an insn. */
3164 if (TARGET_HARD_FLOAT && TARGET_VFP)
3165 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3166 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3167 return 0;
3168
3169 if (TARGET_REALLY_IWMMXT)
3170 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3171 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3172 return 0;
3173
3174 return 1;
3175 }
3176
3177 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3178 shrink-wrapping if possible. This is the case if we need to emit a
3179 prologue, which we can test by looking at the offsets. */
3180 bool
3181 use_simple_return_p (void)
3182 {
3183 arm_stack_offsets *offsets;
3184
3185 offsets = arm_get_frame_offsets ();
3186 return offsets->outgoing_args != 0;
3187 }
3188
3189 /* Return TRUE if int I is a valid immediate ARM constant. */
3190
3191 int
3192 const_ok_for_arm (HOST_WIDE_INT i)
3193 {
3194 int lowbit;
3195
3196 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3197 be all zero, or all one. */
3198 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3199 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3200 != ((~(unsigned HOST_WIDE_INT) 0)
3201 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3202 return FALSE;
3203
3204 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3205
3206 /* Fast return for 0 and small values. We must do this for zero, since
3207 the code below can't handle that one case. */
3208 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3209 return TRUE;
3210
3211 /* Get the number of trailing zeros. */
3212 lowbit = ffs((int) i) - 1;
3213
3214 /* Only even shifts are allowed in ARM mode so round down to the
3215 nearest even number. */
3216 if (TARGET_ARM)
3217 lowbit &= ~1;
3218
3219 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3220 return TRUE;
3221
3222 if (TARGET_ARM)
3223 {
3224 /* Allow rotated constants in ARM mode. */
3225 if (lowbit <= 4
3226 && ((i & ~0xc000003f) == 0
3227 || (i & ~0xf000000f) == 0
3228 || (i & ~0xfc000003) == 0))
3229 return TRUE;
3230 }
3231 else
3232 {
3233 HOST_WIDE_INT v;
3234
3235 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3236 v = i & 0xff;
3237 v |= v << 16;
3238 if (i == v || i == (v | (v << 8)))
3239 return TRUE;
3240
3241 /* Allow repeated pattern 0xXY00XY00. */
3242 v = i & 0xff00;
3243 v |= v << 16;
3244 if (i == v)
3245 return TRUE;
3246 }
3247
3248 return FALSE;
3249 }
3250
3251 /* Return true if I is a valid constant for the operation CODE. */
3252 int
3253 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3254 {
3255 if (const_ok_for_arm (i))
3256 return 1;
3257
3258 switch (code)
3259 {
3260 case SET:
3261 /* See if we can use movw. */
3262 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3263 return 1;
3264 else
3265 /* Otherwise, try mvn. */
3266 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3267
3268 case PLUS:
3269 /* See if we can use addw or subw. */
3270 if (TARGET_THUMB2
3271 && ((i & 0xfffff000) == 0
3272 || ((-i) & 0xfffff000) == 0))
3273 return 1;
3274 /* else fall through. */
3275
3276 case COMPARE:
3277 case EQ:
3278 case NE:
3279 case GT:
3280 case LE:
3281 case LT:
3282 case GE:
3283 case GEU:
3284 case LTU:
3285 case GTU:
3286 case LEU:
3287 case UNORDERED:
3288 case ORDERED:
3289 case UNEQ:
3290 case UNGE:
3291 case UNLT:
3292 case UNGT:
3293 case UNLE:
3294 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3295
3296 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3297 case XOR:
3298 return 0;
3299
3300 case IOR:
3301 if (TARGET_THUMB2)
3302 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3303 return 0;
3304
3305 case AND:
3306 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3307
3308 default:
3309 gcc_unreachable ();
3310 }
3311 }
3312
3313 /* Return true if I is a valid di mode constant for the operation CODE. */
3314 int
3315 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3316 {
3317 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3318 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3319 rtx hi = GEN_INT (hi_val);
3320 rtx lo = GEN_INT (lo_val);
3321
3322 if (TARGET_THUMB1)
3323 return 0;
3324
3325 switch (code)
3326 {
3327 case AND:
3328 case IOR:
3329 case XOR:
3330 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3331 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3332 case PLUS:
3333 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3334
3335 default:
3336 return 0;
3337 }
3338 }
3339
3340 /* Emit a sequence of insns to handle a large constant.
3341 CODE is the code of the operation required, it can be any of SET, PLUS,
3342 IOR, AND, XOR, MINUS;
3343 MODE is the mode in which the operation is being performed;
3344 VAL is the integer to operate on;
3345 SOURCE is the other operand (a register, or a null-pointer for SET);
3346 SUBTARGETS means it is safe to create scratch registers if that will
3347 either produce a simpler sequence, or we will want to cse the values.
3348 Return value is the number of insns emitted. */
3349
3350 /* ??? Tweak this for thumb2. */
3351 int
3352 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
3353 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3354 {
3355 rtx cond;
3356
3357 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3358 cond = COND_EXEC_TEST (PATTERN (insn));
3359 else
3360 cond = NULL_RTX;
3361
3362 if (subtargets || code == SET
3363 || (REG_P (target) && REG_P (source)
3364 && REGNO (target) != REGNO (source)))
3365 {
3366 /* After arm_reorg has been called, we can't fix up expensive
3367 constants by pushing them into memory so we must synthesize
3368 them in-line, regardless of the cost. This is only likely to
3369 be more costly on chips that have load delay slots and we are
3370 compiling without running the scheduler (so no splitting
3371 occurred before the final instruction emission).
3372
3373 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3374 */
3375 if (!after_arm_reorg
3376 && !cond
3377 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3378 1, 0)
3379 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3380 + (code != SET))))
3381 {
3382 if (code == SET)
3383 {
3384 /* Currently SET is the only monadic value for CODE, all
3385 the rest are diadic. */
3386 if (TARGET_USE_MOVT)
3387 arm_emit_movpair (target, GEN_INT (val));
3388 else
3389 emit_set_insn (target, GEN_INT (val));
3390
3391 return 1;
3392 }
3393 else
3394 {
3395 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3396
3397 if (TARGET_USE_MOVT)
3398 arm_emit_movpair (temp, GEN_INT (val));
3399 else
3400 emit_set_insn (temp, GEN_INT (val));
3401
3402 /* For MINUS, the value is subtracted from, since we never
3403 have subtraction of a constant. */
3404 if (code == MINUS)
3405 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3406 else
3407 emit_set_insn (target,
3408 gen_rtx_fmt_ee (code, mode, source, temp));
3409 return 2;
3410 }
3411 }
3412 }
3413
3414 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3415 1);
3416 }
3417
3418 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3419 ARM/THUMB2 immediates, and add up to VAL.
3420 Thr function return value gives the number of insns required. */
3421 static int
3422 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3423 struct four_ints *return_sequence)
3424 {
3425 int best_consecutive_zeros = 0;
3426 int i;
3427 int best_start = 0;
3428 int insns1, insns2;
3429 struct four_ints tmp_sequence;
3430
3431 /* If we aren't targeting ARM, the best place to start is always at
3432 the bottom, otherwise look more closely. */
3433 if (TARGET_ARM)
3434 {
3435 for (i = 0; i < 32; i += 2)
3436 {
3437 int consecutive_zeros = 0;
3438
3439 if (!(val & (3 << i)))
3440 {
3441 while ((i < 32) && !(val & (3 << i)))
3442 {
3443 consecutive_zeros += 2;
3444 i += 2;
3445 }
3446 if (consecutive_zeros > best_consecutive_zeros)
3447 {
3448 best_consecutive_zeros = consecutive_zeros;
3449 best_start = i - consecutive_zeros;
3450 }
3451 i -= 2;
3452 }
3453 }
3454 }
3455
3456 /* So long as it won't require any more insns to do so, it's
3457 desirable to emit a small constant (in bits 0...9) in the last
3458 insn. This way there is more chance that it can be combined with
3459 a later addressing insn to form a pre-indexed load or store
3460 operation. Consider:
3461
3462 *((volatile int *)0xe0000100) = 1;
3463 *((volatile int *)0xe0000110) = 2;
3464
3465 We want this to wind up as:
3466
3467 mov rA, #0xe0000000
3468 mov rB, #1
3469 str rB, [rA, #0x100]
3470 mov rB, #2
3471 str rB, [rA, #0x110]
3472
3473 rather than having to synthesize both large constants from scratch.
3474
3475 Therefore, we calculate how many insns would be required to emit
3476 the constant starting from `best_start', and also starting from
3477 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3478 yield a shorter sequence, we may as well use zero. */
3479 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3480 if (best_start != 0
3481 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3482 {
3483 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3484 if (insns2 <= insns1)
3485 {
3486 *return_sequence = tmp_sequence;
3487 insns1 = insns2;
3488 }
3489 }
3490
3491 return insns1;
3492 }
3493
3494 /* As for optimal_immediate_sequence, but starting at bit-position I. */
3495 static int
3496 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3497 struct four_ints *return_sequence, int i)
3498 {
3499 int remainder = val & 0xffffffff;
3500 int insns = 0;
3501
3502 /* Try and find a way of doing the job in either two or three
3503 instructions.
3504
3505 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3506 location. We start at position I. This may be the MSB, or
3507 optimial_immediate_sequence may have positioned it at the largest block
3508 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3509 wrapping around to the top of the word when we drop off the bottom.
3510 In the worst case this code should produce no more than four insns.
3511
3512 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3513 constants, shifted to any arbitrary location. We should always start
3514 at the MSB. */
3515 do
3516 {
3517 int end;
3518 unsigned int b1, b2, b3, b4;
3519 unsigned HOST_WIDE_INT result;
3520 int loc;
3521
3522 gcc_assert (insns < 4);
3523
3524 if (i <= 0)
3525 i += 32;
3526
3527 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
3528 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3529 {
3530 loc = i;
3531 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3532 /* We can use addw/subw for the last 12 bits. */
3533 result = remainder;
3534 else
3535 {
3536 /* Use an 8-bit shifted/rotated immediate. */
3537 end = i - 8;
3538 if (end < 0)
3539 end += 32;
3540 result = remainder & ((0x0ff << end)
3541 | ((i < end) ? (0xff >> (32 - end))
3542 : 0));
3543 i -= 8;
3544 }
3545 }
3546 else
3547 {
3548 /* Arm allows rotates by a multiple of two. Thumb-2 allows
3549 arbitrary shifts. */
3550 i -= TARGET_ARM ? 2 : 1;
3551 continue;
3552 }
3553
3554 /* Next, see if we can do a better job with a thumb2 replicated
3555 constant.
3556
3557 We do it this way around to catch the cases like 0x01F001E0 where
3558 two 8-bit immediates would work, but a replicated constant would
3559 make it worse.
3560
3561 TODO: 16-bit constants that don't clear all the bits, but still win.
3562 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
3563 if (TARGET_THUMB2)
3564 {
3565 b1 = (remainder & 0xff000000) >> 24;
3566 b2 = (remainder & 0x00ff0000) >> 16;
3567 b3 = (remainder & 0x0000ff00) >> 8;
3568 b4 = remainder & 0xff;
3569
3570 if (loc > 24)
3571 {
3572 /* The 8-bit immediate already found clears b1 (and maybe b2),
3573 but must leave b3 and b4 alone. */
3574
3575 /* First try to find a 32-bit replicated constant that clears
3576 almost everything. We can assume that we can't do it in one,
3577 or else we wouldn't be here. */
3578 unsigned int tmp = b1 & b2 & b3 & b4;
3579 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3580 + (tmp << 24);
3581 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3582 + (tmp == b3) + (tmp == b4);
3583 if (tmp
3584 && (matching_bytes >= 3
3585 || (matching_bytes == 2
3586 && const_ok_for_op (remainder & ~tmp2, code))))
3587 {
3588 /* At least 3 of the bytes match, and the fourth has at
3589 least as many bits set, or two of the bytes match
3590 and it will only require one more insn to finish. */
3591 result = tmp2;
3592 i = tmp != b1 ? 32
3593 : tmp != b2 ? 24
3594 : tmp != b3 ? 16
3595 : 8;
3596 }
3597
3598 /* Second, try to find a 16-bit replicated constant that can
3599 leave three of the bytes clear. If b2 or b4 is already
3600 zero, then we can. If the 8-bit from above would not
3601 clear b2 anyway, then we still win. */
3602 else if (b1 == b3 && (!b2 || !b4
3603 || (remainder & 0x00ff0000 & ~result)))
3604 {
3605 result = remainder & 0xff00ff00;
3606 i = 24;
3607 }
3608 }
3609 else if (loc > 16)
3610 {
3611 /* The 8-bit immediate already found clears b2 (and maybe b3)
3612 and we don't get here unless b1 is alredy clear, but it will
3613 leave b4 unchanged. */
3614
3615 /* If we can clear b2 and b4 at once, then we win, since the
3616 8-bits couldn't possibly reach that far. */
3617 if (b2 == b4)
3618 {
3619 result = remainder & 0x00ff00ff;
3620 i = 16;
3621 }
3622 }
3623 }
3624
3625 return_sequence->i[insns++] = result;
3626 remainder &= ~result;
3627
3628 if (code == SET || code == MINUS)
3629 code = PLUS;
3630 }
3631 while (remainder);
3632
3633 return insns;
3634 }
3635
3636 /* Emit an instruction with the indicated PATTERN. If COND is
3637 non-NULL, conditionalize the execution of the instruction on COND
3638 being true. */
3639
3640 static void
3641 emit_constant_insn (rtx cond, rtx pattern)
3642 {
3643 if (cond)
3644 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3645 emit_insn (pattern);
3646 }
3647
3648 /* As above, but extra parameter GENERATE which, if clear, suppresses
3649 RTL generation. */
3650
3651 static int
3652 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
3653 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3654 int generate)
3655 {
3656 int can_invert = 0;
3657 int can_negate = 0;
3658 int final_invert = 0;
3659 int i;
3660 int set_sign_bit_copies = 0;
3661 int clear_sign_bit_copies = 0;
3662 int clear_zero_bit_copies = 0;
3663 int set_zero_bit_copies = 0;
3664 int insns = 0, neg_insns, inv_insns;
3665 unsigned HOST_WIDE_INT temp1, temp2;
3666 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3667 struct four_ints *immediates;
3668 struct four_ints pos_immediates, neg_immediates, inv_immediates;
3669
3670 /* Find out which operations are safe for a given CODE. Also do a quick
3671 check for degenerate cases; these can occur when DImode operations
3672 are split. */
3673 switch (code)
3674 {
3675 case SET:
3676 can_invert = 1;
3677 break;
3678
3679 case PLUS:
3680 can_negate = 1;
3681 break;
3682
3683 case IOR:
3684 if (remainder == 0xffffffff)
3685 {
3686 if (generate)
3687 emit_constant_insn (cond,
3688 gen_rtx_SET (VOIDmode, target,
3689 GEN_INT (ARM_SIGN_EXTEND (val))));
3690 return 1;
3691 }
3692
3693 if (remainder == 0)
3694 {
3695 if (reload_completed && rtx_equal_p (target, source))
3696 return 0;
3697
3698 if (generate)
3699 emit_constant_insn (cond,
3700 gen_rtx_SET (VOIDmode, target, source));
3701 return 1;
3702 }
3703 break;
3704
3705 case AND:
3706 if (remainder == 0)
3707 {
3708 if (generate)
3709 emit_constant_insn (cond,
3710 gen_rtx_SET (VOIDmode, target, const0_rtx));
3711 return 1;
3712 }
3713 if (remainder == 0xffffffff)
3714 {
3715 if (reload_completed && rtx_equal_p (target, source))
3716 return 0;
3717 if (generate)
3718 emit_constant_insn (cond,
3719 gen_rtx_SET (VOIDmode, target, source));
3720 return 1;
3721 }
3722 can_invert = 1;
3723 break;
3724
3725 case XOR:
3726 if (remainder == 0)
3727 {
3728 if (reload_completed && rtx_equal_p (target, source))
3729 return 0;
3730 if (generate)
3731 emit_constant_insn (cond,
3732 gen_rtx_SET (VOIDmode, target, source));
3733 return 1;
3734 }
3735
3736 if (remainder == 0xffffffff)
3737 {
3738 if (generate)
3739 emit_constant_insn (cond,
3740 gen_rtx_SET (VOIDmode, target,
3741 gen_rtx_NOT (mode, source)));
3742 return 1;
3743 }
3744 final_invert = 1;
3745 break;
3746
3747 case MINUS:
3748 /* We treat MINUS as (val - source), since (source - val) is always
3749 passed as (source + (-val)). */
3750 if (remainder == 0)
3751 {
3752 if (generate)
3753 emit_constant_insn (cond,
3754 gen_rtx_SET (VOIDmode, target,
3755 gen_rtx_NEG (mode, source)));
3756 return 1;
3757 }
3758 if (const_ok_for_arm (val))
3759 {
3760 if (generate)
3761 emit_constant_insn (cond,
3762 gen_rtx_SET (VOIDmode, target,
3763 gen_rtx_MINUS (mode, GEN_INT (val),
3764 source)));
3765 return 1;
3766 }
3767
3768 break;
3769
3770 default:
3771 gcc_unreachable ();
3772 }
3773
3774 /* If we can do it in one insn get out quickly. */
3775 if (const_ok_for_op (val, code))
3776 {
3777 if (generate)
3778 emit_constant_insn (cond,
3779 gen_rtx_SET (VOIDmode, target,
3780 (source
3781 ? gen_rtx_fmt_ee (code, mode, source,
3782 GEN_INT (val))
3783 : GEN_INT (val))));
3784 return 1;
3785 }
3786
3787 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
3788 insn. */
3789 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
3790 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
3791 {
3792 if (generate)
3793 {
3794 if (mode == SImode && i == 16)
3795 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
3796 smaller insn. */
3797 emit_constant_insn (cond,
3798 gen_zero_extendhisi2
3799 (target, gen_lowpart (HImode, source)));
3800 else
3801 /* Extz only supports SImode, but we can coerce the operands
3802 into that mode. */
3803 emit_constant_insn (cond,
3804 gen_extzv_t2 (gen_lowpart (SImode, target),
3805 gen_lowpart (SImode, source),
3806 GEN_INT (i), const0_rtx));
3807 }
3808
3809 return 1;
3810 }
3811
3812 /* Calculate a few attributes that may be useful for specific
3813 optimizations. */
3814 /* Count number of leading zeros. */
3815 for (i = 31; i >= 0; i--)
3816 {
3817 if ((remainder & (1 << i)) == 0)
3818 clear_sign_bit_copies++;
3819 else
3820 break;
3821 }
3822
3823 /* Count number of leading 1's. */
3824 for (i = 31; i >= 0; i--)
3825 {
3826 if ((remainder & (1 << i)) != 0)
3827 set_sign_bit_copies++;
3828 else
3829 break;
3830 }
3831
3832 /* Count number of trailing zero's. */
3833 for (i = 0; i <= 31; i++)
3834 {
3835 if ((remainder & (1 << i)) == 0)
3836 clear_zero_bit_copies++;
3837 else
3838 break;
3839 }
3840
3841 /* Count number of trailing 1's. */
3842 for (i = 0; i <= 31; i++)
3843 {
3844 if ((remainder & (1 << i)) != 0)
3845 set_zero_bit_copies++;
3846 else
3847 break;
3848 }
3849
3850 switch (code)
3851 {
3852 case SET:
3853 /* See if we can do this by sign_extending a constant that is known
3854 to be negative. This is a good, way of doing it, since the shift
3855 may well merge into a subsequent insn. */
3856 if (set_sign_bit_copies > 1)
3857 {
3858 if (const_ok_for_arm
3859 (temp1 = ARM_SIGN_EXTEND (remainder
3860 << (set_sign_bit_copies - 1))))
3861 {
3862 if (generate)
3863 {
3864 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3865 emit_constant_insn (cond,
3866 gen_rtx_SET (VOIDmode, new_src,
3867 GEN_INT (temp1)));
3868 emit_constant_insn (cond,
3869 gen_ashrsi3 (target, new_src,
3870 GEN_INT (set_sign_bit_copies - 1)));
3871 }
3872 return 2;
3873 }
3874 /* For an inverted constant, we will need to set the low bits,
3875 these will be shifted out of harm's way. */
3876 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
3877 if (const_ok_for_arm (~temp1))
3878 {
3879 if (generate)
3880 {
3881 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3882 emit_constant_insn (cond,
3883 gen_rtx_SET (VOIDmode, new_src,
3884 GEN_INT (temp1)));
3885 emit_constant_insn (cond,
3886 gen_ashrsi3 (target, new_src,
3887 GEN_INT (set_sign_bit_copies - 1)));
3888 }
3889 return 2;
3890 }
3891 }
3892
3893 /* See if we can calculate the value as the difference between two
3894 valid immediates. */
3895 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
3896 {
3897 int topshift = clear_sign_bit_copies & ~1;
3898
3899 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
3900 & (0xff000000 >> topshift));
3901
3902 /* If temp1 is zero, then that means the 9 most significant
3903 bits of remainder were 1 and we've caused it to overflow.
3904 When topshift is 0 we don't need to do anything since we
3905 can borrow from 'bit 32'. */
3906 if (temp1 == 0 && topshift != 0)
3907 temp1 = 0x80000000 >> (topshift - 1);
3908
3909 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
3910
3911 if (const_ok_for_arm (temp2))
3912 {
3913 if (generate)
3914 {
3915 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
3916 emit_constant_insn (cond,
3917 gen_rtx_SET (VOIDmode, new_src,
3918 GEN_INT (temp1)));
3919 emit_constant_insn (cond,
3920 gen_addsi3 (target, new_src,
3921 GEN_INT (-temp2)));
3922 }
3923
3924 return 2;
3925 }
3926 }
3927
3928 /* See if we can generate this by setting the bottom (or the top)
3929 16 bits, and then shifting these into the other half of the
3930 word. We only look for the simplest cases, to do more would cost
3931 too much. Be careful, however, not to generate this when the
3932 alternative would take fewer insns. */
3933 if (val & 0xffff0000)
3934 {
3935 temp1 = remainder & 0xffff0000;
3936 temp2 = remainder & 0x0000ffff;
3937
3938 /* Overlaps outside this range are best done using other methods. */
3939 for (i = 9; i < 24; i++)
3940 {
3941 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
3942 && !const_ok_for_arm (temp2))
3943 {
3944 rtx new_src = (subtargets
3945 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3946 : target);
3947 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
3948 source, subtargets, generate);
3949 source = new_src;
3950 if (generate)
3951 emit_constant_insn
3952 (cond,
3953 gen_rtx_SET
3954 (VOIDmode, target,
3955 gen_rtx_IOR (mode,
3956 gen_rtx_ASHIFT (mode, source,
3957 GEN_INT (i)),
3958 source)));
3959 return insns + 1;
3960 }
3961 }
3962
3963 /* Don't duplicate cases already considered. */
3964 for (i = 17; i < 24; i++)
3965 {
3966 if (((temp1 | (temp1 >> i)) == remainder)
3967 && !const_ok_for_arm (temp1))
3968 {
3969 rtx new_src = (subtargets
3970 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
3971 : target);
3972 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
3973 source, subtargets, generate);
3974 source = new_src;
3975 if (generate)
3976 emit_constant_insn
3977 (cond,
3978 gen_rtx_SET (VOIDmode, target,
3979 gen_rtx_IOR
3980 (mode,
3981 gen_rtx_LSHIFTRT (mode, source,
3982 GEN_INT (i)),
3983 source)));
3984 return insns + 1;
3985 }
3986 }
3987 }
3988 break;
3989
3990 case IOR:
3991 case XOR:
3992 /* If we have IOR or XOR, and the constant can be loaded in a
3993 single instruction, and we can find a temporary to put it in,
3994 then this can be done in two instructions instead of 3-4. */
3995 if (subtargets
3996 /* TARGET can't be NULL if SUBTARGETS is 0 */
3997 || (reload_completed && !reg_mentioned_p (target, source)))
3998 {
3999 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4000 {
4001 if (generate)
4002 {
4003 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4004
4005 emit_constant_insn (cond,
4006 gen_rtx_SET (VOIDmode, sub,
4007 GEN_INT (val)));
4008 emit_constant_insn (cond,
4009 gen_rtx_SET (VOIDmode, target,
4010 gen_rtx_fmt_ee (code, mode,
4011 source, sub)));
4012 }
4013 return 2;
4014 }
4015 }
4016
4017 if (code == XOR)
4018 break;
4019
4020 /* Convert.
4021 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4022 and the remainder 0s for e.g. 0xfff00000)
4023 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4024
4025 This can be done in 2 instructions by using shifts with mov or mvn.
4026 e.g. for
4027 x = x | 0xfff00000;
4028 we generate.
4029 mvn r0, r0, asl #12
4030 mvn r0, r0, lsr #12 */
4031 if (set_sign_bit_copies > 8
4032 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4033 {
4034 if (generate)
4035 {
4036 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4037 rtx shift = GEN_INT (set_sign_bit_copies);
4038
4039 emit_constant_insn
4040 (cond,
4041 gen_rtx_SET (VOIDmode, sub,
4042 gen_rtx_NOT (mode,
4043 gen_rtx_ASHIFT (mode,
4044 source,
4045 shift))));
4046 emit_constant_insn
4047 (cond,
4048 gen_rtx_SET (VOIDmode, target,
4049 gen_rtx_NOT (mode,
4050 gen_rtx_LSHIFTRT (mode, sub,
4051 shift))));
4052 }
4053 return 2;
4054 }
4055
4056 /* Convert
4057 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4058 to
4059 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4060
4061 For eg. r0 = r0 | 0xfff
4062 mvn r0, r0, lsr #12
4063 mvn r0, r0, asl #12
4064
4065 */
4066 if (set_zero_bit_copies > 8
4067 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4068 {
4069 if (generate)
4070 {
4071 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4072 rtx shift = GEN_INT (set_zero_bit_copies);
4073
4074 emit_constant_insn
4075 (cond,
4076 gen_rtx_SET (VOIDmode, sub,
4077 gen_rtx_NOT (mode,
4078 gen_rtx_LSHIFTRT (mode,
4079 source,
4080 shift))));
4081 emit_constant_insn
4082 (cond,
4083 gen_rtx_SET (VOIDmode, target,
4084 gen_rtx_NOT (mode,
4085 gen_rtx_ASHIFT (mode, sub,
4086 shift))));
4087 }
4088 return 2;
4089 }
4090
4091 /* This will never be reached for Thumb2 because orn is a valid
4092 instruction. This is for Thumb1 and the ARM 32 bit cases.
4093
4094 x = y | constant (such that ~constant is a valid constant)
4095 Transform this to
4096 x = ~(~y & ~constant).
4097 */
4098 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4099 {
4100 if (generate)
4101 {
4102 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4103 emit_constant_insn (cond,
4104 gen_rtx_SET (VOIDmode, sub,
4105 gen_rtx_NOT (mode, source)));
4106 source = sub;
4107 if (subtargets)
4108 sub = gen_reg_rtx (mode);
4109 emit_constant_insn (cond,
4110 gen_rtx_SET (VOIDmode, sub,
4111 gen_rtx_AND (mode, source,
4112 GEN_INT (temp1))));
4113 emit_constant_insn (cond,
4114 gen_rtx_SET (VOIDmode, target,
4115 gen_rtx_NOT (mode, sub)));
4116 }
4117 return 3;
4118 }
4119 break;
4120
4121 case AND:
4122 /* See if two shifts will do 2 or more insn's worth of work. */
4123 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4124 {
4125 HOST_WIDE_INT shift_mask = ((0xffffffff
4126 << (32 - clear_sign_bit_copies))
4127 & 0xffffffff);
4128
4129 if ((remainder | shift_mask) != 0xffffffff)
4130 {
4131 if (generate)
4132 {
4133 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4134 insns = arm_gen_constant (AND, mode, cond,
4135 remainder | shift_mask,
4136 new_src, source, subtargets, 1);
4137 source = new_src;
4138 }
4139 else
4140 {
4141 rtx targ = subtargets ? NULL_RTX : target;
4142 insns = arm_gen_constant (AND, mode, cond,
4143 remainder | shift_mask,
4144 targ, source, subtargets, 0);
4145 }
4146 }
4147
4148 if (generate)
4149 {
4150 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4151 rtx shift = GEN_INT (clear_sign_bit_copies);
4152
4153 emit_insn (gen_ashlsi3 (new_src, source, shift));
4154 emit_insn (gen_lshrsi3 (target, new_src, shift));
4155 }
4156
4157 return insns + 2;
4158 }
4159
4160 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4161 {
4162 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4163
4164 if ((remainder | shift_mask) != 0xffffffff)
4165 {
4166 if (generate)
4167 {
4168 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4169
4170 insns = arm_gen_constant (AND, mode, cond,
4171 remainder | shift_mask,
4172 new_src, source, subtargets, 1);
4173 source = new_src;
4174 }
4175 else
4176 {
4177 rtx targ = subtargets ? NULL_RTX : target;
4178
4179 insns = arm_gen_constant (AND, mode, cond,
4180 remainder | shift_mask,
4181 targ, source, subtargets, 0);
4182 }
4183 }
4184
4185 if (generate)
4186 {
4187 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4188 rtx shift = GEN_INT (clear_zero_bit_copies);
4189
4190 emit_insn (gen_lshrsi3 (new_src, source, shift));
4191 emit_insn (gen_ashlsi3 (target, new_src, shift));
4192 }
4193
4194 return insns + 2;
4195 }
4196
4197 break;
4198
4199 default:
4200 break;
4201 }
4202
4203 /* Calculate what the instruction sequences would be if we generated it
4204 normally, negated, or inverted. */
4205 if (code == AND)
4206 /* AND cannot be split into multiple insns, so invert and use BIC. */
4207 insns = 99;
4208 else
4209 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4210
4211 if (can_negate)
4212 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4213 &neg_immediates);
4214 else
4215 neg_insns = 99;
4216
4217 if (can_invert || final_invert)
4218 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4219 &inv_immediates);
4220 else
4221 inv_insns = 99;
4222
4223 immediates = &pos_immediates;
4224
4225 /* Is the negated immediate sequence more efficient? */
4226 if (neg_insns < insns && neg_insns <= inv_insns)
4227 {
4228 insns = neg_insns;
4229 immediates = &neg_immediates;
4230 }
4231 else
4232 can_negate = 0;
4233
4234 /* Is the inverted immediate sequence more efficient?
4235 We must allow for an extra NOT instruction for XOR operations, although
4236 there is some chance that the final 'mvn' will get optimized later. */
4237 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4238 {
4239 insns = inv_insns;
4240 immediates = &inv_immediates;
4241 }
4242 else
4243 {
4244 can_invert = 0;
4245 final_invert = 0;
4246 }
4247
4248 /* Now output the chosen sequence as instructions. */
4249 if (generate)
4250 {
4251 for (i = 0; i < insns; i++)
4252 {
4253 rtx new_src, temp1_rtx;
4254
4255 temp1 = immediates->i[i];
4256
4257 if (code == SET || code == MINUS)
4258 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4259 else if ((final_invert || i < (insns - 1)) && subtargets)
4260 new_src = gen_reg_rtx (mode);
4261 else
4262 new_src = target;
4263
4264 if (can_invert)
4265 temp1 = ~temp1;
4266 else if (can_negate)
4267 temp1 = -temp1;
4268
4269 temp1 = trunc_int_for_mode (temp1, mode);
4270 temp1_rtx = GEN_INT (temp1);
4271
4272 if (code == SET)
4273 ;
4274 else if (code == MINUS)
4275 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4276 else
4277 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4278
4279 emit_constant_insn (cond,
4280 gen_rtx_SET (VOIDmode, new_src,
4281 temp1_rtx));
4282 source = new_src;
4283
4284 if (code == SET)
4285 {
4286 can_negate = can_invert;
4287 can_invert = 0;
4288 code = PLUS;
4289 }
4290 else if (code == MINUS)
4291 code = PLUS;
4292 }
4293 }
4294
4295 if (final_invert)
4296 {
4297 if (generate)
4298 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4299 gen_rtx_NOT (mode, source)));
4300 insns++;
4301 }
4302
4303 return insns;
4304 }
4305
4306 /* Canonicalize a comparison so that we are more likely to recognize it.
4307 This can be done for a few constant compares, where we can make the
4308 immediate value easier to load. */
4309
4310 static void
4311 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4312 bool op0_preserve_value)
4313 {
4314 enum machine_mode mode;
4315 unsigned HOST_WIDE_INT i, maxval;
4316
4317 mode = GET_MODE (*op0);
4318 if (mode == VOIDmode)
4319 mode = GET_MODE (*op1);
4320
4321 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4322
4323 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4324 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4325 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4326 for GTU/LEU in Thumb mode. */
4327 if (mode == DImode)
4328 {
4329 rtx tem;
4330
4331 if (*code == GT || *code == LE
4332 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4333 {
4334 /* Missing comparison. First try to use an available
4335 comparison. */
4336 if (CONST_INT_P (*op1))
4337 {
4338 i = INTVAL (*op1);
4339 switch (*code)
4340 {
4341 case GT:
4342 case LE:
4343 if (i != maxval
4344 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4345 {
4346 *op1 = GEN_INT (i + 1);
4347 *code = *code == GT ? GE : LT;
4348 return;
4349 }
4350 break;
4351 case GTU:
4352 case LEU:
4353 if (i != ~((unsigned HOST_WIDE_INT) 0)
4354 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4355 {
4356 *op1 = GEN_INT (i + 1);
4357 *code = *code == GTU ? GEU : LTU;
4358 return;
4359 }
4360 break;
4361 default:
4362 gcc_unreachable ();
4363 }
4364 }
4365
4366 /* If that did not work, reverse the condition. */
4367 if (!op0_preserve_value)
4368 {
4369 tem = *op0;
4370 *op0 = *op1;
4371 *op1 = tem;
4372 *code = (int)swap_condition ((enum rtx_code)*code);
4373 }
4374 }
4375 return;
4376 }
4377
4378 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4379 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4380 to facilitate possible combining with a cmp into 'ands'. */
4381 if (mode == SImode
4382 && GET_CODE (*op0) == ZERO_EXTEND
4383 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4384 && GET_MODE (XEXP (*op0, 0)) == QImode
4385 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4386 && subreg_lowpart_p (XEXP (*op0, 0))
4387 && *op1 == const0_rtx)
4388 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4389 GEN_INT (255));
4390
4391 /* Comparisons smaller than DImode. Only adjust comparisons against
4392 an out-of-range constant. */
4393 if (!CONST_INT_P (*op1)
4394 || const_ok_for_arm (INTVAL (*op1))
4395 || const_ok_for_arm (- INTVAL (*op1)))
4396 return;
4397
4398 i = INTVAL (*op1);
4399
4400 switch (*code)
4401 {
4402 case EQ:
4403 case NE:
4404 return;
4405
4406 case GT:
4407 case LE:
4408 if (i != maxval
4409 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4410 {
4411 *op1 = GEN_INT (i + 1);
4412 *code = *code == GT ? GE : LT;
4413 return;
4414 }
4415 break;
4416
4417 case GE:
4418 case LT:
4419 if (i != ~maxval
4420 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4421 {
4422 *op1 = GEN_INT (i - 1);
4423 *code = *code == GE ? GT : LE;
4424 return;
4425 }
4426 break;
4427
4428 case GTU:
4429 case LEU:
4430 if (i != ~((unsigned HOST_WIDE_INT) 0)
4431 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4432 {
4433 *op1 = GEN_INT (i + 1);
4434 *code = *code == GTU ? GEU : LTU;
4435 return;
4436 }
4437 break;
4438
4439 case GEU:
4440 case LTU:
4441 if (i != 0
4442 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4443 {
4444 *op1 = GEN_INT (i - 1);
4445 *code = *code == GEU ? GTU : LEU;
4446 return;
4447 }
4448 break;
4449
4450 default:
4451 gcc_unreachable ();
4452 }
4453 }
4454
4455
4456 /* Define how to find the value returned by a function. */
4457
4458 static rtx
4459 arm_function_value(const_tree type, const_tree func,
4460 bool outgoing ATTRIBUTE_UNUSED)
4461 {
4462 enum machine_mode mode;
4463 int unsignedp ATTRIBUTE_UNUSED;
4464 rtx r ATTRIBUTE_UNUSED;
4465
4466 mode = TYPE_MODE (type);
4467
4468 if (TARGET_AAPCS_BASED)
4469 return aapcs_allocate_return_reg (mode, type, func);
4470
4471 /* Promote integer types. */
4472 if (INTEGRAL_TYPE_P (type))
4473 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4474
4475 /* Promotes small structs returned in a register to full-word size
4476 for big-endian AAPCS. */
4477 if (arm_return_in_msb (type))
4478 {
4479 HOST_WIDE_INT size = int_size_in_bytes (type);
4480 if (size % UNITS_PER_WORD != 0)
4481 {
4482 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4483 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4484 }
4485 }
4486
4487 return arm_libcall_value_1 (mode);
4488 }
4489
4490 /* libcall hashtable helpers. */
4491
4492 struct libcall_hasher : typed_noop_remove <rtx_def>
4493 {
4494 typedef rtx_def value_type;
4495 typedef rtx_def compare_type;
4496 static inline hashval_t hash (const value_type *);
4497 static inline bool equal (const value_type *, const compare_type *);
4498 static inline void remove (value_type *);
4499 };
4500
4501 inline bool
4502 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4503 {
4504 return rtx_equal_p (p1, p2);
4505 }
4506
4507 inline hashval_t
4508 libcall_hasher::hash (const value_type *p1)
4509 {
4510 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4511 }
4512
4513 typedef hash_table <libcall_hasher> libcall_table_type;
4514
4515 static void
4516 add_libcall (libcall_table_type htab, rtx libcall)
4517 {
4518 *htab.find_slot (libcall, INSERT) = libcall;
4519 }
4520
4521 static bool
4522 arm_libcall_uses_aapcs_base (const_rtx libcall)
4523 {
4524 static bool init_done = false;
4525 static libcall_table_type libcall_htab;
4526
4527 if (!init_done)
4528 {
4529 init_done = true;
4530
4531 libcall_htab.create (31);
4532 add_libcall (libcall_htab,
4533 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4534 add_libcall (libcall_htab,
4535 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4536 add_libcall (libcall_htab,
4537 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4538 add_libcall (libcall_htab,
4539 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4540
4541 add_libcall (libcall_htab,
4542 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4543 add_libcall (libcall_htab,
4544 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4545 add_libcall (libcall_htab,
4546 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4547 add_libcall (libcall_htab,
4548 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4549
4550 add_libcall (libcall_htab,
4551 convert_optab_libfunc (sext_optab, SFmode, HFmode));
4552 add_libcall (libcall_htab,
4553 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4554 add_libcall (libcall_htab,
4555 convert_optab_libfunc (sfix_optab, SImode, DFmode));
4556 add_libcall (libcall_htab,
4557 convert_optab_libfunc (ufix_optab, SImode, DFmode));
4558 add_libcall (libcall_htab,
4559 convert_optab_libfunc (sfix_optab, DImode, DFmode));
4560 add_libcall (libcall_htab,
4561 convert_optab_libfunc (ufix_optab, DImode, DFmode));
4562 add_libcall (libcall_htab,
4563 convert_optab_libfunc (sfix_optab, DImode, SFmode));
4564 add_libcall (libcall_htab,
4565 convert_optab_libfunc (ufix_optab, DImode, SFmode));
4566
4567 /* Values from double-precision helper functions are returned in core
4568 registers if the selected core only supports single-precision
4569 arithmetic, even if we are using the hard-float ABI. The same is
4570 true for single-precision helpers, but we will never be using the
4571 hard-float ABI on a CPU which doesn't support single-precision
4572 operations in hardware. */
4573 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4574 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4575 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4576 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4577 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4578 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4579 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4580 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4581 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4582 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4583 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4584 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4585 SFmode));
4586 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4587 DFmode));
4588 }
4589
4590 return libcall && libcall_htab.find (libcall) != NULL;
4591 }
4592
4593 static rtx
4594 arm_libcall_value_1 (enum machine_mode mode)
4595 {
4596 if (TARGET_AAPCS_BASED)
4597 return aapcs_libcall_value (mode);
4598 else if (TARGET_IWMMXT_ABI
4599 && arm_vector_mode_supported_p (mode))
4600 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4601 else
4602 return gen_rtx_REG (mode, ARG_REGISTER (1));
4603 }
4604
4605 /* Define how to find the value returned by a library function
4606 assuming the value has mode MODE. */
4607
4608 static rtx
4609 arm_libcall_value (enum machine_mode mode, const_rtx libcall)
4610 {
4611 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4612 && GET_MODE_CLASS (mode) == MODE_FLOAT)
4613 {
4614 /* The following libcalls return their result in integer registers,
4615 even though they return a floating point value. */
4616 if (arm_libcall_uses_aapcs_base (libcall))
4617 return gen_rtx_REG (mode, ARG_REGISTER(1));
4618
4619 }
4620
4621 return arm_libcall_value_1 (mode);
4622 }
4623
4624 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
4625
4626 static bool
4627 arm_function_value_regno_p (const unsigned int regno)
4628 {
4629 if (regno == ARG_REGISTER (1)
4630 || (TARGET_32BIT
4631 && TARGET_AAPCS_BASED
4632 && TARGET_VFP
4633 && TARGET_HARD_FLOAT
4634 && regno == FIRST_VFP_REGNUM)
4635 || (TARGET_IWMMXT_ABI
4636 && regno == FIRST_IWMMXT_REGNUM))
4637 return true;
4638
4639 return false;
4640 }
4641
4642 /* Determine the amount of memory needed to store the possible return
4643 registers of an untyped call. */
4644 int
4645 arm_apply_result_size (void)
4646 {
4647 int size = 16;
4648
4649 if (TARGET_32BIT)
4650 {
4651 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4652 size += 32;
4653 if (TARGET_IWMMXT_ABI)
4654 size += 8;
4655 }
4656
4657 return size;
4658 }
4659
4660 /* Decide whether TYPE should be returned in memory (true)
4661 or in a register (false). FNTYPE is the type of the function making
4662 the call. */
4663 static bool
4664 arm_return_in_memory (const_tree type, const_tree fntype)
4665 {
4666 HOST_WIDE_INT size;
4667
4668 size = int_size_in_bytes (type); /* Negative if not fixed size. */
4669
4670 if (TARGET_AAPCS_BASED)
4671 {
4672 /* Simple, non-aggregate types (ie not including vectors and
4673 complex) are always returned in a register (or registers).
4674 We don't care about which register here, so we can short-cut
4675 some of the detail. */
4676 if (!AGGREGATE_TYPE_P (type)
4677 && TREE_CODE (type) != VECTOR_TYPE
4678 && TREE_CODE (type) != COMPLEX_TYPE)
4679 return false;
4680
4681 /* Any return value that is no larger than one word can be
4682 returned in r0. */
4683 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4684 return false;
4685
4686 /* Check any available co-processors to see if they accept the
4687 type as a register candidate (VFP, for example, can return
4688 some aggregates in consecutive registers). These aren't
4689 available if the call is variadic. */
4690 if (aapcs_select_return_coproc (type, fntype) >= 0)
4691 return false;
4692
4693 /* Vector values should be returned using ARM registers, not
4694 memory (unless they're over 16 bytes, which will break since
4695 we only have four call-clobbered registers to play with). */
4696 if (TREE_CODE (type) == VECTOR_TYPE)
4697 return (size < 0 || size > (4 * UNITS_PER_WORD));
4698
4699 /* The rest go in memory. */
4700 return true;
4701 }
4702
4703 if (TREE_CODE (type) == VECTOR_TYPE)
4704 return (size < 0 || size > (4 * UNITS_PER_WORD));
4705
4706 if (!AGGREGATE_TYPE_P (type) &&
4707 (TREE_CODE (type) != VECTOR_TYPE))
4708 /* All simple types are returned in registers. */
4709 return false;
4710
4711 if (arm_abi != ARM_ABI_APCS)
4712 {
4713 /* ATPCS and later return aggregate types in memory only if they are
4714 larger than a word (or are variable size). */
4715 return (size < 0 || size > UNITS_PER_WORD);
4716 }
4717
4718 /* For the arm-wince targets we choose to be compatible with Microsoft's
4719 ARM and Thumb compilers, which always return aggregates in memory. */
4720 #ifndef ARM_WINCE
4721 /* All structures/unions bigger than one word are returned in memory.
4722 Also catch the case where int_size_in_bytes returns -1. In this case
4723 the aggregate is either huge or of variable size, and in either case
4724 we will want to return it via memory and not in a register. */
4725 if (size < 0 || size > UNITS_PER_WORD)
4726 return true;
4727
4728 if (TREE_CODE (type) == RECORD_TYPE)
4729 {
4730 tree field;
4731
4732 /* For a struct the APCS says that we only return in a register
4733 if the type is 'integer like' and every addressable element
4734 has an offset of zero. For practical purposes this means
4735 that the structure can have at most one non bit-field element
4736 and that this element must be the first one in the structure. */
4737
4738 /* Find the first field, ignoring non FIELD_DECL things which will
4739 have been created by C++. */
4740 for (field = TYPE_FIELDS (type);
4741 field && TREE_CODE (field) != FIELD_DECL;
4742 field = DECL_CHAIN (field))
4743 continue;
4744
4745 if (field == NULL)
4746 return false; /* An empty structure. Allowed by an extension to ANSI C. */
4747
4748 /* Check that the first field is valid for returning in a register. */
4749
4750 /* ... Floats are not allowed */
4751 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4752 return true;
4753
4754 /* ... Aggregates that are not themselves valid for returning in
4755 a register are not allowed. */
4756 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4757 return true;
4758
4759 /* Now check the remaining fields, if any. Only bitfields are allowed,
4760 since they are not addressable. */
4761 for (field = DECL_CHAIN (field);
4762 field;
4763 field = DECL_CHAIN (field))
4764 {
4765 if (TREE_CODE (field) != FIELD_DECL)
4766 continue;
4767
4768 if (!DECL_BIT_FIELD_TYPE (field))
4769 return true;
4770 }
4771
4772 return false;
4773 }
4774
4775 if (TREE_CODE (type) == UNION_TYPE)
4776 {
4777 tree field;
4778
4779 /* Unions can be returned in registers if every element is
4780 integral, or can be returned in an integer register. */
4781 for (field = TYPE_FIELDS (type);
4782 field;
4783 field = DECL_CHAIN (field))
4784 {
4785 if (TREE_CODE (field) != FIELD_DECL)
4786 continue;
4787
4788 if (FLOAT_TYPE_P (TREE_TYPE (field)))
4789 return true;
4790
4791 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
4792 return true;
4793 }
4794
4795 return false;
4796 }
4797 #endif /* not ARM_WINCE */
4798
4799 /* Return all other types in memory. */
4800 return true;
4801 }
4802
4803 const struct pcs_attribute_arg
4804 {
4805 const char *arg;
4806 enum arm_pcs value;
4807 } pcs_attribute_args[] =
4808 {
4809 {"aapcs", ARM_PCS_AAPCS},
4810 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
4811 #if 0
4812 /* We could recognize these, but changes would be needed elsewhere
4813 * to implement them. */
4814 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
4815 {"atpcs", ARM_PCS_ATPCS},
4816 {"apcs", ARM_PCS_APCS},
4817 #endif
4818 {NULL, ARM_PCS_UNKNOWN}
4819 };
4820
4821 static enum arm_pcs
4822 arm_pcs_from_attribute (tree attr)
4823 {
4824 const struct pcs_attribute_arg *ptr;
4825 const char *arg;
4826
4827 /* Get the value of the argument. */
4828 if (TREE_VALUE (attr) == NULL_TREE
4829 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
4830 return ARM_PCS_UNKNOWN;
4831
4832 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
4833
4834 /* Check it against the list of known arguments. */
4835 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
4836 if (streq (arg, ptr->arg))
4837 return ptr->value;
4838
4839 /* An unrecognized interrupt type. */
4840 return ARM_PCS_UNKNOWN;
4841 }
4842
4843 /* Get the PCS variant to use for this call. TYPE is the function's type
4844 specification, DECL is the specific declartion. DECL may be null if
4845 the call could be indirect or if this is a library call. */
4846 static enum arm_pcs
4847 arm_get_pcs_model (const_tree type, const_tree decl)
4848 {
4849 bool user_convention = false;
4850 enum arm_pcs user_pcs = arm_pcs_default;
4851 tree attr;
4852
4853 gcc_assert (type);
4854
4855 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
4856 if (attr)
4857 {
4858 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
4859 user_convention = true;
4860 }
4861
4862 if (TARGET_AAPCS_BASED)
4863 {
4864 /* Detect varargs functions. These always use the base rules
4865 (no argument is ever a candidate for a co-processor
4866 register). */
4867 bool base_rules = stdarg_p (type);
4868
4869 if (user_convention)
4870 {
4871 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
4872 sorry ("non-AAPCS derived PCS variant");
4873 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
4874 error ("variadic functions must use the base AAPCS variant");
4875 }
4876
4877 if (base_rules)
4878 return ARM_PCS_AAPCS;
4879 else if (user_convention)
4880 return user_pcs;
4881 else if (decl && flag_unit_at_a_time)
4882 {
4883 /* Local functions never leak outside this compilation unit,
4884 so we are free to use whatever conventions are
4885 appropriate. */
4886 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
4887 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4888 if (i && i->local)
4889 return ARM_PCS_AAPCS_LOCAL;
4890 }
4891 }
4892 else if (user_convention && user_pcs != arm_pcs_default)
4893 sorry ("PCS variant");
4894
4895 /* For everything else we use the target's default. */
4896 return arm_pcs_default;
4897 }
4898
4899
4900 static void
4901 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
4902 const_tree fntype ATTRIBUTE_UNUSED,
4903 rtx libcall ATTRIBUTE_UNUSED,
4904 const_tree fndecl ATTRIBUTE_UNUSED)
4905 {
4906 /* Record the unallocated VFP registers. */
4907 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
4908 pcum->aapcs_vfp_reg_alloc = 0;
4909 }
4910
4911 /* Walk down the type tree of TYPE counting consecutive base elements.
4912 If *MODEP is VOIDmode, then set it to the first valid floating point
4913 type. If a non-floating point type is found, or if a floating point
4914 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
4915 otherwise return the count in the sub-tree. */
4916 static int
4917 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
4918 {
4919 enum machine_mode mode;
4920 HOST_WIDE_INT size;
4921
4922 switch (TREE_CODE (type))
4923 {
4924 case REAL_TYPE:
4925 mode = TYPE_MODE (type);
4926 if (mode != DFmode && mode != SFmode)
4927 return -1;
4928
4929 if (*modep == VOIDmode)
4930 *modep = mode;
4931
4932 if (*modep == mode)
4933 return 1;
4934
4935 break;
4936
4937 case COMPLEX_TYPE:
4938 mode = TYPE_MODE (TREE_TYPE (type));
4939 if (mode != DFmode && mode != SFmode)
4940 return -1;
4941
4942 if (*modep == VOIDmode)
4943 *modep = mode;
4944
4945 if (*modep == mode)
4946 return 2;
4947
4948 break;
4949
4950 case VECTOR_TYPE:
4951 /* Use V2SImode and V4SImode as representatives of all 64-bit
4952 and 128-bit vector types, whether or not those modes are
4953 supported with the present options. */
4954 size = int_size_in_bytes (type);
4955 switch (size)
4956 {
4957 case 8:
4958 mode = V2SImode;
4959 break;
4960 case 16:
4961 mode = V4SImode;
4962 break;
4963 default:
4964 return -1;
4965 }
4966
4967 if (*modep == VOIDmode)
4968 *modep = mode;
4969
4970 /* Vector modes are considered to be opaque: two vectors are
4971 equivalent for the purposes of being homogeneous aggregates
4972 if they are the same size. */
4973 if (*modep == mode)
4974 return 1;
4975
4976 break;
4977
4978 case ARRAY_TYPE:
4979 {
4980 int count;
4981 tree index = TYPE_DOMAIN (type);
4982
4983 /* Can't handle incomplete types. */
4984 if (!COMPLETE_TYPE_P (type))
4985 return -1;
4986
4987 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
4988 if (count == -1
4989 || !index
4990 || !TYPE_MAX_VALUE (index)
4991 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
4992 || !TYPE_MIN_VALUE (index)
4993 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
4994 || count < 0)
4995 return -1;
4996
4997 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
4998 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
4999
5000 /* There must be no padding. */
5001 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
5002 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
5003 != count * GET_MODE_BITSIZE (*modep)))
5004 return -1;
5005
5006 return count;
5007 }
5008
5009 case RECORD_TYPE:
5010 {
5011 int count = 0;
5012 int sub_count;
5013 tree field;
5014
5015 /* Can't handle incomplete types. */
5016 if (!COMPLETE_TYPE_P (type))
5017 return -1;
5018
5019 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5020 {
5021 if (TREE_CODE (field) != FIELD_DECL)
5022 continue;
5023
5024 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5025 if (sub_count < 0)
5026 return -1;
5027 count += sub_count;
5028 }
5029
5030 /* There must be no padding. */
5031 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
5032 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
5033 != count * GET_MODE_BITSIZE (*modep)))
5034 return -1;
5035
5036 return count;
5037 }
5038
5039 case UNION_TYPE:
5040 case QUAL_UNION_TYPE:
5041 {
5042 /* These aren't very interesting except in a degenerate case. */
5043 int count = 0;
5044 int sub_count;
5045 tree field;
5046
5047 /* Can't handle incomplete types. */
5048 if (!COMPLETE_TYPE_P (type))
5049 return -1;
5050
5051 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5052 {
5053 if (TREE_CODE (field) != FIELD_DECL)
5054 continue;
5055
5056 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5057 if (sub_count < 0)
5058 return -1;
5059 count = count > sub_count ? count : sub_count;
5060 }
5061
5062 /* There must be no padding. */
5063 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
5064 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
5065 != count * GET_MODE_BITSIZE (*modep)))
5066 return -1;
5067
5068 return count;
5069 }
5070
5071 default:
5072 break;
5073 }
5074
5075 return -1;
5076 }
5077
5078 /* Return true if PCS_VARIANT should use VFP registers. */
5079 static bool
5080 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5081 {
5082 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5083 {
5084 static bool seen_thumb1_vfp = false;
5085
5086 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5087 {
5088 sorry ("Thumb-1 hard-float VFP ABI");
5089 /* sorry() is not immediately fatal, so only display this once. */
5090 seen_thumb1_vfp = true;
5091 }
5092
5093 return true;
5094 }
5095
5096 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5097 return false;
5098
5099 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5100 (TARGET_VFP_DOUBLE || !is_double));
5101 }
5102
5103 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5104 suitable for passing or returning in VFP registers for the PCS
5105 variant selected. If it is, then *BASE_MODE is updated to contain
5106 a machine mode describing each element of the argument's type and
5107 *COUNT to hold the number of such elements. */
5108 static bool
5109 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5110 enum machine_mode mode, const_tree type,
5111 enum machine_mode *base_mode, int *count)
5112 {
5113 enum machine_mode new_mode = VOIDmode;
5114
5115 /* If we have the type information, prefer that to working things
5116 out from the mode. */
5117 if (type)
5118 {
5119 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5120
5121 if (ag_count > 0 && ag_count <= 4)
5122 *count = ag_count;
5123 else
5124 return false;
5125 }
5126 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5127 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5128 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5129 {
5130 *count = 1;
5131 new_mode = mode;
5132 }
5133 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5134 {
5135 *count = 2;
5136 new_mode = (mode == DCmode ? DFmode : SFmode);
5137 }
5138 else
5139 return false;
5140
5141
5142 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5143 return false;
5144
5145 *base_mode = new_mode;
5146 return true;
5147 }
5148
5149 static bool
5150 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5151 enum machine_mode mode, const_tree type)
5152 {
5153 int count ATTRIBUTE_UNUSED;
5154 enum machine_mode ag_mode ATTRIBUTE_UNUSED;
5155
5156 if (!use_vfp_abi (pcs_variant, false))
5157 return false;
5158 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5159 &ag_mode, &count);
5160 }
5161
5162 static bool
5163 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5164 const_tree type)
5165 {
5166 if (!use_vfp_abi (pcum->pcs_variant, false))
5167 return false;
5168
5169 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5170 &pcum->aapcs_vfp_rmode,
5171 &pcum->aapcs_vfp_rcount);
5172 }
5173
5174 static bool
5175 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5176 const_tree type ATTRIBUTE_UNUSED)
5177 {
5178 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5179 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5180 int regno;
5181
5182 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5183 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5184 {
5185 pcum->aapcs_vfp_reg_alloc = mask << regno;
5186 if (mode == BLKmode
5187 || (mode == TImode && ! TARGET_NEON)
5188 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5189 {
5190 int i;
5191 int rcount = pcum->aapcs_vfp_rcount;
5192 int rshift = shift;
5193 enum machine_mode rmode = pcum->aapcs_vfp_rmode;
5194 rtx par;
5195 if (!TARGET_NEON)
5196 {
5197 /* Avoid using unsupported vector modes. */
5198 if (rmode == V2SImode)
5199 rmode = DImode;
5200 else if (rmode == V4SImode)
5201 {
5202 rmode = DImode;
5203 rcount *= 2;
5204 rshift /= 2;
5205 }
5206 }
5207 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5208 for (i = 0; i < rcount; i++)
5209 {
5210 rtx tmp = gen_rtx_REG (rmode,
5211 FIRST_VFP_REGNUM + regno + i * rshift);
5212 tmp = gen_rtx_EXPR_LIST
5213 (VOIDmode, tmp,
5214 GEN_INT (i * GET_MODE_SIZE (rmode)));
5215 XVECEXP (par, 0, i) = tmp;
5216 }
5217
5218 pcum->aapcs_reg = par;
5219 }
5220 else
5221 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5222 return true;
5223 }
5224 return false;
5225 }
5226
5227 static rtx
5228 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5229 enum machine_mode mode,
5230 const_tree type ATTRIBUTE_UNUSED)
5231 {
5232 if (!use_vfp_abi (pcs_variant, false))
5233 return NULL;
5234
5235 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5236 {
5237 int count;
5238 enum machine_mode ag_mode;
5239 int i;
5240 rtx par;
5241 int shift;
5242
5243 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5244 &ag_mode, &count);
5245
5246 if (!TARGET_NEON)
5247 {
5248 if (ag_mode == V2SImode)
5249 ag_mode = DImode;
5250 else if (ag_mode == V4SImode)
5251 {
5252 ag_mode = DImode;
5253 count *= 2;
5254 }
5255 }
5256 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5257 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5258 for (i = 0; i < count; i++)
5259 {
5260 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5261 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5262 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5263 XVECEXP (par, 0, i) = tmp;
5264 }
5265
5266 return par;
5267 }
5268
5269 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5270 }
5271
5272 static void
5273 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5274 enum machine_mode mode ATTRIBUTE_UNUSED,
5275 const_tree type ATTRIBUTE_UNUSED)
5276 {
5277 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5278 pcum->aapcs_vfp_reg_alloc = 0;
5279 return;
5280 }
5281
5282 #define AAPCS_CP(X) \
5283 { \
5284 aapcs_ ## X ## _cum_init, \
5285 aapcs_ ## X ## _is_call_candidate, \
5286 aapcs_ ## X ## _allocate, \
5287 aapcs_ ## X ## _is_return_candidate, \
5288 aapcs_ ## X ## _allocate_return_reg, \
5289 aapcs_ ## X ## _advance \
5290 }
5291
5292 /* Table of co-processors that can be used to pass arguments in
5293 registers. Idealy no arugment should be a candidate for more than
5294 one co-processor table entry, but the table is processed in order
5295 and stops after the first match. If that entry then fails to put
5296 the argument into a co-processor register, the argument will go on
5297 the stack. */
5298 static struct
5299 {
5300 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5301 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5302
5303 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5304 BLKmode) is a candidate for this co-processor's registers; this
5305 function should ignore any position-dependent state in
5306 CUMULATIVE_ARGS and only use call-type dependent information. */
5307 bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5308
5309 /* Return true if the argument does get a co-processor register; it
5310 should set aapcs_reg to an RTX of the register allocated as is
5311 required for a return from FUNCTION_ARG. */
5312 bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5313
5314 /* Return true if a result of mode MODE (or type TYPE if MODE is
5315 BLKmode) is can be returned in this co-processor's registers. */
5316 bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
5317
5318 /* Allocate and return an RTX element to hold the return type of a
5319 call, this routine must not fail and will only be called if
5320 is_return_candidate returned true with the same parameters. */
5321 rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
5322
5323 /* Finish processing this argument and prepare to start processing
5324 the next one. */
5325 void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
5326 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5327 {
5328 AAPCS_CP(vfp)
5329 };
5330
5331 #undef AAPCS_CP
5332
5333 static int
5334 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5335 const_tree type)
5336 {
5337 int i;
5338
5339 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5340 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5341 return i;
5342
5343 return -1;
5344 }
5345
5346 static int
5347 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5348 {
5349 /* We aren't passed a decl, so we can't check that a call is local.
5350 However, it isn't clear that that would be a win anyway, since it
5351 might limit some tail-calling opportunities. */
5352 enum arm_pcs pcs_variant;
5353
5354 if (fntype)
5355 {
5356 const_tree fndecl = NULL_TREE;
5357
5358 if (TREE_CODE (fntype) == FUNCTION_DECL)
5359 {
5360 fndecl = fntype;
5361 fntype = TREE_TYPE (fntype);
5362 }
5363
5364 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5365 }
5366 else
5367 pcs_variant = arm_pcs_default;
5368
5369 if (pcs_variant != ARM_PCS_AAPCS)
5370 {
5371 int i;
5372
5373 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5374 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5375 TYPE_MODE (type),
5376 type))
5377 return i;
5378 }
5379 return -1;
5380 }
5381
5382 static rtx
5383 aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
5384 const_tree fntype)
5385 {
5386 /* We aren't passed a decl, so we can't check that a call is local.
5387 However, it isn't clear that that would be a win anyway, since it
5388 might limit some tail-calling opportunities. */
5389 enum arm_pcs pcs_variant;
5390 int unsignedp ATTRIBUTE_UNUSED;
5391
5392 if (fntype)
5393 {
5394 const_tree fndecl = NULL_TREE;
5395
5396 if (TREE_CODE (fntype) == FUNCTION_DECL)
5397 {
5398 fndecl = fntype;
5399 fntype = TREE_TYPE (fntype);
5400 }
5401
5402 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5403 }
5404 else
5405 pcs_variant = arm_pcs_default;
5406
5407 /* Promote integer types. */
5408 if (type && INTEGRAL_TYPE_P (type))
5409 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5410
5411 if (pcs_variant != ARM_PCS_AAPCS)
5412 {
5413 int i;
5414
5415 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5416 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5417 type))
5418 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5419 mode, type);
5420 }
5421
5422 /* Promotes small structs returned in a register to full-word size
5423 for big-endian AAPCS. */
5424 if (type && arm_return_in_msb (type))
5425 {
5426 HOST_WIDE_INT size = int_size_in_bytes (type);
5427 if (size % UNITS_PER_WORD != 0)
5428 {
5429 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5430 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5431 }
5432 }
5433
5434 return gen_rtx_REG (mode, R0_REGNUM);
5435 }
5436
5437 static rtx
5438 aapcs_libcall_value (enum machine_mode mode)
5439 {
5440 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5441 && GET_MODE_SIZE (mode) <= 4)
5442 mode = SImode;
5443
5444 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5445 }
5446
5447 /* Lay out a function argument using the AAPCS rules. The rule
5448 numbers referred to here are those in the AAPCS. */
5449 static void
5450 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
5451 const_tree type, bool named)
5452 {
5453 int nregs, nregs2;
5454 int ncrn;
5455
5456 /* We only need to do this once per argument. */
5457 if (pcum->aapcs_arg_processed)
5458 return;
5459
5460 pcum->aapcs_arg_processed = true;
5461
5462 /* Special case: if named is false then we are handling an incoming
5463 anonymous argument which is on the stack. */
5464 if (!named)
5465 return;
5466
5467 /* Is this a potential co-processor register candidate? */
5468 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5469 {
5470 int slot = aapcs_select_call_coproc (pcum, mode, type);
5471 pcum->aapcs_cprc_slot = slot;
5472
5473 /* We don't have to apply any of the rules from part B of the
5474 preparation phase, these are handled elsewhere in the
5475 compiler. */
5476
5477 if (slot >= 0)
5478 {
5479 /* A Co-processor register candidate goes either in its own
5480 class of registers or on the stack. */
5481 if (!pcum->aapcs_cprc_failed[slot])
5482 {
5483 /* C1.cp - Try to allocate the argument to co-processor
5484 registers. */
5485 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5486 return;
5487
5488 /* C2.cp - Put the argument on the stack and note that we
5489 can't assign any more candidates in this slot. We also
5490 need to note that we have allocated stack space, so that
5491 we won't later try to split a non-cprc candidate between
5492 core registers and the stack. */
5493 pcum->aapcs_cprc_failed[slot] = true;
5494 pcum->can_split = false;
5495 }
5496
5497 /* We didn't get a register, so this argument goes on the
5498 stack. */
5499 gcc_assert (pcum->can_split == false);
5500 return;
5501 }
5502 }
5503
5504 /* C3 - For double-word aligned arguments, round the NCRN up to the
5505 next even number. */
5506 ncrn = pcum->aapcs_ncrn;
5507 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5508 ncrn++;
5509
5510 nregs = ARM_NUM_REGS2(mode, type);
5511
5512 /* Sigh, this test should really assert that nregs > 0, but a GCC
5513 extension allows empty structs and then gives them empty size; it
5514 then allows such a structure to be passed by value. For some of
5515 the code below we have to pretend that such an argument has
5516 non-zero size so that we 'locate' it correctly either in
5517 registers or on the stack. */
5518 gcc_assert (nregs >= 0);
5519
5520 nregs2 = nregs ? nregs : 1;
5521
5522 /* C4 - Argument fits entirely in core registers. */
5523 if (ncrn + nregs2 <= NUM_ARG_REGS)
5524 {
5525 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5526 pcum->aapcs_next_ncrn = ncrn + nregs;
5527 return;
5528 }
5529
5530 /* C5 - Some core registers left and there are no arguments already
5531 on the stack: split this argument between the remaining core
5532 registers and the stack. */
5533 if (ncrn < NUM_ARG_REGS && pcum->can_split)
5534 {
5535 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5536 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5537 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5538 return;
5539 }
5540
5541 /* C6 - NCRN is set to 4. */
5542 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5543
5544 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
5545 return;
5546 }
5547
5548 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5549 for a call to a function whose data type is FNTYPE.
5550 For a library call, FNTYPE is NULL. */
5551 void
5552 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5553 rtx libname,
5554 tree fndecl ATTRIBUTE_UNUSED)
5555 {
5556 /* Long call handling. */
5557 if (fntype)
5558 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5559 else
5560 pcum->pcs_variant = arm_pcs_default;
5561
5562 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5563 {
5564 if (arm_libcall_uses_aapcs_base (libname))
5565 pcum->pcs_variant = ARM_PCS_AAPCS;
5566
5567 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5568 pcum->aapcs_reg = NULL_RTX;
5569 pcum->aapcs_partial = 0;
5570 pcum->aapcs_arg_processed = false;
5571 pcum->aapcs_cprc_slot = -1;
5572 pcum->can_split = true;
5573
5574 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5575 {
5576 int i;
5577
5578 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5579 {
5580 pcum->aapcs_cprc_failed[i] = false;
5581 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5582 }
5583 }
5584 return;
5585 }
5586
5587 /* Legacy ABIs */
5588
5589 /* On the ARM, the offset starts at 0. */
5590 pcum->nregs = 0;
5591 pcum->iwmmxt_nregs = 0;
5592 pcum->can_split = true;
5593
5594 /* Varargs vectors are treated the same as long long.
5595 named_count avoids having to change the way arm handles 'named' */
5596 pcum->named_count = 0;
5597 pcum->nargs = 0;
5598
5599 if (TARGET_REALLY_IWMMXT && fntype)
5600 {
5601 tree fn_arg;
5602
5603 for (fn_arg = TYPE_ARG_TYPES (fntype);
5604 fn_arg;
5605 fn_arg = TREE_CHAIN (fn_arg))
5606 pcum->named_count += 1;
5607
5608 if (! pcum->named_count)
5609 pcum->named_count = INT_MAX;
5610 }
5611 }
5612
5613 /* Return true if we use LRA instead of reload pass. */
5614 static bool
5615 arm_lra_p (void)
5616 {
5617 return arm_lra_flag;
5618 }
5619
5620 /* Return true if mode/type need doubleword alignment. */
5621 static bool
5622 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
5623 {
5624 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5625 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5626 }
5627
5628
5629 /* Determine where to put an argument to a function.
5630 Value is zero to push the argument on the stack,
5631 or a hard register in which to store the argument.
5632
5633 MODE is the argument's machine mode.
5634 TYPE is the data type of the argument (as a tree).
5635 This is null for libcalls where that information may
5636 not be available.
5637 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5638 the preceding args and about the function being called.
5639 NAMED is nonzero if this argument is a named parameter
5640 (otherwise it is an extra parameter matching an ellipsis).
5641
5642 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5643 other arguments are passed on the stack. If (NAMED == 0) (which happens
5644 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5645 defined), say it is passed in the stack (function_prologue will
5646 indeed make it pass in the stack if necessary). */
5647
5648 static rtx
5649 arm_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
5650 const_tree type, bool named)
5651 {
5652 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5653 int nregs;
5654
5655 /* Handle the special case quickly. Pick an arbitrary value for op2 of
5656 a call insn (op3 of a call_value insn). */
5657 if (mode == VOIDmode)
5658 return const0_rtx;
5659
5660 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5661 {
5662 aapcs_layout_arg (pcum, mode, type, named);
5663 return pcum->aapcs_reg;
5664 }
5665
5666 /* Varargs vectors are treated the same as long long.
5667 named_count avoids having to change the way arm handles 'named' */
5668 if (TARGET_IWMMXT_ABI
5669 && arm_vector_mode_supported_p (mode)
5670 && pcum->named_count > pcum->nargs + 1)
5671 {
5672 if (pcum->iwmmxt_nregs <= 9)
5673 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5674 else
5675 {
5676 pcum->can_split = false;
5677 return NULL_RTX;
5678 }
5679 }
5680
5681 /* Put doubleword aligned quantities in even register pairs. */
5682 if (pcum->nregs & 1
5683 && ARM_DOUBLEWORD_ALIGN
5684 && arm_needs_doubleword_align (mode, type))
5685 pcum->nregs++;
5686
5687 /* Only allow splitting an arg between regs and memory if all preceding
5688 args were allocated to regs. For args passed by reference we only count
5689 the reference pointer. */
5690 if (pcum->can_split)
5691 nregs = 1;
5692 else
5693 nregs = ARM_NUM_REGS2 (mode, type);
5694
5695 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5696 return NULL_RTX;
5697
5698 return gen_rtx_REG (mode, pcum->nregs);
5699 }
5700
5701 static unsigned int
5702 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
5703 {
5704 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5705 ? DOUBLEWORD_ALIGNMENT
5706 : PARM_BOUNDARY);
5707 }
5708
5709 static int
5710 arm_arg_partial_bytes (cumulative_args_t pcum_v, enum machine_mode mode,
5711 tree type, bool named)
5712 {
5713 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5714 int nregs = pcum->nregs;
5715
5716 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5717 {
5718 aapcs_layout_arg (pcum, mode, type, named);
5719 return pcum->aapcs_partial;
5720 }
5721
5722 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
5723 return 0;
5724
5725 if (NUM_ARG_REGS > nregs
5726 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
5727 && pcum->can_split)
5728 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
5729
5730 return 0;
5731 }
5732
5733 /* Update the data in PCUM to advance over an argument
5734 of mode MODE and data type TYPE.
5735 (TYPE is null for libcalls where that information may not be available.) */
5736
5737 static void
5738 arm_function_arg_advance (cumulative_args_t pcum_v, enum machine_mode mode,
5739 const_tree type, bool named)
5740 {
5741 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5742
5743 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5744 {
5745 aapcs_layout_arg (pcum, mode, type, named);
5746
5747 if (pcum->aapcs_cprc_slot >= 0)
5748 {
5749 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
5750 type);
5751 pcum->aapcs_cprc_slot = -1;
5752 }
5753
5754 /* Generic stuff. */
5755 pcum->aapcs_arg_processed = false;
5756 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
5757 pcum->aapcs_reg = NULL_RTX;
5758 pcum->aapcs_partial = 0;
5759 }
5760 else
5761 {
5762 pcum->nargs += 1;
5763 if (arm_vector_mode_supported_p (mode)
5764 && pcum->named_count > pcum->nargs
5765 && TARGET_IWMMXT_ABI)
5766 pcum->iwmmxt_nregs += 1;
5767 else
5768 pcum->nregs += ARM_NUM_REGS2 (mode, type);
5769 }
5770 }
5771
5772 /* Variable sized types are passed by reference. This is a GCC
5773 extension to the ARM ABI. */
5774
5775 static bool
5776 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
5777 enum machine_mode mode ATTRIBUTE_UNUSED,
5778 const_tree type, bool named ATTRIBUTE_UNUSED)
5779 {
5780 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
5781 }
5782 \f
5783 /* Encode the current state of the #pragma [no_]long_calls. */
5784 typedef enum
5785 {
5786 OFF, /* No #pragma [no_]long_calls is in effect. */
5787 LONG, /* #pragma long_calls is in effect. */
5788 SHORT /* #pragma no_long_calls is in effect. */
5789 } arm_pragma_enum;
5790
5791 static arm_pragma_enum arm_pragma_long_calls = OFF;
5792
5793 void
5794 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5795 {
5796 arm_pragma_long_calls = LONG;
5797 }
5798
5799 void
5800 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5801 {
5802 arm_pragma_long_calls = SHORT;
5803 }
5804
5805 void
5806 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
5807 {
5808 arm_pragma_long_calls = OFF;
5809 }
5810 \f
5811 /* Handle an attribute requiring a FUNCTION_DECL;
5812 arguments as in struct attribute_spec.handler. */
5813 static tree
5814 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
5815 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5816 {
5817 if (TREE_CODE (*node) != FUNCTION_DECL)
5818 {
5819 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5820 name);
5821 *no_add_attrs = true;
5822 }
5823
5824 return NULL_TREE;
5825 }
5826
5827 /* Handle an "interrupt" or "isr" attribute;
5828 arguments as in struct attribute_spec.handler. */
5829 static tree
5830 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
5831 bool *no_add_attrs)
5832 {
5833 if (DECL_P (*node))
5834 {
5835 if (TREE_CODE (*node) != FUNCTION_DECL)
5836 {
5837 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5838 name);
5839 *no_add_attrs = true;
5840 }
5841 /* FIXME: the argument if any is checked for type attributes;
5842 should it be checked for decl ones? */
5843 }
5844 else
5845 {
5846 if (TREE_CODE (*node) == FUNCTION_TYPE
5847 || TREE_CODE (*node) == METHOD_TYPE)
5848 {
5849 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
5850 {
5851 warning (OPT_Wattributes, "%qE attribute ignored",
5852 name);
5853 *no_add_attrs = true;
5854 }
5855 }
5856 else if (TREE_CODE (*node) == POINTER_TYPE
5857 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
5858 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
5859 && arm_isr_value (args) != ARM_FT_UNKNOWN)
5860 {
5861 *node = build_variant_type_copy (*node);
5862 TREE_TYPE (*node) = build_type_attribute_variant
5863 (TREE_TYPE (*node),
5864 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
5865 *no_add_attrs = true;
5866 }
5867 else
5868 {
5869 /* Possibly pass this attribute on from the type to a decl. */
5870 if (flags & ((int) ATTR_FLAG_DECL_NEXT
5871 | (int) ATTR_FLAG_FUNCTION_NEXT
5872 | (int) ATTR_FLAG_ARRAY_NEXT))
5873 {
5874 *no_add_attrs = true;
5875 return tree_cons (name, args, NULL_TREE);
5876 }
5877 else
5878 {
5879 warning (OPT_Wattributes, "%qE attribute ignored",
5880 name);
5881 }
5882 }
5883 }
5884
5885 return NULL_TREE;
5886 }
5887
5888 /* Handle a "pcs" attribute; arguments as in struct
5889 attribute_spec.handler. */
5890 static tree
5891 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
5892 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
5893 {
5894 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
5895 {
5896 warning (OPT_Wattributes, "%qE attribute ignored", name);
5897 *no_add_attrs = true;
5898 }
5899 return NULL_TREE;
5900 }
5901
5902 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
5903 /* Handle the "notshared" attribute. This attribute is another way of
5904 requesting hidden visibility. ARM's compiler supports
5905 "__declspec(notshared)"; we support the same thing via an
5906 attribute. */
5907
5908 static tree
5909 arm_handle_notshared_attribute (tree *node,
5910 tree name ATTRIBUTE_UNUSED,
5911 tree args ATTRIBUTE_UNUSED,
5912 int flags ATTRIBUTE_UNUSED,
5913 bool *no_add_attrs)
5914 {
5915 tree decl = TYPE_NAME (*node);
5916
5917 if (decl)
5918 {
5919 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
5920 DECL_VISIBILITY_SPECIFIED (decl) = 1;
5921 *no_add_attrs = false;
5922 }
5923 return NULL_TREE;
5924 }
5925 #endif
5926
5927 /* Return 0 if the attributes for two types are incompatible, 1 if they
5928 are compatible, and 2 if they are nearly compatible (which causes a
5929 warning to be generated). */
5930 static int
5931 arm_comp_type_attributes (const_tree type1, const_tree type2)
5932 {
5933 int l1, l2, s1, s2;
5934
5935 /* Check for mismatch of non-default calling convention. */
5936 if (TREE_CODE (type1) != FUNCTION_TYPE)
5937 return 1;
5938
5939 /* Check for mismatched call attributes. */
5940 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
5941 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
5942 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
5943 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
5944
5945 /* Only bother to check if an attribute is defined. */
5946 if (l1 | l2 | s1 | s2)
5947 {
5948 /* If one type has an attribute, the other must have the same attribute. */
5949 if ((l1 != l2) || (s1 != s2))
5950 return 0;
5951
5952 /* Disallow mixed attributes. */
5953 if ((l1 & s2) || (l2 & s1))
5954 return 0;
5955 }
5956
5957 /* Check for mismatched ISR attribute. */
5958 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
5959 if (! l1)
5960 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
5961 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
5962 if (! l2)
5963 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
5964 if (l1 != l2)
5965 return 0;
5966
5967 return 1;
5968 }
5969
5970 /* Assigns default attributes to newly defined type. This is used to
5971 set short_call/long_call attributes for function types of
5972 functions defined inside corresponding #pragma scopes. */
5973 static void
5974 arm_set_default_type_attributes (tree type)
5975 {
5976 /* Add __attribute__ ((long_call)) to all functions, when
5977 inside #pragma long_calls or __attribute__ ((short_call)),
5978 when inside #pragma no_long_calls. */
5979 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
5980 {
5981 tree type_attr_list, attr_name;
5982 type_attr_list = TYPE_ATTRIBUTES (type);
5983
5984 if (arm_pragma_long_calls == LONG)
5985 attr_name = get_identifier ("long_call");
5986 else if (arm_pragma_long_calls == SHORT)
5987 attr_name = get_identifier ("short_call");
5988 else
5989 return;
5990
5991 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
5992 TYPE_ATTRIBUTES (type) = type_attr_list;
5993 }
5994 }
5995 \f
5996 /* Return true if DECL is known to be linked into section SECTION. */
5997
5998 static bool
5999 arm_function_in_section_p (tree decl, section *section)
6000 {
6001 /* We can only be certain about functions defined in the same
6002 compilation unit. */
6003 if (!TREE_STATIC (decl))
6004 return false;
6005
6006 /* Make sure that SYMBOL always binds to the definition in this
6007 compilation unit. */
6008 if (!targetm.binds_local_p (decl))
6009 return false;
6010
6011 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6012 if (!DECL_SECTION_NAME (decl))
6013 {
6014 /* Make sure that we will not create a unique section for DECL. */
6015 if (flag_function_sections || DECL_ONE_ONLY (decl))
6016 return false;
6017 }
6018
6019 return function_section (decl) == section;
6020 }
6021
6022 /* Return nonzero if a 32-bit "long_call" should be generated for
6023 a call from the current function to DECL. We generate a long_call
6024 if the function:
6025
6026 a. has an __attribute__((long call))
6027 or b. is within the scope of a #pragma long_calls
6028 or c. the -mlong-calls command line switch has been specified
6029
6030 However we do not generate a long call if the function:
6031
6032 d. has an __attribute__ ((short_call))
6033 or e. is inside the scope of a #pragma no_long_calls
6034 or f. is defined in the same section as the current function. */
6035
6036 bool
6037 arm_is_long_call_p (tree decl)
6038 {
6039 tree attrs;
6040
6041 if (!decl)
6042 return TARGET_LONG_CALLS;
6043
6044 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6045 if (lookup_attribute ("short_call", attrs))
6046 return false;
6047
6048 /* For "f", be conservative, and only cater for cases in which the
6049 whole of the current function is placed in the same section. */
6050 if (!flag_reorder_blocks_and_partition
6051 && TREE_CODE (decl) == FUNCTION_DECL
6052 && arm_function_in_section_p (decl, current_function_section ()))
6053 return false;
6054
6055 if (lookup_attribute ("long_call", attrs))
6056 return true;
6057
6058 return TARGET_LONG_CALLS;
6059 }
6060
6061 /* Return nonzero if it is ok to make a tail-call to DECL. */
6062 static bool
6063 arm_function_ok_for_sibcall (tree decl, tree exp)
6064 {
6065 unsigned long func_type;
6066
6067 if (cfun->machine->sibcall_blocked)
6068 return false;
6069
6070 /* Never tailcall something if we are generating code for Thumb-1. */
6071 if (TARGET_THUMB1)
6072 return false;
6073
6074 /* The PIC register is live on entry to VxWorks PLT entries, so we
6075 must make the call before restoring the PIC register. */
6076 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6077 return false;
6078
6079 /* Cannot tail-call to long calls, since these are out of range of
6080 a branch instruction. */
6081 if (decl && arm_is_long_call_p (decl))
6082 return false;
6083
6084 /* If we are interworking and the function is not declared static
6085 then we can't tail-call it unless we know that it exists in this
6086 compilation unit (since it might be a Thumb routine). */
6087 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6088 && !TREE_ASM_WRITTEN (decl))
6089 return false;
6090
6091 func_type = arm_current_func_type ();
6092 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6093 if (IS_INTERRUPT (func_type))
6094 return false;
6095
6096 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6097 {
6098 /* Check that the return value locations are the same. For
6099 example that we aren't returning a value from the sibling in
6100 a VFP register but then need to transfer it to a core
6101 register. */
6102 rtx a, b;
6103
6104 a = arm_function_value (TREE_TYPE (exp), decl, false);
6105 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6106 cfun->decl, false);
6107 if (!rtx_equal_p (a, b))
6108 return false;
6109 }
6110
6111 /* Never tailcall if function may be called with a misaligned SP. */
6112 if (IS_STACKALIGN (func_type))
6113 return false;
6114
6115 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6116 references should become a NOP. Don't convert such calls into
6117 sibling calls. */
6118 if (TARGET_AAPCS_BASED
6119 && arm_abi == ARM_ABI_AAPCS
6120 && decl
6121 && DECL_WEAK (decl))
6122 return false;
6123
6124 /* Everything else is ok. */
6125 return true;
6126 }
6127
6128 \f
6129 /* Addressing mode support functions. */
6130
6131 /* Return nonzero if X is a legitimate immediate operand when compiling
6132 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6133 int
6134 legitimate_pic_operand_p (rtx x)
6135 {
6136 if (GET_CODE (x) == SYMBOL_REF
6137 || (GET_CODE (x) == CONST
6138 && GET_CODE (XEXP (x, 0)) == PLUS
6139 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6140 return 0;
6141
6142 return 1;
6143 }
6144
6145 /* Record that the current function needs a PIC register. Initialize
6146 cfun->machine->pic_reg if we have not already done so. */
6147
6148 static void
6149 require_pic_register (void)
6150 {
6151 /* A lot of the logic here is made obscure by the fact that this
6152 routine gets called as part of the rtx cost estimation process.
6153 We don't want those calls to affect any assumptions about the real
6154 function; and further, we can't call entry_of_function() until we
6155 start the real expansion process. */
6156 if (!crtl->uses_pic_offset_table)
6157 {
6158 gcc_assert (can_create_pseudo_p ());
6159 if (arm_pic_register != INVALID_REGNUM
6160 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6161 {
6162 if (!cfun->machine->pic_reg)
6163 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6164
6165 /* Play games to avoid marking the function as needing pic
6166 if we are being called as part of the cost-estimation
6167 process. */
6168 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6169 crtl->uses_pic_offset_table = 1;
6170 }
6171 else
6172 {
6173 rtx seq, insn;
6174
6175 if (!cfun->machine->pic_reg)
6176 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6177
6178 /* Play games to avoid marking the function as needing pic
6179 if we are being called as part of the cost-estimation
6180 process. */
6181 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6182 {
6183 crtl->uses_pic_offset_table = 1;
6184 start_sequence ();
6185
6186 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6187 && arm_pic_register > LAST_LO_REGNUM)
6188 emit_move_insn (cfun->machine->pic_reg,
6189 gen_rtx_REG (Pmode, arm_pic_register));
6190 else
6191 arm_load_pic_register (0UL);
6192
6193 seq = get_insns ();
6194 end_sequence ();
6195
6196 for (insn = seq; insn; insn = NEXT_INSN (insn))
6197 if (INSN_P (insn))
6198 INSN_LOCATION (insn) = prologue_location;
6199
6200 /* We can be called during expansion of PHI nodes, where
6201 we can't yet emit instructions directly in the final
6202 insn stream. Queue the insns on the entry edge, they will
6203 be committed after everything else is expanded. */
6204 insert_insn_on_edge (seq,
6205 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6206 }
6207 }
6208 }
6209 }
6210
6211 rtx
6212 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
6213 {
6214 if (GET_CODE (orig) == SYMBOL_REF
6215 || GET_CODE (orig) == LABEL_REF)
6216 {
6217 rtx insn;
6218
6219 if (reg == 0)
6220 {
6221 gcc_assert (can_create_pseudo_p ());
6222 reg = gen_reg_rtx (Pmode);
6223 }
6224
6225 /* VxWorks does not impose a fixed gap between segments; the run-time
6226 gap can be different from the object-file gap. We therefore can't
6227 use GOTOFF unless we are absolutely sure that the symbol is in the
6228 same segment as the GOT. Unfortunately, the flexibility of linker
6229 scripts means that we can't be sure of that in general, so assume
6230 that GOTOFF is never valid on VxWorks. */
6231 if ((GET_CODE (orig) == LABEL_REF
6232 || (GET_CODE (orig) == SYMBOL_REF &&
6233 SYMBOL_REF_LOCAL_P (orig)))
6234 && NEED_GOT_RELOC
6235 && arm_pic_data_is_text_relative)
6236 insn = arm_pic_static_addr (orig, reg);
6237 else
6238 {
6239 rtx pat;
6240 rtx mem;
6241
6242 /* If this function doesn't have a pic register, create one now. */
6243 require_pic_register ();
6244
6245 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6246
6247 /* Make the MEM as close to a constant as possible. */
6248 mem = SET_SRC (pat);
6249 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6250 MEM_READONLY_P (mem) = 1;
6251 MEM_NOTRAP_P (mem) = 1;
6252
6253 insn = emit_insn (pat);
6254 }
6255
6256 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6257 by loop. */
6258 set_unique_reg_note (insn, REG_EQUAL, orig);
6259
6260 return reg;
6261 }
6262 else if (GET_CODE (orig) == CONST)
6263 {
6264 rtx base, offset;
6265
6266 if (GET_CODE (XEXP (orig, 0)) == PLUS
6267 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6268 return orig;
6269
6270 /* Handle the case where we have: const (UNSPEC_TLS). */
6271 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6272 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6273 return orig;
6274
6275 /* Handle the case where we have:
6276 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6277 CONST_INT. */
6278 if (GET_CODE (XEXP (orig, 0)) == PLUS
6279 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6280 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6281 {
6282 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6283 return orig;
6284 }
6285
6286 if (reg == 0)
6287 {
6288 gcc_assert (can_create_pseudo_p ());
6289 reg = gen_reg_rtx (Pmode);
6290 }
6291
6292 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6293
6294 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6295 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6296 base == reg ? 0 : reg);
6297
6298 if (CONST_INT_P (offset))
6299 {
6300 /* The base register doesn't really matter, we only want to
6301 test the index for the appropriate mode. */
6302 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6303 {
6304 gcc_assert (can_create_pseudo_p ());
6305 offset = force_reg (Pmode, offset);
6306 }
6307
6308 if (CONST_INT_P (offset))
6309 return plus_constant (Pmode, base, INTVAL (offset));
6310 }
6311
6312 if (GET_MODE_SIZE (mode) > 4
6313 && (GET_MODE_CLASS (mode) == MODE_INT
6314 || TARGET_SOFT_FLOAT))
6315 {
6316 emit_insn (gen_addsi3 (reg, base, offset));
6317 return reg;
6318 }
6319
6320 return gen_rtx_PLUS (Pmode, base, offset);
6321 }
6322
6323 return orig;
6324 }
6325
6326
6327 /* Find a spare register to use during the prolog of a function. */
6328
6329 static int
6330 thumb_find_work_register (unsigned long pushed_regs_mask)
6331 {
6332 int reg;
6333
6334 /* Check the argument registers first as these are call-used. The
6335 register allocation order means that sometimes r3 might be used
6336 but earlier argument registers might not, so check them all. */
6337 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6338 if (!df_regs_ever_live_p (reg))
6339 return reg;
6340
6341 /* Before going on to check the call-saved registers we can try a couple
6342 more ways of deducing that r3 is available. The first is when we are
6343 pushing anonymous arguments onto the stack and we have less than 4
6344 registers worth of fixed arguments(*). In this case r3 will be part of
6345 the variable argument list and so we can be sure that it will be
6346 pushed right at the start of the function. Hence it will be available
6347 for the rest of the prologue.
6348 (*): ie crtl->args.pretend_args_size is greater than 0. */
6349 if (cfun->machine->uses_anonymous_args
6350 && crtl->args.pretend_args_size > 0)
6351 return LAST_ARG_REGNUM;
6352
6353 /* The other case is when we have fixed arguments but less than 4 registers
6354 worth. In this case r3 might be used in the body of the function, but
6355 it is not being used to convey an argument into the function. In theory
6356 we could just check crtl->args.size to see how many bytes are
6357 being passed in argument registers, but it seems that it is unreliable.
6358 Sometimes it will have the value 0 when in fact arguments are being
6359 passed. (See testcase execute/20021111-1.c for an example). So we also
6360 check the args_info.nregs field as well. The problem with this field is
6361 that it makes no allowances for arguments that are passed to the
6362 function but which are not used. Hence we could miss an opportunity
6363 when a function has an unused argument in r3. But it is better to be
6364 safe than to be sorry. */
6365 if (! cfun->machine->uses_anonymous_args
6366 && crtl->args.size >= 0
6367 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6368 && (TARGET_AAPCS_BASED
6369 ? crtl->args.info.aapcs_ncrn < 4
6370 : crtl->args.info.nregs < 4))
6371 return LAST_ARG_REGNUM;
6372
6373 /* Otherwise look for a call-saved register that is going to be pushed. */
6374 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6375 if (pushed_regs_mask & (1 << reg))
6376 return reg;
6377
6378 if (TARGET_THUMB2)
6379 {
6380 /* Thumb-2 can use high regs. */
6381 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6382 if (pushed_regs_mask & (1 << reg))
6383 return reg;
6384 }
6385 /* Something went wrong - thumb_compute_save_reg_mask()
6386 should have arranged for a suitable register to be pushed. */
6387 gcc_unreachable ();
6388 }
6389
6390 static GTY(()) int pic_labelno;
6391
6392 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6393 low register. */
6394
6395 void
6396 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6397 {
6398 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6399
6400 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6401 return;
6402
6403 gcc_assert (flag_pic);
6404
6405 pic_reg = cfun->machine->pic_reg;
6406 if (TARGET_VXWORKS_RTP)
6407 {
6408 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6409 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6410 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6411
6412 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6413
6414 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6415 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6416 }
6417 else
6418 {
6419 /* We use an UNSPEC rather than a LABEL_REF because this label
6420 never appears in the code stream. */
6421
6422 labelno = GEN_INT (pic_labelno++);
6423 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6424 l1 = gen_rtx_CONST (VOIDmode, l1);
6425
6426 /* On the ARM the PC register contains 'dot + 8' at the time of the
6427 addition, on the Thumb it is 'dot + 4'. */
6428 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6429 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6430 UNSPEC_GOTSYM_OFF);
6431 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6432
6433 if (TARGET_32BIT)
6434 {
6435 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6436 }
6437 else /* TARGET_THUMB1 */
6438 {
6439 if (arm_pic_register != INVALID_REGNUM
6440 && REGNO (pic_reg) > LAST_LO_REGNUM)
6441 {
6442 /* We will have pushed the pic register, so we should always be
6443 able to find a work register. */
6444 pic_tmp = gen_rtx_REG (SImode,
6445 thumb_find_work_register (saved_regs));
6446 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6447 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6448 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6449 }
6450 else if (arm_pic_register != INVALID_REGNUM
6451 && arm_pic_register > LAST_LO_REGNUM
6452 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6453 {
6454 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6455 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6456 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6457 }
6458 else
6459 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6460 }
6461 }
6462
6463 /* Need to emit this whether or not we obey regdecls,
6464 since setjmp/longjmp can cause life info to screw up. */
6465 emit_use (pic_reg);
6466 }
6467
6468 /* Generate code to load the address of a static var when flag_pic is set. */
6469 static rtx
6470 arm_pic_static_addr (rtx orig, rtx reg)
6471 {
6472 rtx l1, labelno, offset_rtx, insn;
6473
6474 gcc_assert (flag_pic);
6475
6476 /* We use an UNSPEC rather than a LABEL_REF because this label
6477 never appears in the code stream. */
6478 labelno = GEN_INT (pic_labelno++);
6479 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6480 l1 = gen_rtx_CONST (VOIDmode, l1);
6481
6482 /* On the ARM the PC register contains 'dot + 8' at the time of the
6483 addition, on the Thumb it is 'dot + 4'. */
6484 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6485 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6486 UNSPEC_SYMBOL_OFFSET);
6487 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6488
6489 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6490 return insn;
6491 }
6492
6493 /* Return nonzero if X is valid as an ARM state addressing register. */
6494 static int
6495 arm_address_register_rtx_p (rtx x, int strict_p)
6496 {
6497 int regno;
6498
6499 if (!REG_P (x))
6500 return 0;
6501
6502 regno = REGNO (x);
6503
6504 if (strict_p)
6505 return ARM_REGNO_OK_FOR_BASE_P (regno);
6506
6507 return (regno <= LAST_ARM_REGNUM
6508 || regno >= FIRST_PSEUDO_REGISTER
6509 || regno == FRAME_POINTER_REGNUM
6510 || regno == ARG_POINTER_REGNUM);
6511 }
6512
6513 /* Return TRUE if this rtx is the difference of a symbol and a label,
6514 and will reduce to a PC-relative relocation in the object file.
6515 Expressions like this can be left alone when generating PIC, rather
6516 than forced through the GOT. */
6517 static int
6518 pcrel_constant_p (rtx x)
6519 {
6520 if (GET_CODE (x) == MINUS)
6521 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6522
6523 return FALSE;
6524 }
6525
6526 /* Return true if X will surely end up in an index register after next
6527 splitting pass. */
6528 static bool
6529 will_be_in_index_register (const_rtx x)
6530 {
6531 /* arm.md: calculate_pic_address will split this into a register. */
6532 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6533 }
6534
6535 /* Return nonzero if X is a valid ARM state address operand. */
6536 int
6537 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
6538 int strict_p)
6539 {
6540 bool use_ldrd;
6541 enum rtx_code code = GET_CODE (x);
6542
6543 if (arm_address_register_rtx_p (x, strict_p))
6544 return 1;
6545
6546 use_ldrd = (TARGET_LDRD
6547 && (mode == DImode
6548 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6549
6550 if (code == POST_INC || code == PRE_DEC
6551 || ((code == PRE_INC || code == POST_DEC)
6552 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6553 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6554
6555 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6556 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6557 && GET_CODE (XEXP (x, 1)) == PLUS
6558 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6559 {
6560 rtx addend = XEXP (XEXP (x, 1), 1);
6561
6562 /* Don't allow ldrd post increment by register because it's hard
6563 to fixup invalid register choices. */
6564 if (use_ldrd
6565 && GET_CODE (x) == POST_MODIFY
6566 && REG_P (addend))
6567 return 0;
6568
6569 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6570 && arm_legitimate_index_p (mode, addend, outer, strict_p));
6571 }
6572
6573 /* After reload constants split into minipools will have addresses
6574 from a LABEL_REF. */
6575 else if (reload_completed
6576 && (code == LABEL_REF
6577 || (code == CONST
6578 && GET_CODE (XEXP (x, 0)) == PLUS
6579 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6580 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6581 return 1;
6582
6583 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6584 return 0;
6585
6586 else if (code == PLUS)
6587 {
6588 rtx xop0 = XEXP (x, 0);
6589 rtx xop1 = XEXP (x, 1);
6590
6591 return ((arm_address_register_rtx_p (xop0, strict_p)
6592 && ((CONST_INT_P (xop1)
6593 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6594 || (!strict_p && will_be_in_index_register (xop1))))
6595 || (arm_address_register_rtx_p (xop1, strict_p)
6596 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6597 }
6598
6599 #if 0
6600 /* Reload currently can't handle MINUS, so disable this for now */
6601 else if (GET_CODE (x) == MINUS)
6602 {
6603 rtx xop0 = XEXP (x, 0);
6604 rtx xop1 = XEXP (x, 1);
6605
6606 return (arm_address_register_rtx_p (xop0, strict_p)
6607 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6608 }
6609 #endif
6610
6611 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6612 && code == SYMBOL_REF
6613 && CONSTANT_POOL_ADDRESS_P (x)
6614 && ! (flag_pic
6615 && symbol_mentioned_p (get_pool_constant (x))
6616 && ! pcrel_constant_p (get_pool_constant (x))))
6617 return 1;
6618
6619 return 0;
6620 }
6621
6622 /* Return nonzero if X is a valid Thumb-2 address operand. */
6623 static int
6624 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6625 {
6626 bool use_ldrd;
6627 enum rtx_code code = GET_CODE (x);
6628
6629 if (arm_address_register_rtx_p (x, strict_p))
6630 return 1;
6631
6632 use_ldrd = (TARGET_LDRD
6633 && (mode == DImode
6634 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6635
6636 if (code == POST_INC || code == PRE_DEC
6637 || ((code == PRE_INC || code == POST_DEC)
6638 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6639 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6640
6641 else if ((code == POST_MODIFY || code == PRE_MODIFY)
6642 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6643 && GET_CODE (XEXP (x, 1)) == PLUS
6644 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6645 {
6646 /* Thumb-2 only has autoincrement by constant. */
6647 rtx addend = XEXP (XEXP (x, 1), 1);
6648 HOST_WIDE_INT offset;
6649
6650 if (!CONST_INT_P (addend))
6651 return 0;
6652
6653 offset = INTVAL(addend);
6654 if (GET_MODE_SIZE (mode) <= 4)
6655 return (offset > -256 && offset < 256);
6656
6657 return (use_ldrd && offset > -1024 && offset < 1024
6658 && (offset & 3) == 0);
6659 }
6660
6661 /* After reload constants split into minipools will have addresses
6662 from a LABEL_REF. */
6663 else if (reload_completed
6664 && (code == LABEL_REF
6665 || (code == CONST
6666 && GET_CODE (XEXP (x, 0)) == PLUS
6667 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6668 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6669 return 1;
6670
6671 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6672 return 0;
6673
6674 else if (code == PLUS)
6675 {
6676 rtx xop0 = XEXP (x, 0);
6677 rtx xop1 = XEXP (x, 1);
6678
6679 return ((arm_address_register_rtx_p (xop0, strict_p)
6680 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6681 || (!strict_p && will_be_in_index_register (xop1))))
6682 || (arm_address_register_rtx_p (xop1, strict_p)
6683 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6684 }
6685
6686 /* Normally we can assign constant values to target registers without
6687 the help of constant pool. But there are cases we have to use constant
6688 pool like:
6689 1) assign a label to register.
6690 2) sign-extend a 8bit value to 32bit and then assign to register.
6691
6692 Constant pool access in format:
6693 (set (reg r0) (mem (symbol_ref (".LC0"))))
6694 will cause the use of literal pool (later in function arm_reorg).
6695 So here we mark such format as an invalid format, then the compiler
6696 will adjust it into:
6697 (set (reg r0) (symbol_ref (".LC0")))
6698 (set (reg r0) (mem (reg r0))).
6699 No extra register is required, and (mem (reg r0)) won't cause the use
6700 of literal pools. */
6701 else if (arm_disable_literal_pool && code == SYMBOL_REF
6702 && CONSTANT_POOL_ADDRESS_P (x))
6703 return 0;
6704
6705 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6706 && code == SYMBOL_REF
6707 && CONSTANT_POOL_ADDRESS_P (x)
6708 && ! (flag_pic
6709 && symbol_mentioned_p (get_pool_constant (x))
6710 && ! pcrel_constant_p (get_pool_constant (x))))
6711 return 1;
6712
6713 return 0;
6714 }
6715
6716 /* Return nonzero if INDEX is valid for an address index operand in
6717 ARM state. */
6718 static int
6719 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
6720 int strict_p)
6721 {
6722 HOST_WIDE_INT range;
6723 enum rtx_code code = GET_CODE (index);
6724
6725 /* Standard coprocessor addressing modes. */
6726 if (TARGET_HARD_FLOAT
6727 && TARGET_VFP
6728 && (mode == SFmode || mode == DFmode))
6729 return (code == CONST_INT && INTVAL (index) < 1024
6730 && INTVAL (index) > -1024
6731 && (INTVAL (index) & 3) == 0);
6732
6733 /* For quad modes, we restrict the constant offset to be slightly less
6734 than what the instruction format permits. We do this because for
6735 quad mode moves, we will actually decompose them into two separate
6736 double-mode reads or writes. INDEX must therefore be a valid
6737 (double-mode) offset and so should INDEX+8. */
6738 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6739 return (code == CONST_INT
6740 && INTVAL (index) < 1016
6741 && INTVAL (index) > -1024
6742 && (INTVAL (index) & 3) == 0);
6743
6744 /* We have no such constraint on double mode offsets, so we permit the
6745 full range of the instruction format. */
6746 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6747 return (code == CONST_INT
6748 && INTVAL (index) < 1024
6749 && INTVAL (index) > -1024
6750 && (INTVAL (index) & 3) == 0);
6751
6752 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6753 return (code == CONST_INT
6754 && INTVAL (index) < 1024
6755 && INTVAL (index) > -1024
6756 && (INTVAL (index) & 3) == 0);
6757
6758 if (arm_address_register_rtx_p (index, strict_p)
6759 && (GET_MODE_SIZE (mode) <= 4))
6760 return 1;
6761
6762 if (mode == DImode || mode == DFmode)
6763 {
6764 if (code == CONST_INT)
6765 {
6766 HOST_WIDE_INT val = INTVAL (index);
6767
6768 if (TARGET_LDRD)
6769 return val > -256 && val < 256;
6770 else
6771 return val > -4096 && val < 4092;
6772 }
6773
6774 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
6775 }
6776
6777 if (GET_MODE_SIZE (mode) <= 4
6778 && ! (arm_arch4
6779 && (mode == HImode
6780 || mode == HFmode
6781 || (mode == QImode && outer == SIGN_EXTEND))))
6782 {
6783 if (code == MULT)
6784 {
6785 rtx xiop0 = XEXP (index, 0);
6786 rtx xiop1 = XEXP (index, 1);
6787
6788 return ((arm_address_register_rtx_p (xiop0, strict_p)
6789 && power_of_two_operand (xiop1, SImode))
6790 || (arm_address_register_rtx_p (xiop1, strict_p)
6791 && power_of_two_operand (xiop0, SImode)));
6792 }
6793 else if (code == LSHIFTRT || code == ASHIFTRT
6794 || code == ASHIFT || code == ROTATERT)
6795 {
6796 rtx op = XEXP (index, 1);
6797
6798 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6799 && CONST_INT_P (op)
6800 && INTVAL (op) > 0
6801 && INTVAL (op) <= 31);
6802 }
6803 }
6804
6805 /* For ARM v4 we may be doing a sign-extend operation during the
6806 load. */
6807 if (arm_arch4)
6808 {
6809 if (mode == HImode
6810 || mode == HFmode
6811 || (outer == SIGN_EXTEND && mode == QImode))
6812 range = 256;
6813 else
6814 range = 4096;
6815 }
6816 else
6817 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
6818
6819 return (code == CONST_INT
6820 && INTVAL (index) < range
6821 && INTVAL (index) > -range);
6822 }
6823
6824 /* Return true if OP is a valid index scaling factor for Thumb-2 address
6825 index operand. i.e. 1, 2, 4 or 8. */
6826 static bool
6827 thumb2_index_mul_operand (rtx op)
6828 {
6829 HOST_WIDE_INT val;
6830
6831 if (!CONST_INT_P (op))
6832 return false;
6833
6834 val = INTVAL(op);
6835 return (val == 1 || val == 2 || val == 4 || val == 8);
6836 }
6837
6838 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
6839 static int
6840 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
6841 {
6842 enum rtx_code code = GET_CODE (index);
6843
6844 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
6845 /* Standard coprocessor addressing modes. */
6846 if (TARGET_HARD_FLOAT
6847 && TARGET_VFP
6848 && (mode == SFmode || mode == DFmode))
6849 return (code == CONST_INT && INTVAL (index) < 1024
6850 /* Thumb-2 allows only > -256 index range for it's core register
6851 load/stores. Since we allow SF/DF in core registers, we have
6852 to use the intersection between -256~4096 (core) and -1024~1024
6853 (coprocessor). */
6854 && INTVAL (index) > -256
6855 && (INTVAL (index) & 3) == 0);
6856
6857 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
6858 {
6859 /* For DImode assume values will usually live in core regs
6860 and only allow LDRD addressing modes. */
6861 if (!TARGET_LDRD || mode != DImode)
6862 return (code == CONST_INT
6863 && INTVAL (index) < 1024
6864 && INTVAL (index) > -1024
6865 && (INTVAL (index) & 3) == 0);
6866 }
6867
6868 /* For quad modes, we restrict the constant offset to be slightly less
6869 than what the instruction format permits. We do this because for
6870 quad mode moves, we will actually decompose them into two separate
6871 double-mode reads or writes. INDEX must therefore be a valid
6872 (double-mode) offset and so should INDEX+8. */
6873 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
6874 return (code == CONST_INT
6875 && INTVAL (index) < 1016
6876 && INTVAL (index) > -1024
6877 && (INTVAL (index) & 3) == 0);
6878
6879 /* We have no such constraint on double mode offsets, so we permit the
6880 full range of the instruction format. */
6881 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
6882 return (code == CONST_INT
6883 && INTVAL (index) < 1024
6884 && INTVAL (index) > -1024
6885 && (INTVAL (index) & 3) == 0);
6886
6887 if (arm_address_register_rtx_p (index, strict_p)
6888 && (GET_MODE_SIZE (mode) <= 4))
6889 return 1;
6890
6891 if (mode == DImode || mode == DFmode)
6892 {
6893 if (code == CONST_INT)
6894 {
6895 HOST_WIDE_INT val = INTVAL (index);
6896 /* ??? Can we assume ldrd for thumb2? */
6897 /* Thumb-2 ldrd only has reg+const addressing modes. */
6898 /* ldrd supports offsets of +-1020.
6899 However the ldr fallback does not. */
6900 return val > -256 && val < 256 && (val & 3) == 0;
6901 }
6902 else
6903 return 0;
6904 }
6905
6906 if (code == MULT)
6907 {
6908 rtx xiop0 = XEXP (index, 0);
6909 rtx xiop1 = XEXP (index, 1);
6910
6911 return ((arm_address_register_rtx_p (xiop0, strict_p)
6912 && thumb2_index_mul_operand (xiop1))
6913 || (arm_address_register_rtx_p (xiop1, strict_p)
6914 && thumb2_index_mul_operand (xiop0)));
6915 }
6916 else if (code == ASHIFT)
6917 {
6918 rtx op = XEXP (index, 1);
6919
6920 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
6921 && CONST_INT_P (op)
6922 && INTVAL (op) > 0
6923 && INTVAL (op) <= 3);
6924 }
6925
6926 return (code == CONST_INT
6927 && INTVAL (index) < 4096
6928 && INTVAL (index) > -256);
6929 }
6930
6931 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
6932 static int
6933 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
6934 {
6935 int regno;
6936
6937 if (!REG_P (x))
6938 return 0;
6939
6940 regno = REGNO (x);
6941
6942 if (strict_p)
6943 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
6944
6945 return (regno <= LAST_LO_REGNUM
6946 || regno > LAST_VIRTUAL_REGISTER
6947 || regno == FRAME_POINTER_REGNUM
6948 || (GET_MODE_SIZE (mode) >= 4
6949 && (regno == STACK_POINTER_REGNUM
6950 || regno >= FIRST_PSEUDO_REGISTER
6951 || x == hard_frame_pointer_rtx
6952 || x == arg_pointer_rtx)));
6953 }
6954
6955 /* Return nonzero if x is a legitimate index register. This is the case
6956 for any base register that can access a QImode object. */
6957 inline static int
6958 thumb1_index_register_rtx_p (rtx x, int strict_p)
6959 {
6960 return thumb1_base_register_rtx_p (x, QImode, strict_p);
6961 }
6962
6963 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
6964
6965 The AP may be eliminated to either the SP or the FP, so we use the
6966 least common denominator, e.g. SImode, and offsets from 0 to 64.
6967
6968 ??? Verify whether the above is the right approach.
6969
6970 ??? Also, the FP may be eliminated to the SP, so perhaps that
6971 needs special handling also.
6972
6973 ??? Look at how the mips16 port solves this problem. It probably uses
6974 better ways to solve some of these problems.
6975
6976 Although it is not incorrect, we don't accept QImode and HImode
6977 addresses based on the frame pointer or arg pointer until the
6978 reload pass starts. This is so that eliminating such addresses
6979 into stack based ones won't produce impossible code. */
6980 int
6981 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
6982 {
6983 /* ??? Not clear if this is right. Experiment. */
6984 if (GET_MODE_SIZE (mode) < 4
6985 && !(reload_in_progress || reload_completed)
6986 && (reg_mentioned_p (frame_pointer_rtx, x)
6987 || reg_mentioned_p (arg_pointer_rtx, x)
6988 || reg_mentioned_p (virtual_incoming_args_rtx, x)
6989 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
6990 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
6991 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
6992 return 0;
6993
6994 /* Accept any base register. SP only in SImode or larger. */
6995 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
6996 return 1;
6997
6998 /* This is PC relative data before arm_reorg runs. */
6999 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7000 && GET_CODE (x) == SYMBOL_REF
7001 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7002 return 1;
7003
7004 /* This is PC relative data after arm_reorg runs. */
7005 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7006 && reload_completed
7007 && (GET_CODE (x) == LABEL_REF
7008 || (GET_CODE (x) == CONST
7009 && GET_CODE (XEXP (x, 0)) == PLUS
7010 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7011 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7012 return 1;
7013
7014 /* Post-inc indexing only supported for SImode and larger. */
7015 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7016 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7017 return 1;
7018
7019 else if (GET_CODE (x) == PLUS)
7020 {
7021 /* REG+REG address can be any two index registers. */
7022 /* We disallow FRAME+REG addressing since we know that FRAME
7023 will be replaced with STACK, and SP relative addressing only
7024 permits SP+OFFSET. */
7025 if (GET_MODE_SIZE (mode) <= 4
7026 && XEXP (x, 0) != frame_pointer_rtx
7027 && XEXP (x, 1) != frame_pointer_rtx
7028 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7029 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7030 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7031 return 1;
7032
7033 /* REG+const has 5-7 bit offset for non-SP registers. */
7034 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7035 || XEXP (x, 0) == arg_pointer_rtx)
7036 && CONST_INT_P (XEXP (x, 1))
7037 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7038 return 1;
7039
7040 /* REG+const has 10-bit offset for SP, but only SImode and
7041 larger is supported. */
7042 /* ??? Should probably check for DI/DFmode overflow here
7043 just like GO_IF_LEGITIMATE_OFFSET does. */
7044 else if (REG_P (XEXP (x, 0))
7045 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7046 && GET_MODE_SIZE (mode) >= 4
7047 && CONST_INT_P (XEXP (x, 1))
7048 && INTVAL (XEXP (x, 1)) >= 0
7049 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7050 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7051 return 1;
7052
7053 else if (REG_P (XEXP (x, 0))
7054 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7055 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7056 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7057 && REGNO (XEXP (x, 0))
7058 <= LAST_VIRTUAL_POINTER_REGISTER))
7059 && GET_MODE_SIZE (mode) >= 4
7060 && CONST_INT_P (XEXP (x, 1))
7061 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7062 return 1;
7063 }
7064
7065 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7066 && GET_MODE_SIZE (mode) == 4
7067 && GET_CODE (x) == SYMBOL_REF
7068 && CONSTANT_POOL_ADDRESS_P (x)
7069 && ! (flag_pic
7070 && symbol_mentioned_p (get_pool_constant (x))
7071 && ! pcrel_constant_p (get_pool_constant (x))))
7072 return 1;
7073
7074 return 0;
7075 }
7076
7077 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7078 instruction of mode MODE. */
7079 int
7080 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
7081 {
7082 switch (GET_MODE_SIZE (mode))
7083 {
7084 case 1:
7085 return val >= 0 && val < 32;
7086
7087 case 2:
7088 return val >= 0 && val < 64 && (val & 1) == 0;
7089
7090 default:
7091 return (val >= 0
7092 && (val + GET_MODE_SIZE (mode)) <= 128
7093 && (val & 3) == 0);
7094 }
7095 }
7096
7097 bool
7098 arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
7099 {
7100 if (TARGET_ARM)
7101 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7102 else if (TARGET_THUMB2)
7103 return thumb2_legitimate_address_p (mode, x, strict_p);
7104 else /* if (TARGET_THUMB1) */
7105 return thumb1_legitimate_address_p (mode, x, strict_p);
7106 }
7107
7108 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7109
7110 Given an rtx X being reloaded into a reg required to be
7111 in class CLASS, return the class of reg to actually use.
7112 In general this is just CLASS, but for the Thumb core registers and
7113 immediate constants we prefer a LO_REGS class or a subset. */
7114
7115 static reg_class_t
7116 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7117 {
7118 if (TARGET_32BIT)
7119 return rclass;
7120 else
7121 {
7122 if (rclass == GENERAL_REGS)
7123 return LO_REGS;
7124 else
7125 return rclass;
7126 }
7127 }
7128
7129 /* Build the SYMBOL_REF for __tls_get_addr. */
7130
7131 static GTY(()) rtx tls_get_addr_libfunc;
7132
7133 static rtx
7134 get_tls_get_addr (void)
7135 {
7136 if (!tls_get_addr_libfunc)
7137 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7138 return tls_get_addr_libfunc;
7139 }
7140
7141 rtx
7142 arm_load_tp (rtx target)
7143 {
7144 if (!target)
7145 target = gen_reg_rtx (SImode);
7146
7147 if (TARGET_HARD_TP)
7148 {
7149 /* Can return in any reg. */
7150 emit_insn (gen_load_tp_hard (target));
7151 }
7152 else
7153 {
7154 /* Always returned in r0. Immediately copy the result into a pseudo,
7155 otherwise other uses of r0 (e.g. setting up function arguments) may
7156 clobber the value. */
7157
7158 rtx tmp;
7159
7160 emit_insn (gen_load_tp_soft ());
7161
7162 tmp = gen_rtx_REG (SImode, 0);
7163 emit_move_insn (target, tmp);
7164 }
7165 return target;
7166 }
7167
7168 static rtx
7169 load_tls_operand (rtx x, rtx reg)
7170 {
7171 rtx tmp;
7172
7173 if (reg == NULL_RTX)
7174 reg = gen_reg_rtx (SImode);
7175
7176 tmp = gen_rtx_CONST (SImode, x);
7177
7178 emit_move_insn (reg, tmp);
7179
7180 return reg;
7181 }
7182
7183 static rtx
7184 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7185 {
7186 rtx insns, label, labelno, sum;
7187
7188 gcc_assert (reloc != TLS_DESCSEQ);
7189 start_sequence ();
7190
7191 labelno = GEN_INT (pic_labelno++);
7192 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7193 label = gen_rtx_CONST (VOIDmode, label);
7194
7195 sum = gen_rtx_UNSPEC (Pmode,
7196 gen_rtvec (4, x, GEN_INT (reloc), label,
7197 GEN_INT (TARGET_ARM ? 8 : 4)),
7198 UNSPEC_TLS);
7199 reg = load_tls_operand (sum, reg);
7200
7201 if (TARGET_ARM)
7202 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7203 else
7204 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7205
7206 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7207 LCT_PURE, /* LCT_CONST? */
7208 Pmode, 1, reg, Pmode);
7209
7210 insns = get_insns ();
7211 end_sequence ();
7212
7213 return insns;
7214 }
7215
7216 static rtx
7217 arm_tls_descseq_addr (rtx x, rtx reg)
7218 {
7219 rtx labelno = GEN_INT (pic_labelno++);
7220 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7221 rtx sum = gen_rtx_UNSPEC (Pmode,
7222 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7223 gen_rtx_CONST (VOIDmode, label),
7224 GEN_INT (!TARGET_ARM)),
7225 UNSPEC_TLS);
7226 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7227
7228 emit_insn (gen_tlscall (x, labelno));
7229 if (!reg)
7230 reg = gen_reg_rtx (SImode);
7231 else
7232 gcc_assert (REGNO (reg) != 0);
7233
7234 emit_move_insn (reg, reg0);
7235
7236 return reg;
7237 }
7238
7239 rtx
7240 legitimize_tls_address (rtx x, rtx reg)
7241 {
7242 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7243 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7244
7245 switch (model)
7246 {
7247 case TLS_MODEL_GLOBAL_DYNAMIC:
7248 if (TARGET_GNU2_TLS)
7249 {
7250 reg = arm_tls_descseq_addr (x, reg);
7251
7252 tp = arm_load_tp (NULL_RTX);
7253
7254 dest = gen_rtx_PLUS (Pmode, tp, reg);
7255 }
7256 else
7257 {
7258 /* Original scheme */
7259 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7260 dest = gen_reg_rtx (Pmode);
7261 emit_libcall_block (insns, dest, ret, x);
7262 }
7263 return dest;
7264
7265 case TLS_MODEL_LOCAL_DYNAMIC:
7266 if (TARGET_GNU2_TLS)
7267 {
7268 reg = arm_tls_descseq_addr (x, reg);
7269
7270 tp = arm_load_tp (NULL_RTX);
7271
7272 dest = gen_rtx_PLUS (Pmode, tp, reg);
7273 }
7274 else
7275 {
7276 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7277
7278 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7279 share the LDM result with other LD model accesses. */
7280 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7281 UNSPEC_TLS);
7282 dest = gen_reg_rtx (Pmode);
7283 emit_libcall_block (insns, dest, ret, eqv);
7284
7285 /* Load the addend. */
7286 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7287 GEN_INT (TLS_LDO32)),
7288 UNSPEC_TLS);
7289 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7290 dest = gen_rtx_PLUS (Pmode, dest, addend);
7291 }
7292 return dest;
7293
7294 case TLS_MODEL_INITIAL_EXEC:
7295 labelno = GEN_INT (pic_labelno++);
7296 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7297 label = gen_rtx_CONST (VOIDmode, label);
7298 sum = gen_rtx_UNSPEC (Pmode,
7299 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7300 GEN_INT (TARGET_ARM ? 8 : 4)),
7301 UNSPEC_TLS);
7302 reg = load_tls_operand (sum, reg);
7303
7304 if (TARGET_ARM)
7305 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7306 else if (TARGET_THUMB2)
7307 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7308 else
7309 {
7310 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7311 emit_move_insn (reg, gen_const_mem (SImode, reg));
7312 }
7313
7314 tp = arm_load_tp (NULL_RTX);
7315
7316 return gen_rtx_PLUS (Pmode, tp, reg);
7317
7318 case TLS_MODEL_LOCAL_EXEC:
7319 tp = arm_load_tp (NULL_RTX);
7320
7321 reg = gen_rtx_UNSPEC (Pmode,
7322 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7323 UNSPEC_TLS);
7324 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7325
7326 return gen_rtx_PLUS (Pmode, tp, reg);
7327
7328 default:
7329 abort ();
7330 }
7331 }
7332
7333 /* Try machine-dependent ways of modifying an illegitimate address
7334 to be legitimate. If we find one, return the new, valid address. */
7335 rtx
7336 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7337 {
7338 if (arm_tls_referenced_p (x))
7339 {
7340 rtx addend = NULL;
7341
7342 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7343 {
7344 addend = XEXP (XEXP (x, 0), 1);
7345 x = XEXP (XEXP (x, 0), 0);
7346 }
7347
7348 if (GET_CODE (x) != SYMBOL_REF)
7349 return x;
7350
7351 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7352
7353 x = legitimize_tls_address (x, NULL_RTX);
7354
7355 if (addend)
7356 {
7357 x = gen_rtx_PLUS (SImode, x, addend);
7358 orig_x = x;
7359 }
7360 else
7361 return x;
7362 }
7363
7364 if (!TARGET_ARM)
7365 {
7366 /* TODO: legitimize_address for Thumb2. */
7367 if (TARGET_THUMB2)
7368 return x;
7369 return thumb_legitimize_address (x, orig_x, mode);
7370 }
7371
7372 if (GET_CODE (x) == PLUS)
7373 {
7374 rtx xop0 = XEXP (x, 0);
7375 rtx xop1 = XEXP (x, 1);
7376
7377 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7378 xop0 = force_reg (SImode, xop0);
7379
7380 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7381 && !symbol_mentioned_p (xop1))
7382 xop1 = force_reg (SImode, xop1);
7383
7384 if (ARM_BASE_REGISTER_RTX_P (xop0)
7385 && CONST_INT_P (xop1))
7386 {
7387 HOST_WIDE_INT n, low_n;
7388 rtx base_reg, val;
7389 n = INTVAL (xop1);
7390
7391 /* VFP addressing modes actually allow greater offsets, but for
7392 now we just stick with the lowest common denominator. */
7393 if (mode == DImode
7394 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7395 {
7396 low_n = n & 0x0f;
7397 n &= ~0x0f;
7398 if (low_n > 4)
7399 {
7400 n += 16;
7401 low_n -= 16;
7402 }
7403 }
7404 else
7405 {
7406 low_n = ((mode) == TImode ? 0
7407 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7408 n -= low_n;
7409 }
7410
7411 base_reg = gen_reg_rtx (SImode);
7412 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7413 emit_move_insn (base_reg, val);
7414 x = plus_constant (Pmode, base_reg, low_n);
7415 }
7416 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7417 x = gen_rtx_PLUS (SImode, xop0, xop1);
7418 }
7419
7420 /* XXX We don't allow MINUS any more -- see comment in
7421 arm_legitimate_address_outer_p (). */
7422 else if (GET_CODE (x) == MINUS)
7423 {
7424 rtx xop0 = XEXP (x, 0);
7425 rtx xop1 = XEXP (x, 1);
7426
7427 if (CONSTANT_P (xop0))
7428 xop0 = force_reg (SImode, xop0);
7429
7430 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7431 xop1 = force_reg (SImode, xop1);
7432
7433 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7434 x = gen_rtx_MINUS (SImode, xop0, xop1);
7435 }
7436
7437 /* Make sure to take full advantage of the pre-indexed addressing mode
7438 with absolute addresses which often allows for the base register to
7439 be factorized for multiple adjacent memory references, and it might
7440 even allows for the mini pool to be avoided entirely. */
7441 else if (CONST_INT_P (x) && optimize > 0)
7442 {
7443 unsigned int bits;
7444 HOST_WIDE_INT mask, base, index;
7445 rtx base_reg;
7446
7447 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7448 use a 8-bit index. So let's use a 12-bit index for SImode only and
7449 hope that arm_gen_constant will enable ldrb to use more bits. */
7450 bits = (mode == SImode) ? 12 : 8;
7451 mask = (1 << bits) - 1;
7452 base = INTVAL (x) & ~mask;
7453 index = INTVAL (x) & mask;
7454 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7455 {
7456 /* It'll most probably be more efficient to generate the base
7457 with more bits set and use a negative index instead. */
7458 base |= mask;
7459 index -= mask;
7460 }
7461 base_reg = force_reg (SImode, GEN_INT (base));
7462 x = plus_constant (Pmode, base_reg, index);
7463 }
7464
7465 if (flag_pic)
7466 {
7467 /* We need to find and carefully transform any SYMBOL and LABEL
7468 references; so go back to the original address expression. */
7469 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7470
7471 if (new_x != orig_x)
7472 x = new_x;
7473 }
7474
7475 return x;
7476 }
7477
7478
7479 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7480 to be legitimate. If we find one, return the new, valid address. */
7481 rtx
7482 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
7483 {
7484 if (GET_CODE (x) == PLUS
7485 && CONST_INT_P (XEXP (x, 1))
7486 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7487 || INTVAL (XEXP (x, 1)) < 0))
7488 {
7489 rtx xop0 = XEXP (x, 0);
7490 rtx xop1 = XEXP (x, 1);
7491 HOST_WIDE_INT offset = INTVAL (xop1);
7492
7493 /* Try and fold the offset into a biasing of the base register and
7494 then offsetting that. Don't do this when optimizing for space
7495 since it can cause too many CSEs. */
7496 if (optimize_size && offset >= 0
7497 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7498 {
7499 HOST_WIDE_INT delta;
7500
7501 if (offset >= 256)
7502 delta = offset - (256 - GET_MODE_SIZE (mode));
7503 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7504 delta = 31 * GET_MODE_SIZE (mode);
7505 else
7506 delta = offset & (~31 * GET_MODE_SIZE (mode));
7507
7508 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7509 NULL_RTX);
7510 x = plus_constant (Pmode, xop0, delta);
7511 }
7512 else if (offset < 0 && offset > -256)
7513 /* Small negative offsets are best done with a subtract before the
7514 dereference, forcing these into a register normally takes two
7515 instructions. */
7516 x = force_operand (x, NULL_RTX);
7517 else
7518 {
7519 /* For the remaining cases, force the constant into a register. */
7520 xop1 = force_reg (SImode, xop1);
7521 x = gen_rtx_PLUS (SImode, xop0, xop1);
7522 }
7523 }
7524 else if (GET_CODE (x) == PLUS
7525 && s_register_operand (XEXP (x, 1), SImode)
7526 && !s_register_operand (XEXP (x, 0), SImode))
7527 {
7528 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7529
7530 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7531 }
7532
7533 if (flag_pic)
7534 {
7535 /* We need to find and carefully transform any SYMBOL and LABEL
7536 references; so go back to the original address expression. */
7537 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7538
7539 if (new_x != orig_x)
7540 x = new_x;
7541 }
7542
7543 return x;
7544 }
7545
7546 bool
7547 arm_legitimize_reload_address (rtx *p,
7548 enum machine_mode mode,
7549 int opnum, int type,
7550 int ind_levels ATTRIBUTE_UNUSED)
7551 {
7552 /* We must recognize output that we have already generated ourselves. */
7553 if (GET_CODE (*p) == PLUS
7554 && GET_CODE (XEXP (*p, 0)) == PLUS
7555 && REG_P (XEXP (XEXP (*p, 0), 0))
7556 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7557 && CONST_INT_P (XEXP (*p, 1)))
7558 {
7559 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7560 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7561 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7562 return true;
7563 }
7564
7565 if (GET_CODE (*p) == PLUS
7566 && REG_P (XEXP (*p, 0))
7567 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7568 /* If the base register is equivalent to a constant, let the generic
7569 code handle it. Otherwise we will run into problems if a future
7570 reload pass decides to rematerialize the constant. */
7571 && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7572 && CONST_INT_P (XEXP (*p, 1)))
7573 {
7574 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7575 HOST_WIDE_INT low, high;
7576
7577 /* Detect coprocessor load/stores. */
7578 bool coproc_p = ((TARGET_HARD_FLOAT
7579 && TARGET_VFP
7580 && (mode == SFmode || mode == DFmode))
7581 || (TARGET_REALLY_IWMMXT
7582 && VALID_IWMMXT_REG_MODE (mode))
7583 || (TARGET_NEON
7584 && (VALID_NEON_DREG_MODE (mode)
7585 || VALID_NEON_QREG_MODE (mode))));
7586
7587 /* For some conditions, bail out when lower two bits are unaligned. */
7588 if ((val & 0x3) != 0
7589 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
7590 && (coproc_p
7591 /* For DI, and DF under soft-float: */
7592 || ((mode == DImode || mode == DFmode)
7593 /* Without ldrd, we use stm/ldm, which does not
7594 fair well with unaligned bits. */
7595 && (! TARGET_LDRD
7596 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
7597 || TARGET_THUMB2))))
7598 return false;
7599
7600 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7601 of which the (reg+high) gets turned into a reload add insn,
7602 we try to decompose the index into high/low values that can often
7603 also lead to better reload CSE.
7604 For example:
7605 ldr r0, [r2, #4100] // Offset too large
7606 ldr r1, [r2, #4104] // Offset too large
7607
7608 is best reloaded as:
7609 add t1, r2, #4096
7610 ldr r0, [t1, #4]
7611 add t2, r2, #4096
7612 ldr r1, [t2, #8]
7613
7614 which post-reload CSE can simplify in most cases to eliminate the
7615 second add instruction:
7616 add t1, r2, #4096
7617 ldr r0, [t1, #4]
7618 ldr r1, [t1, #8]
7619
7620 The idea here is that we want to split out the bits of the constant
7621 as a mask, rather than as subtracting the maximum offset that the
7622 respective type of load/store used can handle.
7623
7624 When encountering negative offsets, we can still utilize it even if
7625 the overall offset is positive; sometimes this may lead to an immediate
7626 that can be constructed with fewer instructions.
7627 For example:
7628 ldr r0, [r2, #0x3FFFFC]
7629
7630 This is best reloaded as:
7631 add t1, r2, #0x400000
7632 ldr r0, [t1, #-4]
7633
7634 The trick for spotting this for a load insn with N bits of offset
7635 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7636 negative offset that is going to make bit N and all the bits below
7637 it become zero in the remainder part.
7638
7639 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7640 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7641 used in most cases of ARM load/store instructions. */
7642
7643 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
7644 (((VAL) & ((1 << (N)) - 1)) \
7645 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
7646 : 0)
7647
7648 if (coproc_p)
7649 {
7650 low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7651
7652 /* NEON quad-word load/stores are made of two double-word accesses,
7653 so the valid index range is reduced by 8. Treat as 9-bit range if
7654 we go over it. */
7655 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7656 low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7657 }
7658 else if (GET_MODE_SIZE (mode) == 8)
7659 {
7660 if (TARGET_LDRD)
7661 low = (TARGET_THUMB2
7662 ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7663 : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7664 else
7665 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7666 to access doublewords. The supported load/store offsets are
7667 -8, -4, and 4, which we try to produce here. */
7668 low = ((val & 0xf) ^ 0x8) - 0x8;
7669 }
7670 else if (GET_MODE_SIZE (mode) < 8)
7671 {
7672 /* NEON element load/stores do not have an offset. */
7673 if (TARGET_NEON_FP16 && mode == HFmode)
7674 return false;
7675
7676 if (TARGET_THUMB2)
7677 {
7678 /* Thumb-2 has an asymmetrical index range of (-256,4096).
7679 Try the wider 12-bit range first, and re-try if the result
7680 is out of range. */
7681 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7682 if (low < -255)
7683 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7684 }
7685 else
7686 {
7687 if (mode == HImode || mode == HFmode)
7688 {
7689 if (arm_arch4)
7690 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7691 else
7692 {
7693 /* The storehi/movhi_bytes fallbacks can use only
7694 [-4094,+4094] of the full ldrb/strb index range. */
7695 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7696 if (low == 4095 || low == -4095)
7697 return false;
7698 }
7699 }
7700 else
7701 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7702 }
7703 }
7704 else
7705 return false;
7706
7707 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7708 ^ (unsigned HOST_WIDE_INT) 0x80000000)
7709 - (unsigned HOST_WIDE_INT) 0x80000000);
7710 /* Check for overflow or zero */
7711 if (low == 0 || high == 0 || (high + low != val))
7712 return false;
7713
7714 /* Reload the high part into a base reg; leave the low part
7715 in the mem.
7716 Note that replacing this gen_rtx_PLUS with plus_constant is
7717 wrong in this case because we rely on the
7718 (plus (plus reg c1) c2) structure being preserved so that
7719 XEXP (*p, 0) in push_reload below uses the correct term. */
7720 *p = gen_rtx_PLUS (GET_MODE (*p),
7721 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
7722 GEN_INT (high)),
7723 GEN_INT (low));
7724 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7725 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7726 VOIDmode, 0, 0, opnum, (enum reload_type) type);
7727 return true;
7728 }
7729
7730 return false;
7731 }
7732
7733 rtx
7734 thumb_legitimize_reload_address (rtx *x_p,
7735 enum machine_mode mode,
7736 int opnum, int type,
7737 int ind_levels ATTRIBUTE_UNUSED)
7738 {
7739 rtx x = *x_p;
7740
7741 if (GET_CODE (x) == PLUS
7742 && GET_MODE_SIZE (mode) < 4
7743 && REG_P (XEXP (x, 0))
7744 && XEXP (x, 0) == stack_pointer_rtx
7745 && CONST_INT_P (XEXP (x, 1))
7746 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7747 {
7748 rtx orig_x = x;
7749
7750 x = copy_rtx (x);
7751 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7752 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7753 return x;
7754 }
7755
7756 /* If both registers are hi-regs, then it's better to reload the
7757 entire expression rather than each register individually. That
7758 only requires one reload register rather than two. */
7759 if (GET_CODE (x) == PLUS
7760 && REG_P (XEXP (x, 0))
7761 && REG_P (XEXP (x, 1))
7762 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
7763 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
7764 {
7765 rtx orig_x = x;
7766
7767 x = copy_rtx (x);
7768 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
7769 Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
7770 return x;
7771 }
7772
7773 return NULL;
7774 }
7775
7776 /* Test for various thread-local symbols. */
7777
7778 /* Helper for arm_tls_referenced_p. */
7779
7780 static int
7781 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
7782 {
7783 if (GET_CODE (*x) == SYMBOL_REF)
7784 return SYMBOL_REF_TLS_MODEL (*x) != 0;
7785
7786 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
7787 TLS offsets, not real symbol references. */
7788 if (GET_CODE (*x) == UNSPEC
7789 && XINT (*x, 1) == UNSPEC_TLS)
7790 return -1;
7791
7792 return 0;
7793 }
7794
7795 /* Return TRUE if X contains any TLS symbol references. */
7796
7797 bool
7798 arm_tls_referenced_p (rtx x)
7799 {
7800 if (! TARGET_HAVE_TLS)
7801 return false;
7802
7803 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
7804 }
7805
7806 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
7807
7808 On the ARM, allow any integer (invalid ones are removed later by insn
7809 patterns), nice doubles and symbol_refs which refer to the function's
7810 constant pool XXX.
7811
7812 When generating pic allow anything. */
7813
7814 static bool
7815 arm_legitimate_constant_p_1 (enum machine_mode mode, rtx x)
7816 {
7817 /* At present, we have no support for Neon structure constants, so forbid
7818 them here. It might be possible to handle simple cases like 0 and -1
7819 in future. */
7820 if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
7821 return false;
7822
7823 return flag_pic || !label_mentioned_p (x);
7824 }
7825
7826 static bool
7827 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7828 {
7829 return (CONST_INT_P (x)
7830 || CONST_DOUBLE_P (x)
7831 || CONSTANT_ADDRESS_P (x)
7832 || flag_pic);
7833 }
7834
7835 static bool
7836 arm_legitimate_constant_p (enum machine_mode mode, rtx x)
7837 {
7838 return (!arm_cannot_force_const_mem (mode, x)
7839 && (TARGET_32BIT
7840 ? arm_legitimate_constant_p_1 (mode, x)
7841 : thumb_legitimate_constant_p (mode, x)));
7842 }
7843
7844 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7845
7846 static bool
7847 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7848 {
7849 rtx base, offset;
7850
7851 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
7852 {
7853 split_const (x, &base, &offset);
7854 if (GET_CODE (base) == SYMBOL_REF
7855 && !offset_within_block_p (base, INTVAL (offset)))
7856 return true;
7857 }
7858 return arm_tls_referenced_p (x);
7859 }
7860 \f
7861 #define REG_OR_SUBREG_REG(X) \
7862 (REG_P (X) \
7863 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
7864
7865 #define REG_OR_SUBREG_RTX(X) \
7866 (REG_P (X) ? (X) : SUBREG_REG (X))
7867
7868 static inline int
7869 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
7870 {
7871 enum machine_mode mode = GET_MODE (x);
7872 int total, words;
7873
7874 switch (code)
7875 {
7876 case ASHIFT:
7877 case ASHIFTRT:
7878 case LSHIFTRT:
7879 case ROTATERT:
7880 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
7881
7882 case PLUS:
7883 case MINUS:
7884 case COMPARE:
7885 case NEG:
7886 case NOT:
7887 return COSTS_N_INSNS (1);
7888
7889 case MULT:
7890 if (CONST_INT_P (XEXP (x, 1)))
7891 {
7892 int cycles = 0;
7893 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
7894
7895 while (i)
7896 {
7897 i >>= 2;
7898 cycles++;
7899 }
7900 return COSTS_N_INSNS (2) + cycles;
7901 }
7902 return COSTS_N_INSNS (1) + 16;
7903
7904 case SET:
7905 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
7906 the mode. */
7907 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
7908 return (COSTS_N_INSNS (words)
7909 + 4 * ((MEM_P (SET_SRC (x)))
7910 + MEM_P (SET_DEST (x))));
7911
7912 case CONST_INT:
7913 if (outer == SET)
7914 {
7915 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
7916 return 0;
7917 if (thumb_shiftable_const (INTVAL (x)))
7918 return COSTS_N_INSNS (2);
7919 return COSTS_N_INSNS (3);
7920 }
7921 else if ((outer == PLUS || outer == COMPARE)
7922 && INTVAL (x) < 256 && INTVAL (x) > -256)
7923 return 0;
7924 else if ((outer == IOR || outer == XOR || outer == AND)
7925 && INTVAL (x) < 256 && INTVAL (x) >= -256)
7926 return COSTS_N_INSNS (1);
7927 else if (outer == AND)
7928 {
7929 int i;
7930 /* This duplicates the tests in the andsi3 expander. */
7931 for (i = 9; i <= 31; i++)
7932 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
7933 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
7934 return COSTS_N_INSNS (2);
7935 }
7936 else if (outer == ASHIFT || outer == ASHIFTRT
7937 || outer == LSHIFTRT)
7938 return 0;
7939 return COSTS_N_INSNS (2);
7940
7941 case CONST:
7942 case CONST_DOUBLE:
7943 case LABEL_REF:
7944 case SYMBOL_REF:
7945 return COSTS_N_INSNS (3);
7946
7947 case UDIV:
7948 case UMOD:
7949 case DIV:
7950 case MOD:
7951 return 100;
7952
7953 case TRUNCATE:
7954 return 99;
7955
7956 case AND:
7957 case XOR:
7958 case IOR:
7959 /* XXX guess. */
7960 return 8;
7961
7962 case MEM:
7963 /* XXX another guess. */
7964 /* Memory costs quite a lot for the first word, but subsequent words
7965 load at the equivalent of a single insn each. */
7966 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7967 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7968 ? 4 : 0));
7969
7970 case IF_THEN_ELSE:
7971 /* XXX a guess. */
7972 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7973 return 14;
7974 return 2;
7975
7976 case SIGN_EXTEND:
7977 case ZERO_EXTEND:
7978 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
7979 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
7980
7981 if (mode == SImode)
7982 return total;
7983
7984 if (arm_arch6)
7985 return total + COSTS_N_INSNS (1);
7986
7987 /* Assume a two-shift sequence. Increase the cost slightly so
7988 we prefer actual shifts over an extend operation. */
7989 return total + 1 + COSTS_N_INSNS (2);
7990
7991 default:
7992 return 99;
7993 }
7994 }
7995
7996 static inline bool
7997 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
7998 {
7999 enum machine_mode mode = GET_MODE (x);
8000 enum rtx_code subcode;
8001 rtx operand;
8002 enum rtx_code code = GET_CODE (x);
8003 *total = 0;
8004
8005 switch (code)
8006 {
8007 case MEM:
8008 /* Memory costs quite a lot for the first word, but subsequent words
8009 load at the equivalent of a single insn each. */
8010 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8011 return true;
8012
8013 case DIV:
8014 case MOD:
8015 case UDIV:
8016 case UMOD:
8017 if (TARGET_HARD_FLOAT && mode == SFmode)
8018 *total = COSTS_N_INSNS (2);
8019 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8020 *total = COSTS_N_INSNS (4);
8021 else
8022 *total = COSTS_N_INSNS (20);
8023 return false;
8024
8025 case ROTATE:
8026 if (REG_P (XEXP (x, 1)))
8027 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8028 else if (!CONST_INT_P (XEXP (x, 1)))
8029 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8030
8031 /* Fall through */
8032 case ROTATERT:
8033 if (mode != SImode)
8034 {
8035 *total += COSTS_N_INSNS (4);
8036 return true;
8037 }
8038
8039 /* Fall through */
8040 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8041 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8042 if (mode == DImode)
8043 {
8044 *total += COSTS_N_INSNS (3);
8045 return true;
8046 }
8047
8048 *total += COSTS_N_INSNS (1);
8049 /* Increase the cost of complex shifts because they aren't any faster,
8050 and reduce dual issue opportunities. */
8051 if (arm_tune_cortex_a9
8052 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8053 ++*total;
8054
8055 return true;
8056
8057 case MINUS:
8058 if (mode == DImode)
8059 {
8060 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8061 if (CONST_INT_P (XEXP (x, 0))
8062 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8063 {
8064 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8065 return true;
8066 }
8067
8068 if (CONST_INT_P (XEXP (x, 1))
8069 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8070 {
8071 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8072 return true;
8073 }
8074
8075 return false;
8076 }
8077
8078 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8079 {
8080 if (TARGET_HARD_FLOAT
8081 && (mode == SFmode
8082 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8083 {
8084 *total = COSTS_N_INSNS (1);
8085 if (CONST_DOUBLE_P (XEXP (x, 0))
8086 && arm_const_double_rtx (XEXP (x, 0)))
8087 {
8088 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8089 return true;
8090 }
8091
8092 if (CONST_DOUBLE_P (XEXP (x, 1))
8093 && arm_const_double_rtx (XEXP (x, 1)))
8094 {
8095 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8096 return true;
8097 }
8098
8099 return false;
8100 }
8101 *total = COSTS_N_INSNS (20);
8102 return false;
8103 }
8104
8105 *total = COSTS_N_INSNS (1);
8106 if (CONST_INT_P (XEXP (x, 0))
8107 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8108 {
8109 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8110 return true;
8111 }
8112
8113 subcode = GET_CODE (XEXP (x, 1));
8114 if (subcode == ASHIFT || subcode == ASHIFTRT
8115 || subcode == LSHIFTRT
8116 || subcode == ROTATE || subcode == ROTATERT)
8117 {
8118 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8119 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8120 return true;
8121 }
8122
8123 /* A shift as a part of RSB costs no more than RSB itself. */
8124 if (GET_CODE (XEXP (x, 0)) == MULT
8125 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8126 {
8127 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8128 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8129 return true;
8130 }
8131
8132 if (subcode == MULT
8133 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8134 {
8135 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8136 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8137 return true;
8138 }
8139
8140 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8141 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8142 {
8143 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8144 if (REG_P (XEXP (XEXP (x, 1), 0))
8145 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8146 *total += COSTS_N_INSNS (1);
8147
8148 return true;
8149 }
8150
8151 /* Fall through */
8152
8153 case PLUS:
8154 if (code == PLUS && arm_arch6 && mode == SImode
8155 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8156 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8157 {
8158 *total = COSTS_N_INSNS (1);
8159 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8160 0, speed);
8161 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8162 return true;
8163 }
8164
8165 /* MLA: All arguments must be registers. We filter out
8166 multiplication by a power of two, so that we fall down into
8167 the code below. */
8168 if (GET_CODE (XEXP (x, 0)) == MULT
8169 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8170 {
8171 /* The cost comes from the cost of the multiply. */
8172 return false;
8173 }
8174
8175 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8176 {
8177 if (TARGET_HARD_FLOAT
8178 && (mode == SFmode
8179 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8180 {
8181 *total = COSTS_N_INSNS (1);
8182 if (CONST_DOUBLE_P (XEXP (x, 1))
8183 && arm_const_double_rtx (XEXP (x, 1)))
8184 {
8185 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8186 return true;
8187 }
8188
8189 return false;
8190 }
8191
8192 *total = COSTS_N_INSNS (20);
8193 return false;
8194 }
8195
8196 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8197 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8198 {
8199 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8200 if (REG_P (XEXP (XEXP (x, 0), 0))
8201 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8202 *total += COSTS_N_INSNS (1);
8203 return true;
8204 }
8205
8206 /* Fall through */
8207
8208 case AND: case XOR: case IOR:
8209
8210 /* Normally the frame registers will be spilt into reg+const during
8211 reload, so it is a bad idea to combine them with other instructions,
8212 since then they might not be moved outside of loops. As a compromise
8213 we allow integration with ops that have a constant as their second
8214 operand. */
8215 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8216 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8217 && !CONST_INT_P (XEXP (x, 1)))
8218 *total = COSTS_N_INSNS (1);
8219
8220 if (mode == DImode)
8221 {
8222 *total += COSTS_N_INSNS (2);
8223 if (CONST_INT_P (XEXP (x, 1))
8224 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8225 {
8226 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8227 return true;
8228 }
8229
8230 return false;
8231 }
8232
8233 *total += COSTS_N_INSNS (1);
8234 if (CONST_INT_P (XEXP (x, 1))
8235 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8236 {
8237 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8238 return true;
8239 }
8240 subcode = GET_CODE (XEXP (x, 0));
8241 if (subcode == ASHIFT || subcode == ASHIFTRT
8242 || subcode == LSHIFTRT
8243 || subcode == ROTATE || subcode == ROTATERT)
8244 {
8245 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8246 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8247 return true;
8248 }
8249
8250 if (subcode == MULT
8251 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8252 {
8253 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8254 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8255 return true;
8256 }
8257
8258 if (subcode == UMIN || subcode == UMAX
8259 || subcode == SMIN || subcode == SMAX)
8260 {
8261 *total = COSTS_N_INSNS (3);
8262 return true;
8263 }
8264
8265 return false;
8266
8267 case MULT:
8268 /* This should have been handled by the CPU specific routines. */
8269 gcc_unreachable ();
8270
8271 case TRUNCATE:
8272 if (arm_arch3m && mode == SImode
8273 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8274 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8275 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8276 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8277 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8278 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8279 {
8280 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8281 return true;
8282 }
8283 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8284 return false;
8285
8286 case NEG:
8287 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8288 {
8289 if (TARGET_HARD_FLOAT
8290 && (mode == SFmode
8291 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8292 {
8293 *total = COSTS_N_INSNS (1);
8294 return false;
8295 }
8296 *total = COSTS_N_INSNS (2);
8297 return false;
8298 }
8299
8300 /* Fall through */
8301 case NOT:
8302 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8303 if (mode == SImode && code == NOT)
8304 {
8305 subcode = GET_CODE (XEXP (x, 0));
8306 if (subcode == ASHIFT || subcode == ASHIFTRT
8307 || subcode == LSHIFTRT
8308 || subcode == ROTATE || subcode == ROTATERT
8309 || (subcode == MULT
8310 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8311 {
8312 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8313 /* Register shifts cost an extra cycle. */
8314 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8315 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8316 subcode, 1, speed);
8317 return true;
8318 }
8319 }
8320
8321 return false;
8322
8323 case IF_THEN_ELSE:
8324 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8325 {
8326 *total = COSTS_N_INSNS (4);
8327 return true;
8328 }
8329
8330 operand = XEXP (x, 0);
8331
8332 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8333 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8334 && REG_P (XEXP (operand, 0))
8335 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8336 *total += COSTS_N_INSNS (1);
8337 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8338 + rtx_cost (XEXP (x, 2), code, 2, speed));
8339 return true;
8340
8341 case NE:
8342 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8343 {
8344 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8345 return true;
8346 }
8347 goto scc_insn;
8348
8349 case GE:
8350 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8351 && mode == SImode && XEXP (x, 1) == const0_rtx)
8352 {
8353 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8354 return true;
8355 }
8356 goto scc_insn;
8357
8358 case LT:
8359 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8360 && mode == SImode && XEXP (x, 1) == const0_rtx)
8361 {
8362 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8363 return true;
8364 }
8365 goto scc_insn;
8366
8367 case EQ:
8368 case GT:
8369 case LE:
8370 case GEU:
8371 case LTU:
8372 case GTU:
8373 case LEU:
8374 case UNORDERED:
8375 case ORDERED:
8376 case UNEQ:
8377 case UNGE:
8378 case UNLT:
8379 case UNGT:
8380 case UNLE:
8381 scc_insn:
8382 /* SCC insns. In the case where the comparison has already been
8383 performed, then they cost 2 instructions. Otherwise they need
8384 an additional comparison before them. */
8385 *total = COSTS_N_INSNS (2);
8386 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8387 {
8388 return true;
8389 }
8390
8391 /* Fall through */
8392 case COMPARE:
8393 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8394 {
8395 *total = 0;
8396 return true;
8397 }
8398
8399 *total += COSTS_N_INSNS (1);
8400 if (CONST_INT_P (XEXP (x, 1))
8401 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8402 {
8403 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8404 return true;
8405 }
8406
8407 subcode = GET_CODE (XEXP (x, 0));
8408 if (subcode == ASHIFT || subcode == ASHIFTRT
8409 || subcode == LSHIFTRT
8410 || subcode == ROTATE || subcode == ROTATERT)
8411 {
8412 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8413 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8414 return true;
8415 }
8416
8417 if (subcode == MULT
8418 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8419 {
8420 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8421 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8422 return true;
8423 }
8424
8425 return false;
8426
8427 case UMIN:
8428 case UMAX:
8429 case SMIN:
8430 case SMAX:
8431 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8432 if (!CONST_INT_P (XEXP (x, 1))
8433 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8434 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8435 return true;
8436
8437 case ABS:
8438 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8439 {
8440 if (TARGET_HARD_FLOAT
8441 && (mode == SFmode
8442 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8443 {
8444 *total = COSTS_N_INSNS (1);
8445 return false;
8446 }
8447 *total = COSTS_N_INSNS (20);
8448 return false;
8449 }
8450 *total = COSTS_N_INSNS (1);
8451 if (mode == DImode)
8452 *total += COSTS_N_INSNS (3);
8453 return false;
8454
8455 case SIGN_EXTEND:
8456 case ZERO_EXTEND:
8457 *total = 0;
8458 if (GET_MODE_CLASS (mode) == MODE_INT)
8459 {
8460 rtx op = XEXP (x, 0);
8461 enum machine_mode opmode = GET_MODE (op);
8462
8463 if (mode == DImode)
8464 *total += COSTS_N_INSNS (1);
8465
8466 if (opmode != SImode)
8467 {
8468 if (MEM_P (op))
8469 {
8470 /* If !arm_arch4, we use one of the extendhisi2_mem
8471 or movhi_bytes patterns for HImode. For a QImode
8472 sign extension, we first zero-extend from memory
8473 and then perform a shift sequence. */
8474 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8475 *total += COSTS_N_INSNS (2);
8476 }
8477 else if (arm_arch6)
8478 *total += COSTS_N_INSNS (1);
8479
8480 /* We don't have the necessary insn, so we need to perform some
8481 other operation. */
8482 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8483 /* An and with constant 255. */
8484 *total += COSTS_N_INSNS (1);
8485 else
8486 /* A shift sequence. Increase costs slightly to avoid
8487 combining two shifts into an extend operation. */
8488 *total += COSTS_N_INSNS (2) + 1;
8489 }
8490
8491 return false;
8492 }
8493
8494 switch (GET_MODE (XEXP (x, 0)))
8495 {
8496 case V8QImode:
8497 case V4HImode:
8498 case V2SImode:
8499 case V4QImode:
8500 case V2HImode:
8501 *total = COSTS_N_INSNS (1);
8502 return false;
8503
8504 default:
8505 gcc_unreachable ();
8506 }
8507 gcc_unreachable ();
8508
8509 case ZERO_EXTRACT:
8510 case SIGN_EXTRACT:
8511 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8512 return true;
8513
8514 case CONST_INT:
8515 if (const_ok_for_arm (INTVAL (x))
8516 || const_ok_for_arm (~INTVAL (x)))
8517 *total = COSTS_N_INSNS (1);
8518 else
8519 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8520 INTVAL (x), NULL_RTX,
8521 NULL_RTX, 0, 0));
8522 return true;
8523
8524 case CONST:
8525 case LABEL_REF:
8526 case SYMBOL_REF:
8527 *total = COSTS_N_INSNS (3);
8528 return true;
8529
8530 case HIGH:
8531 *total = COSTS_N_INSNS (1);
8532 return true;
8533
8534 case LO_SUM:
8535 *total = COSTS_N_INSNS (1);
8536 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8537 return true;
8538
8539 case CONST_DOUBLE:
8540 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8541 && (mode == SFmode || !TARGET_VFP_SINGLE))
8542 *total = COSTS_N_INSNS (1);
8543 else
8544 *total = COSTS_N_INSNS (4);
8545 return true;
8546
8547 case SET:
8548 /* The vec_extract patterns accept memory operands that require an
8549 address reload. Account for the cost of that reload to give the
8550 auto-inc-dec pass an incentive to try to replace them. */
8551 if (TARGET_NEON && MEM_P (SET_DEST (x))
8552 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8553 {
8554 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8555 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8556 *total += COSTS_N_INSNS (1);
8557 return true;
8558 }
8559 /* Likewise for the vec_set patterns. */
8560 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8561 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8562 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8563 {
8564 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8565 *total = rtx_cost (mem, code, 0, speed);
8566 if (!neon_vector_mem_operand (mem, 2, true))
8567 *total += COSTS_N_INSNS (1);
8568 return true;
8569 }
8570 return false;
8571
8572 case UNSPEC:
8573 /* We cost this as high as our memory costs to allow this to
8574 be hoisted from loops. */
8575 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8576 {
8577 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8578 }
8579 return true;
8580
8581 case CONST_VECTOR:
8582 if (TARGET_NEON
8583 && TARGET_HARD_FLOAT
8584 && outer == SET
8585 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8586 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8587 *total = COSTS_N_INSNS (1);
8588 else
8589 *total = COSTS_N_INSNS (4);
8590 return true;
8591
8592 default:
8593 *total = COSTS_N_INSNS (4);
8594 return false;
8595 }
8596 }
8597
8598 /* Estimates the size cost of thumb1 instructions.
8599 For now most of the code is copied from thumb1_rtx_costs. We need more
8600 fine grain tuning when we have more related test cases. */
8601 static inline int
8602 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8603 {
8604 enum machine_mode mode = GET_MODE (x);
8605 int words;
8606
8607 switch (code)
8608 {
8609 case ASHIFT:
8610 case ASHIFTRT:
8611 case LSHIFTRT:
8612 case ROTATERT:
8613 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8614
8615 case PLUS:
8616 case MINUS:
8617 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8618 defined by RTL expansion, especially for the expansion of
8619 multiplication. */
8620 if ((GET_CODE (XEXP (x, 0)) == MULT
8621 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8622 || (GET_CODE (XEXP (x, 1)) == MULT
8623 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8624 return COSTS_N_INSNS (2);
8625 /* On purpose fall through for normal RTX. */
8626 case COMPARE:
8627 case NEG:
8628 case NOT:
8629 return COSTS_N_INSNS (1);
8630
8631 case MULT:
8632 if (CONST_INT_P (XEXP (x, 1)))
8633 {
8634 /* Thumb1 mul instruction can't operate on const. We must Load it
8635 into a register first. */
8636 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8637 return COSTS_N_INSNS (1) + const_size;
8638 }
8639 return COSTS_N_INSNS (1);
8640
8641 case SET:
8642 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8643 the mode. */
8644 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8645 return (COSTS_N_INSNS (words)
8646 + 4 * ((MEM_P (SET_SRC (x)))
8647 + MEM_P (SET_DEST (x))));
8648
8649 case CONST_INT:
8650 if (outer == SET)
8651 {
8652 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8653 return COSTS_N_INSNS (1);
8654 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8655 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8656 return COSTS_N_INSNS (2);
8657 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8658 if (thumb_shiftable_const (INTVAL (x)))
8659 return COSTS_N_INSNS (2);
8660 return COSTS_N_INSNS (3);
8661 }
8662 else if ((outer == PLUS || outer == COMPARE)
8663 && INTVAL (x) < 256 && INTVAL (x) > -256)
8664 return 0;
8665 else if ((outer == IOR || outer == XOR || outer == AND)
8666 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8667 return COSTS_N_INSNS (1);
8668 else if (outer == AND)
8669 {
8670 int i;
8671 /* This duplicates the tests in the andsi3 expander. */
8672 for (i = 9; i <= 31; i++)
8673 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8674 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8675 return COSTS_N_INSNS (2);
8676 }
8677 else if (outer == ASHIFT || outer == ASHIFTRT
8678 || outer == LSHIFTRT)
8679 return 0;
8680 return COSTS_N_INSNS (2);
8681
8682 case CONST:
8683 case CONST_DOUBLE:
8684 case LABEL_REF:
8685 case SYMBOL_REF:
8686 return COSTS_N_INSNS (3);
8687
8688 case UDIV:
8689 case UMOD:
8690 case DIV:
8691 case MOD:
8692 return 100;
8693
8694 case TRUNCATE:
8695 return 99;
8696
8697 case AND:
8698 case XOR:
8699 case IOR:
8700 /* XXX guess. */
8701 return 8;
8702
8703 case MEM:
8704 /* XXX another guess. */
8705 /* Memory costs quite a lot for the first word, but subsequent words
8706 load at the equivalent of a single insn each. */
8707 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8708 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8709 ? 4 : 0));
8710
8711 case IF_THEN_ELSE:
8712 /* XXX a guess. */
8713 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8714 return 14;
8715 return 2;
8716
8717 case ZERO_EXTEND:
8718 /* XXX still guessing. */
8719 switch (GET_MODE (XEXP (x, 0)))
8720 {
8721 case QImode:
8722 return (1 + (mode == DImode ? 4 : 0)
8723 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8724
8725 case HImode:
8726 return (4 + (mode == DImode ? 4 : 0)
8727 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8728
8729 case SImode:
8730 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8731
8732 default:
8733 return 99;
8734 }
8735
8736 default:
8737 return 99;
8738 }
8739 }
8740
8741 /* RTX costs when optimizing for size. */
8742 static bool
8743 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8744 int *total)
8745 {
8746 enum machine_mode mode = GET_MODE (x);
8747 if (TARGET_THUMB1)
8748 {
8749 *total = thumb1_size_rtx_costs (x, code, outer_code);
8750 return true;
8751 }
8752
8753 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
8754 switch (code)
8755 {
8756 case MEM:
8757 /* A memory access costs 1 insn if the mode is small, or the address is
8758 a single register, otherwise it costs one insn per word. */
8759 if (REG_P (XEXP (x, 0)))
8760 *total = COSTS_N_INSNS (1);
8761 else if (flag_pic
8762 && GET_CODE (XEXP (x, 0)) == PLUS
8763 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
8764 /* This will be split into two instructions.
8765 See arm.md:calculate_pic_address. */
8766 *total = COSTS_N_INSNS (2);
8767 else
8768 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8769 return true;
8770
8771 case DIV:
8772 case MOD:
8773 case UDIV:
8774 case UMOD:
8775 /* Needs a libcall, so it costs about this. */
8776 *total = COSTS_N_INSNS (2);
8777 return false;
8778
8779 case ROTATE:
8780 if (mode == SImode && REG_P (XEXP (x, 1)))
8781 {
8782 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
8783 return true;
8784 }
8785 /* Fall through */
8786 case ROTATERT:
8787 case ASHIFT:
8788 case LSHIFTRT:
8789 case ASHIFTRT:
8790 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
8791 {
8792 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
8793 return true;
8794 }
8795 else if (mode == SImode)
8796 {
8797 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
8798 /* Slightly disparage register shifts, but not by much. */
8799 if (!CONST_INT_P (XEXP (x, 1)))
8800 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
8801 return true;
8802 }
8803
8804 /* Needs a libcall. */
8805 *total = COSTS_N_INSNS (2);
8806 return false;
8807
8808 case MINUS:
8809 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8810 && (mode == SFmode || !TARGET_VFP_SINGLE))
8811 {
8812 *total = COSTS_N_INSNS (1);
8813 return false;
8814 }
8815
8816 if (mode == SImode)
8817 {
8818 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
8819 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
8820
8821 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
8822 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
8823 || subcode1 == ROTATE || subcode1 == ROTATERT
8824 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
8825 || subcode1 == ASHIFTRT)
8826 {
8827 /* It's just the cost of the two operands. */
8828 *total = 0;
8829 return false;
8830 }
8831
8832 *total = COSTS_N_INSNS (1);
8833 return false;
8834 }
8835
8836 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8837 return false;
8838
8839 case PLUS:
8840 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8841 && (mode == SFmode || !TARGET_VFP_SINGLE))
8842 {
8843 *total = COSTS_N_INSNS (1);
8844 return false;
8845 }
8846
8847 /* A shift as a part of ADD costs nothing. */
8848 if (GET_CODE (XEXP (x, 0)) == MULT
8849 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8850 {
8851 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
8852 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
8853 *total += rtx_cost (XEXP (x, 1), code, 1, false);
8854 return true;
8855 }
8856
8857 /* Fall through */
8858 case AND: case XOR: case IOR:
8859 if (mode == SImode)
8860 {
8861 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
8862
8863 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
8864 || subcode == LSHIFTRT || subcode == ASHIFTRT
8865 || (code == AND && subcode == NOT))
8866 {
8867 /* It's just the cost of the two operands. */
8868 *total = 0;
8869 return false;
8870 }
8871 }
8872
8873 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8874 return false;
8875
8876 case MULT:
8877 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8878 return false;
8879
8880 case NEG:
8881 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8882 && (mode == SFmode || !TARGET_VFP_SINGLE))
8883 {
8884 *total = COSTS_N_INSNS (1);
8885 return false;
8886 }
8887
8888 /* Fall through */
8889 case NOT:
8890 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8891
8892 return false;
8893
8894 case IF_THEN_ELSE:
8895 *total = 0;
8896 return false;
8897
8898 case COMPARE:
8899 if (cc_register (XEXP (x, 0), VOIDmode))
8900 * total = 0;
8901 else
8902 *total = COSTS_N_INSNS (1);
8903 return false;
8904
8905 case ABS:
8906 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
8907 && (mode == SFmode || !TARGET_VFP_SINGLE))
8908 *total = COSTS_N_INSNS (1);
8909 else
8910 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
8911 return false;
8912
8913 case SIGN_EXTEND:
8914 case ZERO_EXTEND:
8915 return arm_rtx_costs_1 (x, outer_code, total, 0);
8916
8917 case CONST_INT:
8918 if (const_ok_for_arm (INTVAL (x)))
8919 /* A multiplication by a constant requires another instruction
8920 to load the constant to a register. */
8921 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
8922 ? 1 : 0);
8923 else if (const_ok_for_arm (~INTVAL (x)))
8924 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
8925 else if (const_ok_for_arm (-INTVAL (x)))
8926 {
8927 if (outer_code == COMPARE || outer_code == PLUS
8928 || outer_code == MINUS)
8929 *total = 0;
8930 else
8931 *total = COSTS_N_INSNS (1);
8932 }
8933 else
8934 *total = COSTS_N_INSNS (2);
8935 return true;
8936
8937 case CONST:
8938 case LABEL_REF:
8939 case SYMBOL_REF:
8940 *total = COSTS_N_INSNS (2);
8941 return true;
8942
8943 case CONST_DOUBLE:
8944 *total = COSTS_N_INSNS (4);
8945 return true;
8946
8947 case CONST_VECTOR:
8948 if (TARGET_NEON
8949 && TARGET_HARD_FLOAT
8950 && outer_code == SET
8951 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8952 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8953 *total = COSTS_N_INSNS (1);
8954 else
8955 *total = COSTS_N_INSNS (4);
8956 return true;
8957
8958 case HIGH:
8959 case LO_SUM:
8960 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
8961 cost of these slightly. */
8962 *total = COSTS_N_INSNS (1) + 1;
8963 return true;
8964
8965 case SET:
8966 return false;
8967
8968 default:
8969 if (mode != VOIDmode)
8970 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8971 else
8972 *total = COSTS_N_INSNS (4); /* How knows? */
8973 return false;
8974 }
8975 }
8976
8977 /* Helper function for arm_rtx_costs. If the operand is a valid shift
8978 operand, then return the operand that is being shifted. If the shift
8979 is not by a constant, then set SHIFT_REG to point to the operand.
8980 Return NULL if OP is not a shifter operand. */
8981 static rtx
8982 shifter_op_p (rtx op, rtx *shift_reg)
8983 {
8984 enum rtx_code code = GET_CODE (op);
8985
8986 if (code == MULT && CONST_INT_P (XEXP (op, 1))
8987 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
8988 return XEXP (op, 0);
8989 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
8990 return XEXP (op, 0);
8991 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
8992 || code == ASHIFTRT)
8993 {
8994 if (!CONST_INT_P (XEXP (op, 1)))
8995 *shift_reg = XEXP (op, 1);
8996 return XEXP (op, 0);
8997 }
8998
8999 return NULL;
9000 }
9001
9002 static bool
9003 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9004 {
9005 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9006 gcc_assert (GET_CODE (x) == UNSPEC);
9007
9008 switch (XINT (x, 1))
9009 {
9010 case UNSPEC_UNALIGNED_LOAD:
9011 /* We can only do unaligned loads into the integer unit, and we can't
9012 use LDM or LDRD. */
9013 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9014 if (speed_p)
9015 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9016 + extra_cost->ldst.load_unaligned);
9017
9018 #ifdef NOT_YET
9019 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9020 ADDR_SPACE_GENERIC, speed_p);
9021 #endif
9022 return true;
9023
9024 case UNSPEC_UNALIGNED_STORE:
9025 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9026 if (speed_p)
9027 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9028 + extra_cost->ldst.store_unaligned);
9029
9030 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9031 #ifdef NOT_YET
9032 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9033 ADDR_SPACE_GENERIC, speed_p);
9034 #endif
9035 return true;
9036
9037 case UNSPEC_VRINTZ:
9038 case UNSPEC_VRINTP:
9039 case UNSPEC_VRINTM:
9040 case UNSPEC_VRINTR:
9041 case UNSPEC_VRINTX:
9042 case UNSPEC_VRINTA:
9043 *cost = COSTS_N_INSNS (1);
9044 if (speed_p)
9045 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9046
9047 return true;
9048 default:
9049 *cost = COSTS_N_INSNS (2);
9050 break;
9051 }
9052 return false;
9053 }
9054
9055 /* Cost of a libcall. We assume one insn per argument, an amount for the
9056 call (one insn for -Os) and then one for processing the result. */
9057 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9058
9059 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9060 do \
9061 { \
9062 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9063 if (shift_op != NULL \
9064 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9065 { \
9066 if (shift_reg) \
9067 { \
9068 if (speed_p) \
9069 *cost += extra_cost->alu.arith_shift_reg; \
9070 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9071 } \
9072 else if (speed_p) \
9073 *cost += extra_cost->alu.arith_shift; \
9074 \
9075 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9076 + rtx_cost (XEXP (x, 1 - IDX), \
9077 OP, 1, speed_p)); \
9078 return true; \
9079 } \
9080 } \
9081 while (0);
9082
9083 /* RTX costs. Make an estimate of the cost of executing the operation
9084 X, which is contained with an operation with code OUTER_CODE.
9085 SPEED_P indicates whether the cost desired is the performance cost,
9086 or the size cost. The estimate is stored in COST and the return
9087 value is TRUE if the cost calculation is final, or FALSE if the
9088 caller should recurse through the operands of X to add additional
9089 costs.
9090
9091 We currently make no attempt to model the size savings of Thumb-2
9092 16-bit instructions. At the normal points in compilation where
9093 this code is called we have no measure of whether the condition
9094 flags are live or not, and thus no realistic way to determine what
9095 the size will eventually be. */
9096 static bool
9097 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9098 const struct cpu_cost_table *extra_cost,
9099 int *cost, bool speed_p)
9100 {
9101 enum machine_mode mode = GET_MODE (x);
9102
9103 if (TARGET_THUMB1)
9104 {
9105 if (speed_p)
9106 *cost = thumb1_rtx_costs (x, code, outer_code);
9107 else
9108 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9109 return true;
9110 }
9111
9112 switch (code)
9113 {
9114 case SET:
9115 *cost = 0;
9116 /* SET RTXs don't have a mode so we get it from the destination. */
9117 mode = GET_MODE (SET_DEST (x));
9118
9119 if (REG_P (SET_SRC (x))
9120 && REG_P (SET_DEST (x)))
9121 {
9122 /* Assume that most copies can be done with a single insn,
9123 unless we don't have HW FP, in which case everything
9124 larger than word mode will require two insns. */
9125 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9126 && GET_MODE_SIZE (mode) > 4)
9127 || mode == DImode)
9128 ? 2 : 1);
9129 /* Conditional register moves can be encoded
9130 in 16 bits in Thumb mode. */
9131 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9132 *cost >>= 1;
9133
9134 return true;
9135 }
9136
9137 if (CONST_INT_P (SET_SRC (x)))
9138 {
9139 /* Handle CONST_INT here, since the value doesn't have a mode
9140 and we would otherwise be unable to work out the true cost. */
9141 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9142 outer_code = SET;
9143 /* Slightly lower the cost of setting a core reg to a constant.
9144 This helps break up chains and allows for better scheduling. */
9145 if (REG_P (SET_DEST (x))
9146 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9147 *cost -= 1;
9148 x = SET_SRC (x);
9149 /* Immediate moves with an immediate in the range [0, 255] can be
9150 encoded in 16 bits in Thumb mode. */
9151 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9152 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9153 *cost >>= 1;
9154 goto const_int_cost;
9155 }
9156
9157 return false;
9158
9159 case MEM:
9160 /* A memory access costs 1 insn if the mode is small, or the address is
9161 a single register, otherwise it costs one insn per word. */
9162 if (REG_P (XEXP (x, 0)))
9163 *cost = COSTS_N_INSNS (1);
9164 else if (flag_pic
9165 && GET_CODE (XEXP (x, 0)) == PLUS
9166 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9167 /* This will be split into two instructions.
9168 See arm.md:calculate_pic_address. */
9169 *cost = COSTS_N_INSNS (2);
9170 else
9171 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9172
9173 /* For speed optimizations, add the costs of the address and
9174 accessing memory. */
9175 if (speed_p)
9176 #ifdef NOT_YET
9177 *cost += (extra_cost->ldst.load
9178 + arm_address_cost (XEXP (x, 0), mode,
9179 ADDR_SPACE_GENERIC, speed_p));
9180 #else
9181 *cost += extra_cost->ldst.load;
9182 #endif
9183 return true;
9184
9185 case PARALLEL:
9186 {
9187 /* Calculations of LDM costs are complex. We assume an initial cost
9188 (ldm_1st) which will load the number of registers mentioned in
9189 ldm_regs_per_insn_1st registers; then each additional
9190 ldm_regs_per_insn_subsequent registers cost one more insn. The
9191 formula for N regs is thus:
9192
9193 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9194 + ldm_regs_per_insn_subsequent - 1)
9195 / ldm_regs_per_insn_subsequent).
9196
9197 Additional costs may also be added for addressing. A similar
9198 formula is used for STM. */
9199
9200 bool is_ldm = load_multiple_operation (x, SImode);
9201 bool is_stm = store_multiple_operation (x, SImode);
9202
9203 *cost = COSTS_N_INSNS (1);
9204
9205 if (is_ldm || is_stm)
9206 {
9207 if (speed_p)
9208 {
9209 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9210 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9211 ? extra_cost->ldst.ldm_regs_per_insn_1st
9212 : extra_cost->ldst.stm_regs_per_insn_1st;
9213 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9214 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9215 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9216
9217 *cost += regs_per_insn_1st
9218 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9219 + regs_per_insn_sub - 1)
9220 / regs_per_insn_sub);
9221 return true;
9222 }
9223
9224 }
9225 return false;
9226 }
9227 case DIV:
9228 case UDIV:
9229 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9230 && (mode == SFmode || !TARGET_VFP_SINGLE))
9231 *cost = COSTS_N_INSNS (speed_p
9232 ? extra_cost->fp[mode != SFmode].div : 1);
9233 else if (mode == SImode && TARGET_IDIV)
9234 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9235 else
9236 *cost = LIBCALL_COST (2);
9237 return false; /* All arguments must be in registers. */
9238
9239 case MOD:
9240 case UMOD:
9241 *cost = LIBCALL_COST (2);
9242 return false; /* All arguments must be in registers. */
9243
9244 case ROTATE:
9245 if (mode == SImode && REG_P (XEXP (x, 1)))
9246 {
9247 *cost = (COSTS_N_INSNS (2)
9248 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9249 if (speed_p)
9250 *cost += extra_cost->alu.shift_reg;
9251 return true;
9252 }
9253 /* Fall through */
9254 case ROTATERT:
9255 case ASHIFT:
9256 case LSHIFTRT:
9257 case ASHIFTRT:
9258 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9259 {
9260 *cost = (COSTS_N_INSNS (3)
9261 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9262 if (speed_p)
9263 *cost += 2 * extra_cost->alu.shift;
9264 return true;
9265 }
9266 else if (mode == SImode)
9267 {
9268 *cost = (COSTS_N_INSNS (1)
9269 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9270 /* Slightly disparage register shifts at -Os, but not by much. */
9271 if (!CONST_INT_P (XEXP (x, 1)))
9272 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9273 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9274 return true;
9275 }
9276 else if (GET_MODE_CLASS (mode) == MODE_INT
9277 && GET_MODE_SIZE (mode) < 4)
9278 {
9279 if (code == ASHIFT)
9280 {
9281 *cost = (COSTS_N_INSNS (1)
9282 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9283 /* Slightly disparage register shifts at -Os, but not by
9284 much. */
9285 if (!CONST_INT_P (XEXP (x, 1)))
9286 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9287 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9288 }
9289 else if (code == LSHIFTRT || code == ASHIFTRT)
9290 {
9291 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9292 {
9293 /* Can use SBFX/UBFX. */
9294 *cost = COSTS_N_INSNS (1);
9295 if (speed_p)
9296 *cost += extra_cost->alu.bfx;
9297 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9298 }
9299 else
9300 {
9301 *cost = COSTS_N_INSNS (2);
9302 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9303 if (speed_p)
9304 {
9305 if (CONST_INT_P (XEXP (x, 1)))
9306 *cost += 2 * extra_cost->alu.shift;
9307 else
9308 *cost += (extra_cost->alu.shift
9309 + extra_cost->alu.shift_reg);
9310 }
9311 else
9312 /* Slightly disparage register shifts. */
9313 *cost += !CONST_INT_P (XEXP (x, 1));
9314 }
9315 }
9316 else /* Rotates. */
9317 {
9318 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9319 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9320 if (speed_p)
9321 {
9322 if (CONST_INT_P (XEXP (x, 1)))
9323 *cost += (2 * extra_cost->alu.shift
9324 + extra_cost->alu.log_shift);
9325 else
9326 *cost += (extra_cost->alu.shift
9327 + extra_cost->alu.shift_reg
9328 + extra_cost->alu.log_shift_reg);
9329 }
9330 }
9331 return true;
9332 }
9333
9334 *cost = LIBCALL_COST (2);
9335 return false;
9336
9337 case MINUS:
9338 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9339 && (mode == SFmode || !TARGET_VFP_SINGLE))
9340 {
9341 *cost = COSTS_N_INSNS (1);
9342 if (GET_CODE (XEXP (x, 0)) == MULT
9343 || GET_CODE (XEXP (x, 1)) == MULT)
9344 {
9345 rtx mul_op0, mul_op1, sub_op;
9346
9347 if (speed_p)
9348 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9349
9350 if (GET_CODE (XEXP (x, 0)) == MULT)
9351 {
9352 mul_op0 = XEXP (XEXP (x, 0), 0);
9353 mul_op1 = XEXP (XEXP (x, 0), 1);
9354 sub_op = XEXP (x, 1);
9355 }
9356 else
9357 {
9358 mul_op0 = XEXP (XEXP (x, 1), 0);
9359 mul_op1 = XEXP (XEXP (x, 1), 1);
9360 sub_op = XEXP (x, 0);
9361 }
9362
9363 /* The first operand of the multiply may be optionally
9364 negated. */
9365 if (GET_CODE (mul_op0) == NEG)
9366 mul_op0 = XEXP (mul_op0, 0);
9367
9368 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9369 + rtx_cost (mul_op1, code, 0, speed_p)
9370 + rtx_cost (sub_op, code, 0, speed_p));
9371
9372 return true;
9373 }
9374
9375 if (speed_p)
9376 *cost += extra_cost->fp[mode != SFmode].addsub;
9377 return false;
9378 }
9379
9380 if (mode == SImode)
9381 {
9382 rtx shift_by_reg = NULL;
9383 rtx shift_op;
9384 rtx non_shift_op;
9385
9386 *cost = COSTS_N_INSNS (1);
9387
9388 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9389 if (shift_op == NULL)
9390 {
9391 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9392 non_shift_op = XEXP (x, 0);
9393 }
9394 else
9395 non_shift_op = XEXP (x, 1);
9396
9397 if (shift_op != NULL)
9398 {
9399 if (shift_by_reg != NULL)
9400 {
9401 if (speed_p)
9402 *cost += extra_cost->alu.arith_shift_reg;
9403 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9404 }
9405 else if (speed_p)
9406 *cost += extra_cost->alu.arith_shift;
9407
9408 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9409 + rtx_cost (non_shift_op, code, 0, speed_p));
9410 return true;
9411 }
9412
9413 if (arm_arch_thumb2
9414 && GET_CODE (XEXP (x, 1)) == MULT)
9415 {
9416 /* MLS. */
9417 if (speed_p)
9418 *cost += extra_cost->mult[0].add;
9419 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9420 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9421 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9422 return true;
9423 }
9424
9425 if (CONST_INT_P (XEXP (x, 0)))
9426 {
9427 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9428 INTVAL (XEXP (x, 0)), NULL_RTX,
9429 NULL_RTX, 1, 0);
9430 *cost = COSTS_N_INSNS (insns);
9431 if (speed_p)
9432 *cost += insns * extra_cost->alu.arith;
9433 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9434 return true;
9435 }
9436
9437 return false;
9438 }
9439
9440 if (GET_MODE_CLASS (mode) == MODE_INT
9441 && GET_MODE_SIZE (mode) < 4)
9442 {
9443 rtx shift_op, shift_reg;
9444 shift_reg = NULL;
9445
9446 /* We check both sides of the MINUS for shifter operands since,
9447 unlike PLUS, it's not commutative. */
9448
9449 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9450 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9451
9452 /* Slightly disparage, as we might need to widen the result. */
9453 *cost = 1 + COSTS_N_INSNS (1);
9454 if (speed_p)
9455 *cost += extra_cost->alu.arith;
9456
9457 if (CONST_INT_P (XEXP (x, 0)))
9458 {
9459 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9460 return true;
9461 }
9462
9463 return false;
9464 }
9465
9466 if (mode == DImode)
9467 {
9468 *cost = COSTS_N_INSNS (2);
9469
9470 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9471 {
9472 rtx op1 = XEXP (x, 1);
9473
9474 if (speed_p)
9475 *cost += 2 * extra_cost->alu.arith;
9476
9477 if (GET_CODE (op1) == ZERO_EXTEND)
9478 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9479 else
9480 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9481 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9482 0, speed_p);
9483 return true;
9484 }
9485 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9486 {
9487 if (speed_p)
9488 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9489 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9490 0, speed_p)
9491 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9492 return true;
9493 }
9494 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9495 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9496 {
9497 if (speed_p)
9498 *cost += (extra_cost->alu.arith
9499 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9500 ? extra_cost->alu.arith
9501 : extra_cost->alu.arith_shift));
9502 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9503 + rtx_cost (XEXP (XEXP (x, 1), 0),
9504 GET_CODE (XEXP (x, 1)), 0, speed_p));
9505 return true;
9506 }
9507
9508 if (speed_p)
9509 *cost += 2 * extra_cost->alu.arith;
9510 return false;
9511 }
9512
9513 /* Vector mode? */
9514
9515 *cost = LIBCALL_COST (2);
9516 return false;
9517
9518 case PLUS:
9519 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9520 && (mode == SFmode || !TARGET_VFP_SINGLE))
9521 {
9522 *cost = COSTS_N_INSNS (1);
9523 if (GET_CODE (XEXP (x, 0)) == MULT)
9524 {
9525 rtx mul_op0, mul_op1, add_op;
9526
9527 if (speed_p)
9528 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9529
9530 mul_op0 = XEXP (XEXP (x, 0), 0);
9531 mul_op1 = XEXP (XEXP (x, 0), 1);
9532 add_op = XEXP (x, 1);
9533
9534 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9535 + rtx_cost (mul_op1, code, 0, speed_p)
9536 + rtx_cost (add_op, code, 0, speed_p));
9537
9538 return true;
9539 }
9540
9541 if (speed_p)
9542 *cost += extra_cost->fp[mode != SFmode].addsub;
9543 return false;
9544 }
9545 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9546 {
9547 *cost = LIBCALL_COST (2);
9548 return false;
9549 }
9550
9551 /* Narrow modes can be synthesized in SImode, but the range
9552 of useful sub-operations is limited. Check for shift operations
9553 on one of the operands. Only left shifts can be used in the
9554 narrow modes. */
9555 if (GET_MODE_CLASS (mode) == MODE_INT
9556 && GET_MODE_SIZE (mode) < 4)
9557 {
9558 rtx shift_op, shift_reg;
9559 shift_reg = NULL;
9560
9561 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9562
9563 if (CONST_INT_P (XEXP (x, 1)))
9564 {
9565 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9566 INTVAL (XEXP (x, 1)), NULL_RTX,
9567 NULL_RTX, 1, 0);
9568 *cost = COSTS_N_INSNS (insns);
9569 if (speed_p)
9570 *cost += insns * extra_cost->alu.arith;
9571 /* Slightly penalize a narrow operation as the result may
9572 need widening. */
9573 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9574 return true;
9575 }
9576
9577 /* Slightly penalize a narrow operation as the result may
9578 need widening. */
9579 *cost = 1 + COSTS_N_INSNS (1);
9580 if (speed_p)
9581 *cost += extra_cost->alu.arith;
9582
9583 return false;
9584 }
9585
9586 if (mode == SImode)
9587 {
9588 rtx shift_op, shift_reg;
9589
9590 *cost = COSTS_N_INSNS (1);
9591 if (TARGET_INT_SIMD
9592 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9593 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9594 {
9595 /* UXTA[BH] or SXTA[BH]. */
9596 if (speed_p)
9597 *cost += extra_cost->alu.extend_arith;
9598 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9599 speed_p)
9600 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9601 return true;
9602 }
9603
9604 shift_reg = NULL;
9605 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9606 if (shift_op != NULL)
9607 {
9608 if (shift_reg)
9609 {
9610 if (speed_p)
9611 *cost += extra_cost->alu.arith_shift_reg;
9612 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9613 }
9614 else if (speed_p)
9615 *cost += extra_cost->alu.arith_shift;
9616
9617 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9618 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9619 return true;
9620 }
9621 if (GET_CODE (XEXP (x, 0)) == MULT)
9622 {
9623 rtx mul_op = XEXP (x, 0);
9624
9625 *cost = COSTS_N_INSNS (1);
9626
9627 if (TARGET_DSP_MULTIPLY
9628 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9629 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9630 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9631 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9632 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9633 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9634 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9635 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9636 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9637 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9638 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9639 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9640 == 16))))))
9641 {
9642 /* SMLA[BT][BT]. */
9643 if (speed_p)
9644 *cost += extra_cost->mult[0].extend_add;
9645 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9646 SIGN_EXTEND, 0, speed_p)
9647 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9648 SIGN_EXTEND, 0, speed_p)
9649 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9650 return true;
9651 }
9652
9653 if (speed_p)
9654 *cost += extra_cost->mult[0].add;
9655 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9656 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9657 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9658 return true;
9659 }
9660 if (CONST_INT_P (XEXP (x, 1)))
9661 {
9662 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9663 INTVAL (XEXP (x, 1)), NULL_RTX,
9664 NULL_RTX, 1, 0);
9665 *cost = COSTS_N_INSNS (insns);
9666 if (speed_p)
9667 *cost += insns * extra_cost->alu.arith;
9668 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9669 return true;
9670 }
9671 return false;
9672 }
9673
9674 if (mode == DImode)
9675 {
9676 if (arm_arch3m
9677 && GET_CODE (XEXP (x, 0)) == MULT
9678 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9679 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9680 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9681 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9682 {
9683 *cost = COSTS_N_INSNS (1);
9684 if (speed_p)
9685 *cost += extra_cost->mult[1].extend_add;
9686 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
9687 ZERO_EXTEND, 0, speed_p)
9688 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
9689 ZERO_EXTEND, 0, speed_p)
9690 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9691 return true;
9692 }
9693
9694 *cost = COSTS_N_INSNS (2);
9695
9696 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9697 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9698 {
9699 if (speed_p)
9700 *cost += (extra_cost->alu.arith
9701 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9702 ? extra_cost->alu.arith
9703 : extra_cost->alu.arith_shift));
9704
9705 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9706 speed_p)
9707 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9708 return true;
9709 }
9710
9711 if (speed_p)
9712 *cost += 2 * extra_cost->alu.arith;
9713 return false;
9714 }
9715
9716 /* Vector mode? */
9717 *cost = LIBCALL_COST (2);
9718 return false;
9719
9720 case AND: case XOR: case IOR:
9721 if (mode == SImode)
9722 {
9723 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9724 rtx op0 = XEXP (x, 0);
9725 rtx shift_op, shift_reg;
9726
9727 *cost = COSTS_N_INSNS (1);
9728
9729 if (subcode == NOT
9730 && (code == AND
9731 || (code == IOR && TARGET_THUMB2)))
9732 op0 = XEXP (op0, 0);
9733
9734 shift_reg = NULL;
9735 shift_op = shifter_op_p (op0, &shift_reg);
9736 if (shift_op != NULL)
9737 {
9738 if (shift_reg)
9739 {
9740 if (speed_p)
9741 *cost += extra_cost->alu.log_shift_reg;
9742 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9743 }
9744 else if (speed_p)
9745 *cost += extra_cost->alu.log_shift;
9746
9747 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9748 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9749 return true;
9750 }
9751
9752 if (CONST_INT_P (XEXP (x, 1)))
9753 {
9754 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9755 INTVAL (XEXP (x, 1)), NULL_RTX,
9756 NULL_RTX, 1, 0);
9757
9758 *cost = COSTS_N_INSNS (insns);
9759 if (speed_p)
9760 *cost += insns * extra_cost->alu.logical;
9761 *cost += rtx_cost (op0, code, 0, speed_p);
9762 return true;
9763 }
9764
9765 if (speed_p)
9766 *cost += extra_cost->alu.logical;
9767 *cost += (rtx_cost (op0, code, 0, speed_p)
9768 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9769 return true;
9770 }
9771
9772 if (mode == DImode)
9773 {
9774 rtx op0 = XEXP (x, 0);
9775 enum rtx_code subcode = GET_CODE (op0);
9776
9777 *cost = COSTS_N_INSNS (2);
9778
9779 if (subcode == NOT
9780 && (code == AND
9781 || (code == IOR && TARGET_THUMB2)))
9782 op0 = XEXP (op0, 0);
9783
9784 if (GET_CODE (op0) == ZERO_EXTEND)
9785 {
9786 if (speed_p)
9787 *cost += 2 * extra_cost->alu.logical;
9788
9789 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
9790 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9791 return true;
9792 }
9793 else if (GET_CODE (op0) == SIGN_EXTEND)
9794 {
9795 if (speed_p)
9796 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9797
9798 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
9799 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
9800 return true;
9801 }
9802
9803 if (speed_p)
9804 *cost += 2 * extra_cost->alu.logical;
9805
9806 return true;
9807 }
9808 /* Vector mode? */
9809
9810 *cost = LIBCALL_COST (2);
9811 return false;
9812
9813 case MULT:
9814 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9815 && (mode == SFmode || !TARGET_VFP_SINGLE))
9816 {
9817 rtx op0 = XEXP (x, 0);
9818
9819 *cost = COSTS_N_INSNS (1);
9820
9821 if (GET_CODE (op0) == NEG)
9822 op0 = XEXP (op0, 0);
9823
9824 if (speed_p)
9825 *cost += extra_cost->fp[mode != SFmode].mult;
9826
9827 *cost += (rtx_cost (op0, MULT, 0, speed_p)
9828 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
9829 return true;
9830 }
9831 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9832 {
9833 *cost = LIBCALL_COST (2);
9834 return false;
9835 }
9836
9837 if (mode == SImode)
9838 {
9839 *cost = COSTS_N_INSNS (1);
9840 if (TARGET_DSP_MULTIPLY
9841 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9842 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9843 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9844 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9845 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
9846 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
9847 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
9848 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
9849 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
9850 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
9851 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
9852 && (INTVAL (XEXP (XEXP (x, 1), 1))
9853 == 16))))))
9854 {
9855 /* SMUL[TB][TB]. */
9856 if (speed_p)
9857 *cost += extra_cost->mult[0].extend;
9858 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
9859 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
9860 return true;
9861 }
9862 if (speed_p)
9863 *cost += extra_cost->mult[0].simple;
9864 return false;
9865 }
9866
9867 if (mode == DImode)
9868 {
9869 if (arm_arch3m
9870 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9871 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
9872 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
9873 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
9874 {
9875 *cost = COSTS_N_INSNS (1);
9876 if (speed_p)
9877 *cost += extra_cost->mult[1].extend;
9878 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
9879 ZERO_EXTEND, 0, speed_p)
9880 + rtx_cost (XEXP (XEXP (x, 1), 0),
9881 ZERO_EXTEND, 0, speed_p));
9882 return true;
9883 }
9884
9885 *cost = LIBCALL_COST (2);
9886 return false;
9887 }
9888
9889 /* Vector mode? */
9890 *cost = LIBCALL_COST (2);
9891 return false;
9892
9893 case NEG:
9894 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9895 && (mode == SFmode || !TARGET_VFP_SINGLE))
9896 {
9897 *cost = COSTS_N_INSNS (1);
9898 if (speed_p)
9899 *cost += extra_cost->fp[mode != SFmode].neg;
9900
9901 return false;
9902 }
9903 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9904 {
9905 *cost = LIBCALL_COST (1);
9906 return false;
9907 }
9908
9909 if (mode == SImode)
9910 {
9911 if (GET_CODE (XEXP (x, 0)) == ABS)
9912 {
9913 *cost = COSTS_N_INSNS (2);
9914 /* Assume the non-flag-changing variant. */
9915 if (speed_p)
9916 *cost += (extra_cost->alu.log_shift
9917 + extra_cost->alu.arith_shift);
9918 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
9919 return true;
9920 }
9921
9922 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
9923 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
9924 {
9925 *cost = COSTS_N_INSNS (2);
9926 /* No extra cost for MOV imm and MVN imm. */
9927 /* If the comparison op is using the flags, there's no further
9928 cost, otherwise we need to add the cost of the comparison. */
9929 if (!(REG_P (XEXP (XEXP (x, 0), 0))
9930 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
9931 && XEXP (XEXP (x, 0), 1) == const0_rtx))
9932 {
9933 *cost += (COSTS_N_INSNS (1)
9934 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
9935 speed_p)
9936 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
9937 speed_p));
9938 if (speed_p)
9939 *cost += extra_cost->alu.arith;
9940 }
9941 return true;
9942 }
9943 *cost = COSTS_N_INSNS (1);
9944 if (speed_p)
9945 *cost += extra_cost->alu.arith;
9946 return false;
9947 }
9948
9949 if (GET_MODE_CLASS (mode) == MODE_INT
9950 && GET_MODE_SIZE (mode) < 4)
9951 {
9952 /* Slightly disparage, as we might need an extend operation. */
9953 *cost = 1 + COSTS_N_INSNS (1);
9954 if (speed_p)
9955 *cost += extra_cost->alu.arith;
9956 return false;
9957 }
9958
9959 if (mode == DImode)
9960 {
9961 *cost = COSTS_N_INSNS (2);
9962 if (speed_p)
9963 *cost += 2 * extra_cost->alu.arith;
9964 return false;
9965 }
9966
9967 /* Vector mode? */
9968 *cost = LIBCALL_COST (1);
9969 return false;
9970
9971 case NOT:
9972 if (mode == SImode)
9973 {
9974 rtx shift_op;
9975 rtx shift_reg = NULL;
9976
9977 *cost = COSTS_N_INSNS (1);
9978 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9979
9980 if (shift_op)
9981 {
9982 if (shift_reg != NULL)
9983 {
9984 if (speed_p)
9985 *cost += extra_cost->alu.log_shift_reg;
9986 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9987 }
9988 else if (speed_p)
9989 *cost += extra_cost->alu.log_shift;
9990 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
9991 return true;
9992 }
9993
9994 if (speed_p)
9995 *cost += extra_cost->alu.logical;
9996 return false;
9997 }
9998 if (mode == DImode)
9999 {
10000 *cost = COSTS_N_INSNS (2);
10001 return false;
10002 }
10003
10004 /* Vector mode? */
10005
10006 *cost += LIBCALL_COST (1);
10007 return false;
10008
10009 case IF_THEN_ELSE:
10010 {
10011 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10012 {
10013 *cost = COSTS_N_INSNS (4);
10014 return true;
10015 }
10016 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10017 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10018
10019 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10020 /* Assume that if one arm of the if_then_else is a register,
10021 that it will be tied with the result and eliminate the
10022 conditional insn. */
10023 if (REG_P (XEXP (x, 1)))
10024 *cost += op2cost;
10025 else if (REG_P (XEXP (x, 2)))
10026 *cost += op1cost;
10027 else
10028 {
10029 if (speed_p)
10030 {
10031 if (extra_cost->alu.non_exec_costs_exec)
10032 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10033 else
10034 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10035 }
10036 else
10037 *cost += op1cost + op2cost;
10038 }
10039 }
10040 return true;
10041
10042 case COMPARE:
10043 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10044 *cost = 0;
10045 else
10046 {
10047 enum machine_mode op0mode;
10048 /* We'll mostly assume that the cost of a compare is the cost of the
10049 LHS. However, there are some notable exceptions. */
10050
10051 /* Floating point compares are never done as side-effects. */
10052 op0mode = GET_MODE (XEXP (x, 0));
10053 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10054 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10055 {
10056 *cost = COSTS_N_INSNS (1);
10057 if (speed_p)
10058 *cost += extra_cost->fp[op0mode != SFmode].compare;
10059
10060 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10061 {
10062 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10063 return true;
10064 }
10065
10066 return false;
10067 }
10068 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10069 {
10070 *cost = LIBCALL_COST (2);
10071 return false;
10072 }
10073
10074 /* DImode compares normally take two insns. */
10075 if (op0mode == DImode)
10076 {
10077 *cost = COSTS_N_INSNS (2);
10078 if (speed_p)
10079 *cost += 2 * extra_cost->alu.arith;
10080 return false;
10081 }
10082
10083 if (op0mode == SImode)
10084 {
10085 rtx shift_op;
10086 rtx shift_reg;
10087
10088 if (XEXP (x, 1) == const0_rtx
10089 && !(REG_P (XEXP (x, 0))
10090 || (GET_CODE (XEXP (x, 0)) == SUBREG
10091 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10092 {
10093 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10094
10095 /* Multiply operations that set the flags are often
10096 significantly more expensive. */
10097 if (speed_p
10098 && GET_CODE (XEXP (x, 0)) == MULT
10099 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10100 *cost += extra_cost->mult[0].flag_setting;
10101
10102 if (speed_p
10103 && GET_CODE (XEXP (x, 0)) == PLUS
10104 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10105 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10106 0), 1), mode))
10107 *cost += extra_cost->mult[0].flag_setting;
10108 return true;
10109 }
10110
10111 shift_reg = NULL;
10112 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10113 if (shift_op != NULL)
10114 {
10115 *cost = COSTS_N_INSNS (1);
10116 if (shift_reg != NULL)
10117 {
10118 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10119 if (speed_p)
10120 *cost += extra_cost->alu.arith_shift_reg;
10121 }
10122 else if (speed_p)
10123 *cost += extra_cost->alu.arith_shift;
10124 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10125 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10126 return true;
10127 }
10128
10129 *cost = COSTS_N_INSNS (1);
10130 if (speed_p)
10131 *cost += extra_cost->alu.arith;
10132 if (CONST_INT_P (XEXP (x, 1))
10133 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10134 {
10135 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10136 return true;
10137 }
10138 return false;
10139 }
10140
10141 /* Vector mode? */
10142
10143 *cost = LIBCALL_COST (2);
10144 return false;
10145 }
10146 return true;
10147
10148 case EQ:
10149 case NE:
10150 case LT:
10151 case LE:
10152 case GT:
10153 case GE:
10154 case LTU:
10155 case LEU:
10156 case GEU:
10157 case GTU:
10158 case ORDERED:
10159 case UNORDERED:
10160 case UNEQ:
10161 case UNLE:
10162 case UNLT:
10163 case UNGE:
10164 case UNGT:
10165 case LTGT:
10166 if (outer_code == SET)
10167 {
10168 /* Is it a store-flag operation? */
10169 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10170 && XEXP (x, 1) == const0_rtx)
10171 {
10172 /* Thumb also needs an IT insn. */
10173 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10174 return true;
10175 }
10176 if (XEXP (x, 1) == const0_rtx)
10177 {
10178 switch (code)
10179 {
10180 case LT:
10181 /* LSR Rd, Rn, #31. */
10182 *cost = COSTS_N_INSNS (1);
10183 if (speed_p)
10184 *cost += extra_cost->alu.shift;
10185 break;
10186
10187 case EQ:
10188 /* RSBS T1, Rn, #0
10189 ADC Rd, Rn, T1. */
10190
10191 case NE:
10192 /* SUBS T1, Rn, #1
10193 SBC Rd, Rn, T1. */
10194 *cost = COSTS_N_INSNS (2);
10195 break;
10196
10197 case LE:
10198 /* RSBS T1, Rn, Rn, LSR #31
10199 ADC Rd, Rn, T1. */
10200 *cost = COSTS_N_INSNS (2);
10201 if (speed_p)
10202 *cost += extra_cost->alu.arith_shift;
10203 break;
10204
10205 case GT:
10206 /* RSB Rd, Rn, Rn, ASR #1
10207 LSR Rd, Rd, #31. */
10208 *cost = COSTS_N_INSNS (2);
10209 if (speed_p)
10210 *cost += (extra_cost->alu.arith_shift
10211 + extra_cost->alu.shift);
10212 break;
10213
10214 case GE:
10215 /* ASR Rd, Rn, #31
10216 ADD Rd, Rn, #1. */
10217 *cost = COSTS_N_INSNS (2);
10218 if (speed_p)
10219 *cost += extra_cost->alu.shift;
10220 break;
10221
10222 default:
10223 /* Remaining cases are either meaningless or would take
10224 three insns anyway. */
10225 *cost = COSTS_N_INSNS (3);
10226 break;
10227 }
10228 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10229 return true;
10230 }
10231 else
10232 {
10233 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10234 if (CONST_INT_P (XEXP (x, 1))
10235 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10236 {
10237 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10238 return true;
10239 }
10240
10241 return false;
10242 }
10243 }
10244 /* Not directly inside a set. If it involves the condition code
10245 register it must be the condition for a branch, cond_exec or
10246 I_T_E operation. Since the comparison is performed elsewhere
10247 this is just the control part which has no additional
10248 cost. */
10249 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10250 && XEXP (x, 1) == const0_rtx)
10251 {
10252 *cost = 0;
10253 return true;
10254 }
10255 return false;
10256
10257 case ABS:
10258 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10259 && (mode == SFmode || !TARGET_VFP_SINGLE))
10260 {
10261 *cost = COSTS_N_INSNS (1);
10262 if (speed_p)
10263 *cost += extra_cost->fp[mode != SFmode].neg;
10264
10265 return false;
10266 }
10267 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10268 {
10269 *cost = LIBCALL_COST (1);
10270 return false;
10271 }
10272
10273 if (mode == SImode)
10274 {
10275 *cost = COSTS_N_INSNS (1);
10276 if (speed_p)
10277 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10278 return false;
10279 }
10280 /* Vector mode? */
10281 *cost = LIBCALL_COST (1);
10282 return false;
10283
10284 case SIGN_EXTEND:
10285 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10286 && MEM_P (XEXP (x, 0)))
10287 {
10288 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10289
10290 if (mode == DImode)
10291 *cost += COSTS_N_INSNS (1);
10292
10293 if (!speed_p)
10294 return true;
10295
10296 if (GET_MODE (XEXP (x, 0)) == SImode)
10297 *cost += extra_cost->ldst.load;
10298 else
10299 *cost += extra_cost->ldst.load_sign_extend;
10300
10301 if (mode == DImode)
10302 *cost += extra_cost->alu.shift;
10303
10304 return true;
10305 }
10306
10307 /* Widening from less than 32-bits requires an extend operation. */
10308 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10309 {
10310 /* We have SXTB/SXTH. */
10311 *cost = COSTS_N_INSNS (1);
10312 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10313 if (speed_p)
10314 *cost += extra_cost->alu.extend;
10315 }
10316 else if (GET_MODE (XEXP (x, 0)) != SImode)
10317 {
10318 /* Needs two shifts. */
10319 *cost = COSTS_N_INSNS (2);
10320 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10321 if (speed_p)
10322 *cost += 2 * extra_cost->alu.shift;
10323 }
10324
10325 /* Widening beyond 32-bits requires one more insn. */
10326 if (mode == DImode)
10327 {
10328 *cost += COSTS_N_INSNS (1);
10329 if (speed_p)
10330 *cost += extra_cost->alu.shift;
10331 }
10332
10333 return true;
10334
10335 case ZERO_EXTEND:
10336 if ((arm_arch4
10337 || GET_MODE (XEXP (x, 0)) == SImode
10338 || GET_MODE (XEXP (x, 0)) == QImode)
10339 && MEM_P (XEXP (x, 0)))
10340 {
10341 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10342
10343 if (mode == DImode)
10344 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10345
10346 return true;
10347 }
10348
10349 /* Widening from less than 32-bits requires an extend operation. */
10350 if (GET_MODE (XEXP (x, 0)) == QImode)
10351 {
10352 /* UXTB can be a shorter instruction in Thumb2, but it might
10353 be slower than the AND Rd, Rn, #255 alternative. When
10354 optimizing for speed it should never be slower to use
10355 AND, and we don't really model 16-bit vs 32-bit insns
10356 here. */
10357 *cost = COSTS_N_INSNS (1);
10358 if (speed_p)
10359 *cost += extra_cost->alu.logical;
10360 }
10361 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10362 {
10363 /* We have UXTB/UXTH. */
10364 *cost = COSTS_N_INSNS (1);
10365 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10366 if (speed_p)
10367 *cost += extra_cost->alu.extend;
10368 }
10369 else if (GET_MODE (XEXP (x, 0)) != SImode)
10370 {
10371 /* Needs two shifts. It's marginally preferable to use
10372 shifts rather than two BIC instructions as the second
10373 shift may merge with a subsequent insn as a shifter
10374 op. */
10375 *cost = COSTS_N_INSNS (2);
10376 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10377 if (speed_p)
10378 *cost += 2 * extra_cost->alu.shift;
10379 }
10380 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10381 *cost = COSTS_N_INSNS (1);
10382
10383 /* Widening beyond 32-bits requires one more insn. */
10384 if (mode == DImode)
10385 {
10386 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10387 }
10388
10389 return true;
10390
10391 case CONST_INT:
10392 *cost = 0;
10393 /* CONST_INT has no mode, so we cannot tell for sure how many
10394 insns are really going to be needed. The best we can do is
10395 look at the value passed. If it fits in SImode, then assume
10396 that's the mode it will be used for. Otherwise assume it
10397 will be used in DImode. */
10398 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10399 mode = SImode;
10400 else
10401 mode = DImode;
10402
10403 /* Avoid blowing up in arm_gen_constant (). */
10404 if (!(outer_code == PLUS
10405 || outer_code == AND
10406 || outer_code == IOR
10407 || outer_code == XOR
10408 || outer_code == MINUS))
10409 outer_code = SET;
10410
10411 const_int_cost:
10412 if (mode == SImode)
10413 {
10414 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10415 INTVAL (x), NULL, NULL,
10416 0, 0));
10417 /* Extra costs? */
10418 }
10419 else
10420 {
10421 *cost += COSTS_N_INSNS (arm_gen_constant
10422 (outer_code, SImode, NULL,
10423 trunc_int_for_mode (INTVAL (x), SImode),
10424 NULL, NULL, 0, 0)
10425 + arm_gen_constant (outer_code, SImode, NULL,
10426 INTVAL (x) >> 32, NULL,
10427 NULL, 0, 0));
10428 /* Extra costs? */
10429 }
10430
10431 return true;
10432
10433 case CONST:
10434 case LABEL_REF:
10435 case SYMBOL_REF:
10436 if (speed_p)
10437 {
10438 if (arm_arch_thumb2 && !flag_pic)
10439 *cost = COSTS_N_INSNS (2);
10440 else
10441 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10442 }
10443 else
10444 *cost = COSTS_N_INSNS (2);
10445
10446 if (flag_pic)
10447 {
10448 *cost += COSTS_N_INSNS (1);
10449 if (speed_p)
10450 *cost += extra_cost->alu.arith;
10451 }
10452
10453 return true;
10454
10455 case CONST_FIXED:
10456 *cost = COSTS_N_INSNS (4);
10457 /* Fixme. */
10458 return true;
10459
10460 case CONST_DOUBLE:
10461 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10462 && (mode == SFmode || !TARGET_VFP_SINGLE))
10463 {
10464 if (vfp3_const_double_rtx (x))
10465 {
10466 *cost = COSTS_N_INSNS (1);
10467 if (speed_p)
10468 *cost += extra_cost->fp[mode == DFmode].fpconst;
10469 return true;
10470 }
10471
10472 if (speed_p)
10473 {
10474 *cost = COSTS_N_INSNS (1);
10475 if (mode == DFmode)
10476 *cost += extra_cost->ldst.loadd;
10477 else
10478 *cost += extra_cost->ldst.loadf;
10479 }
10480 else
10481 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10482
10483 return true;
10484 }
10485 *cost = COSTS_N_INSNS (4);
10486 return true;
10487
10488 case CONST_VECTOR:
10489 /* Fixme. */
10490 if (TARGET_NEON
10491 && TARGET_HARD_FLOAT
10492 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10493 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10494 *cost = COSTS_N_INSNS (1);
10495 else
10496 *cost = COSTS_N_INSNS (4);
10497 return true;
10498
10499 case HIGH:
10500 case LO_SUM:
10501 *cost = COSTS_N_INSNS (1);
10502 /* When optimizing for size, we prefer constant pool entries to
10503 MOVW/MOVT pairs, so bump the cost of these slightly. */
10504 if (!speed_p)
10505 *cost += 1;
10506 return true;
10507
10508 case CLZ:
10509 *cost = COSTS_N_INSNS (1);
10510 if (speed_p)
10511 *cost += extra_cost->alu.clz;
10512 return false;
10513
10514 case SMIN:
10515 if (XEXP (x, 1) == const0_rtx)
10516 {
10517 *cost = COSTS_N_INSNS (1);
10518 if (speed_p)
10519 *cost += extra_cost->alu.log_shift;
10520 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10521 return true;
10522 }
10523 /* Fall through. */
10524 case SMAX:
10525 case UMIN:
10526 case UMAX:
10527 *cost = COSTS_N_INSNS (2);
10528 return false;
10529
10530 case TRUNCATE:
10531 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10532 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10533 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10534 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10535 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10536 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10537 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10538 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10539 == ZERO_EXTEND))))
10540 {
10541 *cost = COSTS_N_INSNS (1);
10542 if (speed_p)
10543 *cost += extra_cost->mult[1].extend;
10544 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10545 speed_p)
10546 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10547 0, speed_p));
10548 return true;
10549 }
10550 *cost = LIBCALL_COST (1);
10551 return false;
10552
10553 case UNSPEC:
10554 return arm_unspec_cost (x, outer_code, speed_p, cost);
10555
10556 case PC:
10557 /* Reading the PC is like reading any other register. Writing it
10558 is more expensive, but we take that into account elsewhere. */
10559 *cost = 0;
10560 return true;
10561
10562 case ZERO_EXTRACT:
10563 /* TODO: Simple zero_extract of bottom bits using AND. */
10564 /* Fall through. */
10565 case SIGN_EXTRACT:
10566 if (arm_arch6
10567 && mode == SImode
10568 && CONST_INT_P (XEXP (x, 1))
10569 && CONST_INT_P (XEXP (x, 2)))
10570 {
10571 *cost = COSTS_N_INSNS (1);
10572 if (speed_p)
10573 *cost += extra_cost->alu.bfx;
10574 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10575 return true;
10576 }
10577 /* Without UBFX/SBFX, need to resort to shift operations. */
10578 *cost = COSTS_N_INSNS (2);
10579 if (speed_p)
10580 *cost += 2 * extra_cost->alu.shift;
10581 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10582 return true;
10583
10584 case FLOAT_EXTEND:
10585 if (TARGET_HARD_FLOAT)
10586 {
10587 *cost = COSTS_N_INSNS (1);
10588 if (speed_p)
10589 *cost += extra_cost->fp[mode == DFmode].widen;
10590 if (!TARGET_FPU_ARMV8
10591 && GET_MODE (XEXP (x, 0)) == HFmode)
10592 {
10593 /* Pre v8, widening HF->DF is a two-step process, first
10594 widening to SFmode. */
10595 *cost += COSTS_N_INSNS (1);
10596 if (speed_p)
10597 *cost += extra_cost->fp[0].widen;
10598 }
10599 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10600 return true;
10601 }
10602
10603 *cost = LIBCALL_COST (1);
10604 return false;
10605
10606 case FLOAT_TRUNCATE:
10607 if (TARGET_HARD_FLOAT)
10608 {
10609 *cost = COSTS_N_INSNS (1);
10610 if (speed_p)
10611 *cost += extra_cost->fp[mode == DFmode].narrow;
10612 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10613 return true;
10614 /* Vector modes? */
10615 }
10616 *cost = LIBCALL_COST (1);
10617 return false;
10618
10619 case FIX:
10620 case UNSIGNED_FIX:
10621 if (TARGET_HARD_FLOAT)
10622 {
10623 if (GET_MODE_CLASS (mode) == MODE_INT)
10624 {
10625 *cost = COSTS_N_INSNS (1);
10626 if (speed_p)
10627 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10628 /* Strip of the 'cost' of rounding towards zero. */
10629 if (GET_CODE (XEXP (x, 0)) == FIX)
10630 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10631 else
10632 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10633 /* ??? Increase the cost to deal with transferring from
10634 FP -> CORE registers? */
10635 return true;
10636 }
10637 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10638 && TARGET_FPU_ARMV8)
10639 {
10640 *cost = COSTS_N_INSNS (1);
10641 if (speed_p)
10642 *cost += extra_cost->fp[mode == DFmode].roundint;
10643 return false;
10644 }
10645 /* Vector costs? */
10646 }
10647 *cost = LIBCALL_COST (1);
10648 return false;
10649
10650 case FLOAT:
10651 case UNSIGNED_FLOAT:
10652 if (TARGET_HARD_FLOAT)
10653 {
10654 /* ??? Increase the cost to deal with transferring from CORE
10655 -> FP registers? */
10656 *cost = COSTS_N_INSNS (1);
10657 if (speed_p)
10658 *cost += extra_cost->fp[mode == DFmode].fromint;
10659 return false;
10660 }
10661 *cost = LIBCALL_COST (1);
10662 return false;
10663
10664 case CALL:
10665 *cost = COSTS_N_INSNS (1);
10666 return true;
10667
10668 case ASM_OPERANDS:
10669 /* Just a guess. Cost one insn per input. */
10670 *cost = COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x));
10671 return true;
10672
10673 default:
10674 if (mode != VOIDmode)
10675 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10676 else
10677 *cost = COSTS_N_INSNS (4); /* Who knows? */
10678 return false;
10679 }
10680 }
10681
10682 #undef HANDLE_NARROW_SHIFT_ARITH
10683
10684 /* RTX costs when optimizing for size. */
10685 static bool
10686 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
10687 int *total, bool speed)
10688 {
10689 bool result;
10690
10691 if (TARGET_OLD_RTX_COSTS
10692 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
10693 {
10694 /* Old way. (Deprecated.) */
10695 if (!speed)
10696 result = arm_size_rtx_costs (x, (enum rtx_code) code,
10697 (enum rtx_code) outer_code, total);
10698 else
10699 result = current_tune->rtx_costs (x, (enum rtx_code) code,
10700 (enum rtx_code) outer_code, total,
10701 speed);
10702 }
10703 else
10704 {
10705 /* New way. */
10706 if (current_tune->insn_extra_cost)
10707 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10708 (enum rtx_code) outer_code,
10709 current_tune->insn_extra_cost,
10710 total, speed);
10711 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
10712 && current_tune->insn_extra_cost != NULL */
10713 else
10714 result = arm_new_rtx_costs (x, (enum rtx_code) code,
10715 (enum rtx_code) outer_code,
10716 &generic_extra_costs, total, speed);
10717 }
10718
10719 if (dump_file && (dump_flags & TDF_DETAILS))
10720 {
10721 print_rtl_single (dump_file, x);
10722 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10723 *total, result ? "final" : "partial");
10724 }
10725 return result;
10726 }
10727
10728 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
10729 supported on any "slowmul" cores, so it can be ignored. */
10730
10731 static bool
10732 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10733 int *total, bool speed)
10734 {
10735 enum machine_mode mode = GET_MODE (x);
10736
10737 if (TARGET_THUMB)
10738 {
10739 *total = thumb1_rtx_costs (x, code, outer_code);
10740 return true;
10741 }
10742
10743 switch (code)
10744 {
10745 case MULT:
10746 if (GET_MODE_CLASS (mode) == MODE_FLOAT
10747 || mode == DImode)
10748 {
10749 *total = COSTS_N_INSNS (20);
10750 return false;
10751 }
10752
10753 if (CONST_INT_P (XEXP (x, 1)))
10754 {
10755 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
10756 & (unsigned HOST_WIDE_INT) 0xffffffff);
10757 int cost, const_ok = const_ok_for_arm (i);
10758 int j, booth_unit_size;
10759
10760 /* Tune as appropriate. */
10761 cost = const_ok ? 4 : 8;
10762 booth_unit_size = 2;
10763 for (j = 0; i && j < 32; j += booth_unit_size)
10764 {
10765 i >>= booth_unit_size;
10766 cost++;
10767 }
10768
10769 *total = COSTS_N_INSNS (cost);
10770 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
10771 return true;
10772 }
10773
10774 *total = COSTS_N_INSNS (20);
10775 return false;
10776
10777 default:
10778 return arm_rtx_costs_1 (x, outer_code, total, speed);;
10779 }
10780 }
10781
10782
10783 /* RTX cost for cores with a fast multiply unit (M variants). */
10784
10785 static bool
10786 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10787 int *total, bool speed)
10788 {
10789 enum machine_mode mode = GET_MODE (x);
10790
10791 if (TARGET_THUMB1)
10792 {
10793 *total = thumb1_rtx_costs (x, code, outer_code);
10794 return true;
10795 }
10796
10797 /* ??? should thumb2 use different costs? */
10798 switch (code)
10799 {
10800 case MULT:
10801 /* There is no point basing this on the tuning, since it is always the
10802 fast variant if it exists at all. */
10803 if (mode == DImode
10804 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10805 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10806 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10807 {
10808 *total = COSTS_N_INSNS(2);
10809 return false;
10810 }
10811
10812
10813 if (mode == DImode)
10814 {
10815 *total = COSTS_N_INSNS (5);
10816 return false;
10817 }
10818
10819 if (CONST_INT_P (XEXP (x, 1)))
10820 {
10821 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
10822 & (unsigned HOST_WIDE_INT) 0xffffffff);
10823 int cost, const_ok = const_ok_for_arm (i);
10824 int j, booth_unit_size;
10825
10826 /* Tune as appropriate. */
10827 cost = const_ok ? 4 : 8;
10828 booth_unit_size = 8;
10829 for (j = 0; i && j < 32; j += booth_unit_size)
10830 {
10831 i >>= booth_unit_size;
10832 cost++;
10833 }
10834
10835 *total = COSTS_N_INSNS(cost);
10836 return false;
10837 }
10838
10839 if (mode == SImode)
10840 {
10841 *total = COSTS_N_INSNS (4);
10842 return false;
10843 }
10844
10845 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10846 {
10847 if (TARGET_HARD_FLOAT
10848 && (mode == SFmode
10849 || (mode == DFmode && !TARGET_VFP_SINGLE)))
10850 {
10851 *total = COSTS_N_INSNS (1);
10852 return false;
10853 }
10854 }
10855
10856 /* Requires a lib call */
10857 *total = COSTS_N_INSNS (20);
10858 return false;
10859
10860 default:
10861 return arm_rtx_costs_1 (x, outer_code, total, speed);
10862 }
10863 }
10864
10865
10866 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
10867 so it can be ignored. */
10868
10869 static bool
10870 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10871 int *total, bool speed)
10872 {
10873 enum machine_mode mode = GET_MODE (x);
10874
10875 if (TARGET_THUMB)
10876 {
10877 *total = thumb1_rtx_costs (x, code, outer_code);
10878 return true;
10879 }
10880
10881 switch (code)
10882 {
10883 case COMPARE:
10884 if (GET_CODE (XEXP (x, 0)) != MULT)
10885 return arm_rtx_costs_1 (x, outer_code, total, speed);
10886
10887 /* A COMPARE of a MULT is slow on XScale; the muls instruction
10888 will stall until the multiplication is complete. */
10889 *total = COSTS_N_INSNS (3);
10890 return false;
10891
10892 case MULT:
10893 /* There is no point basing this on the tuning, since it is always the
10894 fast variant if it exists at all. */
10895 if (mode == DImode
10896 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10897 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10898 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10899 {
10900 *total = COSTS_N_INSNS (2);
10901 return false;
10902 }
10903
10904
10905 if (mode == DImode)
10906 {
10907 *total = COSTS_N_INSNS (5);
10908 return false;
10909 }
10910
10911 if (CONST_INT_P (XEXP (x, 1)))
10912 {
10913 /* If operand 1 is a constant we can more accurately
10914 calculate the cost of the multiply. The multiplier can
10915 retire 15 bits on the first cycle and a further 12 on the
10916 second. We do, of course, have to load the constant into
10917 a register first. */
10918 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
10919 /* There's a general overhead of one cycle. */
10920 int cost = 1;
10921 unsigned HOST_WIDE_INT masked_const;
10922
10923 if (i & 0x80000000)
10924 i = ~i;
10925
10926 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
10927
10928 masked_const = i & 0xffff8000;
10929 if (masked_const != 0)
10930 {
10931 cost++;
10932 masked_const = i & 0xf8000000;
10933 if (masked_const != 0)
10934 cost++;
10935 }
10936 *total = COSTS_N_INSNS (cost);
10937 return false;
10938 }
10939
10940 if (mode == SImode)
10941 {
10942 *total = COSTS_N_INSNS (3);
10943 return false;
10944 }
10945
10946 /* Requires a lib call */
10947 *total = COSTS_N_INSNS (20);
10948 return false;
10949
10950 default:
10951 return arm_rtx_costs_1 (x, outer_code, total, speed);
10952 }
10953 }
10954
10955
10956 /* RTX costs for 9e (and later) cores. */
10957
10958 static bool
10959 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
10960 int *total, bool speed)
10961 {
10962 enum machine_mode mode = GET_MODE (x);
10963
10964 if (TARGET_THUMB1)
10965 {
10966 switch (code)
10967 {
10968 case MULT:
10969 *total = COSTS_N_INSNS (3);
10970 return true;
10971
10972 default:
10973 *total = thumb1_rtx_costs (x, code, outer_code);
10974 return true;
10975 }
10976 }
10977
10978 switch (code)
10979 {
10980 case MULT:
10981 /* There is no point basing this on the tuning, since it is always the
10982 fast variant if it exists at all. */
10983 if (mode == DImode
10984 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
10985 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10986 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10987 {
10988 *total = COSTS_N_INSNS (2);
10989 return false;
10990 }
10991
10992
10993 if (mode == DImode)
10994 {
10995 *total = COSTS_N_INSNS (5);
10996 return false;
10997 }
10998
10999 if (mode == SImode)
11000 {
11001 *total = COSTS_N_INSNS (2);
11002 return false;
11003 }
11004
11005 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11006 {
11007 if (TARGET_HARD_FLOAT
11008 && (mode == SFmode
11009 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11010 {
11011 *total = COSTS_N_INSNS (1);
11012 return false;
11013 }
11014 }
11015
11016 *total = COSTS_N_INSNS (20);
11017 return false;
11018
11019 default:
11020 return arm_rtx_costs_1 (x, outer_code, total, speed);
11021 }
11022 }
11023 /* All address computations that can be done are free, but rtx cost returns
11024 the same for practically all of them. So we weight the different types
11025 of address here in the order (most pref first):
11026 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11027 static inline int
11028 arm_arm_address_cost (rtx x)
11029 {
11030 enum rtx_code c = GET_CODE (x);
11031
11032 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11033 return 0;
11034 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11035 return 10;
11036
11037 if (c == PLUS)
11038 {
11039 if (CONST_INT_P (XEXP (x, 1)))
11040 return 2;
11041
11042 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11043 return 3;
11044
11045 return 4;
11046 }
11047
11048 return 6;
11049 }
11050
11051 static inline int
11052 arm_thumb_address_cost (rtx x)
11053 {
11054 enum rtx_code c = GET_CODE (x);
11055
11056 if (c == REG)
11057 return 1;
11058 if (c == PLUS
11059 && REG_P (XEXP (x, 0))
11060 && CONST_INT_P (XEXP (x, 1)))
11061 return 1;
11062
11063 return 2;
11064 }
11065
11066 static int
11067 arm_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
11068 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11069 {
11070 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11071 }
11072
11073 /* Adjust cost hook for XScale. */
11074 static bool
11075 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11076 {
11077 /* Some true dependencies can have a higher cost depending
11078 on precisely how certain input operands are used. */
11079 if (REG_NOTE_KIND(link) == 0
11080 && recog_memoized (insn) >= 0
11081 && recog_memoized (dep) >= 0)
11082 {
11083 int shift_opnum = get_attr_shift (insn);
11084 enum attr_type attr_type = get_attr_type (dep);
11085
11086 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11087 operand for INSN. If we have a shifted input operand and the
11088 instruction we depend on is another ALU instruction, then we may
11089 have to account for an additional stall. */
11090 if (shift_opnum != 0
11091 && (attr_type == TYPE_ALU_SHIFT_IMM
11092 || attr_type == TYPE_ALUS_SHIFT_IMM
11093 || attr_type == TYPE_LOGIC_SHIFT_IMM
11094 || attr_type == TYPE_LOGICS_SHIFT_IMM
11095 || attr_type == TYPE_ALU_SHIFT_REG
11096 || attr_type == TYPE_ALUS_SHIFT_REG
11097 || attr_type == TYPE_LOGIC_SHIFT_REG
11098 || attr_type == TYPE_LOGICS_SHIFT_REG
11099 || attr_type == TYPE_MOV_SHIFT
11100 || attr_type == TYPE_MVN_SHIFT
11101 || attr_type == TYPE_MOV_SHIFT_REG
11102 || attr_type == TYPE_MVN_SHIFT_REG))
11103 {
11104 rtx shifted_operand;
11105 int opno;
11106
11107 /* Get the shifted operand. */
11108 extract_insn (insn);
11109 shifted_operand = recog_data.operand[shift_opnum];
11110
11111 /* Iterate over all the operands in DEP. If we write an operand
11112 that overlaps with SHIFTED_OPERAND, then we have increase the
11113 cost of this dependency. */
11114 extract_insn (dep);
11115 preprocess_constraints ();
11116 for (opno = 0; opno < recog_data.n_operands; opno++)
11117 {
11118 /* We can ignore strict inputs. */
11119 if (recog_data.operand_type[opno] == OP_IN)
11120 continue;
11121
11122 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11123 shifted_operand))
11124 {
11125 *cost = 2;
11126 return false;
11127 }
11128 }
11129 }
11130 }
11131 return true;
11132 }
11133
11134 /* Adjust cost hook for Cortex A9. */
11135 static bool
11136 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11137 {
11138 switch (REG_NOTE_KIND (link))
11139 {
11140 case REG_DEP_ANTI:
11141 *cost = 0;
11142 return false;
11143
11144 case REG_DEP_TRUE:
11145 case REG_DEP_OUTPUT:
11146 if (recog_memoized (insn) >= 0
11147 && recog_memoized (dep) >= 0)
11148 {
11149 if (GET_CODE (PATTERN (insn)) == SET)
11150 {
11151 if (GET_MODE_CLASS
11152 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11153 || GET_MODE_CLASS
11154 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11155 {
11156 enum attr_type attr_type_insn = get_attr_type (insn);
11157 enum attr_type attr_type_dep = get_attr_type (dep);
11158
11159 /* By default all dependencies of the form
11160 s0 = s0 <op> s1
11161 s0 = s0 <op> s2
11162 have an extra latency of 1 cycle because
11163 of the input and output dependency in this
11164 case. However this gets modeled as an true
11165 dependency and hence all these checks. */
11166 if (REG_P (SET_DEST (PATTERN (insn)))
11167 && REG_P (SET_DEST (PATTERN (dep)))
11168 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11169 SET_DEST (PATTERN (dep))))
11170 {
11171 /* FMACS is a special case where the dependent
11172 instruction can be issued 3 cycles before
11173 the normal latency in case of an output
11174 dependency. */
11175 if ((attr_type_insn == TYPE_FMACS
11176 || attr_type_insn == TYPE_FMACD)
11177 && (attr_type_dep == TYPE_FMACS
11178 || attr_type_dep == TYPE_FMACD))
11179 {
11180 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11181 *cost = insn_default_latency (dep) - 3;
11182 else
11183 *cost = insn_default_latency (dep);
11184 return false;
11185 }
11186 else
11187 {
11188 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11189 *cost = insn_default_latency (dep) + 1;
11190 else
11191 *cost = insn_default_latency (dep);
11192 }
11193 return false;
11194 }
11195 }
11196 }
11197 }
11198 break;
11199
11200 default:
11201 gcc_unreachable ();
11202 }
11203
11204 return true;
11205 }
11206
11207 /* Adjust cost hook for FA726TE. */
11208 static bool
11209 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
11210 {
11211 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11212 have penalty of 3. */
11213 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11214 && recog_memoized (insn) >= 0
11215 && recog_memoized (dep) >= 0
11216 && get_attr_conds (dep) == CONDS_SET)
11217 {
11218 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11219 if (get_attr_conds (insn) == CONDS_USE
11220 && get_attr_type (insn) != TYPE_BRANCH)
11221 {
11222 *cost = 3;
11223 return false;
11224 }
11225
11226 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11227 || get_attr_conds (insn) == CONDS_USE)
11228 {
11229 *cost = 0;
11230 return false;
11231 }
11232 }
11233
11234 return true;
11235 }
11236
11237 /* Implement TARGET_REGISTER_MOVE_COST.
11238
11239 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11240 it is typically more expensive than a single memory access. We set
11241 the cost to less than two memory accesses so that floating
11242 point to integer conversion does not go through memory. */
11243
11244 int
11245 arm_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
11246 reg_class_t from, reg_class_t to)
11247 {
11248 if (TARGET_32BIT)
11249 {
11250 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11251 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11252 return 15;
11253 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11254 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11255 return 4;
11256 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11257 return 20;
11258 else
11259 return 2;
11260 }
11261 else
11262 {
11263 if (from == HI_REGS || to == HI_REGS)
11264 return 4;
11265 else
11266 return 2;
11267 }
11268 }
11269
11270 /* Implement TARGET_MEMORY_MOVE_COST. */
11271
11272 int
11273 arm_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
11274 bool in ATTRIBUTE_UNUSED)
11275 {
11276 if (TARGET_32BIT)
11277 return 10;
11278 else
11279 {
11280 if (GET_MODE_SIZE (mode) < 4)
11281 return 8;
11282 else
11283 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11284 }
11285 }
11286
11287 /* Vectorizer cost model implementation. */
11288
11289 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11290 static int
11291 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11292 tree vectype,
11293 int misalign ATTRIBUTE_UNUSED)
11294 {
11295 unsigned elements;
11296
11297 switch (type_of_cost)
11298 {
11299 case scalar_stmt:
11300 return current_tune->vec_costs->scalar_stmt_cost;
11301
11302 case scalar_load:
11303 return current_tune->vec_costs->scalar_load_cost;
11304
11305 case scalar_store:
11306 return current_tune->vec_costs->scalar_store_cost;
11307
11308 case vector_stmt:
11309 return current_tune->vec_costs->vec_stmt_cost;
11310
11311 case vector_load:
11312 return current_tune->vec_costs->vec_align_load_cost;
11313
11314 case vector_store:
11315 return current_tune->vec_costs->vec_store_cost;
11316
11317 case vec_to_scalar:
11318 return current_tune->vec_costs->vec_to_scalar_cost;
11319
11320 case scalar_to_vec:
11321 return current_tune->vec_costs->scalar_to_vec_cost;
11322
11323 case unaligned_load:
11324 return current_tune->vec_costs->vec_unalign_load_cost;
11325
11326 case unaligned_store:
11327 return current_tune->vec_costs->vec_unalign_store_cost;
11328
11329 case cond_branch_taken:
11330 return current_tune->vec_costs->cond_taken_branch_cost;
11331
11332 case cond_branch_not_taken:
11333 return current_tune->vec_costs->cond_not_taken_branch_cost;
11334
11335 case vec_perm:
11336 case vec_promote_demote:
11337 return current_tune->vec_costs->vec_stmt_cost;
11338
11339 case vec_construct:
11340 elements = TYPE_VECTOR_SUBPARTS (vectype);
11341 return elements / 2 + 1;
11342
11343 default:
11344 gcc_unreachable ();
11345 }
11346 }
11347
11348 /* Implement targetm.vectorize.add_stmt_cost. */
11349
11350 static unsigned
11351 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11352 struct _stmt_vec_info *stmt_info, int misalign,
11353 enum vect_cost_model_location where)
11354 {
11355 unsigned *cost = (unsigned *) data;
11356 unsigned retval = 0;
11357
11358 if (flag_vect_cost_model)
11359 {
11360 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11361 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11362
11363 /* Statements in an inner loop relative to the loop being
11364 vectorized are weighted more heavily. The value here is
11365 arbitrary and could potentially be improved with analysis. */
11366 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11367 count *= 50; /* FIXME. */
11368
11369 retval = (unsigned) (count * stmt_cost);
11370 cost[where] += retval;
11371 }
11372
11373 return retval;
11374 }
11375
11376 /* Return true if and only if this insn can dual-issue only as older. */
11377 static bool
11378 cortexa7_older_only (rtx insn)
11379 {
11380 if (recog_memoized (insn) < 0)
11381 return false;
11382
11383 switch (get_attr_type (insn))
11384 {
11385 case TYPE_ALU_REG:
11386 case TYPE_ALUS_REG:
11387 case TYPE_LOGIC_REG:
11388 case TYPE_LOGICS_REG:
11389 case TYPE_ADC_REG:
11390 case TYPE_ADCS_REG:
11391 case TYPE_ADR:
11392 case TYPE_BFM:
11393 case TYPE_REV:
11394 case TYPE_MVN_REG:
11395 case TYPE_SHIFT_IMM:
11396 case TYPE_SHIFT_REG:
11397 case TYPE_LOAD_BYTE:
11398 case TYPE_LOAD1:
11399 case TYPE_STORE1:
11400 case TYPE_FFARITHS:
11401 case TYPE_FADDS:
11402 case TYPE_FFARITHD:
11403 case TYPE_FADDD:
11404 case TYPE_FMOV:
11405 case TYPE_F_CVT:
11406 case TYPE_FCMPS:
11407 case TYPE_FCMPD:
11408 case TYPE_FCONSTS:
11409 case TYPE_FCONSTD:
11410 case TYPE_FMULS:
11411 case TYPE_FMACS:
11412 case TYPE_FMULD:
11413 case TYPE_FMACD:
11414 case TYPE_FDIVS:
11415 case TYPE_FDIVD:
11416 case TYPE_F_MRC:
11417 case TYPE_F_MRRC:
11418 case TYPE_F_FLAG:
11419 case TYPE_F_LOADS:
11420 case TYPE_F_STORES:
11421 return true;
11422 default:
11423 return false;
11424 }
11425 }
11426
11427 /* Return true if and only if this insn can dual-issue as younger. */
11428 static bool
11429 cortexa7_younger (FILE *file, int verbose, rtx insn)
11430 {
11431 if (recog_memoized (insn) < 0)
11432 {
11433 if (verbose > 5)
11434 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11435 return false;
11436 }
11437
11438 switch (get_attr_type (insn))
11439 {
11440 case TYPE_ALU_IMM:
11441 case TYPE_ALUS_IMM:
11442 case TYPE_LOGIC_IMM:
11443 case TYPE_LOGICS_IMM:
11444 case TYPE_EXTEND:
11445 case TYPE_MVN_IMM:
11446 case TYPE_MOV_IMM:
11447 case TYPE_MOV_REG:
11448 case TYPE_MOV_SHIFT:
11449 case TYPE_MOV_SHIFT_REG:
11450 case TYPE_BRANCH:
11451 case TYPE_CALL:
11452 return true;
11453 default:
11454 return false;
11455 }
11456 }
11457
11458
11459 /* Look for an instruction that can dual issue only as an older
11460 instruction, and move it in front of any instructions that can
11461 dual-issue as younger, while preserving the relative order of all
11462 other instructions in the ready list. This is a hueuristic to help
11463 dual-issue in later cycles, by postponing issue of more flexible
11464 instructions. This heuristic may affect dual issue opportunities
11465 in the current cycle. */
11466 static void
11467 cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11468 int clock)
11469 {
11470 int i;
11471 int first_older_only = -1, first_younger = -1;
11472
11473 if (verbose > 5)
11474 fprintf (file,
11475 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11476 clock,
11477 *n_readyp);
11478
11479 /* Traverse the ready list from the head (the instruction to issue
11480 first), and looking for the first instruction that can issue as
11481 younger and the first instruction that can dual-issue only as
11482 older. */
11483 for (i = *n_readyp - 1; i >= 0; i--)
11484 {
11485 rtx insn = ready[i];
11486 if (cortexa7_older_only (insn))
11487 {
11488 first_older_only = i;
11489 if (verbose > 5)
11490 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11491 break;
11492 }
11493 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11494 first_younger = i;
11495 }
11496
11497 /* Nothing to reorder because either no younger insn found or insn
11498 that can dual-issue only as older appears before any insn that
11499 can dual-issue as younger. */
11500 if (first_younger == -1)
11501 {
11502 if (verbose > 5)
11503 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11504 return;
11505 }
11506
11507 /* Nothing to reorder because no older-only insn in the ready list. */
11508 if (first_older_only == -1)
11509 {
11510 if (verbose > 5)
11511 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11512 return;
11513 }
11514
11515 /* Move first_older_only insn before first_younger. */
11516 if (verbose > 5)
11517 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11518 INSN_UID(ready [first_older_only]),
11519 INSN_UID(ready [first_younger]));
11520 rtx first_older_only_insn = ready [first_older_only];
11521 for (i = first_older_only; i < first_younger; i++)
11522 {
11523 ready[i] = ready[i+1];
11524 }
11525
11526 ready[i] = first_older_only_insn;
11527 return;
11528 }
11529
11530 /* Implement TARGET_SCHED_REORDER. */
11531 static int
11532 arm_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
11533 int clock)
11534 {
11535 switch (arm_tune)
11536 {
11537 case cortexa7:
11538 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11539 break;
11540 default:
11541 /* Do nothing for other cores. */
11542 break;
11543 }
11544
11545 return arm_issue_rate ();
11546 }
11547
11548 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11549 It corrects the value of COST based on the relationship between
11550 INSN and DEP through the dependence LINK. It returns the new
11551 value. There is a per-core adjust_cost hook to adjust scheduler costs
11552 and the per-core hook can choose to completely override the generic
11553 adjust_cost function. Only put bits of code into arm_adjust_cost that
11554 are common across all cores. */
11555 static int
11556 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
11557 {
11558 rtx i_pat, d_pat;
11559
11560 /* When generating Thumb-1 code, we want to place flag-setting operations
11561 close to a conditional branch which depends on them, so that we can
11562 omit the comparison. */
11563 if (TARGET_THUMB1
11564 && REG_NOTE_KIND (link) == 0
11565 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11566 && recog_memoized (dep) >= 0
11567 && get_attr_conds (dep) == CONDS_SET)
11568 return 0;
11569
11570 if (current_tune->sched_adjust_cost != NULL)
11571 {
11572 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11573 return cost;
11574 }
11575
11576 /* XXX Is this strictly true? */
11577 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11578 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11579 return 0;
11580
11581 /* Call insns don't incur a stall, even if they follow a load. */
11582 if (REG_NOTE_KIND (link) == 0
11583 && CALL_P (insn))
11584 return 1;
11585
11586 if ((i_pat = single_set (insn)) != NULL
11587 && MEM_P (SET_SRC (i_pat))
11588 && (d_pat = single_set (dep)) != NULL
11589 && MEM_P (SET_DEST (d_pat)))
11590 {
11591 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11592 /* This is a load after a store, there is no conflict if the load reads
11593 from a cached area. Assume that loads from the stack, and from the
11594 constant pool are cached, and that others will miss. This is a
11595 hack. */
11596
11597 if ((GET_CODE (src_mem) == SYMBOL_REF
11598 && CONSTANT_POOL_ADDRESS_P (src_mem))
11599 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11600 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11601 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11602 return 1;
11603 }
11604
11605 return cost;
11606 }
11607
11608 int
11609 arm_max_conditional_execute (void)
11610 {
11611 return max_insns_skipped;
11612 }
11613
11614 static int
11615 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11616 {
11617 if (TARGET_32BIT)
11618 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11619 else
11620 return (optimize > 0) ? 2 : 0;
11621 }
11622
11623 static int
11624 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11625 {
11626 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11627 }
11628
11629 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11630 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11631 sequences of non-executed instructions in IT blocks probably take the same
11632 amount of time as executed instructions (and the IT instruction itself takes
11633 space in icache). This function was experimentally determined to give good
11634 results on a popular embedded benchmark. */
11635
11636 static int
11637 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11638 {
11639 return (TARGET_32BIT && speed_p) ? 1
11640 : arm_default_branch_cost (speed_p, predictable_p);
11641 }
11642
11643 static bool fp_consts_inited = false;
11644
11645 static REAL_VALUE_TYPE value_fp0;
11646
11647 static void
11648 init_fp_table (void)
11649 {
11650 REAL_VALUE_TYPE r;
11651
11652 r = REAL_VALUE_ATOF ("0", DFmode);
11653 value_fp0 = r;
11654 fp_consts_inited = true;
11655 }
11656
11657 /* Return TRUE if rtx X is a valid immediate FP constant. */
11658 int
11659 arm_const_double_rtx (rtx x)
11660 {
11661 REAL_VALUE_TYPE r;
11662
11663 if (!fp_consts_inited)
11664 init_fp_table ();
11665
11666 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11667 if (REAL_VALUE_MINUS_ZERO (r))
11668 return 0;
11669
11670 if (REAL_VALUES_EQUAL (r, value_fp0))
11671 return 1;
11672
11673 return 0;
11674 }
11675
11676 /* VFPv3 has a fairly wide range of representable immediates, formed from
11677 "quarter-precision" floating-point values. These can be evaluated using this
11678 formula (with ^ for exponentiation):
11679
11680 -1^s * n * 2^-r
11681
11682 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11683 16 <= n <= 31 and 0 <= r <= 7.
11684
11685 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11686
11687 - A (most-significant) is the sign bit.
11688 - BCD are the exponent (encoded as r XOR 3).
11689 - EFGH are the mantissa (encoded as n - 16).
11690 */
11691
11692 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11693 fconst[sd] instruction, or -1 if X isn't suitable. */
11694 static int
11695 vfp3_const_double_index (rtx x)
11696 {
11697 REAL_VALUE_TYPE r, m;
11698 int sign, exponent;
11699 unsigned HOST_WIDE_INT mantissa, mant_hi;
11700 unsigned HOST_WIDE_INT mask;
11701 HOST_WIDE_INT m1, m2;
11702 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11703
11704 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11705 return -1;
11706
11707 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11708
11709 /* We can't represent these things, so detect them first. */
11710 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11711 return -1;
11712
11713 /* Extract sign, exponent and mantissa. */
11714 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11715 r = real_value_abs (&r);
11716 exponent = REAL_EXP (&r);
11717 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11718 highest (sign) bit, with a fixed binary point at bit point_pos.
11719 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11720 bits for the mantissa, this may fail (low bits would be lost). */
11721 real_ldexp (&m, &r, point_pos - exponent);
11722 REAL_VALUE_TO_INT (&m1, &m2, m);
11723 mantissa = m1;
11724 mant_hi = m2;
11725
11726 /* If there are bits set in the low part of the mantissa, we can't
11727 represent this value. */
11728 if (mantissa != 0)
11729 return -1;
11730
11731 /* Now make it so that mantissa contains the most-significant bits, and move
11732 the point_pos to indicate that the least-significant bits have been
11733 discarded. */
11734 point_pos -= HOST_BITS_PER_WIDE_INT;
11735 mantissa = mant_hi;
11736
11737 /* We can permit four significant bits of mantissa only, plus a high bit
11738 which is always 1. */
11739 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
11740 if ((mantissa & mask) != 0)
11741 return -1;
11742
11743 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11744 mantissa >>= point_pos - 5;
11745
11746 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11747 floating-point immediate zero with Neon using an integer-zero load, but
11748 that case is handled elsewhere.) */
11749 if (mantissa == 0)
11750 return -1;
11751
11752 gcc_assert (mantissa >= 16 && mantissa <= 31);
11753
11754 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11755 normalized significands are in the range [1, 2). (Our mantissa is shifted
11756 left 4 places at this point relative to normalized IEEE754 values). GCC
11757 internally uses [0.5, 1) (see real.c), so the exponent returned from
11758 REAL_EXP must be altered. */
11759 exponent = 5 - exponent;
11760
11761 if (exponent < 0 || exponent > 7)
11762 return -1;
11763
11764 /* Sign, mantissa and exponent are now in the correct form to plug into the
11765 formula described in the comment above. */
11766 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11767 }
11768
11769 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11770 int
11771 vfp3_const_double_rtx (rtx x)
11772 {
11773 if (!TARGET_VFP3)
11774 return 0;
11775
11776 return vfp3_const_double_index (x) != -1;
11777 }
11778
11779 /* Recognize immediates which can be used in various Neon instructions. Legal
11780 immediates are described by the following table (for VMVN variants, the
11781 bitwise inverse of the constant shown is recognized. In either case, VMOV
11782 is output and the correct instruction to use for a given constant is chosen
11783 by the assembler). The constant shown is replicated across all elements of
11784 the destination vector.
11785
11786 insn elems variant constant (binary)
11787 ---- ----- ------- -----------------
11788 vmov i32 0 00000000 00000000 00000000 abcdefgh
11789 vmov i32 1 00000000 00000000 abcdefgh 00000000
11790 vmov i32 2 00000000 abcdefgh 00000000 00000000
11791 vmov i32 3 abcdefgh 00000000 00000000 00000000
11792 vmov i16 4 00000000 abcdefgh
11793 vmov i16 5 abcdefgh 00000000
11794 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11795 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11796 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11797 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11798 vmvn i16 10 00000000 abcdefgh
11799 vmvn i16 11 abcdefgh 00000000
11800 vmov i32 12 00000000 00000000 abcdefgh 11111111
11801 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11802 vmov i32 14 00000000 abcdefgh 11111111 11111111
11803 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11804 vmov i8 16 abcdefgh
11805 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11806 eeeeeeee ffffffff gggggggg hhhhhhhh
11807 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11808 vmov f32 19 00000000 00000000 00000000 00000000
11809
11810 For case 18, B = !b. Representable values are exactly those accepted by
11811 vfp3_const_double_index, but are output as floating-point numbers rather
11812 than indices.
11813
11814 For case 19, we will change it to vmov.i32 when assembling.
11815
11816 Variants 0-5 (inclusive) may also be used as immediates for the second
11817 operand of VORR/VBIC instructions.
11818
11819 The INVERSE argument causes the bitwise inverse of the given operand to be
11820 recognized instead (used for recognizing legal immediates for the VAND/VORN
11821 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11822 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11823 output, rather than the real insns vbic/vorr).
11824
11825 INVERSE makes no difference to the recognition of float vectors.
11826
11827 The return value is the variant of immediate as shown in the above table, or
11828 -1 if the given value doesn't match any of the listed patterns.
11829 */
11830 static int
11831 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
11832 rtx *modconst, int *elementwidth)
11833 {
11834 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11835 matches = 1; \
11836 for (i = 0; i < idx; i += (STRIDE)) \
11837 if (!(TEST)) \
11838 matches = 0; \
11839 if (matches) \
11840 { \
11841 immtype = (CLASS); \
11842 elsize = (ELSIZE); \
11843 break; \
11844 }
11845
11846 unsigned int i, elsize = 0, idx = 0, n_elts;
11847 unsigned int innersize;
11848 unsigned char bytes[16];
11849 int immtype = -1, matches;
11850 unsigned int invmask = inverse ? 0xff : 0;
11851 bool vector = GET_CODE (op) == CONST_VECTOR;
11852
11853 if (vector)
11854 {
11855 n_elts = CONST_VECTOR_NUNITS (op);
11856 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
11857 }
11858 else
11859 {
11860 n_elts = 1;
11861 if (mode == VOIDmode)
11862 mode = DImode;
11863 innersize = GET_MODE_SIZE (mode);
11864 }
11865
11866 /* Vectors of float constants. */
11867 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11868 {
11869 rtx el0 = CONST_VECTOR_ELT (op, 0);
11870 REAL_VALUE_TYPE r0;
11871
11872 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11873 return -1;
11874
11875 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
11876
11877 for (i = 1; i < n_elts; i++)
11878 {
11879 rtx elt = CONST_VECTOR_ELT (op, i);
11880 REAL_VALUE_TYPE re;
11881
11882 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
11883
11884 if (!REAL_VALUES_EQUAL (r0, re))
11885 return -1;
11886 }
11887
11888 if (modconst)
11889 *modconst = CONST_VECTOR_ELT (op, 0);
11890
11891 if (elementwidth)
11892 *elementwidth = 0;
11893
11894 if (el0 == CONST0_RTX (GET_MODE (el0)))
11895 return 19;
11896 else
11897 return 18;
11898 }
11899
11900 /* Splat vector constant out into a byte vector. */
11901 for (i = 0; i < n_elts; i++)
11902 {
11903 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11904 unsigned HOST_WIDE_INT elpart;
11905 unsigned int part, parts;
11906
11907 if (CONST_INT_P (el))
11908 {
11909 elpart = INTVAL (el);
11910 parts = 1;
11911 }
11912 else if (CONST_DOUBLE_P (el))
11913 {
11914 elpart = CONST_DOUBLE_LOW (el);
11915 parts = 2;
11916 }
11917 else
11918 gcc_unreachable ();
11919
11920 for (part = 0; part < parts; part++)
11921 {
11922 unsigned int byte;
11923 for (byte = 0; byte < innersize; byte++)
11924 {
11925 bytes[idx++] = (elpart & 0xff) ^ invmask;
11926 elpart >>= BITS_PER_UNIT;
11927 }
11928 if (CONST_DOUBLE_P (el))
11929 elpart = CONST_DOUBLE_HIGH (el);
11930 }
11931 }
11932
11933 /* Sanity check. */
11934 gcc_assert (idx == GET_MODE_SIZE (mode));
11935
11936 do
11937 {
11938 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11939 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11940
11941 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11942 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11943
11944 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11945 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11946
11947 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11948 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11949
11950 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11951
11952 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11953
11954 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11955 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11956
11957 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11958 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11959
11960 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11961 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11962
11963 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11964 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11965
11966 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11967
11968 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11969
11970 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11971 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11972
11973 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11974 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11975
11976 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11977 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11978
11979 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11980 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11981
11982 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11983
11984 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11985 && bytes[i] == bytes[(i + 8) % idx]);
11986 }
11987 while (0);
11988
11989 if (immtype == -1)
11990 return -1;
11991
11992 if (elementwidth)
11993 *elementwidth = elsize;
11994
11995 if (modconst)
11996 {
11997 unsigned HOST_WIDE_INT imm = 0;
11998
11999 /* Un-invert bytes of recognized vector, if necessary. */
12000 if (invmask != 0)
12001 for (i = 0; i < idx; i++)
12002 bytes[i] ^= invmask;
12003
12004 if (immtype == 17)
12005 {
12006 /* FIXME: Broken on 32-bit H_W_I hosts. */
12007 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12008
12009 for (i = 0; i < 8; i++)
12010 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12011 << (i * BITS_PER_UNIT);
12012
12013 *modconst = GEN_INT (imm);
12014 }
12015 else
12016 {
12017 unsigned HOST_WIDE_INT imm = 0;
12018
12019 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12020 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12021
12022 *modconst = GEN_INT (imm);
12023 }
12024 }
12025
12026 return immtype;
12027 #undef CHECK
12028 }
12029
12030 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12031 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12032 float elements), and a modified constant (whatever should be output for a
12033 VMOV) in *MODCONST. */
12034
12035 int
12036 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
12037 rtx *modconst, int *elementwidth)
12038 {
12039 rtx tmpconst;
12040 int tmpwidth;
12041 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12042
12043 if (retval == -1)
12044 return 0;
12045
12046 if (modconst)
12047 *modconst = tmpconst;
12048
12049 if (elementwidth)
12050 *elementwidth = tmpwidth;
12051
12052 return 1;
12053 }
12054
12055 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12056 the immediate is valid, write a constant suitable for using as an operand
12057 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12058 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12059
12060 int
12061 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
12062 rtx *modconst, int *elementwidth)
12063 {
12064 rtx tmpconst;
12065 int tmpwidth;
12066 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12067
12068 if (retval < 0 || retval > 5)
12069 return 0;
12070
12071 if (modconst)
12072 *modconst = tmpconst;
12073
12074 if (elementwidth)
12075 *elementwidth = tmpwidth;
12076
12077 return 1;
12078 }
12079
12080 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12081 the immediate is valid, write a constant suitable for using as an operand
12082 to VSHR/VSHL to *MODCONST and the corresponding element width to
12083 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12084 because they have different limitations. */
12085
12086 int
12087 neon_immediate_valid_for_shift (rtx op, enum machine_mode mode,
12088 rtx *modconst, int *elementwidth,
12089 bool isleftshift)
12090 {
12091 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12092 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12093 unsigned HOST_WIDE_INT last_elt = 0;
12094 unsigned HOST_WIDE_INT maxshift;
12095
12096 /* Split vector constant out into a byte vector. */
12097 for (i = 0; i < n_elts; i++)
12098 {
12099 rtx el = CONST_VECTOR_ELT (op, i);
12100 unsigned HOST_WIDE_INT elpart;
12101
12102 if (CONST_INT_P (el))
12103 elpart = INTVAL (el);
12104 else if (CONST_DOUBLE_P (el))
12105 return 0;
12106 else
12107 gcc_unreachable ();
12108
12109 if (i != 0 && elpart != last_elt)
12110 return 0;
12111
12112 last_elt = elpart;
12113 }
12114
12115 /* Shift less than element size. */
12116 maxshift = innersize * 8;
12117
12118 if (isleftshift)
12119 {
12120 /* Left shift immediate value can be from 0 to <size>-1. */
12121 if (last_elt >= maxshift)
12122 return 0;
12123 }
12124 else
12125 {
12126 /* Right shift immediate value can be from 1 to <size>. */
12127 if (last_elt == 0 || last_elt > maxshift)
12128 return 0;
12129 }
12130
12131 if (elementwidth)
12132 *elementwidth = innersize * 8;
12133
12134 if (modconst)
12135 *modconst = CONST_VECTOR_ELT (op, 0);
12136
12137 return 1;
12138 }
12139
12140 /* Return a string suitable for output of Neon immediate logic operation
12141 MNEM. */
12142
12143 char *
12144 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
12145 int inverse, int quad)
12146 {
12147 int width, is_valid;
12148 static char templ[40];
12149
12150 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12151
12152 gcc_assert (is_valid != 0);
12153
12154 if (quad)
12155 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12156 else
12157 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12158
12159 return templ;
12160 }
12161
12162 /* Return a string suitable for output of Neon immediate shift operation
12163 (VSHR or VSHL) MNEM. */
12164
12165 char *
12166 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12167 enum machine_mode mode, int quad,
12168 bool isleftshift)
12169 {
12170 int width, is_valid;
12171 static char templ[40];
12172
12173 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12174 gcc_assert (is_valid != 0);
12175
12176 if (quad)
12177 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12178 else
12179 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12180
12181 return templ;
12182 }
12183
12184 /* Output a sequence of pairwise operations to implement a reduction.
12185 NOTE: We do "too much work" here, because pairwise operations work on two
12186 registers-worth of operands in one go. Unfortunately we can't exploit those
12187 extra calculations to do the full operation in fewer steps, I don't think.
12188 Although all vector elements of the result but the first are ignored, we
12189 actually calculate the same result in each of the elements. An alternative
12190 such as initially loading a vector with zero to use as each of the second
12191 operands would use up an additional register and take an extra instruction,
12192 for no particular gain. */
12193
12194 void
12195 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
12196 rtx (*reduc) (rtx, rtx, rtx))
12197 {
12198 enum machine_mode inner = GET_MODE_INNER (mode);
12199 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12200 rtx tmpsum = op1;
12201
12202 for (i = parts / 2; i >= 1; i /= 2)
12203 {
12204 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12205 emit_insn (reduc (dest, tmpsum, tmpsum));
12206 tmpsum = dest;
12207 }
12208 }
12209
12210 /* If VALS is a vector constant that can be loaded into a register
12211 using VDUP, generate instructions to do so and return an RTX to
12212 assign to the register. Otherwise return NULL_RTX. */
12213
12214 static rtx
12215 neon_vdup_constant (rtx vals)
12216 {
12217 enum machine_mode mode = GET_MODE (vals);
12218 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12219 int n_elts = GET_MODE_NUNITS (mode);
12220 bool all_same = true;
12221 rtx x;
12222 int i;
12223
12224 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12225 return NULL_RTX;
12226
12227 for (i = 0; i < n_elts; ++i)
12228 {
12229 x = XVECEXP (vals, 0, i);
12230 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12231 all_same = false;
12232 }
12233
12234 if (!all_same)
12235 /* The elements are not all the same. We could handle repeating
12236 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12237 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12238 vdup.i16). */
12239 return NULL_RTX;
12240
12241 /* We can load this constant by using VDUP and a constant in a
12242 single ARM register. This will be cheaper than a vector
12243 load. */
12244
12245 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12246 return gen_rtx_VEC_DUPLICATE (mode, x);
12247 }
12248
12249 /* Generate code to load VALS, which is a PARALLEL containing only
12250 constants (for vec_init) or CONST_VECTOR, efficiently into a
12251 register. Returns an RTX to copy into the register, or NULL_RTX
12252 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12253
12254 rtx
12255 neon_make_constant (rtx vals)
12256 {
12257 enum machine_mode mode = GET_MODE (vals);
12258 rtx target;
12259 rtx const_vec = NULL_RTX;
12260 int n_elts = GET_MODE_NUNITS (mode);
12261 int n_const = 0;
12262 int i;
12263
12264 if (GET_CODE (vals) == CONST_VECTOR)
12265 const_vec = vals;
12266 else if (GET_CODE (vals) == PARALLEL)
12267 {
12268 /* A CONST_VECTOR must contain only CONST_INTs and
12269 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12270 Only store valid constants in a CONST_VECTOR. */
12271 for (i = 0; i < n_elts; ++i)
12272 {
12273 rtx x = XVECEXP (vals, 0, i);
12274 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12275 n_const++;
12276 }
12277 if (n_const == n_elts)
12278 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12279 }
12280 else
12281 gcc_unreachable ();
12282
12283 if (const_vec != NULL
12284 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12285 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12286 return const_vec;
12287 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12288 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12289 pipeline cycle; creating the constant takes one or two ARM
12290 pipeline cycles. */
12291 return target;
12292 else if (const_vec != NULL_RTX)
12293 /* Load from constant pool. On Cortex-A8 this takes two cycles
12294 (for either double or quad vectors). We can not take advantage
12295 of single-cycle VLD1 because we need a PC-relative addressing
12296 mode. */
12297 return const_vec;
12298 else
12299 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12300 We can not construct an initializer. */
12301 return NULL_RTX;
12302 }
12303
12304 /* Initialize vector TARGET to VALS. */
12305
12306 void
12307 neon_expand_vector_init (rtx target, rtx vals)
12308 {
12309 enum machine_mode mode = GET_MODE (target);
12310 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12311 int n_elts = GET_MODE_NUNITS (mode);
12312 int n_var = 0, one_var = -1;
12313 bool all_same = true;
12314 rtx x, mem;
12315 int i;
12316
12317 for (i = 0; i < n_elts; ++i)
12318 {
12319 x = XVECEXP (vals, 0, i);
12320 if (!CONSTANT_P (x))
12321 ++n_var, one_var = i;
12322
12323 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12324 all_same = false;
12325 }
12326
12327 if (n_var == 0)
12328 {
12329 rtx constant = neon_make_constant (vals);
12330 if (constant != NULL_RTX)
12331 {
12332 emit_move_insn (target, constant);
12333 return;
12334 }
12335 }
12336
12337 /* Splat a single non-constant element if we can. */
12338 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12339 {
12340 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12341 emit_insn (gen_rtx_SET (VOIDmode, target,
12342 gen_rtx_VEC_DUPLICATE (mode, x)));
12343 return;
12344 }
12345
12346 /* One field is non-constant. Load constant then overwrite varying
12347 field. This is more efficient than using the stack. */
12348 if (n_var == 1)
12349 {
12350 rtx copy = copy_rtx (vals);
12351 rtx index = GEN_INT (one_var);
12352
12353 /* Load constant part of vector, substitute neighboring value for
12354 varying element. */
12355 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12356 neon_expand_vector_init (target, copy);
12357
12358 /* Insert variable. */
12359 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12360 switch (mode)
12361 {
12362 case V8QImode:
12363 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12364 break;
12365 case V16QImode:
12366 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12367 break;
12368 case V4HImode:
12369 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12370 break;
12371 case V8HImode:
12372 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12373 break;
12374 case V2SImode:
12375 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12376 break;
12377 case V4SImode:
12378 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12379 break;
12380 case V2SFmode:
12381 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12382 break;
12383 case V4SFmode:
12384 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12385 break;
12386 case V2DImode:
12387 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12388 break;
12389 default:
12390 gcc_unreachable ();
12391 }
12392 return;
12393 }
12394
12395 /* Construct the vector in memory one field at a time
12396 and load the whole vector. */
12397 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12398 for (i = 0; i < n_elts; i++)
12399 emit_move_insn (adjust_address_nv (mem, inner_mode,
12400 i * GET_MODE_SIZE (inner_mode)),
12401 XVECEXP (vals, 0, i));
12402 emit_move_insn (target, mem);
12403 }
12404
12405 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12406 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12407 reported source locations are bogus. */
12408
12409 static void
12410 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12411 const char *err)
12412 {
12413 HOST_WIDE_INT lane;
12414
12415 gcc_assert (CONST_INT_P (operand));
12416
12417 lane = INTVAL (operand);
12418
12419 if (lane < low || lane >= high)
12420 error (err);
12421 }
12422
12423 /* Bounds-check lanes. */
12424
12425 void
12426 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12427 {
12428 bounds_check (operand, low, high, "lane out of range");
12429 }
12430
12431 /* Bounds-check constants. */
12432
12433 void
12434 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12435 {
12436 bounds_check (operand, low, high, "constant out of range");
12437 }
12438
12439 HOST_WIDE_INT
12440 neon_element_bits (enum machine_mode mode)
12441 {
12442 if (mode == DImode)
12443 return GET_MODE_BITSIZE (mode);
12444 else
12445 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12446 }
12447
12448 \f
12449 /* Predicates for `match_operand' and `match_operator'. */
12450
12451 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12452 WB is true if full writeback address modes are allowed and is false
12453 if limited writeback address modes (POST_INC and PRE_DEC) are
12454 allowed. */
12455
12456 int
12457 arm_coproc_mem_operand (rtx op, bool wb)
12458 {
12459 rtx ind;
12460
12461 /* Reject eliminable registers. */
12462 if (! (reload_in_progress || reload_completed || lra_in_progress)
12463 && ( reg_mentioned_p (frame_pointer_rtx, op)
12464 || reg_mentioned_p (arg_pointer_rtx, op)
12465 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12466 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12467 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12468 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12469 return FALSE;
12470
12471 /* Constants are converted into offsets from labels. */
12472 if (!MEM_P (op))
12473 return FALSE;
12474
12475 ind = XEXP (op, 0);
12476
12477 if (reload_completed
12478 && (GET_CODE (ind) == LABEL_REF
12479 || (GET_CODE (ind) == CONST
12480 && GET_CODE (XEXP (ind, 0)) == PLUS
12481 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12482 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12483 return TRUE;
12484
12485 /* Match: (mem (reg)). */
12486 if (REG_P (ind))
12487 return arm_address_register_rtx_p (ind, 0);
12488
12489 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12490 acceptable in any case (subject to verification by
12491 arm_address_register_rtx_p). We need WB to be true to accept
12492 PRE_INC and POST_DEC. */
12493 if (GET_CODE (ind) == POST_INC
12494 || GET_CODE (ind) == PRE_DEC
12495 || (wb
12496 && (GET_CODE (ind) == PRE_INC
12497 || GET_CODE (ind) == POST_DEC)))
12498 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12499
12500 if (wb
12501 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12502 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12503 && GET_CODE (XEXP (ind, 1)) == PLUS
12504 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12505 ind = XEXP (ind, 1);
12506
12507 /* Match:
12508 (plus (reg)
12509 (const)). */
12510 if (GET_CODE (ind) == PLUS
12511 && REG_P (XEXP (ind, 0))
12512 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12513 && CONST_INT_P (XEXP (ind, 1))
12514 && INTVAL (XEXP (ind, 1)) > -1024
12515 && INTVAL (XEXP (ind, 1)) < 1024
12516 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12517 return TRUE;
12518
12519 return FALSE;
12520 }
12521
12522 /* Return TRUE if OP is a memory operand which we can load or store a vector
12523 to/from. TYPE is one of the following values:
12524 0 - Vector load/stor (vldr)
12525 1 - Core registers (ldm)
12526 2 - Element/structure loads (vld1)
12527 */
12528 int
12529 neon_vector_mem_operand (rtx op, int type, bool strict)
12530 {
12531 rtx ind;
12532
12533 /* Reject eliminable registers. */
12534 if (! (reload_in_progress || reload_completed)
12535 && ( reg_mentioned_p (frame_pointer_rtx, op)
12536 || reg_mentioned_p (arg_pointer_rtx, op)
12537 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12538 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12539 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12540 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12541 return !strict;
12542
12543 /* Constants are converted into offsets from labels. */
12544 if (!MEM_P (op))
12545 return FALSE;
12546
12547 ind = XEXP (op, 0);
12548
12549 if (reload_completed
12550 && (GET_CODE (ind) == LABEL_REF
12551 || (GET_CODE (ind) == CONST
12552 && GET_CODE (XEXP (ind, 0)) == PLUS
12553 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12554 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12555 return TRUE;
12556
12557 /* Match: (mem (reg)). */
12558 if (REG_P (ind))
12559 return arm_address_register_rtx_p (ind, 0);
12560
12561 /* Allow post-increment with Neon registers. */
12562 if ((type != 1 && GET_CODE (ind) == POST_INC)
12563 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12564 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12565
12566 /* FIXME: vld1 allows register post-modify. */
12567
12568 /* Match:
12569 (plus (reg)
12570 (const)). */
12571 if (type == 0
12572 && GET_CODE (ind) == PLUS
12573 && REG_P (XEXP (ind, 0))
12574 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12575 && CONST_INT_P (XEXP (ind, 1))
12576 && INTVAL (XEXP (ind, 1)) > -1024
12577 /* For quad modes, we restrict the constant offset to be slightly less
12578 than what the instruction format permits. We have no such constraint
12579 on double mode offsets. (This must match arm_legitimate_index_p.) */
12580 && (INTVAL (XEXP (ind, 1))
12581 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12582 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12583 return TRUE;
12584
12585 return FALSE;
12586 }
12587
12588 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12589 type. */
12590 int
12591 neon_struct_mem_operand (rtx op)
12592 {
12593 rtx ind;
12594
12595 /* Reject eliminable registers. */
12596 if (! (reload_in_progress || reload_completed)
12597 && ( reg_mentioned_p (frame_pointer_rtx, op)
12598 || reg_mentioned_p (arg_pointer_rtx, op)
12599 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12600 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12601 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12602 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12603 return FALSE;
12604
12605 /* Constants are converted into offsets from labels. */
12606 if (!MEM_P (op))
12607 return FALSE;
12608
12609 ind = XEXP (op, 0);
12610
12611 if (reload_completed
12612 && (GET_CODE (ind) == LABEL_REF
12613 || (GET_CODE (ind) == CONST
12614 && GET_CODE (XEXP (ind, 0)) == PLUS
12615 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12616 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12617 return TRUE;
12618
12619 /* Match: (mem (reg)). */
12620 if (REG_P (ind))
12621 return arm_address_register_rtx_p (ind, 0);
12622
12623 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12624 if (GET_CODE (ind) == POST_INC
12625 || GET_CODE (ind) == PRE_DEC)
12626 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12627
12628 return FALSE;
12629 }
12630
12631 /* Return true if X is a register that will be eliminated later on. */
12632 int
12633 arm_eliminable_register (rtx x)
12634 {
12635 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12636 || REGNO (x) == ARG_POINTER_REGNUM
12637 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12638 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12639 }
12640
12641 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12642 coprocessor registers. Otherwise return NO_REGS. */
12643
12644 enum reg_class
12645 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
12646 {
12647 if (mode == HFmode)
12648 {
12649 if (!TARGET_NEON_FP16)
12650 return GENERAL_REGS;
12651 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12652 return NO_REGS;
12653 return GENERAL_REGS;
12654 }
12655
12656 /* The neon move patterns handle all legitimate vector and struct
12657 addresses. */
12658 if (TARGET_NEON
12659 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12660 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12661 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12662 || VALID_NEON_STRUCT_MODE (mode)))
12663 return NO_REGS;
12664
12665 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12666 return NO_REGS;
12667
12668 return GENERAL_REGS;
12669 }
12670
12671 /* Values which must be returned in the most-significant end of the return
12672 register. */
12673
12674 static bool
12675 arm_return_in_msb (const_tree valtype)
12676 {
12677 return (TARGET_AAPCS_BASED
12678 && BYTES_BIG_ENDIAN
12679 && (AGGREGATE_TYPE_P (valtype)
12680 || TREE_CODE (valtype) == COMPLEX_TYPE
12681 || FIXED_POINT_TYPE_P (valtype)));
12682 }
12683
12684 /* Return TRUE if X references a SYMBOL_REF. */
12685 int
12686 symbol_mentioned_p (rtx x)
12687 {
12688 const char * fmt;
12689 int i;
12690
12691 if (GET_CODE (x) == SYMBOL_REF)
12692 return 1;
12693
12694 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12695 are constant offsets, not symbols. */
12696 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12697 return 0;
12698
12699 fmt = GET_RTX_FORMAT (GET_CODE (x));
12700
12701 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12702 {
12703 if (fmt[i] == 'E')
12704 {
12705 int j;
12706
12707 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12708 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12709 return 1;
12710 }
12711 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12712 return 1;
12713 }
12714
12715 return 0;
12716 }
12717
12718 /* Return TRUE if X references a LABEL_REF. */
12719 int
12720 label_mentioned_p (rtx x)
12721 {
12722 const char * fmt;
12723 int i;
12724
12725 if (GET_CODE (x) == LABEL_REF)
12726 return 1;
12727
12728 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12729 instruction, but they are constant offsets, not symbols. */
12730 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12731 return 0;
12732
12733 fmt = GET_RTX_FORMAT (GET_CODE (x));
12734 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12735 {
12736 if (fmt[i] == 'E')
12737 {
12738 int j;
12739
12740 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12741 if (label_mentioned_p (XVECEXP (x, i, j)))
12742 return 1;
12743 }
12744 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12745 return 1;
12746 }
12747
12748 return 0;
12749 }
12750
12751 int
12752 tls_mentioned_p (rtx x)
12753 {
12754 switch (GET_CODE (x))
12755 {
12756 case CONST:
12757 return tls_mentioned_p (XEXP (x, 0));
12758
12759 case UNSPEC:
12760 if (XINT (x, 1) == UNSPEC_TLS)
12761 return 1;
12762
12763 default:
12764 return 0;
12765 }
12766 }
12767
12768 /* Must not copy any rtx that uses a pc-relative address. */
12769
12770 static int
12771 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
12772 {
12773 if (GET_CODE (*x) == UNSPEC
12774 && (XINT (*x, 1) == UNSPEC_PIC_BASE
12775 || XINT (*x, 1) == UNSPEC_PIC_UNIFIED))
12776 return 1;
12777 return 0;
12778 }
12779
12780 static bool
12781 arm_cannot_copy_insn_p (rtx insn)
12782 {
12783 /* The tls call insn cannot be copied, as it is paired with a data
12784 word. */
12785 if (recog_memoized (insn) == CODE_FOR_tlscall)
12786 return true;
12787
12788 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
12789 }
12790
12791 enum rtx_code
12792 minmax_code (rtx x)
12793 {
12794 enum rtx_code code = GET_CODE (x);
12795
12796 switch (code)
12797 {
12798 case SMAX:
12799 return GE;
12800 case SMIN:
12801 return LE;
12802 case UMIN:
12803 return LEU;
12804 case UMAX:
12805 return GEU;
12806 default:
12807 gcc_unreachable ();
12808 }
12809 }
12810
12811 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12812
12813 bool
12814 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12815 int *mask, bool *signed_sat)
12816 {
12817 /* The high bound must be a power of two minus one. */
12818 int log = exact_log2 (INTVAL (hi_bound) + 1);
12819 if (log == -1)
12820 return false;
12821
12822 /* The low bound is either zero (for usat) or one less than the
12823 negation of the high bound (for ssat). */
12824 if (INTVAL (lo_bound) == 0)
12825 {
12826 if (mask)
12827 *mask = log;
12828 if (signed_sat)
12829 *signed_sat = false;
12830
12831 return true;
12832 }
12833
12834 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12835 {
12836 if (mask)
12837 *mask = log + 1;
12838 if (signed_sat)
12839 *signed_sat = true;
12840
12841 return true;
12842 }
12843
12844 return false;
12845 }
12846
12847 /* Return 1 if memory locations are adjacent. */
12848 int
12849 adjacent_mem_locations (rtx a, rtx b)
12850 {
12851 /* We don't guarantee to preserve the order of these memory refs. */
12852 if (volatile_refs_p (a) || volatile_refs_p (b))
12853 return 0;
12854
12855 if ((REG_P (XEXP (a, 0))
12856 || (GET_CODE (XEXP (a, 0)) == PLUS
12857 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12858 && (REG_P (XEXP (b, 0))
12859 || (GET_CODE (XEXP (b, 0)) == PLUS
12860 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12861 {
12862 HOST_WIDE_INT val0 = 0, val1 = 0;
12863 rtx reg0, reg1;
12864 int val_diff;
12865
12866 if (GET_CODE (XEXP (a, 0)) == PLUS)
12867 {
12868 reg0 = XEXP (XEXP (a, 0), 0);
12869 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12870 }
12871 else
12872 reg0 = XEXP (a, 0);
12873
12874 if (GET_CODE (XEXP (b, 0)) == PLUS)
12875 {
12876 reg1 = XEXP (XEXP (b, 0), 0);
12877 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12878 }
12879 else
12880 reg1 = XEXP (b, 0);
12881
12882 /* Don't accept any offset that will require multiple
12883 instructions to handle, since this would cause the
12884 arith_adjacentmem pattern to output an overlong sequence. */
12885 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12886 return 0;
12887
12888 /* Don't allow an eliminable register: register elimination can make
12889 the offset too large. */
12890 if (arm_eliminable_register (reg0))
12891 return 0;
12892
12893 val_diff = val1 - val0;
12894
12895 if (arm_ld_sched)
12896 {
12897 /* If the target has load delay slots, then there's no benefit
12898 to using an ldm instruction unless the offset is zero and
12899 we are optimizing for size. */
12900 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12901 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12902 && (val_diff == 4 || val_diff == -4));
12903 }
12904
12905 return ((REGNO (reg0) == REGNO (reg1))
12906 && (val_diff == 4 || val_diff == -4));
12907 }
12908
12909 return 0;
12910 }
12911
12912 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12913 for load operations, false for store operations. CONSECUTIVE is true
12914 if the register numbers in the operation must be consecutive in the register
12915 bank. RETURN_PC is true if value is to be loaded in PC.
12916 The pattern we are trying to match for load is:
12917 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12918 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12919 :
12920 :
12921 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12922 ]
12923 where
12924 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12925 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12926 3. If consecutive is TRUE, then for kth register being loaded,
12927 REGNO (R_dk) = REGNO (R_d0) + k.
12928 The pattern for store is similar. */
12929 bool
12930 ldm_stm_operation_p (rtx op, bool load, enum machine_mode mode,
12931 bool consecutive, bool return_pc)
12932 {
12933 HOST_WIDE_INT count = XVECLEN (op, 0);
12934 rtx reg, mem, addr;
12935 unsigned regno;
12936 unsigned first_regno;
12937 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12938 rtx elt;
12939 bool addr_reg_in_reglist = false;
12940 bool update = false;
12941 int reg_increment;
12942 int offset_adj;
12943 int regs_per_val;
12944
12945 /* If not in SImode, then registers must be consecutive
12946 (e.g., VLDM instructions for DFmode). */
12947 gcc_assert ((mode == SImode) || consecutive);
12948 /* Setting return_pc for stores is illegal. */
12949 gcc_assert (!return_pc || load);
12950
12951 /* Set up the increments and the regs per val based on the mode. */
12952 reg_increment = GET_MODE_SIZE (mode);
12953 regs_per_val = reg_increment / 4;
12954 offset_adj = return_pc ? 1 : 0;
12955
12956 if (count <= 1
12957 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12958 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12959 return false;
12960
12961 /* Check if this is a write-back. */
12962 elt = XVECEXP (op, 0, offset_adj);
12963 if (GET_CODE (SET_SRC (elt)) == PLUS)
12964 {
12965 i++;
12966 base = 1;
12967 update = true;
12968
12969 /* The offset adjustment must be the number of registers being
12970 popped times the size of a single register. */
12971 if (!REG_P (SET_DEST (elt))
12972 || !REG_P (XEXP (SET_SRC (elt), 0))
12973 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12974 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12975 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12976 ((count - 1 - offset_adj) * reg_increment))
12977 return false;
12978 }
12979
12980 i = i + offset_adj;
12981 base = base + offset_adj;
12982 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12983 success depends on the type: VLDM can do just one reg,
12984 LDM must do at least two. */
12985 if ((count <= i) && (mode == SImode))
12986 return false;
12987
12988 elt = XVECEXP (op, 0, i - 1);
12989 if (GET_CODE (elt) != SET)
12990 return false;
12991
12992 if (load)
12993 {
12994 reg = SET_DEST (elt);
12995 mem = SET_SRC (elt);
12996 }
12997 else
12998 {
12999 reg = SET_SRC (elt);
13000 mem = SET_DEST (elt);
13001 }
13002
13003 if (!REG_P (reg) || !MEM_P (mem))
13004 return false;
13005
13006 regno = REGNO (reg);
13007 first_regno = regno;
13008 addr = XEXP (mem, 0);
13009 if (GET_CODE (addr) == PLUS)
13010 {
13011 if (!CONST_INT_P (XEXP (addr, 1)))
13012 return false;
13013
13014 offset = INTVAL (XEXP (addr, 1));
13015 addr = XEXP (addr, 0);
13016 }
13017
13018 if (!REG_P (addr))
13019 return false;
13020
13021 /* Don't allow SP to be loaded unless it is also the base register. It
13022 guarantees that SP is reset correctly when an LDM instruction
13023 is interrupted. Otherwise, we might end up with a corrupt stack. */
13024 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13025 return false;
13026
13027 for (; i < count; i++)
13028 {
13029 elt = XVECEXP (op, 0, i);
13030 if (GET_CODE (elt) != SET)
13031 return false;
13032
13033 if (load)
13034 {
13035 reg = SET_DEST (elt);
13036 mem = SET_SRC (elt);
13037 }
13038 else
13039 {
13040 reg = SET_SRC (elt);
13041 mem = SET_DEST (elt);
13042 }
13043
13044 if (!REG_P (reg)
13045 || GET_MODE (reg) != mode
13046 || REGNO (reg) <= regno
13047 || (consecutive
13048 && (REGNO (reg) !=
13049 (unsigned int) (first_regno + regs_per_val * (i - base))))
13050 /* Don't allow SP to be loaded unless it is also the base register. It
13051 guarantees that SP is reset correctly when an LDM instruction
13052 is interrupted. Otherwise, we might end up with a corrupt stack. */
13053 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13054 || !MEM_P (mem)
13055 || GET_MODE (mem) != mode
13056 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13057 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13058 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13059 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13060 offset + (i - base) * reg_increment))
13061 && (!REG_P (XEXP (mem, 0))
13062 || offset + (i - base) * reg_increment != 0)))
13063 return false;
13064
13065 regno = REGNO (reg);
13066 if (regno == REGNO (addr))
13067 addr_reg_in_reglist = true;
13068 }
13069
13070 if (load)
13071 {
13072 if (update && addr_reg_in_reglist)
13073 return false;
13074
13075 /* For Thumb-1, address register is always modified - either by write-back
13076 or by explicit load. If the pattern does not describe an update,
13077 then the address register must be in the list of loaded registers. */
13078 if (TARGET_THUMB1)
13079 return update || addr_reg_in_reglist;
13080 }
13081
13082 return true;
13083 }
13084
13085 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13086 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13087 instruction. ADD_OFFSET is nonzero if the base address register needs
13088 to be modified with an add instruction before we can use it. */
13089
13090 static bool
13091 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13092 int nops, HOST_WIDE_INT add_offset)
13093 {
13094 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13095 if the offset isn't small enough. The reason 2 ldrs are faster
13096 is because these ARMs are able to do more than one cache access
13097 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13098 whilst the ARM8 has a double bandwidth cache. This means that
13099 these cores can do both an instruction fetch and a data fetch in
13100 a single cycle, so the trick of calculating the address into a
13101 scratch register (one of the result regs) and then doing a load
13102 multiple actually becomes slower (and no smaller in code size).
13103 That is the transformation
13104
13105 ldr rd1, [rbase + offset]
13106 ldr rd2, [rbase + offset + 4]
13107
13108 to
13109
13110 add rd1, rbase, offset
13111 ldmia rd1, {rd1, rd2}
13112
13113 produces worse code -- '3 cycles + any stalls on rd2' instead of
13114 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13115 access per cycle, the first sequence could never complete in less
13116 than 6 cycles, whereas the ldm sequence would only take 5 and
13117 would make better use of sequential accesses if not hitting the
13118 cache.
13119
13120 We cheat here and test 'arm_ld_sched' which we currently know to
13121 only be true for the ARM8, ARM9 and StrongARM. If this ever
13122 changes, then the test below needs to be reworked. */
13123 if (nops == 2 && arm_ld_sched && add_offset != 0)
13124 return false;
13125
13126 /* XScale has load-store double instructions, but they have stricter
13127 alignment requirements than load-store multiple, so we cannot
13128 use them.
13129
13130 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13131 the pipeline until completion.
13132
13133 NREGS CYCLES
13134 1 3
13135 2 4
13136 3 5
13137 4 6
13138
13139 An ldr instruction takes 1-3 cycles, but does not block the
13140 pipeline.
13141
13142 NREGS CYCLES
13143 1 1-3
13144 2 2-6
13145 3 3-9
13146 4 4-12
13147
13148 Best case ldr will always win. However, the more ldr instructions
13149 we issue, the less likely we are to be able to schedule them well.
13150 Using ldr instructions also increases code size.
13151
13152 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13153 for counts of 3 or 4 regs. */
13154 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13155 return false;
13156 return true;
13157 }
13158
13159 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13160 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13161 an array ORDER which describes the sequence to use when accessing the
13162 offsets that produces an ascending order. In this sequence, each
13163 offset must be larger by exactly 4 than the previous one. ORDER[0]
13164 must have been filled in with the lowest offset by the caller.
13165 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13166 we use to verify that ORDER produces an ascending order of registers.
13167 Return true if it was possible to construct such an order, false if
13168 not. */
13169
13170 static bool
13171 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13172 int *unsorted_regs)
13173 {
13174 int i;
13175 for (i = 1; i < nops; i++)
13176 {
13177 int j;
13178
13179 order[i] = order[i - 1];
13180 for (j = 0; j < nops; j++)
13181 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13182 {
13183 /* We must find exactly one offset that is higher than the
13184 previous one by 4. */
13185 if (order[i] != order[i - 1])
13186 return false;
13187 order[i] = j;
13188 }
13189 if (order[i] == order[i - 1])
13190 return false;
13191 /* The register numbers must be ascending. */
13192 if (unsorted_regs != NULL
13193 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13194 return false;
13195 }
13196 return true;
13197 }
13198
13199 /* Used to determine in a peephole whether a sequence of load
13200 instructions can be changed into a load-multiple instruction.
13201 NOPS is the number of separate load instructions we are examining. The
13202 first NOPS entries in OPERANDS are the destination registers, the
13203 next NOPS entries are memory operands. If this function is
13204 successful, *BASE is set to the common base register of the memory
13205 accesses; *LOAD_OFFSET is set to the first memory location's offset
13206 from that base register.
13207 REGS is an array filled in with the destination register numbers.
13208 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13209 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13210 the sequence of registers in REGS matches the loads from ascending memory
13211 locations, and the function verifies that the register numbers are
13212 themselves ascending. If CHECK_REGS is false, the register numbers
13213 are stored in the order they are found in the operands. */
13214 static int
13215 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13216 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13217 {
13218 int unsorted_regs[MAX_LDM_STM_OPS];
13219 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13220 int order[MAX_LDM_STM_OPS];
13221 rtx base_reg_rtx = NULL;
13222 int base_reg = -1;
13223 int i, ldm_case;
13224
13225 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13226 easily extended if required. */
13227 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13228
13229 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13230
13231 /* Loop over the operands and check that the memory references are
13232 suitable (i.e. immediate offsets from the same base register). At
13233 the same time, extract the target register, and the memory
13234 offsets. */
13235 for (i = 0; i < nops; i++)
13236 {
13237 rtx reg;
13238 rtx offset;
13239
13240 /* Convert a subreg of a mem into the mem itself. */
13241 if (GET_CODE (operands[nops + i]) == SUBREG)
13242 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13243
13244 gcc_assert (MEM_P (operands[nops + i]));
13245
13246 /* Don't reorder volatile memory references; it doesn't seem worth
13247 looking for the case where the order is ok anyway. */
13248 if (MEM_VOLATILE_P (operands[nops + i]))
13249 return 0;
13250
13251 offset = const0_rtx;
13252
13253 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13254 || (GET_CODE (reg) == SUBREG
13255 && REG_P (reg = SUBREG_REG (reg))))
13256 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13257 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13258 || (GET_CODE (reg) == SUBREG
13259 && REG_P (reg = SUBREG_REG (reg))))
13260 && (CONST_INT_P (offset
13261 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13262 {
13263 if (i == 0)
13264 {
13265 base_reg = REGNO (reg);
13266 base_reg_rtx = reg;
13267 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13268 return 0;
13269 }
13270 else if (base_reg != (int) REGNO (reg))
13271 /* Not addressed from the same base register. */
13272 return 0;
13273
13274 unsorted_regs[i] = (REG_P (operands[i])
13275 ? REGNO (operands[i])
13276 : REGNO (SUBREG_REG (operands[i])));
13277
13278 /* If it isn't an integer register, or if it overwrites the
13279 base register but isn't the last insn in the list, then
13280 we can't do this. */
13281 if (unsorted_regs[i] < 0
13282 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13283 || unsorted_regs[i] > 14
13284 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13285 return 0;
13286
13287 /* Don't allow SP to be loaded unless it is also the base
13288 register. It guarantees that SP is reset correctly when
13289 an LDM instruction is interrupted. Otherwise, we might
13290 end up with a corrupt stack. */
13291 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13292 return 0;
13293
13294 unsorted_offsets[i] = INTVAL (offset);
13295 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13296 order[0] = i;
13297 }
13298 else
13299 /* Not a suitable memory address. */
13300 return 0;
13301 }
13302
13303 /* All the useful information has now been extracted from the
13304 operands into unsorted_regs and unsorted_offsets; additionally,
13305 order[0] has been set to the lowest offset in the list. Sort
13306 the offsets into order, verifying that they are adjacent, and
13307 check that the register numbers are ascending. */
13308 if (!compute_offset_order (nops, unsorted_offsets, order,
13309 check_regs ? unsorted_regs : NULL))
13310 return 0;
13311
13312 if (saved_order)
13313 memcpy (saved_order, order, sizeof order);
13314
13315 if (base)
13316 {
13317 *base = base_reg;
13318
13319 for (i = 0; i < nops; i++)
13320 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13321
13322 *load_offset = unsorted_offsets[order[0]];
13323 }
13324
13325 if (TARGET_THUMB1
13326 && !peep2_reg_dead_p (nops, base_reg_rtx))
13327 return 0;
13328
13329 if (unsorted_offsets[order[0]] == 0)
13330 ldm_case = 1; /* ldmia */
13331 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13332 ldm_case = 2; /* ldmib */
13333 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13334 ldm_case = 3; /* ldmda */
13335 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13336 ldm_case = 4; /* ldmdb */
13337 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13338 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13339 ldm_case = 5;
13340 else
13341 return 0;
13342
13343 if (!multiple_operation_profitable_p (false, nops,
13344 ldm_case == 5
13345 ? unsorted_offsets[order[0]] : 0))
13346 return 0;
13347
13348 return ldm_case;
13349 }
13350
13351 /* Used to determine in a peephole whether a sequence of store instructions can
13352 be changed into a store-multiple instruction.
13353 NOPS is the number of separate store instructions we are examining.
13354 NOPS_TOTAL is the total number of instructions recognized by the peephole
13355 pattern.
13356 The first NOPS entries in OPERANDS are the source registers, the next
13357 NOPS entries are memory operands. If this function is successful, *BASE is
13358 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13359 to the first memory location's offset from that base register. REGS is an
13360 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13361 likewise filled with the corresponding rtx's.
13362 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13363 numbers to an ascending order of stores.
13364 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13365 from ascending memory locations, and the function verifies that the register
13366 numbers are themselves ascending. If CHECK_REGS is false, the register
13367 numbers are stored in the order they are found in the operands. */
13368 static int
13369 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13370 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13371 HOST_WIDE_INT *load_offset, bool check_regs)
13372 {
13373 int unsorted_regs[MAX_LDM_STM_OPS];
13374 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13375 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13376 int order[MAX_LDM_STM_OPS];
13377 int base_reg = -1;
13378 rtx base_reg_rtx = NULL;
13379 int i, stm_case;
13380
13381 /* Write back of base register is currently only supported for Thumb 1. */
13382 int base_writeback = TARGET_THUMB1;
13383
13384 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13385 easily extended if required. */
13386 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13387
13388 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13389
13390 /* Loop over the operands and check that the memory references are
13391 suitable (i.e. immediate offsets from the same base register). At
13392 the same time, extract the target register, and the memory
13393 offsets. */
13394 for (i = 0; i < nops; i++)
13395 {
13396 rtx reg;
13397 rtx offset;
13398
13399 /* Convert a subreg of a mem into the mem itself. */
13400 if (GET_CODE (operands[nops + i]) == SUBREG)
13401 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13402
13403 gcc_assert (MEM_P (operands[nops + i]));
13404
13405 /* Don't reorder volatile memory references; it doesn't seem worth
13406 looking for the case where the order is ok anyway. */
13407 if (MEM_VOLATILE_P (operands[nops + i]))
13408 return 0;
13409
13410 offset = const0_rtx;
13411
13412 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13413 || (GET_CODE (reg) == SUBREG
13414 && REG_P (reg = SUBREG_REG (reg))))
13415 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13416 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13417 || (GET_CODE (reg) == SUBREG
13418 && REG_P (reg = SUBREG_REG (reg))))
13419 && (CONST_INT_P (offset
13420 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13421 {
13422 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13423 ? operands[i] : SUBREG_REG (operands[i]));
13424 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13425
13426 if (i == 0)
13427 {
13428 base_reg = REGNO (reg);
13429 base_reg_rtx = reg;
13430 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13431 return 0;
13432 }
13433 else if (base_reg != (int) REGNO (reg))
13434 /* Not addressed from the same base register. */
13435 return 0;
13436
13437 /* If it isn't an integer register, then we can't do this. */
13438 if (unsorted_regs[i] < 0
13439 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13440 /* The effects are unpredictable if the base register is
13441 both updated and stored. */
13442 || (base_writeback && unsorted_regs[i] == base_reg)
13443 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13444 || unsorted_regs[i] > 14)
13445 return 0;
13446
13447 unsorted_offsets[i] = INTVAL (offset);
13448 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13449 order[0] = i;
13450 }
13451 else
13452 /* Not a suitable memory address. */
13453 return 0;
13454 }
13455
13456 /* All the useful information has now been extracted from the
13457 operands into unsorted_regs and unsorted_offsets; additionally,
13458 order[0] has been set to the lowest offset in the list. Sort
13459 the offsets into order, verifying that they are adjacent, and
13460 check that the register numbers are ascending. */
13461 if (!compute_offset_order (nops, unsorted_offsets, order,
13462 check_regs ? unsorted_regs : NULL))
13463 return 0;
13464
13465 if (saved_order)
13466 memcpy (saved_order, order, sizeof order);
13467
13468 if (base)
13469 {
13470 *base = base_reg;
13471
13472 for (i = 0; i < nops; i++)
13473 {
13474 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13475 if (reg_rtxs)
13476 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13477 }
13478
13479 *load_offset = unsorted_offsets[order[0]];
13480 }
13481
13482 if (TARGET_THUMB1
13483 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13484 return 0;
13485
13486 if (unsorted_offsets[order[0]] == 0)
13487 stm_case = 1; /* stmia */
13488 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13489 stm_case = 2; /* stmib */
13490 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13491 stm_case = 3; /* stmda */
13492 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13493 stm_case = 4; /* stmdb */
13494 else
13495 return 0;
13496
13497 if (!multiple_operation_profitable_p (false, nops, 0))
13498 return 0;
13499
13500 return stm_case;
13501 }
13502 \f
13503 /* Routines for use in generating RTL. */
13504
13505 /* Generate a load-multiple instruction. COUNT is the number of loads in
13506 the instruction; REGS and MEMS are arrays containing the operands.
13507 BASEREG is the base register to be used in addressing the memory operands.
13508 WBACK_OFFSET is nonzero if the instruction should update the base
13509 register. */
13510
13511 static rtx
13512 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13513 HOST_WIDE_INT wback_offset)
13514 {
13515 int i = 0, j;
13516 rtx result;
13517
13518 if (!multiple_operation_profitable_p (false, count, 0))
13519 {
13520 rtx seq;
13521
13522 start_sequence ();
13523
13524 for (i = 0; i < count; i++)
13525 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13526
13527 if (wback_offset != 0)
13528 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13529
13530 seq = get_insns ();
13531 end_sequence ();
13532
13533 return seq;
13534 }
13535
13536 result = gen_rtx_PARALLEL (VOIDmode,
13537 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13538 if (wback_offset != 0)
13539 {
13540 XVECEXP (result, 0, 0)
13541 = gen_rtx_SET (VOIDmode, basereg,
13542 plus_constant (Pmode, basereg, wback_offset));
13543 i = 1;
13544 count++;
13545 }
13546
13547 for (j = 0; i < count; i++, j++)
13548 XVECEXP (result, 0, i)
13549 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13550
13551 return result;
13552 }
13553
13554 /* Generate a store-multiple instruction. COUNT is the number of stores in
13555 the instruction; REGS and MEMS are arrays containing the operands.
13556 BASEREG is the base register to be used in addressing the memory operands.
13557 WBACK_OFFSET is nonzero if the instruction should update the base
13558 register. */
13559
13560 static rtx
13561 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13562 HOST_WIDE_INT wback_offset)
13563 {
13564 int i = 0, j;
13565 rtx result;
13566
13567 if (GET_CODE (basereg) == PLUS)
13568 basereg = XEXP (basereg, 0);
13569
13570 if (!multiple_operation_profitable_p (false, count, 0))
13571 {
13572 rtx seq;
13573
13574 start_sequence ();
13575
13576 for (i = 0; i < count; i++)
13577 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13578
13579 if (wback_offset != 0)
13580 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13581
13582 seq = get_insns ();
13583 end_sequence ();
13584
13585 return seq;
13586 }
13587
13588 result = gen_rtx_PARALLEL (VOIDmode,
13589 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13590 if (wback_offset != 0)
13591 {
13592 XVECEXP (result, 0, 0)
13593 = gen_rtx_SET (VOIDmode, basereg,
13594 plus_constant (Pmode, basereg, wback_offset));
13595 i = 1;
13596 count++;
13597 }
13598
13599 for (j = 0; i < count; i++, j++)
13600 XVECEXP (result, 0, i)
13601 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13602
13603 return result;
13604 }
13605
13606 /* Generate either a load-multiple or a store-multiple instruction. This
13607 function can be used in situations where we can start with a single MEM
13608 rtx and adjust its address upwards.
13609 COUNT is the number of operations in the instruction, not counting a
13610 possible update of the base register. REGS is an array containing the
13611 register operands.
13612 BASEREG is the base register to be used in addressing the memory operands,
13613 which are constructed from BASEMEM.
13614 WRITE_BACK specifies whether the generated instruction should include an
13615 update of the base register.
13616 OFFSETP is used to pass an offset to and from this function; this offset
13617 is not used when constructing the address (instead BASEMEM should have an
13618 appropriate offset in its address), it is used only for setting
13619 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13620
13621 static rtx
13622 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13623 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13624 {
13625 rtx mems[MAX_LDM_STM_OPS];
13626 HOST_WIDE_INT offset = *offsetp;
13627 int i;
13628
13629 gcc_assert (count <= MAX_LDM_STM_OPS);
13630
13631 if (GET_CODE (basereg) == PLUS)
13632 basereg = XEXP (basereg, 0);
13633
13634 for (i = 0; i < count; i++)
13635 {
13636 rtx addr = plus_constant (Pmode, basereg, i * 4);
13637 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13638 offset += 4;
13639 }
13640
13641 if (write_back)
13642 *offsetp = offset;
13643
13644 if (is_load)
13645 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13646 write_back ? 4 * count : 0);
13647 else
13648 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13649 write_back ? 4 * count : 0);
13650 }
13651
13652 rtx
13653 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13654 rtx basemem, HOST_WIDE_INT *offsetp)
13655 {
13656 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13657 offsetp);
13658 }
13659
13660 rtx
13661 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13662 rtx basemem, HOST_WIDE_INT *offsetp)
13663 {
13664 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13665 offsetp);
13666 }
13667
13668 /* Called from a peephole2 expander to turn a sequence of loads into an
13669 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13670 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13671 is true if we can reorder the registers because they are used commutatively
13672 subsequently.
13673 Returns true iff we could generate a new instruction. */
13674
13675 bool
13676 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13677 {
13678 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13679 rtx mems[MAX_LDM_STM_OPS];
13680 int i, j, base_reg;
13681 rtx base_reg_rtx;
13682 HOST_WIDE_INT offset;
13683 int write_back = FALSE;
13684 int ldm_case;
13685 rtx addr;
13686
13687 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13688 &base_reg, &offset, !sort_regs);
13689
13690 if (ldm_case == 0)
13691 return false;
13692
13693 if (sort_regs)
13694 for (i = 0; i < nops - 1; i++)
13695 for (j = i + 1; j < nops; j++)
13696 if (regs[i] > regs[j])
13697 {
13698 int t = regs[i];
13699 regs[i] = regs[j];
13700 regs[j] = t;
13701 }
13702 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13703
13704 if (TARGET_THUMB1)
13705 {
13706 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13707 gcc_assert (ldm_case == 1 || ldm_case == 5);
13708 write_back = TRUE;
13709 }
13710
13711 if (ldm_case == 5)
13712 {
13713 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13714 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13715 offset = 0;
13716 if (!TARGET_THUMB1)
13717 {
13718 base_reg = regs[0];
13719 base_reg_rtx = newbase;
13720 }
13721 }
13722
13723 for (i = 0; i < nops; i++)
13724 {
13725 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13726 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13727 SImode, addr, 0);
13728 }
13729 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13730 write_back ? offset + i * 4 : 0));
13731 return true;
13732 }
13733
13734 /* Called from a peephole2 expander to turn a sequence of stores into an
13735 STM instruction. OPERANDS are the operands found by the peephole matcher;
13736 NOPS indicates how many separate stores we are trying to combine.
13737 Returns true iff we could generate a new instruction. */
13738
13739 bool
13740 gen_stm_seq (rtx *operands, int nops)
13741 {
13742 int i;
13743 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13744 rtx mems[MAX_LDM_STM_OPS];
13745 int base_reg;
13746 rtx base_reg_rtx;
13747 HOST_WIDE_INT offset;
13748 int write_back = FALSE;
13749 int stm_case;
13750 rtx addr;
13751 bool base_reg_dies;
13752
13753 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13754 mem_order, &base_reg, &offset, true);
13755
13756 if (stm_case == 0)
13757 return false;
13758
13759 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13760
13761 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13762 if (TARGET_THUMB1)
13763 {
13764 gcc_assert (base_reg_dies);
13765 write_back = TRUE;
13766 }
13767
13768 if (stm_case == 5)
13769 {
13770 gcc_assert (base_reg_dies);
13771 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13772 offset = 0;
13773 }
13774
13775 addr = plus_constant (Pmode, base_reg_rtx, offset);
13776
13777 for (i = 0; i < nops; i++)
13778 {
13779 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13780 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13781 SImode, addr, 0);
13782 }
13783 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13784 write_back ? offset + i * 4 : 0));
13785 return true;
13786 }
13787
13788 /* Called from a peephole2 expander to turn a sequence of stores that are
13789 preceded by constant loads into an STM instruction. OPERANDS are the
13790 operands found by the peephole matcher; NOPS indicates how many
13791 separate stores we are trying to combine; there are 2 * NOPS
13792 instructions in the peephole.
13793 Returns true iff we could generate a new instruction. */
13794
13795 bool
13796 gen_const_stm_seq (rtx *operands, int nops)
13797 {
13798 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13799 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13800 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13801 rtx mems[MAX_LDM_STM_OPS];
13802 int base_reg;
13803 rtx base_reg_rtx;
13804 HOST_WIDE_INT offset;
13805 int write_back = FALSE;
13806 int stm_case;
13807 rtx addr;
13808 bool base_reg_dies;
13809 int i, j;
13810 HARD_REG_SET allocated;
13811
13812 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13813 mem_order, &base_reg, &offset, false);
13814
13815 if (stm_case == 0)
13816 return false;
13817
13818 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13819
13820 /* If the same register is used more than once, try to find a free
13821 register. */
13822 CLEAR_HARD_REG_SET (allocated);
13823 for (i = 0; i < nops; i++)
13824 {
13825 for (j = i + 1; j < nops; j++)
13826 if (regs[i] == regs[j])
13827 {
13828 rtx t = peep2_find_free_register (0, nops * 2,
13829 TARGET_THUMB1 ? "l" : "r",
13830 SImode, &allocated);
13831 if (t == NULL_RTX)
13832 return false;
13833 reg_rtxs[i] = t;
13834 regs[i] = REGNO (t);
13835 }
13836 }
13837
13838 /* Compute an ordering that maps the register numbers to an ascending
13839 sequence. */
13840 reg_order[0] = 0;
13841 for (i = 0; i < nops; i++)
13842 if (regs[i] < regs[reg_order[0]])
13843 reg_order[0] = i;
13844
13845 for (i = 1; i < nops; i++)
13846 {
13847 int this_order = reg_order[i - 1];
13848 for (j = 0; j < nops; j++)
13849 if (regs[j] > regs[reg_order[i - 1]]
13850 && (this_order == reg_order[i - 1]
13851 || regs[j] < regs[this_order]))
13852 this_order = j;
13853 reg_order[i] = this_order;
13854 }
13855
13856 /* Ensure that registers that must be live after the instruction end
13857 up with the correct value. */
13858 for (i = 0; i < nops; i++)
13859 {
13860 int this_order = reg_order[i];
13861 if ((this_order != mem_order[i]
13862 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13863 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13864 return false;
13865 }
13866
13867 /* Load the constants. */
13868 for (i = 0; i < nops; i++)
13869 {
13870 rtx op = operands[2 * nops + mem_order[i]];
13871 sorted_regs[i] = regs[reg_order[i]];
13872 emit_move_insn (reg_rtxs[reg_order[i]], op);
13873 }
13874
13875 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13876
13877 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13878 if (TARGET_THUMB1)
13879 {
13880 gcc_assert (base_reg_dies);
13881 write_back = TRUE;
13882 }
13883
13884 if (stm_case == 5)
13885 {
13886 gcc_assert (base_reg_dies);
13887 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13888 offset = 0;
13889 }
13890
13891 addr = plus_constant (Pmode, base_reg_rtx, offset);
13892
13893 for (i = 0; i < nops; i++)
13894 {
13895 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13896 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13897 SImode, addr, 0);
13898 }
13899 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13900 write_back ? offset + i * 4 : 0));
13901 return true;
13902 }
13903
13904 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13905 unaligned copies on processors which support unaligned semantics for those
13906 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13907 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13908 An interleave factor of 1 (the minimum) will perform no interleaving.
13909 Load/store multiple are used for aligned addresses where possible. */
13910
13911 static void
13912 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13913 HOST_WIDE_INT length,
13914 unsigned int interleave_factor)
13915 {
13916 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13917 int *regnos = XALLOCAVEC (int, interleave_factor);
13918 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13919 HOST_WIDE_INT i, j;
13920 HOST_WIDE_INT remaining = length, words;
13921 rtx halfword_tmp = NULL, byte_tmp = NULL;
13922 rtx dst, src;
13923 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13924 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13925 HOST_WIDE_INT srcoffset, dstoffset;
13926 HOST_WIDE_INT src_autoinc, dst_autoinc;
13927 rtx mem, addr;
13928
13929 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13930
13931 /* Use hard registers if we have aligned source or destination so we can use
13932 load/store multiple with contiguous registers. */
13933 if (dst_aligned || src_aligned)
13934 for (i = 0; i < interleave_factor; i++)
13935 regs[i] = gen_rtx_REG (SImode, i);
13936 else
13937 for (i = 0; i < interleave_factor; i++)
13938 regs[i] = gen_reg_rtx (SImode);
13939
13940 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13941 src = copy_addr_to_reg (XEXP (srcbase, 0));
13942
13943 srcoffset = dstoffset = 0;
13944
13945 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13946 For copying the last bytes we want to subtract this offset again. */
13947 src_autoinc = dst_autoinc = 0;
13948
13949 for (i = 0; i < interleave_factor; i++)
13950 regnos[i] = i;
13951
13952 /* Copy BLOCK_SIZE_BYTES chunks. */
13953
13954 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13955 {
13956 /* Load words. */
13957 if (src_aligned && interleave_factor > 1)
13958 {
13959 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13960 TRUE, srcbase, &srcoffset));
13961 src_autoinc += UNITS_PER_WORD * interleave_factor;
13962 }
13963 else
13964 {
13965 for (j = 0; j < interleave_factor; j++)
13966 {
13967 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13968 - src_autoinc));
13969 mem = adjust_automodify_address (srcbase, SImode, addr,
13970 srcoffset + j * UNITS_PER_WORD);
13971 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13972 }
13973 srcoffset += block_size_bytes;
13974 }
13975
13976 /* Store words. */
13977 if (dst_aligned && interleave_factor > 1)
13978 {
13979 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13980 TRUE, dstbase, &dstoffset));
13981 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13982 }
13983 else
13984 {
13985 for (j = 0; j < interleave_factor; j++)
13986 {
13987 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13988 - dst_autoinc));
13989 mem = adjust_automodify_address (dstbase, SImode, addr,
13990 dstoffset + j * UNITS_PER_WORD);
13991 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13992 }
13993 dstoffset += block_size_bytes;
13994 }
13995
13996 remaining -= block_size_bytes;
13997 }
13998
13999 /* Copy any whole words left (note these aren't interleaved with any
14000 subsequent halfword/byte load/stores in the interests of simplicity). */
14001
14002 words = remaining / UNITS_PER_WORD;
14003
14004 gcc_assert (words < interleave_factor);
14005
14006 if (src_aligned && words > 1)
14007 {
14008 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14009 &srcoffset));
14010 src_autoinc += UNITS_PER_WORD * words;
14011 }
14012 else
14013 {
14014 for (j = 0; j < words; j++)
14015 {
14016 addr = plus_constant (Pmode, src,
14017 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14018 mem = adjust_automodify_address (srcbase, SImode, addr,
14019 srcoffset + j * UNITS_PER_WORD);
14020 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14021 }
14022 srcoffset += words * UNITS_PER_WORD;
14023 }
14024
14025 if (dst_aligned && words > 1)
14026 {
14027 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14028 &dstoffset));
14029 dst_autoinc += words * UNITS_PER_WORD;
14030 }
14031 else
14032 {
14033 for (j = 0; j < words; j++)
14034 {
14035 addr = plus_constant (Pmode, dst,
14036 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14037 mem = adjust_automodify_address (dstbase, SImode, addr,
14038 dstoffset + j * UNITS_PER_WORD);
14039 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14040 }
14041 dstoffset += words * UNITS_PER_WORD;
14042 }
14043
14044 remaining -= words * UNITS_PER_WORD;
14045
14046 gcc_assert (remaining < 4);
14047
14048 /* Copy a halfword if necessary. */
14049
14050 if (remaining >= 2)
14051 {
14052 halfword_tmp = gen_reg_rtx (SImode);
14053
14054 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14055 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14056 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14057
14058 /* Either write out immediately, or delay until we've loaded the last
14059 byte, depending on interleave factor. */
14060 if (interleave_factor == 1)
14061 {
14062 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14063 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14064 emit_insn (gen_unaligned_storehi (mem,
14065 gen_lowpart (HImode, halfword_tmp)));
14066 halfword_tmp = NULL;
14067 dstoffset += 2;
14068 }
14069
14070 remaining -= 2;
14071 srcoffset += 2;
14072 }
14073
14074 gcc_assert (remaining < 2);
14075
14076 /* Copy last byte. */
14077
14078 if ((remaining & 1) != 0)
14079 {
14080 byte_tmp = gen_reg_rtx (SImode);
14081
14082 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14083 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14084 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14085
14086 if (interleave_factor == 1)
14087 {
14088 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14089 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14090 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14091 byte_tmp = NULL;
14092 dstoffset++;
14093 }
14094
14095 remaining--;
14096 srcoffset++;
14097 }
14098
14099 /* Store last halfword if we haven't done so already. */
14100
14101 if (halfword_tmp)
14102 {
14103 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14104 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14105 emit_insn (gen_unaligned_storehi (mem,
14106 gen_lowpart (HImode, halfword_tmp)));
14107 dstoffset += 2;
14108 }
14109
14110 /* Likewise for last byte. */
14111
14112 if (byte_tmp)
14113 {
14114 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14115 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14116 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14117 dstoffset++;
14118 }
14119
14120 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14121 }
14122
14123 /* From mips_adjust_block_mem:
14124
14125 Helper function for doing a loop-based block operation on memory
14126 reference MEM. Each iteration of the loop will operate on LENGTH
14127 bytes of MEM.
14128
14129 Create a new base register for use within the loop and point it to
14130 the start of MEM. Create a new memory reference that uses this
14131 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14132
14133 static void
14134 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14135 rtx *loop_mem)
14136 {
14137 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14138
14139 /* Although the new mem does not refer to a known location,
14140 it does keep up to LENGTH bytes of alignment. */
14141 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14142 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14143 }
14144
14145 /* From mips_block_move_loop:
14146
14147 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14148 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14149 the memory regions do not overlap. */
14150
14151 static void
14152 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14153 unsigned int interleave_factor,
14154 HOST_WIDE_INT bytes_per_iter)
14155 {
14156 rtx label, src_reg, dest_reg, final_src, test;
14157 HOST_WIDE_INT leftover;
14158
14159 leftover = length % bytes_per_iter;
14160 length -= leftover;
14161
14162 /* Create registers and memory references for use within the loop. */
14163 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14164 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14165
14166 /* Calculate the value that SRC_REG should have after the last iteration of
14167 the loop. */
14168 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14169 0, 0, OPTAB_WIDEN);
14170
14171 /* Emit the start of the loop. */
14172 label = gen_label_rtx ();
14173 emit_label (label);
14174
14175 /* Emit the loop body. */
14176 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14177 interleave_factor);
14178
14179 /* Move on to the next block. */
14180 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14181 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14182
14183 /* Emit the loop condition. */
14184 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14185 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14186
14187 /* Mop up any left-over bytes. */
14188 if (leftover)
14189 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14190 }
14191
14192 /* Emit a block move when either the source or destination is unaligned (not
14193 aligned to a four-byte boundary). This may need further tuning depending on
14194 core type, optimize_size setting, etc. */
14195
14196 static int
14197 arm_movmemqi_unaligned (rtx *operands)
14198 {
14199 HOST_WIDE_INT length = INTVAL (operands[2]);
14200
14201 if (optimize_size)
14202 {
14203 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14204 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14205 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14206 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14207 or dst_aligned though: allow more interleaving in those cases since the
14208 resulting code can be smaller. */
14209 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14210 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14211
14212 if (length > 12)
14213 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14214 interleave_factor, bytes_per_iter);
14215 else
14216 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14217 interleave_factor);
14218 }
14219 else
14220 {
14221 /* Note that the loop created by arm_block_move_unaligned_loop may be
14222 subject to loop unrolling, which makes tuning this condition a little
14223 redundant. */
14224 if (length > 32)
14225 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14226 else
14227 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14228 }
14229
14230 return 1;
14231 }
14232
14233 int
14234 arm_gen_movmemqi (rtx *operands)
14235 {
14236 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14237 HOST_WIDE_INT srcoffset, dstoffset;
14238 int i;
14239 rtx src, dst, srcbase, dstbase;
14240 rtx part_bytes_reg = NULL;
14241 rtx mem;
14242
14243 if (!CONST_INT_P (operands[2])
14244 || !CONST_INT_P (operands[3])
14245 || INTVAL (operands[2]) > 64)
14246 return 0;
14247
14248 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14249 return arm_movmemqi_unaligned (operands);
14250
14251 if (INTVAL (operands[3]) & 3)
14252 return 0;
14253
14254 dstbase = operands[0];
14255 srcbase = operands[1];
14256
14257 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14258 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14259
14260 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14261 out_words_to_go = INTVAL (operands[2]) / 4;
14262 last_bytes = INTVAL (operands[2]) & 3;
14263 dstoffset = srcoffset = 0;
14264
14265 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14266 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14267
14268 for (i = 0; in_words_to_go >= 2; i+=4)
14269 {
14270 if (in_words_to_go > 4)
14271 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14272 TRUE, srcbase, &srcoffset));
14273 else
14274 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14275 src, FALSE, srcbase,
14276 &srcoffset));
14277
14278 if (out_words_to_go)
14279 {
14280 if (out_words_to_go > 4)
14281 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14282 TRUE, dstbase, &dstoffset));
14283 else if (out_words_to_go != 1)
14284 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14285 out_words_to_go, dst,
14286 (last_bytes == 0
14287 ? FALSE : TRUE),
14288 dstbase, &dstoffset));
14289 else
14290 {
14291 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14292 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14293 if (last_bytes != 0)
14294 {
14295 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14296 dstoffset += 4;
14297 }
14298 }
14299 }
14300
14301 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14302 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14303 }
14304
14305 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14306 if (out_words_to_go)
14307 {
14308 rtx sreg;
14309
14310 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14311 sreg = copy_to_reg (mem);
14312
14313 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14314 emit_move_insn (mem, sreg);
14315 in_words_to_go--;
14316
14317 gcc_assert (!in_words_to_go); /* Sanity check */
14318 }
14319
14320 if (in_words_to_go)
14321 {
14322 gcc_assert (in_words_to_go > 0);
14323
14324 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14325 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14326 }
14327
14328 gcc_assert (!last_bytes || part_bytes_reg);
14329
14330 if (BYTES_BIG_ENDIAN && last_bytes)
14331 {
14332 rtx tmp = gen_reg_rtx (SImode);
14333
14334 /* The bytes we want are in the top end of the word. */
14335 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14336 GEN_INT (8 * (4 - last_bytes))));
14337 part_bytes_reg = tmp;
14338
14339 while (last_bytes)
14340 {
14341 mem = adjust_automodify_address (dstbase, QImode,
14342 plus_constant (Pmode, dst,
14343 last_bytes - 1),
14344 dstoffset + last_bytes - 1);
14345 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14346
14347 if (--last_bytes)
14348 {
14349 tmp = gen_reg_rtx (SImode);
14350 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14351 part_bytes_reg = tmp;
14352 }
14353 }
14354
14355 }
14356 else
14357 {
14358 if (last_bytes > 1)
14359 {
14360 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14361 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14362 last_bytes -= 2;
14363 if (last_bytes)
14364 {
14365 rtx tmp = gen_reg_rtx (SImode);
14366 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14367 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14368 part_bytes_reg = tmp;
14369 dstoffset += 2;
14370 }
14371 }
14372
14373 if (last_bytes)
14374 {
14375 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14376 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14377 }
14378 }
14379
14380 return 1;
14381 }
14382
14383 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14384 by mode size. */
14385 inline static rtx
14386 next_consecutive_mem (rtx mem)
14387 {
14388 enum machine_mode mode = GET_MODE (mem);
14389 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14390 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14391
14392 return adjust_automodify_address (mem, mode, addr, offset);
14393 }
14394
14395 /* Copy using LDRD/STRD instructions whenever possible.
14396 Returns true upon success. */
14397 bool
14398 gen_movmem_ldrd_strd (rtx *operands)
14399 {
14400 unsigned HOST_WIDE_INT len;
14401 HOST_WIDE_INT align;
14402 rtx src, dst, base;
14403 rtx reg0;
14404 bool src_aligned, dst_aligned;
14405 bool src_volatile, dst_volatile;
14406
14407 gcc_assert (CONST_INT_P (operands[2]));
14408 gcc_assert (CONST_INT_P (operands[3]));
14409
14410 len = UINTVAL (operands[2]);
14411 if (len > 64)
14412 return false;
14413
14414 /* Maximum alignment we can assume for both src and dst buffers. */
14415 align = INTVAL (operands[3]);
14416
14417 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14418 return false;
14419
14420 /* Place src and dst addresses in registers
14421 and update the corresponding mem rtx. */
14422 dst = operands[0];
14423 dst_volatile = MEM_VOLATILE_P (dst);
14424 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14425 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14426 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14427
14428 src = operands[1];
14429 src_volatile = MEM_VOLATILE_P (src);
14430 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14431 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14432 src = adjust_automodify_address (src, VOIDmode, base, 0);
14433
14434 if (!unaligned_access && !(src_aligned && dst_aligned))
14435 return false;
14436
14437 if (src_volatile || dst_volatile)
14438 return false;
14439
14440 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14441 if (!(dst_aligned || src_aligned))
14442 return arm_gen_movmemqi (operands);
14443
14444 src = adjust_address (src, DImode, 0);
14445 dst = adjust_address (dst, DImode, 0);
14446 while (len >= 8)
14447 {
14448 len -= 8;
14449 reg0 = gen_reg_rtx (DImode);
14450 if (src_aligned)
14451 emit_move_insn (reg0, src);
14452 else
14453 emit_insn (gen_unaligned_loaddi (reg0, src));
14454
14455 if (dst_aligned)
14456 emit_move_insn (dst, reg0);
14457 else
14458 emit_insn (gen_unaligned_storedi (dst, reg0));
14459
14460 src = next_consecutive_mem (src);
14461 dst = next_consecutive_mem (dst);
14462 }
14463
14464 gcc_assert (len < 8);
14465 if (len >= 4)
14466 {
14467 /* More than a word but less than a double-word to copy. Copy a word. */
14468 reg0 = gen_reg_rtx (SImode);
14469 src = adjust_address (src, SImode, 0);
14470 dst = adjust_address (dst, SImode, 0);
14471 if (src_aligned)
14472 emit_move_insn (reg0, src);
14473 else
14474 emit_insn (gen_unaligned_loadsi (reg0, src));
14475
14476 if (dst_aligned)
14477 emit_move_insn (dst, reg0);
14478 else
14479 emit_insn (gen_unaligned_storesi (dst, reg0));
14480
14481 src = next_consecutive_mem (src);
14482 dst = next_consecutive_mem (dst);
14483 len -= 4;
14484 }
14485
14486 if (len == 0)
14487 return true;
14488
14489 /* Copy the remaining bytes. */
14490 if (len >= 2)
14491 {
14492 dst = adjust_address (dst, HImode, 0);
14493 src = adjust_address (src, HImode, 0);
14494 reg0 = gen_reg_rtx (SImode);
14495 if (src_aligned)
14496 emit_insn (gen_zero_extendhisi2 (reg0, src));
14497 else
14498 emit_insn (gen_unaligned_loadhiu (reg0, src));
14499
14500 if (dst_aligned)
14501 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14502 else
14503 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14504
14505 src = next_consecutive_mem (src);
14506 dst = next_consecutive_mem (dst);
14507 if (len == 2)
14508 return true;
14509 }
14510
14511 dst = adjust_address (dst, QImode, 0);
14512 src = adjust_address (src, QImode, 0);
14513 reg0 = gen_reg_rtx (QImode);
14514 emit_move_insn (reg0, src);
14515 emit_move_insn (dst, reg0);
14516 return true;
14517 }
14518
14519 /* Select a dominance comparison mode if possible for a test of the general
14520 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14521 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14522 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14523 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14524 In all cases OP will be either EQ or NE, but we don't need to know which
14525 here. If we are unable to support a dominance comparison we return
14526 CC mode. This will then fail to match for the RTL expressions that
14527 generate this call. */
14528 enum machine_mode
14529 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14530 {
14531 enum rtx_code cond1, cond2;
14532 int swapped = 0;
14533
14534 /* Currently we will probably get the wrong result if the individual
14535 comparisons are not simple. This also ensures that it is safe to
14536 reverse a comparison if necessary. */
14537 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14538 != CCmode)
14539 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14540 != CCmode))
14541 return CCmode;
14542
14543 /* The if_then_else variant of this tests the second condition if the
14544 first passes, but is true if the first fails. Reverse the first
14545 condition to get a true "inclusive-or" expression. */
14546 if (cond_or == DOM_CC_NX_OR_Y)
14547 cond1 = reverse_condition (cond1);
14548
14549 /* If the comparisons are not equal, and one doesn't dominate the other,
14550 then we can't do this. */
14551 if (cond1 != cond2
14552 && !comparison_dominates_p (cond1, cond2)
14553 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14554 return CCmode;
14555
14556 if (swapped)
14557 {
14558 enum rtx_code temp = cond1;
14559 cond1 = cond2;
14560 cond2 = temp;
14561 }
14562
14563 switch (cond1)
14564 {
14565 case EQ:
14566 if (cond_or == DOM_CC_X_AND_Y)
14567 return CC_DEQmode;
14568
14569 switch (cond2)
14570 {
14571 case EQ: return CC_DEQmode;
14572 case LE: return CC_DLEmode;
14573 case LEU: return CC_DLEUmode;
14574 case GE: return CC_DGEmode;
14575 case GEU: return CC_DGEUmode;
14576 default: gcc_unreachable ();
14577 }
14578
14579 case LT:
14580 if (cond_or == DOM_CC_X_AND_Y)
14581 return CC_DLTmode;
14582
14583 switch (cond2)
14584 {
14585 case LT:
14586 return CC_DLTmode;
14587 case LE:
14588 return CC_DLEmode;
14589 case NE:
14590 return CC_DNEmode;
14591 default:
14592 gcc_unreachable ();
14593 }
14594
14595 case GT:
14596 if (cond_or == DOM_CC_X_AND_Y)
14597 return CC_DGTmode;
14598
14599 switch (cond2)
14600 {
14601 case GT:
14602 return CC_DGTmode;
14603 case GE:
14604 return CC_DGEmode;
14605 case NE:
14606 return CC_DNEmode;
14607 default:
14608 gcc_unreachable ();
14609 }
14610
14611 case LTU:
14612 if (cond_or == DOM_CC_X_AND_Y)
14613 return CC_DLTUmode;
14614
14615 switch (cond2)
14616 {
14617 case LTU:
14618 return CC_DLTUmode;
14619 case LEU:
14620 return CC_DLEUmode;
14621 case NE:
14622 return CC_DNEmode;
14623 default:
14624 gcc_unreachable ();
14625 }
14626
14627 case GTU:
14628 if (cond_or == DOM_CC_X_AND_Y)
14629 return CC_DGTUmode;
14630
14631 switch (cond2)
14632 {
14633 case GTU:
14634 return CC_DGTUmode;
14635 case GEU:
14636 return CC_DGEUmode;
14637 case NE:
14638 return CC_DNEmode;
14639 default:
14640 gcc_unreachable ();
14641 }
14642
14643 /* The remaining cases only occur when both comparisons are the
14644 same. */
14645 case NE:
14646 gcc_assert (cond1 == cond2);
14647 return CC_DNEmode;
14648
14649 case LE:
14650 gcc_assert (cond1 == cond2);
14651 return CC_DLEmode;
14652
14653 case GE:
14654 gcc_assert (cond1 == cond2);
14655 return CC_DGEmode;
14656
14657 case LEU:
14658 gcc_assert (cond1 == cond2);
14659 return CC_DLEUmode;
14660
14661 case GEU:
14662 gcc_assert (cond1 == cond2);
14663 return CC_DGEUmode;
14664
14665 default:
14666 gcc_unreachable ();
14667 }
14668 }
14669
14670 enum machine_mode
14671 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14672 {
14673 /* All floating point compares return CCFP if it is an equality
14674 comparison, and CCFPE otherwise. */
14675 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14676 {
14677 switch (op)
14678 {
14679 case EQ:
14680 case NE:
14681 case UNORDERED:
14682 case ORDERED:
14683 case UNLT:
14684 case UNLE:
14685 case UNGT:
14686 case UNGE:
14687 case UNEQ:
14688 case LTGT:
14689 return CCFPmode;
14690
14691 case LT:
14692 case LE:
14693 case GT:
14694 case GE:
14695 return CCFPEmode;
14696
14697 default:
14698 gcc_unreachable ();
14699 }
14700 }
14701
14702 /* A compare with a shifted operand. Because of canonicalization, the
14703 comparison will have to be swapped when we emit the assembler. */
14704 if (GET_MODE (y) == SImode
14705 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14706 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14707 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14708 || GET_CODE (x) == ROTATERT))
14709 return CC_SWPmode;
14710
14711 /* This operation is performed swapped, but since we only rely on the Z
14712 flag we don't need an additional mode. */
14713 if (GET_MODE (y) == SImode
14714 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14715 && GET_CODE (x) == NEG
14716 && (op == EQ || op == NE))
14717 return CC_Zmode;
14718
14719 /* This is a special case that is used by combine to allow a
14720 comparison of a shifted byte load to be split into a zero-extend
14721 followed by a comparison of the shifted integer (only valid for
14722 equalities and unsigned inequalities). */
14723 if (GET_MODE (x) == SImode
14724 && GET_CODE (x) == ASHIFT
14725 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14726 && GET_CODE (XEXP (x, 0)) == SUBREG
14727 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14728 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14729 && (op == EQ || op == NE
14730 || op == GEU || op == GTU || op == LTU || op == LEU)
14731 && CONST_INT_P (y))
14732 return CC_Zmode;
14733
14734 /* A construct for a conditional compare, if the false arm contains
14735 0, then both conditions must be true, otherwise either condition
14736 must be true. Not all conditions are possible, so CCmode is
14737 returned if it can't be done. */
14738 if (GET_CODE (x) == IF_THEN_ELSE
14739 && (XEXP (x, 2) == const0_rtx
14740 || XEXP (x, 2) == const1_rtx)
14741 && COMPARISON_P (XEXP (x, 0))
14742 && COMPARISON_P (XEXP (x, 1)))
14743 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14744 INTVAL (XEXP (x, 2)));
14745
14746 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14747 if (GET_CODE (x) == AND
14748 && (op == EQ || op == NE)
14749 && COMPARISON_P (XEXP (x, 0))
14750 && COMPARISON_P (XEXP (x, 1)))
14751 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14752 DOM_CC_X_AND_Y);
14753
14754 if (GET_CODE (x) == IOR
14755 && (op == EQ || op == NE)
14756 && COMPARISON_P (XEXP (x, 0))
14757 && COMPARISON_P (XEXP (x, 1)))
14758 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14759 DOM_CC_X_OR_Y);
14760
14761 /* An operation (on Thumb) where we want to test for a single bit.
14762 This is done by shifting that bit up into the top bit of a
14763 scratch register; we can then branch on the sign bit. */
14764 if (TARGET_THUMB1
14765 && GET_MODE (x) == SImode
14766 && (op == EQ || op == NE)
14767 && GET_CODE (x) == ZERO_EXTRACT
14768 && XEXP (x, 1) == const1_rtx)
14769 return CC_Nmode;
14770
14771 /* An operation that sets the condition codes as a side-effect, the
14772 V flag is not set correctly, so we can only use comparisons where
14773 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14774 instead.) */
14775 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14776 if (GET_MODE (x) == SImode
14777 && y == const0_rtx
14778 && (op == EQ || op == NE || op == LT || op == GE)
14779 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14780 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14781 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14782 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14783 || GET_CODE (x) == LSHIFTRT
14784 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14785 || GET_CODE (x) == ROTATERT
14786 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14787 return CC_NOOVmode;
14788
14789 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14790 return CC_Zmode;
14791
14792 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14793 && GET_CODE (x) == PLUS
14794 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14795 return CC_Cmode;
14796
14797 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14798 {
14799 switch (op)
14800 {
14801 case EQ:
14802 case NE:
14803 /* A DImode comparison against zero can be implemented by
14804 or'ing the two halves together. */
14805 if (y == const0_rtx)
14806 return CC_Zmode;
14807
14808 /* We can do an equality test in three Thumb instructions. */
14809 if (!TARGET_32BIT)
14810 return CC_Zmode;
14811
14812 /* FALLTHROUGH */
14813
14814 case LTU:
14815 case LEU:
14816 case GTU:
14817 case GEU:
14818 /* DImode unsigned comparisons can be implemented by cmp +
14819 cmpeq without a scratch register. Not worth doing in
14820 Thumb-2. */
14821 if (TARGET_32BIT)
14822 return CC_CZmode;
14823
14824 /* FALLTHROUGH */
14825
14826 case LT:
14827 case LE:
14828 case GT:
14829 case GE:
14830 /* DImode signed and unsigned comparisons can be implemented
14831 by cmp + sbcs with a scratch register, but that does not
14832 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14833 gcc_assert (op != EQ && op != NE);
14834 return CC_NCVmode;
14835
14836 default:
14837 gcc_unreachable ();
14838 }
14839 }
14840
14841 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14842 return GET_MODE (x);
14843
14844 return CCmode;
14845 }
14846
14847 /* X and Y are two things to compare using CODE. Emit the compare insn and
14848 return the rtx for register 0 in the proper mode. FP means this is a
14849 floating point compare: I don't think that it is needed on the arm. */
14850 rtx
14851 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14852 {
14853 enum machine_mode mode;
14854 rtx cc_reg;
14855 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14856
14857 /* We might have X as a constant, Y as a register because of the predicates
14858 used for cmpdi. If so, force X to a register here. */
14859 if (dimode_comparison && !REG_P (x))
14860 x = force_reg (DImode, x);
14861
14862 mode = SELECT_CC_MODE (code, x, y);
14863 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14864
14865 if (dimode_comparison
14866 && mode != CC_CZmode)
14867 {
14868 rtx clobber, set;
14869
14870 /* To compare two non-zero values for equality, XOR them and
14871 then compare against zero. Not used for ARM mode; there
14872 CC_CZmode is cheaper. */
14873 if (mode == CC_Zmode && y != const0_rtx)
14874 {
14875 gcc_assert (!reload_completed);
14876 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14877 y = const0_rtx;
14878 }
14879
14880 /* A scratch register is required. */
14881 if (reload_completed)
14882 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14883 else
14884 scratch = gen_rtx_SCRATCH (SImode);
14885
14886 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14887 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
14888 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14889 }
14890 else
14891 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14892
14893 return cc_reg;
14894 }
14895
14896 /* Generate a sequence of insns that will generate the correct return
14897 address mask depending on the physical architecture that the program
14898 is running on. */
14899 rtx
14900 arm_gen_return_addr_mask (void)
14901 {
14902 rtx reg = gen_reg_rtx (Pmode);
14903
14904 emit_insn (gen_return_addr_mask (reg));
14905 return reg;
14906 }
14907
14908 void
14909 arm_reload_in_hi (rtx *operands)
14910 {
14911 rtx ref = operands[1];
14912 rtx base, scratch;
14913 HOST_WIDE_INT offset = 0;
14914
14915 if (GET_CODE (ref) == SUBREG)
14916 {
14917 offset = SUBREG_BYTE (ref);
14918 ref = SUBREG_REG (ref);
14919 }
14920
14921 if (REG_P (ref))
14922 {
14923 /* We have a pseudo which has been spilt onto the stack; there
14924 are two cases here: the first where there is a simple
14925 stack-slot replacement and a second where the stack-slot is
14926 out of range, or is used as a subreg. */
14927 if (reg_equiv_mem (REGNO (ref)))
14928 {
14929 ref = reg_equiv_mem (REGNO (ref));
14930 base = find_replacement (&XEXP (ref, 0));
14931 }
14932 else
14933 /* The slot is out of range, or was dressed up in a SUBREG. */
14934 base = reg_equiv_address (REGNO (ref));
14935 }
14936 else
14937 base = find_replacement (&XEXP (ref, 0));
14938
14939 /* Handle the case where the address is too complex to be offset by 1. */
14940 if (GET_CODE (base) == MINUS
14941 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14942 {
14943 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14944
14945 emit_set_insn (base_plus, base);
14946 base = base_plus;
14947 }
14948 else if (GET_CODE (base) == PLUS)
14949 {
14950 /* The addend must be CONST_INT, or we would have dealt with it above. */
14951 HOST_WIDE_INT hi, lo;
14952
14953 offset += INTVAL (XEXP (base, 1));
14954 base = XEXP (base, 0);
14955
14956 /* Rework the address into a legal sequence of insns. */
14957 /* Valid range for lo is -4095 -> 4095 */
14958 lo = (offset >= 0
14959 ? (offset & 0xfff)
14960 : -((-offset) & 0xfff));
14961
14962 /* Corner case, if lo is the max offset then we would be out of range
14963 once we have added the additional 1 below, so bump the msb into the
14964 pre-loading insn(s). */
14965 if (lo == 4095)
14966 lo &= 0x7ff;
14967
14968 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14969 ^ (HOST_WIDE_INT) 0x80000000)
14970 - (HOST_WIDE_INT) 0x80000000);
14971
14972 gcc_assert (hi + lo == offset);
14973
14974 if (hi != 0)
14975 {
14976 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14977
14978 /* Get the base address; addsi3 knows how to handle constants
14979 that require more than one insn. */
14980 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14981 base = base_plus;
14982 offset = lo;
14983 }
14984 }
14985
14986 /* Operands[2] may overlap operands[0] (though it won't overlap
14987 operands[1]), that's why we asked for a DImode reg -- so we can
14988 use the bit that does not overlap. */
14989 if (REGNO (operands[2]) == REGNO (operands[0]))
14990 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14991 else
14992 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14993
14994 emit_insn (gen_zero_extendqisi2 (scratch,
14995 gen_rtx_MEM (QImode,
14996 plus_constant (Pmode, base,
14997 offset))));
14998 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14999 gen_rtx_MEM (QImode,
15000 plus_constant (Pmode, base,
15001 offset + 1))));
15002 if (!BYTES_BIG_ENDIAN)
15003 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15004 gen_rtx_IOR (SImode,
15005 gen_rtx_ASHIFT
15006 (SImode,
15007 gen_rtx_SUBREG (SImode, operands[0], 0),
15008 GEN_INT (8)),
15009 scratch));
15010 else
15011 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15012 gen_rtx_IOR (SImode,
15013 gen_rtx_ASHIFT (SImode, scratch,
15014 GEN_INT (8)),
15015 gen_rtx_SUBREG (SImode, operands[0], 0)));
15016 }
15017
15018 /* Handle storing a half-word to memory during reload by synthesizing as two
15019 byte stores. Take care not to clobber the input values until after we
15020 have moved them somewhere safe. This code assumes that if the DImode
15021 scratch in operands[2] overlaps either the input value or output address
15022 in some way, then that value must die in this insn (we absolutely need
15023 two scratch registers for some corner cases). */
15024 void
15025 arm_reload_out_hi (rtx *operands)
15026 {
15027 rtx ref = operands[0];
15028 rtx outval = operands[1];
15029 rtx base, scratch;
15030 HOST_WIDE_INT offset = 0;
15031
15032 if (GET_CODE (ref) == SUBREG)
15033 {
15034 offset = SUBREG_BYTE (ref);
15035 ref = SUBREG_REG (ref);
15036 }
15037
15038 if (REG_P (ref))
15039 {
15040 /* We have a pseudo which has been spilt onto the stack; there
15041 are two cases here: the first where there is a simple
15042 stack-slot replacement and a second where the stack-slot is
15043 out of range, or is used as a subreg. */
15044 if (reg_equiv_mem (REGNO (ref)))
15045 {
15046 ref = reg_equiv_mem (REGNO (ref));
15047 base = find_replacement (&XEXP (ref, 0));
15048 }
15049 else
15050 /* The slot is out of range, or was dressed up in a SUBREG. */
15051 base = reg_equiv_address (REGNO (ref));
15052 }
15053 else
15054 base = find_replacement (&XEXP (ref, 0));
15055
15056 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15057
15058 /* Handle the case where the address is too complex to be offset by 1. */
15059 if (GET_CODE (base) == MINUS
15060 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15061 {
15062 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15063
15064 /* Be careful not to destroy OUTVAL. */
15065 if (reg_overlap_mentioned_p (base_plus, outval))
15066 {
15067 /* Updating base_plus might destroy outval, see if we can
15068 swap the scratch and base_plus. */
15069 if (!reg_overlap_mentioned_p (scratch, outval))
15070 {
15071 rtx tmp = scratch;
15072 scratch = base_plus;
15073 base_plus = tmp;
15074 }
15075 else
15076 {
15077 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15078
15079 /* Be conservative and copy OUTVAL into the scratch now,
15080 this should only be necessary if outval is a subreg
15081 of something larger than a word. */
15082 /* XXX Might this clobber base? I can't see how it can,
15083 since scratch is known to overlap with OUTVAL, and
15084 must be wider than a word. */
15085 emit_insn (gen_movhi (scratch_hi, outval));
15086 outval = scratch_hi;
15087 }
15088 }
15089
15090 emit_set_insn (base_plus, base);
15091 base = base_plus;
15092 }
15093 else if (GET_CODE (base) == PLUS)
15094 {
15095 /* The addend must be CONST_INT, or we would have dealt with it above. */
15096 HOST_WIDE_INT hi, lo;
15097
15098 offset += INTVAL (XEXP (base, 1));
15099 base = XEXP (base, 0);
15100
15101 /* Rework the address into a legal sequence of insns. */
15102 /* Valid range for lo is -4095 -> 4095 */
15103 lo = (offset >= 0
15104 ? (offset & 0xfff)
15105 : -((-offset) & 0xfff));
15106
15107 /* Corner case, if lo is the max offset then we would be out of range
15108 once we have added the additional 1 below, so bump the msb into the
15109 pre-loading insn(s). */
15110 if (lo == 4095)
15111 lo &= 0x7ff;
15112
15113 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15114 ^ (HOST_WIDE_INT) 0x80000000)
15115 - (HOST_WIDE_INT) 0x80000000);
15116
15117 gcc_assert (hi + lo == offset);
15118
15119 if (hi != 0)
15120 {
15121 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15122
15123 /* Be careful not to destroy OUTVAL. */
15124 if (reg_overlap_mentioned_p (base_plus, outval))
15125 {
15126 /* Updating base_plus might destroy outval, see if we
15127 can swap the scratch and base_plus. */
15128 if (!reg_overlap_mentioned_p (scratch, outval))
15129 {
15130 rtx tmp = scratch;
15131 scratch = base_plus;
15132 base_plus = tmp;
15133 }
15134 else
15135 {
15136 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15137
15138 /* Be conservative and copy outval into scratch now,
15139 this should only be necessary if outval is a
15140 subreg of something larger than a word. */
15141 /* XXX Might this clobber base? I can't see how it
15142 can, since scratch is known to overlap with
15143 outval. */
15144 emit_insn (gen_movhi (scratch_hi, outval));
15145 outval = scratch_hi;
15146 }
15147 }
15148
15149 /* Get the base address; addsi3 knows how to handle constants
15150 that require more than one insn. */
15151 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15152 base = base_plus;
15153 offset = lo;
15154 }
15155 }
15156
15157 if (BYTES_BIG_ENDIAN)
15158 {
15159 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15160 plus_constant (Pmode, base,
15161 offset + 1)),
15162 gen_lowpart (QImode, outval)));
15163 emit_insn (gen_lshrsi3 (scratch,
15164 gen_rtx_SUBREG (SImode, outval, 0),
15165 GEN_INT (8)));
15166 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15167 offset)),
15168 gen_lowpart (QImode, scratch)));
15169 }
15170 else
15171 {
15172 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15173 offset)),
15174 gen_lowpart (QImode, outval)));
15175 emit_insn (gen_lshrsi3 (scratch,
15176 gen_rtx_SUBREG (SImode, outval, 0),
15177 GEN_INT (8)));
15178 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15179 plus_constant (Pmode, base,
15180 offset + 1)),
15181 gen_lowpart (QImode, scratch)));
15182 }
15183 }
15184
15185 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15186 (padded to the size of a word) should be passed in a register. */
15187
15188 static bool
15189 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
15190 {
15191 if (TARGET_AAPCS_BASED)
15192 return must_pass_in_stack_var_size (mode, type);
15193 else
15194 return must_pass_in_stack_var_size_or_pad (mode, type);
15195 }
15196
15197
15198 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15199 Return true if an argument passed on the stack should be padded upwards,
15200 i.e. if the least-significant byte has useful data.
15201 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15202 aggregate types are placed in the lowest memory address. */
15203
15204 bool
15205 arm_pad_arg_upward (enum machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15206 {
15207 if (!TARGET_AAPCS_BASED)
15208 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15209
15210 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15211 return false;
15212
15213 return true;
15214 }
15215
15216
15217 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15218 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15219 register has useful data, and return the opposite if the most
15220 significant byte does. */
15221
15222 bool
15223 arm_pad_reg_upward (enum machine_mode mode,
15224 tree type, int first ATTRIBUTE_UNUSED)
15225 {
15226 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15227 {
15228 /* For AAPCS, small aggregates, small fixed-point types,
15229 and small complex types are always padded upwards. */
15230 if (type)
15231 {
15232 if ((AGGREGATE_TYPE_P (type)
15233 || TREE_CODE (type) == COMPLEX_TYPE
15234 || FIXED_POINT_TYPE_P (type))
15235 && int_size_in_bytes (type) <= 4)
15236 return true;
15237 }
15238 else
15239 {
15240 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15241 && GET_MODE_SIZE (mode) <= 4)
15242 return true;
15243 }
15244 }
15245
15246 /* Otherwise, use default padding. */
15247 return !BYTES_BIG_ENDIAN;
15248 }
15249
15250 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15251 assuming that the address in the base register is word aligned. */
15252 bool
15253 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15254 {
15255 HOST_WIDE_INT max_offset;
15256
15257 /* Offset must be a multiple of 4 in Thumb mode. */
15258 if (TARGET_THUMB2 && ((offset & 3) != 0))
15259 return false;
15260
15261 if (TARGET_THUMB2)
15262 max_offset = 1020;
15263 else if (TARGET_ARM)
15264 max_offset = 255;
15265 else
15266 return false;
15267
15268 return ((offset <= max_offset) && (offset >= -max_offset));
15269 }
15270
15271 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15272 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15273 Assumes that the address in the base register RN is word aligned. Pattern
15274 guarantees that both memory accesses use the same base register,
15275 the offsets are constants within the range, and the gap between the offsets is 4.
15276 If preload complete then check that registers are legal. WBACK indicates whether
15277 address is updated. LOAD indicates whether memory access is load or store. */
15278 bool
15279 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15280 bool wback, bool load)
15281 {
15282 unsigned int t, t2, n;
15283
15284 if (!reload_completed)
15285 return true;
15286
15287 if (!offset_ok_for_ldrd_strd (offset))
15288 return false;
15289
15290 t = REGNO (rt);
15291 t2 = REGNO (rt2);
15292 n = REGNO (rn);
15293
15294 if ((TARGET_THUMB2)
15295 && ((wback && (n == t || n == t2))
15296 || (t == SP_REGNUM)
15297 || (t == PC_REGNUM)
15298 || (t2 == SP_REGNUM)
15299 || (t2 == PC_REGNUM)
15300 || (!load && (n == PC_REGNUM))
15301 || (load && (t == t2))
15302 /* Triggers Cortex-M3 LDRD errata. */
15303 || (!wback && load && fix_cm3_ldrd && (n == t))))
15304 return false;
15305
15306 if ((TARGET_ARM)
15307 && ((wback && (n == t || n == t2))
15308 || (t2 == PC_REGNUM)
15309 || (t % 2 != 0) /* First destination register is not even. */
15310 || (t2 != t + 1)
15311 /* PC can be used as base register (for offset addressing only),
15312 but it is depricated. */
15313 || (n == PC_REGNUM)))
15314 return false;
15315
15316 return true;
15317 }
15318
15319 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15320 operand MEM's address contains an immediate offset from the base
15321 register and has no side effects, in which case it sets BASE and
15322 OFFSET accordingly. */
15323 static bool
15324 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15325 {
15326 rtx addr;
15327
15328 gcc_assert (base != NULL && offset != NULL);
15329
15330 /* TODO: Handle more general memory operand patterns, such as
15331 PRE_DEC and PRE_INC. */
15332
15333 if (side_effects_p (mem))
15334 return false;
15335
15336 /* Can't deal with subregs. */
15337 if (GET_CODE (mem) == SUBREG)
15338 return false;
15339
15340 gcc_assert (MEM_P (mem));
15341
15342 *offset = const0_rtx;
15343
15344 addr = XEXP (mem, 0);
15345
15346 /* If addr isn't valid for DImode, then we can't handle it. */
15347 if (!arm_legitimate_address_p (DImode, addr,
15348 reload_in_progress || reload_completed))
15349 return false;
15350
15351 if (REG_P (addr))
15352 {
15353 *base = addr;
15354 return true;
15355 }
15356 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15357 {
15358 *base = XEXP (addr, 0);
15359 *offset = XEXP (addr, 1);
15360 return (REG_P (*base) && CONST_INT_P (*offset));
15361 }
15362
15363 return false;
15364 }
15365
15366 #define SWAP_RTX(x,y) do { rtx tmp = x; x = y; y = tmp; } while (0)
15367
15368 /* Called from a peephole2 to replace two word-size accesses with a
15369 single LDRD/STRD instruction. Returns true iff we can generate a
15370 new instruction sequence. That is, both accesses use the same base
15371 register and the gap between constant offsets is 4. This function
15372 may reorder its operands to match ldrd/strd RTL templates.
15373 OPERANDS are the operands found by the peephole matcher;
15374 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15375 corresponding memory operands. LOAD indicaates whether the access
15376 is load or store. CONST_STORE indicates a store of constant
15377 integer values held in OPERANDS[4,5] and assumes that the pattern
15378 is of length 4 insn, for the purpose of checking dead registers.
15379 COMMUTE indicates that register operands may be reordered. */
15380 bool
15381 gen_operands_ldrd_strd (rtx *operands, bool load,
15382 bool const_store, bool commute)
15383 {
15384 int nops = 2;
15385 HOST_WIDE_INT offsets[2], offset;
15386 rtx base = NULL_RTX;
15387 rtx cur_base, cur_offset, tmp;
15388 int i, gap;
15389 HARD_REG_SET regset;
15390
15391 gcc_assert (!const_store || !load);
15392 /* Check that the memory references are immediate offsets from the
15393 same base register. Extract the base register, the destination
15394 registers, and the corresponding memory offsets. */
15395 for (i = 0; i < nops; i++)
15396 {
15397 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15398 return false;
15399
15400 if (i == 0)
15401 base = cur_base;
15402 else if (REGNO (base) != REGNO (cur_base))
15403 return false;
15404
15405 offsets[i] = INTVAL (cur_offset);
15406 if (GET_CODE (operands[i]) == SUBREG)
15407 {
15408 tmp = SUBREG_REG (operands[i]);
15409 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15410 operands[i] = tmp;
15411 }
15412 }
15413
15414 /* Make sure there is no dependency between the individual loads. */
15415 if (load && REGNO (operands[0]) == REGNO (base))
15416 return false; /* RAW */
15417
15418 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15419 return false; /* WAW */
15420
15421 /* If the same input register is used in both stores
15422 when storing different constants, try to find a free register.
15423 For example, the code
15424 mov r0, 0
15425 str r0, [r2]
15426 mov r0, 1
15427 str r0, [r2, #4]
15428 can be transformed into
15429 mov r1, 0
15430 strd r1, r0, [r2]
15431 in Thumb mode assuming that r1 is free. */
15432 if (const_store
15433 && REGNO (operands[0]) == REGNO (operands[1])
15434 && INTVAL (operands[4]) != INTVAL (operands[5]))
15435 {
15436 if (TARGET_THUMB2)
15437 {
15438 CLEAR_HARD_REG_SET (regset);
15439 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15440 if (tmp == NULL_RTX)
15441 return false;
15442
15443 /* Use the new register in the first load to ensure that
15444 if the original input register is not dead after peephole,
15445 then it will have the correct constant value. */
15446 operands[0] = tmp;
15447 }
15448 else if (TARGET_ARM)
15449 {
15450 return false;
15451 int regno = REGNO (operands[0]);
15452 if (!peep2_reg_dead_p (4, operands[0]))
15453 {
15454 /* When the input register is even and is not dead after the
15455 pattern, it has to hold the second constant but we cannot
15456 form a legal STRD in ARM mode with this register as the second
15457 register. */
15458 if (regno % 2 == 0)
15459 return false;
15460
15461 /* Is regno-1 free? */
15462 SET_HARD_REG_SET (regset);
15463 CLEAR_HARD_REG_BIT(regset, regno - 1);
15464 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15465 if (tmp == NULL_RTX)
15466 return false;
15467
15468 operands[0] = tmp;
15469 }
15470 else
15471 {
15472 /* Find a DImode register. */
15473 CLEAR_HARD_REG_SET (regset);
15474 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15475 if (tmp != NULL_RTX)
15476 {
15477 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15478 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15479 }
15480 else
15481 {
15482 /* Can we use the input register to form a DI register? */
15483 SET_HARD_REG_SET (regset);
15484 CLEAR_HARD_REG_BIT(regset,
15485 regno % 2 == 0 ? regno + 1 : regno - 1);
15486 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15487 if (tmp == NULL_RTX)
15488 return false;
15489 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15490 }
15491 }
15492
15493 gcc_assert (operands[0] != NULL_RTX);
15494 gcc_assert (operands[1] != NULL_RTX);
15495 gcc_assert (REGNO (operands[0]) % 2 == 0);
15496 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15497 }
15498 }
15499
15500 /* Make sure the instructions are ordered with lower memory access first. */
15501 if (offsets[0] > offsets[1])
15502 {
15503 gap = offsets[0] - offsets[1];
15504 offset = offsets[1];
15505
15506 /* Swap the instructions such that lower memory is accessed first. */
15507 SWAP_RTX (operands[0], operands[1]);
15508 SWAP_RTX (operands[2], operands[3]);
15509 if (const_store)
15510 SWAP_RTX (operands[4], operands[5]);
15511 }
15512 else
15513 {
15514 gap = offsets[1] - offsets[0];
15515 offset = offsets[0];
15516 }
15517
15518 /* Make sure accesses are to consecutive memory locations. */
15519 if (gap != 4)
15520 return false;
15521
15522 /* Make sure we generate legal instructions. */
15523 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15524 false, load))
15525 return true;
15526
15527 /* In Thumb state, where registers are almost unconstrained, there
15528 is little hope to fix it. */
15529 if (TARGET_THUMB2)
15530 return false;
15531
15532 if (load && commute)
15533 {
15534 /* Try reordering registers. */
15535 SWAP_RTX (operands[0], operands[1]);
15536 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15537 false, load))
15538 return true;
15539 }
15540
15541 if (const_store)
15542 {
15543 /* If input registers are dead after this pattern, they can be
15544 reordered or replaced by other registers that are free in the
15545 current pattern. */
15546 if (!peep2_reg_dead_p (4, operands[0])
15547 || !peep2_reg_dead_p (4, operands[1]))
15548 return false;
15549
15550 /* Try to reorder the input registers. */
15551 /* For example, the code
15552 mov r0, 0
15553 mov r1, 1
15554 str r1, [r2]
15555 str r0, [r2, #4]
15556 can be transformed into
15557 mov r1, 0
15558 mov r0, 1
15559 strd r0, [r2]
15560 */
15561 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15562 false, false))
15563 {
15564 SWAP_RTX (operands[0], operands[1]);
15565 return true;
15566 }
15567
15568 /* Try to find a free DI register. */
15569 CLEAR_HARD_REG_SET (regset);
15570 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15571 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15572 while (true)
15573 {
15574 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15575 if (tmp == NULL_RTX)
15576 return false;
15577
15578 /* DREG must be an even-numbered register in DImode.
15579 Split it into SI registers. */
15580 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15581 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15582 gcc_assert (operands[0] != NULL_RTX);
15583 gcc_assert (operands[1] != NULL_RTX);
15584 gcc_assert (REGNO (operands[0]) % 2 == 0);
15585 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15586
15587 return (operands_ok_ldrd_strd (operands[0], operands[1],
15588 base, offset,
15589 false, load));
15590 }
15591 }
15592
15593 return false;
15594 }
15595 #undef SWAP_RTX
15596
15597
15598
15599 \f
15600 /* Print a symbolic form of X to the debug file, F. */
15601 static void
15602 arm_print_value (FILE *f, rtx x)
15603 {
15604 switch (GET_CODE (x))
15605 {
15606 case CONST_INT:
15607 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15608 return;
15609
15610 case CONST_DOUBLE:
15611 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15612 return;
15613
15614 case CONST_VECTOR:
15615 {
15616 int i;
15617
15618 fprintf (f, "<");
15619 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15620 {
15621 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15622 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15623 fputc (',', f);
15624 }
15625 fprintf (f, ">");
15626 }
15627 return;
15628
15629 case CONST_STRING:
15630 fprintf (f, "\"%s\"", XSTR (x, 0));
15631 return;
15632
15633 case SYMBOL_REF:
15634 fprintf (f, "`%s'", XSTR (x, 0));
15635 return;
15636
15637 case LABEL_REF:
15638 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15639 return;
15640
15641 case CONST:
15642 arm_print_value (f, XEXP (x, 0));
15643 return;
15644
15645 case PLUS:
15646 arm_print_value (f, XEXP (x, 0));
15647 fprintf (f, "+");
15648 arm_print_value (f, XEXP (x, 1));
15649 return;
15650
15651 case PC:
15652 fprintf (f, "pc");
15653 return;
15654
15655 default:
15656 fprintf (f, "????");
15657 return;
15658 }
15659 }
15660 \f
15661 /* Routines for manipulation of the constant pool. */
15662
15663 /* Arm instructions cannot load a large constant directly into a
15664 register; they have to come from a pc relative load. The constant
15665 must therefore be placed in the addressable range of the pc
15666 relative load. Depending on the precise pc relative load
15667 instruction the range is somewhere between 256 bytes and 4k. This
15668 means that we often have to dump a constant inside a function, and
15669 generate code to branch around it.
15670
15671 It is important to minimize this, since the branches will slow
15672 things down and make the code larger.
15673
15674 Normally we can hide the table after an existing unconditional
15675 branch so that there is no interruption of the flow, but in the
15676 worst case the code looks like this:
15677
15678 ldr rn, L1
15679 ...
15680 b L2
15681 align
15682 L1: .long value
15683 L2:
15684 ...
15685
15686 ldr rn, L3
15687 ...
15688 b L4
15689 align
15690 L3: .long value
15691 L4:
15692 ...
15693
15694 We fix this by performing a scan after scheduling, which notices
15695 which instructions need to have their operands fetched from the
15696 constant table and builds the table.
15697
15698 The algorithm starts by building a table of all the constants that
15699 need fixing up and all the natural barriers in the function (places
15700 where a constant table can be dropped without breaking the flow).
15701 For each fixup we note how far the pc-relative replacement will be
15702 able to reach and the offset of the instruction into the function.
15703
15704 Having built the table we then group the fixes together to form
15705 tables that are as large as possible (subject to addressing
15706 constraints) and emit each table of constants after the last
15707 barrier that is within range of all the instructions in the group.
15708 If a group does not contain a barrier, then we forcibly create one
15709 by inserting a jump instruction into the flow. Once the table has
15710 been inserted, the insns are then modified to reference the
15711 relevant entry in the pool.
15712
15713 Possible enhancements to the algorithm (not implemented) are:
15714
15715 1) For some processors and object formats, there may be benefit in
15716 aligning the pools to the start of cache lines; this alignment
15717 would need to be taken into account when calculating addressability
15718 of a pool. */
15719
15720 /* These typedefs are located at the start of this file, so that
15721 they can be used in the prototypes there. This comment is to
15722 remind readers of that fact so that the following structures
15723 can be understood more easily.
15724
15725 typedef struct minipool_node Mnode;
15726 typedef struct minipool_fixup Mfix; */
15727
15728 struct minipool_node
15729 {
15730 /* Doubly linked chain of entries. */
15731 Mnode * next;
15732 Mnode * prev;
15733 /* The maximum offset into the code that this entry can be placed. While
15734 pushing fixes for forward references, all entries are sorted in order
15735 of increasing max_address. */
15736 HOST_WIDE_INT max_address;
15737 /* Similarly for an entry inserted for a backwards ref. */
15738 HOST_WIDE_INT min_address;
15739 /* The number of fixes referencing this entry. This can become zero
15740 if we "unpush" an entry. In this case we ignore the entry when we
15741 come to emit the code. */
15742 int refcount;
15743 /* The offset from the start of the minipool. */
15744 HOST_WIDE_INT offset;
15745 /* The value in table. */
15746 rtx value;
15747 /* The mode of value. */
15748 enum machine_mode mode;
15749 /* The size of the value. With iWMMXt enabled
15750 sizes > 4 also imply an alignment of 8-bytes. */
15751 int fix_size;
15752 };
15753
15754 struct minipool_fixup
15755 {
15756 Mfix * next;
15757 rtx insn;
15758 HOST_WIDE_INT address;
15759 rtx * loc;
15760 enum machine_mode mode;
15761 int fix_size;
15762 rtx value;
15763 Mnode * minipool;
15764 HOST_WIDE_INT forwards;
15765 HOST_WIDE_INT backwards;
15766 };
15767
15768 /* Fixes less than a word need padding out to a word boundary. */
15769 #define MINIPOOL_FIX_SIZE(mode) \
15770 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15771
15772 static Mnode * minipool_vector_head;
15773 static Mnode * minipool_vector_tail;
15774 static rtx minipool_vector_label;
15775 static int minipool_pad;
15776
15777 /* The linked list of all minipool fixes required for this function. */
15778 Mfix * minipool_fix_head;
15779 Mfix * minipool_fix_tail;
15780 /* The fix entry for the current minipool, once it has been placed. */
15781 Mfix * minipool_barrier;
15782
15783 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15784 #define JUMP_TABLES_IN_TEXT_SECTION 0
15785 #endif
15786
15787 static HOST_WIDE_INT
15788 get_jump_table_size (rtx insn)
15789 {
15790 /* ADDR_VECs only take room if read-only data does into the text
15791 section. */
15792 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15793 {
15794 rtx body = PATTERN (insn);
15795 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15796 HOST_WIDE_INT size;
15797 HOST_WIDE_INT modesize;
15798
15799 modesize = GET_MODE_SIZE (GET_MODE (body));
15800 size = modesize * XVECLEN (body, elt);
15801 switch (modesize)
15802 {
15803 case 1:
15804 /* Round up size of TBB table to a halfword boundary. */
15805 size = (size + 1) & ~(HOST_WIDE_INT)1;
15806 break;
15807 case 2:
15808 /* No padding necessary for TBH. */
15809 break;
15810 case 4:
15811 /* Add two bytes for alignment on Thumb. */
15812 if (TARGET_THUMB)
15813 size += 2;
15814 break;
15815 default:
15816 gcc_unreachable ();
15817 }
15818 return size;
15819 }
15820
15821 return 0;
15822 }
15823
15824 /* Return the maximum amount of padding that will be inserted before
15825 label LABEL. */
15826
15827 static HOST_WIDE_INT
15828 get_label_padding (rtx label)
15829 {
15830 HOST_WIDE_INT align, min_insn_size;
15831
15832 align = 1 << label_to_alignment (label);
15833 min_insn_size = TARGET_THUMB ? 2 : 4;
15834 return align > min_insn_size ? align - min_insn_size : 0;
15835 }
15836
15837 /* Move a minipool fix MP from its current location to before MAX_MP.
15838 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15839 constraints may need updating. */
15840 static Mnode *
15841 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15842 HOST_WIDE_INT max_address)
15843 {
15844 /* The code below assumes these are different. */
15845 gcc_assert (mp != max_mp);
15846
15847 if (max_mp == NULL)
15848 {
15849 if (max_address < mp->max_address)
15850 mp->max_address = max_address;
15851 }
15852 else
15853 {
15854 if (max_address > max_mp->max_address - mp->fix_size)
15855 mp->max_address = max_mp->max_address - mp->fix_size;
15856 else
15857 mp->max_address = max_address;
15858
15859 /* Unlink MP from its current position. Since max_mp is non-null,
15860 mp->prev must be non-null. */
15861 mp->prev->next = mp->next;
15862 if (mp->next != NULL)
15863 mp->next->prev = mp->prev;
15864 else
15865 minipool_vector_tail = mp->prev;
15866
15867 /* Re-insert it before MAX_MP. */
15868 mp->next = max_mp;
15869 mp->prev = max_mp->prev;
15870 max_mp->prev = mp;
15871
15872 if (mp->prev != NULL)
15873 mp->prev->next = mp;
15874 else
15875 minipool_vector_head = mp;
15876 }
15877
15878 /* Save the new entry. */
15879 max_mp = mp;
15880
15881 /* Scan over the preceding entries and adjust their addresses as
15882 required. */
15883 while (mp->prev != NULL
15884 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15885 {
15886 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15887 mp = mp->prev;
15888 }
15889
15890 return max_mp;
15891 }
15892
15893 /* Add a constant to the minipool for a forward reference. Returns the
15894 node added or NULL if the constant will not fit in this pool. */
15895 static Mnode *
15896 add_minipool_forward_ref (Mfix *fix)
15897 {
15898 /* If set, max_mp is the first pool_entry that has a lower
15899 constraint than the one we are trying to add. */
15900 Mnode * max_mp = NULL;
15901 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15902 Mnode * mp;
15903
15904 /* If the minipool starts before the end of FIX->INSN then this FIX
15905 can not be placed into the current pool. Furthermore, adding the
15906 new constant pool entry may cause the pool to start FIX_SIZE bytes
15907 earlier. */
15908 if (minipool_vector_head &&
15909 (fix->address + get_attr_length (fix->insn)
15910 >= minipool_vector_head->max_address - fix->fix_size))
15911 return NULL;
15912
15913 /* Scan the pool to see if a constant with the same value has
15914 already been added. While we are doing this, also note the
15915 location where we must insert the constant if it doesn't already
15916 exist. */
15917 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15918 {
15919 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15920 && fix->mode == mp->mode
15921 && (!LABEL_P (fix->value)
15922 || (CODE_LABEL_NUMBER (fix->value)
15923 == CODE_LABEL_NUMBER (mp->value)))
15924 && rtx_equal_p (fix->value, mp->value))
15925 {
15926 /* More than one fix references this entry. */
15927 mp->refcount++;
15928 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15929 }
15930
15931 /* Note the insertion point if necessary. */
15932 if (max_mp == NULL
15933 && mp->max_address > max_address)
15934 max_mp = mp;
15935
15936 /* If we are inserting an 8-bytes aligned quantity and
15937 we have not already found an insertion point, then
15938 make sure that all such 8-byte aligned quantities are
15939 placed at the start of the pool. */
15940 if (ARM_DOUBLEWORD_ALIGN
15941 && max_mp == NULL
15942 && fix->fix_size >= 8
15943 && mp->fix_size < 8)
15944 {
15945 max_mp = mp;
15946 max_address = mp->max_address;
15947 }
15948 }
15949
15950 /* The value is not currently in the minipool, so we need to create
15951 a new entry for it. If MAX_MP is NULL, the entry will be put on
15952 the end of the list since the placement is less constrained than
15953 any existing entry. Otherwise, we insert the new fix before
15954 MAX_MP and, if necessary, adjust the constraints on the other
15955 entries. */
15956 mp = XNEW (Mnode);
15957 mp->fix_size = fix->fix_size;
15958 mp->mode = fix->mode;
15959 mp->value = fix->value;
15960 mp->refcount = 1;
15961 /* Not yet required for a backwards ref. */
15962 mp->min_address = -65536;
15963
15964 if (max_mp == NULL)
15965 {
15966 mp->max_address = max_address;
15967 mp->next = NULL;
15968 mp->prev = minipool_vector_tail;
15969
15970 if (mp->prev == NULL)
15971 {
15972 minipool_vector_head = mp;
15973 minipool_vector_label = gen_label_rtx ();
15974 }
15975 else
15976 mp->prev->next = mp;
15977
15978 minipool_vector_tail = mp;
15979 }
15980 else
15981 {
15982 if (max_address > max_mp->max_address - mp->fix_size)
15983 mp->max_address = max_mp->max_address - mp->fix_size;
15984 else
15985 mp->max_address = max_address;
15986
15987 mp->next = max_mp;
15988 mp->prev = max_mp->prev;
15989 max_mp->prev = mp;
15990 if (mp->prev != NULL)
15991 mp->prev->next = mp;
15992 else
15993 minipool_vector_head = mp;
15994 }
15995
15996 /* Save the new entry. */
15997 max_mp = mp;
15998
15999 /* Scan over the preceding entries and adjust their addresses as
16000 required. */
16001 while (mp->prev != NULL
16002 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16003 {
16004 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16005 mp = mp->prev;
16006 }
16007
16008 return max_mp;
16009 }
16010
16011 static Mnode *
16012 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16013 HOST_WIDE_INT min_address)
16014 {
16015 HOST_WIDE_INT offset;
16016
16017 /* The code below assumes these are different. */
16018 gcc_assert (mp != min_mp);
16019
16020 if (min_mp == NULL)
16021 {
16022 if (min_address > mp->min_address)
16023 mp->min_address = min_address;
16024 }
16025 else
16026 {
16027 /* We will adjust this below if it is too loose. */
16028 mp->min_address = min_address;
16029
16030 /* Unlink MP from its current position. Since min_mp is non-null,
16031 mp->next must be non-null. */
16032 mp->next->prev = mp->prev;
16033 if (mp->prev != NULL)
16034 mp->prev->next = mp->next;
16035 else
16036 minipool_vector_head = mp->next;
16037
16038 /* Reinsert it after MIN_MP. */
16039 mp->prev = min_mp;
16040 mp->next = min_mp->next;
16041 min_mp->next = mp;
16042 if (mp->next != NULL)
16043 mp->next->prev = mp;
16044 else
16045 minipool_vector_tail = mp;
16046 }
16047
16048 min_mp = mp;
16049
16050 offset = 0;
16051 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16052 {
16053 mp->offset = offset;
16054 if (mp->refcount > 0)
16055 offset += mp->fix_size;
16056
16057 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16058 mp->next->min_address = mp->min_address + mp->fix_size;
16059 }
16060
16061 return min_mp;
16062 }
16063
16064 /* Add a constant to the minipool for a backward reference. Returns the
16065 node added or NULL if the constant will not fit in this pool.
16066
16067 Note that the code for insertion for a backwards reference can be
16068 somewhat confusing because the calculated offsets for each fix do
16069 not take into account the size of the pool (which is still under
16070 construction. */
16071 static Mnode *
16072 add_minipool_backward_ref (Mfix *fix)
16073 {
16074 /* If set, min_mp is the last pool_entry that has a lower constraint
16075 than the one we are trying to add. */
16076 Mnode *min_mp = NULL;
16077 /* This can be negative, since it is only a constraint. */
16078 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16079 Mnode *mp;
16080
16081 /* If we can't reach the current pool from this insn, or if we can't
16082 insert this entry at the end of the pool without pushing other
16083 fixes out of range, then we don't try. This ensures that we
16084 can't fail later on. */
16085 if (min_address >= minipool_barrier->address
16086 || (minipool_vector_tail->min_address + fix->fix_size
16087 >= minipool_barrier->address))
16088 return NULL;
16089
16090 /* Scan the pool to see if a constant with the same value has
16091 already been added. While we are doing this, also note the
16092 location where we must insert the constant if it doesn't already
16093 exist. */
16094 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16095 {
16096 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16097 && fix->mode == mp->mode
16098 && (!LABEL_P (fix->value)
16099 || (CODE_LABEL_NUMBER (fix->value)
16100 == CODE_LABEL_NUMBER (mp->value)))
16101 && rtx_equal_p (fix->value, mp->value)
16102 /* Check that there is enough slack to move this entry to the
16103 end of the table (this is conservative). */
16104 && (mp->max_address
16105 > (minipool_barrier->address
16106 + minipool_vector_tail->offset
16107 + minipool_vector_tail->fix_size)))
16108 {
16109 mp->refcount++;
16110 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16111 }
16112
16113 if (min_mp != NULL)
16114 mp->min_address += fix->fix_size;
16115 else
16116 {
16117 /* Note the insertion point if necessary. */
16118 if (mp->min_address < min_address)
16119 {
16120 /* For now, we do not allow the insertion of 8-byte alignment
16121 requiring nodes anywhere but at the start of the pool. */
16122 if (ARM_DOUBLEWORD_ALIGN
16123 && fix->fix_size >= 8 && mp->fix_size < 8)
16124 return NULL;
16125 else
16126 min_mp = mp;
16127 }
16128 else if (mp->max_address
16129 < minipool_barrier->address + mp->offset + fix->fix_size)
16130 {
16131 /* Inserting before this entry would push the fix beyond
16132 its maximum address (which can happen if we have
16133 re-located a forwards fix); force the new fix to come
16134 after it. */
16135 if (ARM_DOUBLEWORD_ALIGN
16136 && fix->fix_size >= 8 && mp->fix_size < 8)
16137 return NULL;
16138 else
16139 {
16140 min_mp = mp;
16141 min_address = mp->min_address + fix->fix_size;
16142 }
16143 }
16144 /* Do not insert a non-8-byte aligned quantity before 8-byte
16145 aligned quantities. */
16146 else if (ARM_DOUBLEWORD_ALIGN
16147 && fix->fix_size < 8
16148 && mp->fix_size >= 8)
16149 {
16150 min_mp = mp;
16151 min_address = mp->min_address + fix->fix_size;
16152 }
16153 }
16154 }
16155
16156 /* We need to create a new entry. */
16157 mp = XNEW (Mnode);
16158 mp->fix_size = fix->fix_size;
16159 mp->mode = fix->mode;
16160 mp->value = fix->value;
16161 mp->refcount = 1;
16162 mp->max_address = minipool_barrier->address + 65536;
16163
16164 mp->min_address = min_address;
16165
16166 if (min_mp == NULL)
16167 {
16168 mp->prev = NULL;
16169 mp->next = minipool_vector_head;
16170
16171 if (mp->next == NULL)
16172 {
16173 minipool_vector_tail = mp;
16174 minipool_vector_label = gen_label_rtx ();
16175 }
16176 else
16177 mp->next->prev = mp;
16178
16179 minipool_vector_head = mp;
16180 }
16181 else
16182 {
16183 mp->next = min_mp->next;
16184 mp->prev = min_mp;
16185 min_mp->next = mp;
16186
16187 if (mp->next != NULL)
16188 mp->next->prev = mp;
16189 else
16190 minipool_vector_tail = mp;
16191 }
16192
16193 /* Save the new entry. */
16194 min_mp = mp;
16195
16196 if (mp->prev)
16197 mp = mp->prev;
16198 else
16199 mp->offset = 0;
16200
16201 /* Scan over the following entries and adjust their offsets. */
16202 while (mp->next != NULL)
16203 {
16204 if (mp->next->min_address < mp->min_address + mp->fix_size)
16205 mp->next->min_address = mp->min_address + mp->fix_size;
16206
16207 if (mp->refcount)
16208 mp->next->offset = mp->offset + mp->fix_size;
16209 else
16210 mp->next->offset = mp->offset;
16211
16212 mp = mp->next;
16213 }
16214
16215 return min_mp;
16216 }
16217
16218 static void
16219 assign_minipool_offsets (Mfix *barrier)
16220 {
16221 HOST_WIDE_INT offset = 0;
16222 Mnode *mp;
16223
16224 minipool_barrier = barrier;
16225
16226 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16227 {
16228 mp->offset = offset;
16229
16230 if (mp->refcount > 0)
16231 offset += mp->fix_size;
16232 }
16233 }
16234
16235 /* Output the literal table */
16236 static void
16237 dump_minipool (rtx scan)
16238 {
16239 Mnode * mp;
16240 Mnode * nmp;
16241 int align64 = 0;
16242
16243 if (ARM_DOUBLEWORD_ALIGN)
16244 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16245 if (mp->refcount > 0 && mp->fix_size >= 8)
16246 {
16247 align64 = 1;
16248 break;
16249 }
16250
16251 if (dump_file)
16252 fprintf (dump_file,
16253 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16254 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16255
16256 scan = emit_label_after (gen_label_rtx (), scan);
16257 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16258 scan = emit_label_after (minipool_vector_label, scan);
16259
16260 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16261 {
16262 if (mp->refcount > 0)
16263 {
16264 if (dump_file)
16265 {
16266 fprintf (dump_file,
16267 ";; Offset %u, min %ld, max %ld ",
16268 (unsigned) mp->offset, (unsigned long) mp->min_address,
16269 (unsigned long) mp->max_address);
16270 arm_print_value (dump_file, mp->value);
16271 fputc ('\n', dump_file);
16272 }
16273
16274 switch (mp->fix_size)
16275 {
16276 #ifdef HAVE_consttable_1
16277 case 1:
16278 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16279 break;
16280
16281 #endif
16282 #ifdef HAVE_consttable_2
16283 case 2:
16284 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16285 break;
16286
16287 #endif
16288 #ifdef HAVE_consttable_4
16289 case 4:
16290 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16291 break;
16292
16293 #endif
16294 #ifdef HAVE_consttable_8
16295 case 8:
16296 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16297 break;
16298
16299 #endif
16300 #ifdef HAVE_consttable_16
16301 case 16:
16302 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16303 break;
16304
16305 #endif
16306 default:
16307 gcc_unreachable ();
16308 }
16309 }
16310
16311 nmp = mp->next;
16312 free (mp);
16313 }
16314
16315 minipool_vector_head = minipool_vector_tail = NULL;
16316 scan = emit_insn_after (gen_consttable_end (), scan);
16317 scan = emit_barrier_after (scan);
16318 }
16319
16320 /* Return the cost of forcibly inserting a barrier after INSN. */
16321 static int
16322 arm_barrier_cost (rtx insn)
16323 {
16324 /* Basing the location of the pool on the loop depth is preferable,
16325 but at the moment, the basic block information seems to be
16326 corrupt by this stage of the compilation. */
16327 int base_cost = 50;
16328 rtx next = next_nonnote_insn (insn);
16329
16330 if (next != NULL && LABEL_P (next))
16331 base_cost -= 20;
16332
16333 switch (GET_CODE (insn))
16334 {
16335 case CODE_LABEL:
16336 /* It will always be better to place the table before the label, rather
16337 than after it. */
16338 return 50;
16339
16340 case INSN:
16341 case CALL_INSN:
16342 return base_cost;
16343
16344 case JUMP_INSN:
16345 return base_cost - 10;
16346
16347 default:
16348 return base_cost + 10;
16349 }
16350 }
16351
16352 /* Find the best place in the insn stream in the range
16353 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16354 Create the barrier by inserting a jump and add a new fix entry for
16355 it. */
16356 static Mfix *
16357 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16358 {
16359 HOST_WIDE_INT count = 0;
16360 rtx barrier;
16361 rtx from = fix->insn;
16362 /* The instruction after which we will insert the jump. */
16363 rtx selected = NULL;
16364 int selected_cost;
16365 /* The address at which the jump instruction will be placed. */
16366 HOST_WIDE_INT selected_address;
16367 Mfix * new_fix;
16368 HOST_WIDE_INT max_count = max_address - fix->address;
16369 rtx label = gen_label_rtx ();
16370
16371 selected_cost = arm_barrier_cost (from);
16372 selected_address = fix->address;
16373
16374 while (from && count < max_count)
16375 {
16376 rtx tmp;
16377 int new_cost;
16378
16379 /* This code shouldn't have been called if there was a natural barrier
16380 within range. */
16381 gcc_assert (!BARRIER_P (from));
16382
16383 /* Count the length of this insn. This must stay in sync with the
16384 code that pushes minipool fixes. */
16385 if (LABEL_P (from))
16386 count += get_label_padding (from);
16387 else
16388 count += get_attr_length (from);
16389
16390 /* If there is a jump table, add its length. */
16391 if (tablejump_p (from, NULL, &tmp))
16392 {
16393 count += get_jump_table_size (tmp);
16394
16395 /* Jump tables aren't in a basic block, so base the cost on
16396 the dispatch insn. If we select this location, we will
16397 still put the pool after the table. */
16398 new_cost = arm_barrier_cost (from);
16399
16400 if (count < max_count
16401 && (!selected || new_cost <= selected_cost))
16402 {
16403 selected = tmp;
16404 selected_cost = new_cost;
16405 selected_address = fix->address + count;
16406 }
16407
16408 /* Continue after the dispatch table. */
16409 from = NEXT_INSN (tmp);
16410 continue;
16411 }
16412
16413 new_cost = arm_barrier_cost (from);
16414
16415 if (count < max_count
16416 && (!selected || new_cost <= selected_cost))
16417 {
16418 selected = from;
16419 selected_cost = new_cost;
16420 selected_address = fix->address + count;
16421 }
16422
16423 from = NEXT_INSN (from);
16424 }
16425
16426 /* Make sure that we found a place to insert the jump. */
16427 gcc_assert (selected);
16428
16429 /* Make sure we do not split a call and its corresponding
16430 CALL_ARG_LOCATION note. */
16431 if (CALL_P (selected))
16432 {
16433 rtx next = NEXT_INSN (selected);
16434 if (next && NOTE_P (next)
16435 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16436 selected = next;
16437 }
16438
16439 /* Create a new JUMP_INSN that branches around a barrier. */
16440 from = emit_jump_insn_after (gen_jump (label), selected);
16441 JUMP_LABEL (from) = label;
16442 barrier = emit_barrier_after (from);
16443 emit_label_after (label, barrier);
16444
16445 /* Create a minipool barrier entry for the new barrier. */
16446 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16447 new_fix->insn = barrier;
16448 new_fix->address = selected_address;
16449 new_fix->next = fix->next;
16450 fix->next = new_fix;
16451
16452 return new_fix;
16453 }
16454
16455 /* Record that there is a natural barrier in the insn stream at
16456 ADDRESS. */
16457 static void
16458 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
16459 {
16460 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16461
16462 fix->insn = insn;
16463 fix->address = address;
16464
16465 fix->next = NULL;
16466 if (minipool_fix_head != NULL)
16467 minipool_fix_tail->next = fix;
16468 else
16469 minipool_fix_head = fix;
16470
16471 minipool_fix_tail = fix;
16472 }
16473
16474 /* Record INSN, which will need fixing up to load a value from the
16475 minipool. ADDRESS is the offset of the insn since the start of the
16476 function; LOC is a pointer to the part of the insn which requires
16477 fixing; VALUE is the constant that must be loaded, which is of type
16478 MODE. */
16479 static void
16480 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
16481 enum machine_mode mode, rtx value)
16482 {
16483 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16484
16485 fix->insn = insn;
16486 fix->address = address;
16487 fix->loc = loc;
16488 fix->mode = mode;
16489 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16490 fix->value = value;
16491 fix->forwards = get_attr_pool_range (insn);
16492 fix->backwards = get_attr_neg_pool_range (insn);
16493 fix->minipool = NULL;
16494
16495 /* If an insn doesn't have a range defined for it, then it isn't
16496 expecting to be reworked by this code. Better to stop now than
16497 to generate duff assembly code. */
16498 gcc_assert (fix->forwards || fix->backwards);
16499
16500 /* If an entry requires 8-byte alignment then assume all constant pools
16501 require 4 bytes of padding. Trying to do this later on a per-pool
16502 basis is awkward because existing pool entries have to be modified. */
16503 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16504 minipool_pad = 4;
16505
16506 if (dump_file)
16507 {
16508 fprintf (dump_file,
16509 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16510 GET_MODE_NAME (mode),
16511 INSN_UID (insn), (unsigned long) address,
16512 -1 * (long)fix->backwards, (long)fix->forwards);
16513 arm_print_value (dump_file, fix->value);
16514 fprintf (dump_file, "\n");
16515 }
16516
16517 /* Add it to the chain of fixes. */
16518 fix->next = NULL;
16519
16520 if (minipool_fix_head != NULL)
16521 minipool_fix_tail->next = fix;
16522 else
16523 minipool_fix_head = fix;
16524
16525 minipool_fix_tail = fix;
16526 }
16527
16528 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16529 Returns the number of insns needed, or 99 if we always want to synthesize
16530 the value. */
16531 int
16532 arm_max_const_double_inline_cost ()
16533 {
16534 /* Let the value get synthesized to avoid the use of literal pools. */
16535 if (arm_disable_literal_pool)
16536 return 99;
16537
16538 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16539 }
16540
16541 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16542 Returns the number of insns needed, or 99 if we don't know how to
16543 do it. */
16544 int
16545 arm_const_double_inline_cost (rtx val)
16546 {
16547 rtx lowpart, highpart;
16548 enum machine_mode mode;
16549
16550 mode = GET_MODE (val);
16551
16552 if (mode == VOIDmode)
16553 mode = DImode;
16554
16555 gcc_assert (GET_MODE_SIZE (mode) == 8);
16556
16557 lowpart = gen_lowpart (SImode, val);
16558 highpart = gen_highpart_mode (SImode, mode, val);
16559
16560 gcc_assert (CONST_INT_P (lowpart));
16561 gcc_assert (CONST_INT_P (highpart));
16562
16563 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16564 NULL_RTX, NULL_RTX, 0, 0)
16565 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16566 NULL_RTX, NULL_RTX, 0, 0));
16567 }
16568
16569 /* Return true if it is worthwhile to split a 64-bit constant into two
16570 32-bit operations. This is the case if optimizing for size, or
16571 if we have load delay slots, or if one 32-bit part can be done with
16572 a single data operation. */
16573 bool
16574 arm_const_double_by_parts (rtx val)
16575 {
16576 enum machine_mode mode = GET_MODE (val);
16577 rtx part;
16578
16579 if (optimize_size || arm_ld_sched)
16580 return true;
16581
16582 if (mode == VOIDmode)
16583 mode = DImode;
16584
16585 part = gen_highpart_mode (SImode, mode, val);
16586
16587 gcc_assert (CONST_INT_P (part));
16588
16589 if (const_ok_for_arm (INTVAL (part))
16590 || const_ok_for_arm (~INTVAL (part)))
16591 return true;
16592
16593 part = gen_lowpart (SImode, val);
16594
16595 gcc_assert (CONST_INT_P (part));
16596
16597 if (const_ok_for_arm (INTVAL (part))
16598 || const_ok_for_arm (~INTVAL (part)))
16599 return true;
16600
16601 return false;
16602 }
16603
16604 /* Return true if it is possible to inline both the high and low parts
16605 of a 64-bit constant into 32-bit data processing instructions. */
16606 bool
16607 arm_const_double_by_immediates (rtx val)
16608 {
16609 enum machine_mode mode = GET_MODE (val);
16610 rtx part;
16611
16612 if (mode == VOIDmode)
16613 mode = DImode;
16614
16615 part = gen_highpart_mode (SImode, mode, val);
16616
16617 gcc_assert (CONST_INT_P (part));
16618
16619 if (!const_ok_for_arm (INTVAL (part)))
16620 return false;
16621
16622 part = gen_lowpart (SImode, val);
16623
16624 gcc_assert (CONST_INT_P (part));
16625
16626 if (!const_ok_for_arm (INTVAL (part)))
16627 return false;
16628
16629 return true;
16630 }
16631
16632 /* Scan INSN and note any of its operands that need fixing.
16633 If DO_PUSHES is false we do not actually push any of the fixups
16634 needed. */
16635 static void
16636 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
16637 {
16638 int opno;
16639
16640 extract_insn (insn);
16641
16642 if (!constrain_operands (1))
16643 fatal_insn_not_found (insn);
16644
16645 if (recog_data.n_alternatives == 0)
16646 return;
16647
16648 /* Fill in recog_op_alt with information about the constraints of
16649 this insn. */
16650 preprocess_constraints ();
16651
16652 for (opno = 0; opno < recog_data.n_operands; opno++)
16653 {
16654 /* Things we need to fix can only occur in inputs. */
16655 if (recog_data.operand_type[opno] != OP_IN)
16656 continue;
16657
16658 /* If this alternative is a memory reference, then any mention
16659 of constants in this alternative is really to fool reload
16660 into allowing us to accept one there. We need to fix them up
16661 now so that we output the right code. */
16662 if (recog_op_alt[opno][which_alternative].memory_ok)
16663 {
16664 rtx op = recog_data.operand[opno];
16665
16666 if (CONSTANT_P (op))
16667 {
16668 if (do_pushes)
16669 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16670 recog_data.operand_mode[opno], op);
16671 }
16672 else if (MEM_P (op)
16673 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16674 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16675 {
16676 if (do_pushes)
16677 {
16678 rtx cop = avoid_constant_pool_reference (op);
16679
16680 /* Casting the address of something to a mode narrower
16681 than a word can cause avoid_constant_pool_reference()
16682 to return the pool reference itself. That's no good to
16683 us here. Lets just hope that we can use the
16684 constant pool value directly. */
16685 if (op == cop)
16686 cop = get_pool_constant (XEXP (op, 0));
16687
16688 push_minipool_fix (insn, address,
16689 recog_data.operand_loc[opno],
16690 recog_data.operand_mode[opno], cop);
16691 }
16692
16693 }
16694 }
16695 }
16696
16697 return;
16698 }
16699
16700 /* Rewrite move insn into subtract of 0 if the condition codes will
16701 be useful in next conditional jump insn. */
16702
16703 static void
16704 thumb1_reorg (void)
16705 {
16706 basic_block bb;
16707
16708 FOR_EACH_BB_FN (bb, cfun)
16709 {
16710 rtx dest, src;
16711 rtx pat, op0, set = NULL;
16712 rtx prev, insn = BB_END (bb);
16713 bool insn_clobbered = false;
16714
16715 while (insn != BB_HEAD (bb) && DEBUG_INSN_P (insn))
16716 insn = PREV_INSN (insn);
16717
16718 /* Find the last cbranchsi4_insn in basic block BB. */
16719 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
16720 continue;
16721
16722 /* Get the register with which we are comparing. */
16723 pat = PATTERN (insn);
16724 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
16725
16726 /* Find the first flag setting insn before INSN in basic block BB. */
16727 gcc_assert (insn != BB_HEAD (bb));
16728 for (prev = PREV_INSN (insn);
16729 (!insn_clobbered
16730 && prev != BB_HEAD (bb)
16731 && (NOTE_P (prev)
16732 || DEBUG_INSN_P (prev)
16733 || ((set = single_set (prev)) != NULL
16734 && get_attr_conds (prev) == CONDS_NOCOND)));
16735 prev = PREV_INSN (prev))
16736 {
16737 if (reg_set_p (op0, prev))
16738 insn_clobbered = true;
16739 }
16740
16741 /* Skip if op0 is clobbered by insn other than prev. */
16742 if (insn_clobbered)
16743 continue;
16744
16745 if (!set)
16746 continue;
16747
16748 dest = SET_DEST (set);
16749 src = SET_SRC (set);
16750 if (!low_register_operand (dest, SImode)
16751 || !low_register_operand (src, SImode))
16752 continue;
16753
16754 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
16755 in INSN. Both src and dest of the move insn are checked. */
16756 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
16757 {
16758 dest = copy_rtx (dest);
16759 src = copy_rtx (src);
16760 src = gen_rtx_MINUS (SImode, src, const0_rtx);
16761 PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
16762 INSN_CODE (prev) = -1;
16763 /* Set test register in INSN to dest. */
16764 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
16765 INSN_CODE (insn) = -1;
16766 }
16767 }
16768 }
16769
16770 /* Convert instructions to their cc-clobbering variant if possible, since
16771 that allows us to use smaller encodings. */
16772
16773 static void
16774 thumb2_reorg (void)
16775 {
16776 basic_block bb;
16777 regset_head live;
16778
16779 INIT_REG_SET (&live);
16780
16781 /* We are freeing block_for_insn in the toplev to keep compatibility
16782 with old MDEP_REORGS that are not CFG based. Recompute it now. */
16783 compute_bb_for_insn ();
16784 df_analyze ();
16785
16786 FOR_EACH_BB_FN (bb, cfun)
16787 {
16788 rtx insn;
16789
16790 COPY_REG_SET (&live, DF_LR_OUT (bb));
16791 df_simulate_initialize_backwards (bb, &live);
16792 FOR_BB_INSNS_REVERSE (bb, insn)
16793 {
16794 if (NONJUMP_INSN_P (insn)
16795 && !REGNO_REG_SET_P (&live, CC_REGNUM)
16796 && GET_CODE (PATTERN (insn)) == SET)
16797 {
16798 enum {SKIP, CONV, SWAP_CONV} action = SKIP;
16799 rtx pat = PATTERN (insn);
16800 rtx dst = XEXP (pat, 0);
16801 rtx src = XEXP (pat, 1);
16802 rtx op0 = NULL_RTX, op1 = NULL_RTX;
16803
16804 if (!OBJECT_P (src))
16805 op0 = XEXP (src, 0);
16806
16807 if (BINARY_P (src))
16808 op1 = XEXP (src, 1);
16809
16810 if (low_register_operand (dst, SImode))
16811 {
16812 switch (GET_CODE (src))
16813 {
16814 case PLUS:
16815 /* Adding two registers and storing the result
16816 in the first source is already a 16-bit
16817 operation. */
16818 if (rtx_equal_p (dst, op0)
16819 && register_operand (op1, SImode))
16820 break;
16821
16822 if (low_register_operand (op0, SImode))
16823 {
16824 /* ADDS <Rd>,<Rn>,<Rm> */
16825 if (low_register_operand (op1, SImode))
16826 action = CONV;
16827 /* ADDS <Rdn>,#<imm8> */
16828 /* SUBS <Rdn>,#<imm8> */
16829 else if (rtx_equal_p (dst, op0)
16830 && CONST_INT_P (op1)
16831 && IN_RANGE (INTVAL (op1), -255, 255))
16832 action = CONV;
16833 /* ADDS <Rd>,<Rn>,#<imm3> */
16834 /* SUBS <Rd>,<Rn>,#<imm3> */
16835 else if (CONST_INT_P (op1)
16836 && IN_RANGE (INTVAL (op1), -7, 7))
16837 action = CONV;
16838 }
16839 /* ADCS <Rd>, <Rn> */
16840 else if (GET_CODE (XEXP (src, 0)) == PLUS
16841 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
16842 && low_register_operand (XEXP (XEXP (src, 0), 1),
16843 SImode)
16844 && COMPARISON_P (op1)
16845 && cc_register (XEXP (op1, 0), VOIDmode)
16846 && maybe_get_arm_condition_code (op1) == ARM_CS
16847 && XEXP (op1, 1) == const0_rtx)
16848 action = CONV;
16849 break;
16850
16851 case MINUS:
16852 /* RSBS <Rd>,<Rn>,#0
16853 Not handled here: see NEG below. */
16854 /* SUBS <Rd>,<Rn>,#<imm3>
16855 SUBS <Rdn>,#<imm8>
16856 Not handled here: see PLUS above. */
16857 /* SUBS <Rd>,<Rn>,<Rm> */
16858 if (low_register_operand (op0, SImode)
16859 && low_register_operand (op1, SImode))
16860 action = CONV;
16861 break;
16862
16863 case MULT:
16864 /* MULS <Rdm>,<Rn>,<Rdm>
16865 As an exception to the rule, this is only used
16866 when optimizing for size since MULS is slow on all
16867 known implementations. We do not even want to use
16868 MULS in cold code, if optimizing for speed, so we
16869 test the global flag here. */
16870 if (!optimize_size)
16871 break;
16872 /* else fall through. */
16873 case AND:
16874 case IOR:
16875 case XOR:
16876 /* ANDS <Rdn>,<Rm> */
16877 if (rtx_equal_p (dst, op0)
16878 && low_register_operand (op1, SImode))
16879 action = CONV;
16880 else if (rtx_equal_p (dst, op1)
16881 && low_register_operand (op0, SImode))
16882 action = SWAP_CONV;
16883 break;
16884
16885 case ASHIFTRT:
16886 case ASHIFT:
16887 case LSHIFTRT:
16888 /* ASRS <Rdn>,<Rm> */
16889 /* LSRS <Rdn>,<Rm> */
16890 /* LSLS <Rdn>,<Rm> */
16891 if (rtx_equal_p (dst, op0)
16892 && low_register_operand (op1, SImode))
16893 action = CONV;
16894 /* ASRS <Rd>,<Rm>,#<imm5> */
16895 /* LSRS <Rd>,<Rm>,#<imm5> */
16896 /* LSLS <Rd>,<Rm>,#<imm5> */
16897 else if (low_register_operand (op0, SImode)
16898 && CONST_INT_P (op1)
16899 && IN_RANGE (INTVAL (op1), 0, 31))
16900 action = CONV;
16901 break;
16902
16903 case ROTATERT:
16904 /* RORS <Rdn>,<Rm> */
16905 if (rtx_equal_p (dst, op0)
16906 && low_register_operand (op1, SImode))
16907 action = CONV;
16908 break;
16909
16910 case NOT:
16911 case NEG:
16912 /* MVNS <Rd>,<Rm> */
16913 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
16914 if (low_register_operand (op0, SImode))
16915 action = CONV;
16916 break;
16917
16918 case CONST_INT:
16919 /* MOVS <Rd>,#<imm8> */
16920 if (CONST_INT_P (src)
16921 && IN_RANGE (INTVAL (src), 0, 255))
16922 action = CONV;
16923 break;
16924
16925 case REG:
16926 /* MOVS and MOV<c> with registers have different
16927 encodings, so are not relevant here. */
16928 break;
16929
16930 default:
16931 break;
16932 }
16933 }
16934
16935 if (action != SKIP)
16936 {
16937 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
16938 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
16939 rtvec vec;
16940
16941 if (action == SWAP_CONV)
16942 {
16943 src = copy_rtx (src);
16944 XEXP (src, 0) = op1;
16945 XEXP (src, 1) = op0;
16946 pat = gen_rtx_SET (VOIDmode, dst, src);
16947 vec = gen_rtvec (2, pat, clobber);
16948 }
16949 else /* action == CONV */
16950 vec = gen_rtvec (2, pat, clobber);
16951
16952 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
16953 INSN_CODE (insn) = -1;
16954 }
16955 }
16956
16957 if (NONDEBUG_INSN_P (insn))
16958 df_simulate_one_insn_backwards (bb, insn, &live);
16959 }
16960 }
16961
16962 CLEAR_REG_SET (&live);
16963 }
16964
16965 /* Gcc puts the pool in the wrong place for ARM, since we can only
16966 load addresses a limited distance around the pc. We do some
16967 special munging to move the constant pool values to the correct
16968 point in the code. */
16969 static void
16970 arm_reorg (void)
16971 {
16972 rtx insn;
16973 HOST_WIDE_INT address = 0;
16974 Mfix * fix;
16975
16976 if (TARGET_THUMB1)
16977 thumb1_reorg ();
16978 else if (TARGET_THUMB2)
16979 thumb2_reorg ();
16980
16981 /* Ensure all insns that must be split have been split at this point.
16982 Otherwise, the pool placement code below may compute incorrect
16983 insn lengths. Note that when optimizing, all insns have already
16984 been split at this point. */
16985 if (!optimize)
16986 split_all_insns_noflow ();
16987
16988 minipool_fix_head = minipool_fix_tail = NULL;
16989
16990 /* The first insn must always be a note, or the code below won't
16991 scan it properly. */
16992 insn = get_insns ();
16993 gcc_assert (NOTE_P (insn));
16994 minipool_pad = 0;
16995
16996 /* Scan all the insns and record the operands that will need fixing. */
16997 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
16998 {
16999 if (BARRIER_P (insn))
17000 push_minipool_barrier (insn, address);
17001 else if (INSN_P (insn))
17002 {
17003 rtx table;
17004
17005 note_invalid_constants (insn, address, true);
17006 address += get_attr_length (insn);
17007
17008 /* If the insn is a vector jump, add the size of the table
17009 and skip the table. */
17010 if (tablejump_p (insn, NULL, &table))
17011 {
17012 address += get_jump_table_size (table);
17013 insn = table;
17014 }
17015 }
17016 else if (LABEL_P (insn))
17017 /* Add the worst-case padding due to alignment. We don't add
17018 the _current_ padding because the minipool insertions
17019 themselves might change it. */
17020 address += get_label_padding (insn);
17021 }
17022
17023 fix = minipool_fix_head;
17024
17025 /* Now scan the fixups and perform the required changes. */
17026 while (fix)
17027 {
17028 Mfix * ftmp;
17029 Mfix * fdel;
17030 Mfix * last_added_fix;
17031 Mfix * last_barrier = NULL;
17032 Mfix * this_fix;
17033
17034 /* Skip any further barriers before the next fix. */
17035 while (fix && BARRIER_P (fix->insn))
17036 fix = fix->next;
17037
17038 /* No more fixes. */
17039 if (fix == NULL)
17040 break;
17041
17042 last_added_fix = NULL;
17043
17044 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17045 {
17046 if (BARRIER_P (ftmp->insn))
17047 {
17048 if (ftmp->address >= minipool_vector_head->max_address)
17049 break;
17050
17051 last_barrier = ftmp;
17052 }
17053 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17054 break;
17055
17056 last_added_fix = ftmp; /* Keep track of the last fix added. */
17057 }
17058
17059 /* If we found a barrier, drop back to that; any fixes that we
17060 could have reached but come after the barrier will now go in
17061 the next mini-pool. */
17062 if (last_barrier != NULL)
17063 {
17064 /* Reduce the refcount for those fixes that won't go into this
17065 pool after all. */
17066 for (fdel = last_barrier->next;
17067 fdel && fdel != ftmp;
17068 fdel = fdel->next)
17069 {
17070 fdel->minipool->refcount--;
17071 fdel->minipool = NULL;
17072 }
17073
17074 ftmp = last_barrier;
17075 }
17076 else
17077 {
17078 /* ftmp is first fix that we can't fit into this pool and
17079 there no natural barriers that we could use. Insert a
17080 new barrier in the code somewhere between the previous
17081 fix and this one, and arrange to jump around it. */
17082 HOST_WIDE_INT max_address;
17083
17084 /* The last item on the list of fixes must be a barrier, so
17085 we can never run off the end of the list of fixes without
17086 last_barrier being set. */
17087 gcc_assert (ftmp);
17088
17089 max_address = minipool_vector_head->max_address;
17090 /* Check that there isn't another fix that is in range that
17091 we couldn't fit into this pool because the pool was
17092 already too large: we need to put the pool before such an
17093 instruction. The pool itself may come just after the
17094 fix because create_fix_barrier also allows space for a
17095 jump instruction. */
17096 if (ftmp->address < max_address)
17097 max_address = ftmp->address + 1;
17098
17099 last_barrier = create_fix_barrier (last_added_fix, max_address);
17100 }
17101
17102 assign_minipool_offsets (last_barrier);
17103
17104 while (ftmp)
17105 {
17106 if (!BARRIER_P (ftmp->insn)
17107 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17108 == NULL))
17109 break;
17110
17111 ftmp = ftmp->next;
17112 }
17113
17114 /* Scan over the fixes we have identified for this pool, fixing them
17115 up and adding the constants to the pool itself. */
17116 for (this_fix = fix; this_fix && ftmp != this_fix;
17117 this_fix = this_fix->next)
17118 if (!BARRIER_P (this_fix->insn))
17119 {
17120 rtx addr
17121 = plus_constant (Pmode,
17122 gen_rtx_LABEL_REF (VOIDmode,
17123 minipool_vector_label),
17124 this_fix->minipool->offset);
17125 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17126 }
17127
17128 dump_minipool (last_barrier->insn);
17129 fix = ftmp;
17130 }
17131
17132 /* From now on we must synthesize any constants that we can't handle
17133 directly. This can happen if the RTL gets split during final
17134 instruction generation. */
17135 after_arm_reorg = 1;
17136
17137 /* Free the minipool memory. */
17138 obstack_free (&minipool_obstack, minipool_startobj);
17139 }
17140 \f
17141 /* Routines to output assembly language. */
17142
17143 /* If the rtx is the correct value then return the string of the number.
17144 In this way we can ensure that valid double constants are generated even
17145 when cross compiling. */
17146 const char *
17147 fp_immediate_constant (rtx x)
17148 {
17149 REAL_VALUE_TYPE r;
17150
17151 if (!fp_consts_inited)
17152 init_fp_table ();
17153
17154 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
17155
17156 gcc_assert (REAL_VALUES_EQUAL (r, value_fp0));
17157 return "0";
17158 }
17159
17160 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
17161 static const char *
17162 fp_const_from_val (REAL_VALUE_TYPE *r)
17163 {
17164 if (!fp_consts_inited)
17165 init_fp_table ();
17166
17167 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17168 return "0";
17169 }
17170
17171 /* OPERANDS[0] is the entire list of insns that constitute pop,
17172 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17173 is in the list, UPDATE is true iff the list contains explicit
17174 update of base register. */
17175 void
17176 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17177 bool update)
17178 {
17179 int i;
17180 char pattern[100];
17181 int offset;
17182 const char *conditional;
17183 int num_saves = XVECLEN (operands[0], 0);
17184 unsigned int regno;
17185 unsigned int regno_base = REGNO (operands[1]);
17186
17187 offset = 0;
17188 offset += update ? 1 : 0;
17189 offset += return_pc ? 1 : 0;
17190
17191 /* Is the base register in the list? */
17192 for (i = offset; i < num_saves; i++)
17193 {
17194 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17195 /* If SP is in the list, then the base register must be SP. */
17196 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17197 /* If base register is in the list, there must be no explicit update. */
17198 if (regno == regno_base)
17199 gcc_assert (!update);
17200 }
17201
17202 conditional = reverse ? "%?%D0" : "%?%d0";
17203 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17204 {
17205 /* Output pop (not stmfd) because it has a shorter encoding. */
17206 gcc_assert (update);
17207 sprintf (pattern, "pop%s\t{", conditional);
17208 }
17209 else
17210 {
17211 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17212 It's just a convention, their semantics are identical. */
17213 if (regno_base == SP_REGNUM)
17214 sprintf (pattern, "ldm%sfd\t", conditional);
17215 else if (TARGET_UNIFIED_ASM)
17216 sprintf (pattern, "ldmia%s\t", conditional);
17217 else
17218 sprintf (pattern, "ldm%sia\t", conditional);
17219
17220 strcat (pattern, reg_names[regno_base]);
17221 if (update)
17222 strcat (pattern, "!, {");
17223 else
17224 strcat (pattern, ", {");
17225 }
17226
17227 /* Output the first destination register. */
17228 strcat (pattern,
17229 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17230
17231 /* Output the rest of the destination registers. */
17232 for (i = offset + 1; i < num_saves; i++)
17233 {
17234 strcat (pattern, ", ");
17235 strcat (pattern,
17236 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17237 }
17238
17239 strcat (pattern, "}");
17240
17241 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17242 strcat (pattern, "^");
17243
17244 output_asm_insn (pattern, &cond);
17245 }
17246
17247
17248 /* Output the assembly for a store multiple. */
17249
17250 const char *
17251 vfp_output_fstmd (rtx * operands)
17252 {
17253 char pattern[100];
17254 int p;
17255 int base;
17256 int i;
17257
17258 strcpy (pattern, "fstmfdd%?\t%m0!, {%P1");
17259 p = strlen (pattern);
17260
17261 gcc_assert (REG_P (operands[1]));
17262
17263 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17264 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17265 {
17266 p += sprintf (&pattern[p], ", d%d", base + i);
17267 }
17268 strcpy (&pattern[p], "}");
17269
17270 output_asm_insn (pattern, operands);
17271 return "";
17272 }
17273
17274
17275 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17276 number of bytes pushed. */
17277
17278 static int
17279 vfp_emit_fstmd (int base_reg, int count)
17280 {
17281 rtx par;
17282 rtx dwarf;
17283 rtx tmp, reg;
17284 int i;
17285
17286 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17287 register pairs are stored by a store multiple insn. We avoid this
17288 by pushing an extra pair. */
17289 if (count == 2 && !arm_arch6)
17290 {
17291 if (base_reg == LAST_VFP_REGNUM - 3)
17292 base_reg -= 2;
17293 count++;
17294 }
17295
17296 /* FSTMD may not store more than 16 doubleword registers at once. Split
17297 larger stores into multiple parts (up to a maximum of two, in
17298 practice). */
17299 if (count > 16)
17300 {
17301 int saved;
17302 /* NOTE: base_reg is an internal register number, so each D register
17303 counts as 2. */
17304 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17305 saved += vfp_emit_fstmd (base_reg, 16);
17306 return saved;
17307 }
17308
17309 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17310 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17311
17312 reg = gen_rtx_REG (DFmode, base_reg);
17313 base_reg += 2;
17314
17315 XVECEXP (par, 0, 0)
17316 = gen_rtx_SET (VOIDmode,
17317 gen_frame_mem
17318 (BLKmode,
17319 gen_rtx_PRE_MODIFY (Pmode,
17320 stack_pointer_rtx,
17321 plus_constant
17322 (Pmode, stack_pointer_rtx,
17323 - (count * 8)))
17324 ),
17325 gen_rtx_UNSPEC (BLKmode,
17326 gen_rtvec (1, reg),
17327 UNSPEC_PUSH_MULT));
17328
17329 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17330 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17331 RTX_FRAME_RELATED_P (tmp) = 1;
17332 XVECEXP (dwarf, 0, 0) = tmp;
17333
17334 tmp = gen_rtx_SET (VOIDmode,
17335 gen_frame_mem (DFmode, stack_pointer_rtx),
17336 reg);
17337 RTX_FRAME_RELATED_P (tmp) = 1;
17338 XVECEXP (dwarf, 0, 1) = tmp;
17339
17340 for (i = 1; i < count; i++)
17341 {
17342 reg = gen_rtx_REG (DFmode, base_reg);
17343 base_reg += 2;
17344 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17345
17346 tmp = gen_rtx_SET (VOIDmode,
17347 gen_frame_mem (DFmode,
17348 plus_constant (Pmode,
17349 stack_pointer_rtx,
17350 i * 8)),
17351 reg);
17352 RTX_FRAME_RELATED_P (tmp) = 1;
17353 XVECEXP (dwarf, 0, i + 1) = tmp;
17354 }
17355
17356 par = emit_insn (par);
17357 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17358 RTX_FRAME_RELATED_P (par) = 1;
17359
17360 return count * 8;
17361 }
17362
17363 /* Emit a call instruction with pattern PAT. ADDR is the address of
17364 the call target. */
17365
17366 void
17367 arm_emit_call_insn (rtx pat, rtx addr)
17368 {
17369 rtx insn;
17370
17371 insn = emit_call_insn (pat);
17372
17373 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17374 If the call might use such an entry, add a use of the PIC register
17375 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17376 if (TARGET_VXWORKS_RTP
17377 && flag_pic
17378 && GET_CODE (addr) == SYMBOL_REF
17379 && (SYMBOL_REF_DECL (addr)
17380 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17381 : !SYMBOL_REF_LOCAL_P (addr)))
17382 {
17383 require_pic_register ();
17384 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17385 }
17386 }
17387
17388 /* Output a 'call' insn. */
17389 const char *
17390 output_call (rtx *operands)
17391 {
17392 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17393
17394 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17395 if (REGNO (operands[0]) == LR_REGNUM)
17396 {
17397 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17398 output_asm_insn ("mov%?\t%0, %|lr", operands);
17399 }
17400
17401 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17402
17403 if (TARGET_INTERWORK || arm_arch4t)
17404 output_asm_insn ("bx%?\t%0", operands);
17405 else
17406 output_asm_insn ("mov%?\t%|pc, %0", operands);
17407
17408 return "";
17409 }
17410
17411 /* Output a 'call' insn that is a reference in memory. This is
17412 disabled for ARMv5 and we prefer a blx instead because otherwise
17413 there's a significant performance overhead. */
17414 const char *
17415 output_call_mem (rtx *operands)
17416 {
17417 gcc_assert (!arm_arch5);
17418 if (TARGET_INTERWORK)
17419 {
17420 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17421 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17422 output_asm_insn ("bx%?\t%|ip", operands);
17423 }
17424 else if (regno_use_in (LR_REGNUM, operands[0]))
17425 {
17426 /* LR is used in the memory address. We load the address in the
17427 first instruction. It's safe to use IP as the target of the
17428 load since the call will kill it anyway. */
17429 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17430 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17431 if (arm_arch4t)
17432 output_asm_insn ("bx%?\t%|ip", operands);
17433 else
17434 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17435 }
17436 else
17437 {
17438 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17439 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17440 }
17441
17442 return "";
17443 }
17444
17445
17446 /* Output a move from arm registers to arm registers of a long double
17447 OPERANDS[0] is the destination.
17448 OPERANDS[1] is the source. */
17449 const char *
17450 output_mov_long_double_arm_from_arm (rtx *operands)
17451 {
17452 /* We have to be careful here because the two might overlap. */
17453 int dest_start = REGNO (operands[0]);
17454 int src_start = REGNO (operands[1]);
17455 rtx ops[2];
17456 int i;
17457
17458 if (dest_start < src_start)
17459 {
17460 for (i = 0; i < 3; i++)
17461 {
17462 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17463 ops[1] = gen_rtx_REG (SImode, src_start + i);
17464 output_asm_insn ("mov%?\t%0, %1", ops);
17465 }
17466 }
17467 else
17468 {
17469 for (i = 2; i >= 0; i--)
17470 {
17471 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17472 ops[1] = gen_rtx_REG (SImode, src_start + i);
17473 output_asm_insn ("mov%?\t%0, %1", ops);
17474 }
17475 }
17476
17477 return "";
17478 }
17479
17480 void
17481 arm_emit_movpair (rtx dest, rtx src)
17482 {
17483 /* If the src is an immediate, simplify it. */
17484 if (CONST_INT_P (src))
17485 {
17486 HOST_WIDE_INT val = INTVAL (src);
17487 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17488 if ((val >> 16) & 0x0000ffff)
17489 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17490 GEN_INT (16)),
17491 GEN_INT ((val >> 16) & 0x0000ffff));
17492 return;
17493 }
17494 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17495 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17496 }
17497
17498 /* Output a move between double words. It must be REG<-MEM
17499 or MEM<-REG. */
17500 const char *
17501 output_move_double (rtx *operands, bool emit, int *count)
17502 {
17503 enum rtx_code code0 = GET_CODE (operands[0]);
17504 enum rtx_code code1 = GET_CODE (operands[1]);
17505 rtx otherops[3];
17506 if (count)
17507 *count = 1;
17508
17509 /* The only case when this might happen is when
17510 you are looking at the length of a DImode instruction
17511 that has an invalid constant in it. */
17512 if (code0 == REG && code1 != MEM)
17513 {
17514 gcc_assert (!emit);
17515 *count = 2;
17516 return "";
17517 }
17518
17519 if (code0 == REG)
17520 {
17521 unsigned int reg0 = REGNO (operands[0]);
17522
17523 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17524
17525 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17526
17527 switch (GET_CODE (XEXP (operands[1], 0)))
17528 {
17529 case REG:
17530
17531 if (emit)
17532 {
17533 if (TARGET_LDRD
17534 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17535 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17536 else
17537 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17538 }
17539 break;
17540
17541 case PRE_INC:
17542 gcc_assert (TARGET_LDRD);
17543 if (emit)
17544 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17545 break;
17546
17547 case PRE_DEC:
17548 if (emit)
17549 {
17550 if (TARGET_LDRD)
17551 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17552 else
17553 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17554 }
17555 break;
17556
17557 case POST_INC:
17558 if (emit)
17559 {
17560 if (TARGET_LDRD)
17561 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17562 else
17563 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17564 }
17565 break;
17566
17567 case POST_DEC:
17568 gcc_assert (TARGET_LDRD);
17569 if (emit)
17570 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17571 break;
17572
17573 case PRE_MODIFY:
17574 case POST_MODIFY:
17575 /* Autoicrement addressing modes should never have overlapping
17576 base and destination registers, and overlapping index registers
17577 are already prohibited, so this doesn't need to worry about
17578 fix_cm3_ldrd. */
17579 otherops[0] = operands[0];
17580 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17581 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17582
17583 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17584 {
17585 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17586 {
17587 /* Registers overlap so split out the increment. */
17588 if (emit)
17589 {
17590 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17591 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17592 }
17593 if (count)
17594 *count = 2;
17595 }
17596 else
17597 {
17598 /* Use a single insn if we can.
17599 FIXME: IWMMXT allows offsets larger than ldrd can
17600 handle, fix these up with a pair of ldr. */
17601 if (TARGET_THUMB2
17602 || !CONST_INT_P (otherops[2])
17603 || (INTVAL (otherops[2]) > -256
17604 && INTVAL (otherops[2]) < 256))
17605 {
17606 if (emit)
17607 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
17608 }
17609 else
17610 {
17611 if (emit)
17612 {
17613 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17614 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17615 }
17616 if (count)
17617 *count = 2;
17618
17619 }
17620 }
17621 }
17622 else
17623 {
17624 /* Use a single insn if we can.
17625 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17626 fix these up with a pair of ldr. */
17627 if (TARGET_THUMB2
17628 || !CONST_INT_P (otherops[2])
17629 || (INTVAL (otherops[2]) > -256
17630 && INTVAL (otherops[2]) < 256))
17631 {
17632 if (emit)
17633 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
17634 }
17635 else
17636 {
17637 if (emit)
17638 {
17639 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17640 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
17641 }
17642 if (count)
17643 *count = 2;
17644 }
17645 }
17646 break;
17647
17648 case LABEL_REF:
17649 case CONST:
17650 /* We might be able to use ldrd %0, %1 here. However the range is
17651 different to ldr/adr, and it is broken on some ARMv7-M
17652 implementations. */
17653 /* Use the second register of the pair to avoid problematic
17654 overlap. */
17655 otherops[1] = operands[1];
17656 if (emit)
17657 output_asm_insn ("adr%?\t%0, %1", otherops);
17658 operands[1] = otherops[0];
17659 if (emit)
17660 {
17661 if (TARGET_LDRD)
17662 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17663 else
17664 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
17665 }
17666
17667 if (count)
17668 *count = 2;
17669 break;
17670
17671 /* ??? This needs checking for thumb2. */
17672 default:
17673 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
17674 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
17675 {
17676 otherops[0] = operands[0];
17677 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
17678 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
17679
17680 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
17681 {
17682 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17683 {
17684 switch ((int) INTVAL (otherops[2]))
17685 {
17686 case -8:
17687 if (emit)
17688 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
17689 return "";
17690 case -4:
17691 if (TARGET_THUMB2)
17692 break;
17693 if (emit)
17694 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
17695 return "";
17696 case 4:
17697 if (TARGET_THUMB2)
17698 break;
17699 if (emit)
17700 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
17701 return "";
17702 }
17703 }
17704 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
17705 operands[1] = otherops[0];
17706 if (TARGET_LDRD
17707 && (REG_P (otherops[2])
17708 || TARGET_THUMB2
17709 || (CONST_INT_P (otherops[2])
17710 && INTVAL (otherops[2]) > -256
17711 && INTVAL (otherops[2]) < 256)))
17712 {
17713 if (reg_overlap_mentioned_p (operands[0],
17714 otherops[2]))
17715 {
17716 rtx tmp;
17717 /* Swap base and index registers over to
17718 avoid a conflict. */
17719 tmp = otherops[1];
17720 otherops[1] = otherops[2];
17721 otherops[2] = tmp;
17722 }
17723 /* If both registers conflict, it will usually
17724 have been fixed by a splitter. */
17725 if (reg_overlap_mentioned_p (operands[0], otherops[2])
17726 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
17727 {
17728 if (emit)
17729 {
17730 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17731 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
17732 }
17733 if (count)
17734 *count = 2;
17735 }
17736 else
17737 {
17738 otherops[0] = operands[0];
17739 if (emit)
17740 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
17741 }
17742 return "";
17743 }
17744
17745 if (CONST_INT_P (otherops[2]))
17746 {
17747 if (emit)
17748 {
17749 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
17750 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
17751 else
17752 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17753 }
17754 }
17755 else
17756 {
17757 if (emit)
17758 output_asm_insn ("add%?\t%0, %1, %2", otherops);
17759 }
17760 }
17761 else
17762 {
17763 if (emit)
17764 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
17765 }
17766
17767 if (count)
17768 *count = 2;
17769
17770 if (TARGET_LDRD)
17771 return "ldr%(d%)\t%0, [%1]";
17772
17773 return "ldm%(ia%)\t%1, %M0";
17774 }
17775 else
17776 {
17777 otherops[1] = adjust_address (operands[1], SImode, 4);
17778 /* Take care of overlapping base/data reg. */
17779 if (reg_mentioned_p (operands[0], operands[1]))
17780 {
17781 if (emit)
17782 {
17783 output_asm_insn ("ldr%?\t%0, %1", otherops);
17784 output_asm_insn ("ldr%?\t%0, %1", operands);
17785 }
17786 if (count)
17787 *count = 2;
17788
17789 }
17790 else
17791 {
17792 if (emit)
17793 {
17794 output_asm_insn ("ldr%?\t%0, %1", operands);
17795 output_asm_insn ("ldr%?\t%0, %1", otherops);
17796 }
17797 if (count)
17798 *count = 2;
17799 }
17800 }
17801 }
17802 }
17803 else
17804 {
17805 /* Constraints should ensure this. */
17806 gcc_assert (code0 == MEM && code1 == REG);
17807 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
17808 || (TARGET_ARM && TARGET_LDRD));
17809
17810 switch (GET_CODE (XEXP (operands[0], 0)))
17811 {
17812 case REG:
17813 if (emit)
17814 {
17815 if (TARGET_LDRD)
17816 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
17817 else
17818 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
17819 }
17820 break;
17821
17822 case PRE_INC:
17823 gcc_assert (TARGET_LDRD);
17824 if (emit)
17825 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
17826 break;
17827
17828 case PRE_DEC:
17829 if (emit)
17830 {
17831 if (TARGET_LDRD)
17832 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
17833 else
17834 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
17835 }
17836 break;
17837
17838 case POST_INC:
17839 if (emit)
17840 {
17841 if (TARGET_LDRD)
17842 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
17843 else
17844 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
17845 }
17846 break;
17847
17848 case POST_DEC:
17849 gcc_assert (TARGET_LDRD);
17850 if (emit)
17851 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
17852 break;
17853
17854 case PRE_MODIFY:
17855 case POST_MODIFY:
17856 otherops[0] = operands[1];
17857 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
17858 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
17859
17860 /* IWMMXT allows offsets larger than ldrd can handle,
17861 fix these up with a pair of ldr. */
17862 if (!TARGET_THUMB2
17863 && CONST_INT_P (otherops[2])
17864 && (INTVAL(otherops[2]) <= -256
17865 || INTVAL(otherops[2]) >= 256))
17866 {
17867 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
17868 {
17869 if (emit)
17870 {
17871 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
17872 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
17873 }
17874 if (count)
17875 *count = 2;
17876 }
17877 else
17878 {
17879 if (emit)
17880 {
17881 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
17882 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
17883 }
17884 if (count)
17885 *count = 2;
17886 }
17887 }
17888 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
17889 {
17890 if (emit)
17891 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
17892 }
17893 else
17894 {
17895 if (emit)
17896 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
17897 }
17898 break;
17899
17900 case PLUS:
17901 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
17902 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
17903 {
17904 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
17905 {
17906 case -8:
17907 if (emit)
17908 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
17909 return "";
17910
17911 case -4:
17912 if (TARGET_THUMB2)
17913 break;
17914 if (emit)
17915 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
17916 return "";
17917
17918 case 4:
17919 if (TARGET_THUMB2)
17920 break;
17921 if (emit)
17922 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
17923 return "";
17924 }
17925 }
17926 if (TARGET_LDRD
17927 && (REG_P (otherops[2])
17928 || TARGET_THUMB2
17929 || (CONST_INT_P (otherops[2])
17930 && INTVAL (otherops[2]) > -256
17931 && INTVAL (otherops[2]) < 256)))
17932 {
17933 otherops[0] = operands[1];
17934 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
17935 if (emit)
17936 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
17937 return "";
17938 }
17939 /* Fall through */
17940
17941 default:
17942 otherops[0] = adjust_address (operands[0], SImode, 4);
17943 otherops[1] = operands[1];
17944 if (emit)
17945 {
17946 output_asm_insn ("str%?\t%1, %0", operands);
17947 output_asm_insn ("str%?\t%H1, %0", otherops);
17948 }
17949 if (count)
17950 *count = 2;
17951 }
17952 }
17953
17954 return "";
17955 }
17956
17957 /* Output a move, load or store for quad-word vectors in ARM registers. Only
17958 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
17959
17960 const char *
17961 output_move_quad (rtx *operands)
17962 {
17963 if (REG_P (operands[0]))
17964 {
17965 /* Load, or reg->reg move. */
17966
17967 if (MEM_P (operands[1]))
17968 {
17969 switch (GET_CODE (XEXP (operands[1], 0)))
17970 {
17971 case REG:
17972 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17973 break;
17974
17975 case LABEL_REF:
17976 case CONST:
17977 output_asm_insn ("adr%?\t%0, %1", operands);
17978 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
17979 break;
17980
17981 default:
17982 gcc_unreachable ();
17983 }
17984 }
17985 else
17986 {
17987 rtx ops[2];
17988 int dest, src, i;
17989
17990 gcc_assert (REG_P (operands[1]));
17991
17992 dest = REGNO (operands[0]);
17993 src = REGNO (operands[1]);
17994
17995 /* This seems pretty dumb, but hopefully GCC won't try to do it
17996 very often. */
17997 if (dest < src)
17998 for (i = 0; i < 4; i++)
17999 {
18000 ops[0] = gen_rtx_REG (SImode, dest + i);
18001 ops[1] = gen_rtx_REG (SImode, src + i);
18002 output_asm_insn ("mov%?\t%0, %1", ops);
18003 }
18004 else
18005 for (i = 3; i >= 0; i--)
18006 {
18007 ops[0] = gen_rtx_REG (SImode, dest + i);
18008 ops[1] = gen_rtx_REG (SImode, src + i);
18009 output_asm_insn ("mov%?\t%0, %1", ops);
18010 }
18011 }
18012 }
18013 else
18014 {
18015 gcc_assert (MEM_P (operands[0]));
18016 gcc_assert (REG_P (operands[1]));
18017 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18018
18019 switch (GET_CODE (XEXP (operands[0], 0)))
18020 {
18021 case REG:
18022 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18023 break;
18024
18025 default:
18026 gcc_unreachable ();
18027 }
18028 }
18029
18030 return "";
18031 }
18032
18033 /* Output a VFP load or store instruction. */
18034
18035 const char *
18036 output_move_vfp (rtx *operands)
18037 {
18038 rtx reg, mem, addr, ops[2];
18039 int load = REG_P (operands[0]);
18040 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18041 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18042 const char *templ;
18043 char buff[50];
18044 enum machine_mode mode;
18045
18046 reg = operands[!load];
18047 mem = operands[load];
18048
18049 mode = GET_MODE (reg);
18050
18051 gcc_assert (REG_P (reg));
18052 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18053 gcc_assert (mode == SFmode
18054 || mode == DFmode
18055 || mode == SImode
18056 || mode == DImode
18057 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18058 gcc_assert (MEM_P (mem));
18059
18060 addr = XEXP (mem, 0);
18061
18062 switch (GET_CODE (addr))
18063 {
18064 case PRE_DEC:
18065 templ = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
18066 ops[0] = XEXP (addr, 0);
18067 ops[1] = reg;
18068 break;
18069
18070 case POST_INC:
18071 templ = "f%smia%c%%?\t%%0!, {%%%s1}%s";
18072 ops[0] = XEXP (addr, 0);
18073 ops[1] = reg;
18074 break;
18075
18076 default:
18077 templ = "f%s%c%%?\t%%%s0, %%1%s";
18078 ops[0] = reg;
18079 ops[1] = mem;
18080 break;
18081 }
18082
18083 sprintf (buff, templ,
18084 load ? "ld" : "st",
18085 dp ? 'd' : 's',
18086 dp ? "P" : "",
18087 integer_p ? "\t%@ int" : "");
18088 output_asm_insn (buff, ops);
18089
18090 return "";
18091 }
18092
18093 /* Output a Neon double-word or quad-word load or store, or a load
18094 or store for larger structure modes.
18095
18096 WARNING: The ordering of elements is weird in big-endian mode,
18097 because the EABI requires that vectors stored in memory appear
18098 as though they were stored by a VSTM, as required by the EABI.
18099 GCC RTL defines element ordering based on in-memory order.
18100 This can be different from the architectural ordering of elements
18101 within a NEON register. The intrinsics defined in arm_neon.h use the
18102 NEON register element ordering, not the GCC RTL element ordering.
18103
18104 For example, the in-memory ordering of a big-endian a quadword
18105 vector with 16-bit elements when stored from register pair {d0,d1}
18106 will be (lowest address first, d0[N] is NEON register element N):
18107
18108 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18109
18110 When necessary, quadword registers (dN, dN+1) are moved to ARM
18111 registers from rN in the order:
18112
18113 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18114
18115 So that STM/LDM can be used on vectors in ARM registers, and the
18116 same memory layout will result as if VSTM/VLDM were used.
18117
18118 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18119 possible, which allows use of appropriate alignment tags.
18120 Note that the choice of "64" is independent of the actual vector
18121 element size; this size simply ensures that the behavior is
18122 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18123
18124 Due to limitations of those instructions, use of VST1.64/VLD1.64
18125 is not possible if:
18126 - the address contains PRE_DEC, or
18127 - the mode refers to more than 4 double-word registers
18128
18129 In those cases, it would be possible to replace VSTM/VLDM by a
18130 sequence of instructions; this is not currently implemented since
18131 this is not certain to actually improve performance. */
18132
18133 const char *
18134 output_move_neon (rtx *operands)
18135 {
18136 rtx reg, mem, addr, ops[2];
18137 int regno, nregs, load = REG_P (operands[0]);
18138 const char *templ;
18139 char buff[50];
18140 enum machine_mode mode;
18141
18142 reg = operands[!load];
18143 mem = operands[load];
18144
18145 mode = GET_MODE (reg);
18146
18147 gcc_assert (REG_P (reg));
18148 regno = REGNO (reg);
18149 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18150 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18151 || NEON_REGNO_OK_FOR_QUAD (regno));
18152 gcc_assert (VALID_NEON_DREG_MODE (mode)
18153 || VALID_NEON_QREG_MODE (mode)
18154 || VALID_NEON_STRUCT_MODE (mode));
18155 gcc_assert (MEM_P (mem));
18156
18157 addr = XEXP (mem, 0);
18158
18159 /* Strip off const from addresses like (const (plus (...))). */
18160 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18161 addr = XEXP (addr, 0);
18162
18163 switch (GET_CODE (addr))
18164 {
18165 case POST_INC:
18166 /* We have to use vldm / vstm for too-large modes. */
18167 if (nregs > 4)
18168 {
18169 templ = "v%smia%%?\t%%0!, %%h1";
18170 ops[0] = XEXP (addr, 0);
18171 }
18172 else
18173 {
18174 templ = "v%s1.64\t%%h1, %%A0";
18175 ops[0] = mem;
18176 }
18177 ops[1] = reg;
18178 break;
18179
18180 case PRE_DEC:
18181 /* We have to use vldm / vstm in this case, since there is no
18182 pre-decrement form of the vld1 / vst1 instructions. */
18183 templ = "v%smdb%%?\t%%0!, %%h1";
18184 ops[0] = XEXP (addr, 0);
18185 ops[1] = reg;
18186 break;
18187
18188 case POST_MODIFY:
18189 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18190 gcc_unreachable ();
18191
18192 case LABEL_REF:
18193 case PLUS:
18194 {
18195 int i;
18196 int overlap = -1;
18197 for (i = 0; i < nregs; i++)
18198 {
18199 /* We're only using DImode here because it's a convenient size. */
18200 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18201 ops[1] = adjust_address (mem, DImode, 8 * i);
18202 if (reg_overlap_mentioned_p (ops[0], mem))
18203 {
18204 gcc_assert (overlap == -1);
18205 overlap = i;
18206 }
18207 else
18208 {
18209 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18210 output_asm_insn (buff, ops);
18211 }
18212 }
18213 if (overlap != -1)
18214 {
18215 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18216 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18217 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18218 output_asm_insn (buff, ops);
18219 }
18220
18221 return "";
18222 }
18223
18224 default:
18225 /* We have to use vldm / vstm for too-large modes. */
18226 if (nregs > 4)
18227 templ = "v%smia%%?\t%%m0, %%h1";
18228 else
18229 templ = "v%s1.64\t%%h1, %%A0";
18230
18231 ops[0] = mem;
18232 ops[1] = reg;
18233 }
18234
18235 sprintf (buff, templ, load ? "ld" : "st");
18236 output_asm_insn (buff, ops);
18237
18238 return "";
18239 }
18240
18241 /* Compute and return the length of neon_mov<mode>, where <mode> is
18242 one of VSTRUCT modes: EI, OI, CI or XI. */
18243 int
18244 arm_attr_length_move_neon (rtx insn)
18245 {
18246 rtx reg, mem, addr;
18247 int load;
18248 enum machine_mode mode;
18249
18250 extract_insn_cached (insn);
18251
18252 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18253 {
18254 mode = GET_MODE (recog_data.operand[0]);
18255 switch (mode)
18256 {
18257 case EImode:
18258 case OImode:
18259 return 8;
18260 case CImode:
18261 return 12;
18262 case XImode:
18263 return 16;
18264 default:
18265 gcc_unreachable ();
18266 }
18267 }
18268
18269 load = REG_P (recog_data.operand[0]);
18270 reg = recog_data.operand[!load];
18271 mem = recog_data.operand[load];
18272
18273 gcc_assert (MEM_P (mem));
18274
18275 mode = GET_MODE (reg);
18276 addr = XEXP (mem, 0);
18277
18278 /* Strip off const from addresses like (const (plus (...))). */
18279 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18280 addr = XEXP (addr, 0);
18281
18282 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18283 {
18284 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18285 return insns * 4;
18286 }
18287 else
18288 return 4;
18289 }
18290
18291 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18292 return zero. */
18293
18294 int
18295 arm_address_offset_is_imm (rtx insn)
18296 {
18297 rtx mem, addr;
18298
18299 extract_insn_cached (insn);
18300
18301 if (REG_P (recog_data.operand[0]))
18302 return 0;
18303
18304 mem = recog_data.operand[0];
18305
18306 gcc_assert (MEM_P (mem));
18307
18308 addr = XEXP (mem, 0);
18309
18310 if (REG_P (addr)
18311 || (GET_CODE (addr) == PLUS
18312 && REG_P (XEXP (addr, 0))
18313 && CONST_INT_P (XEXP (addr, 1))))
18314 return 1;
18315 else
18316 return 0;
18317 }
18318
18319 /* Output an ADD r, s, #n where n may be too big for one instruction.
18320 If adding zero to one register, output nothing. */
18321 const char *
18322 output_add_immediate (rtx *operands)
18323 {
18324 HOST_WIDE_INT n = INTVAL (operands[2]);
18325
18326 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18327 {
18328 if (n < 0)
18329 output_multi_immediate (operands,
18330 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18331 -n);
18332 else
18333 output_multi_immediate (operands,
18334 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18335 n);
18336 }
18337
18338 return "";
18339 }
18340
18341 /* Output a multiple immediate operation.
18342 OPERANDS is the vector of operands referred to in the output patterns.
18343 INSTR1 is the output pattern to use for the first constant.
18344 INSTR2 is the output pattern to use for subsequent constants.
18345 IMMED_OP is the index of the constant slot in OPERANDS.
18346 N is the constant value. */
18347 static const char *
18348 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18349 int immed_op, HOST_WIDE_INT n)
18350 {
18351 #if HOST_BITS_PER_WIDE_INT > 32
18352 n &= 0xffffffff;
18353 #endif
18354
18355 if (n == 0)
18356 {
18357 /* Quick and easy output. */
18358 operands[immed_op] = const0_rtx;
18359 output_asm_insn (instr1, operands);
18360 }
18361 else
18362 {
18363 int i;
18364 const char * instr = instr1;
18365
18366 /* Note that n is never zero here (which would give no output). */
18367 for (i = 0; i < 32; i += 2)
18368 {
18369 if (n & (3 << i))
18370 {
18371 operands[immed_op] = GEN_INT (n & (255 << i));
18372 output_asm_insn (instr, operands);
18373 instr = instr2;
18374 i += 6;
18375 }
18376 }
18377 }
18378
18379 return "";
18380 }
18381
18382 /* Return the name of a shifter operation. */
18383 static const char *
18384 arm_shift_nmem(enum rtx_code code)
18385 {
18386 switch (code)
18387 {
18388 case ASHIFT:
18389 return ARM_LSL_NAME;
18390
18391 case ASHIFTRT:
18392 return "asr";
18393
18394 case LSHIFTRT:
18395 return "lsr";
18396
18397 case ROTATERT:
18398 return "ror";
18399
18400 default:
18401 abort();
18402 }
18403 }
18404
18405 /* Return the appropriate ARM instruction for the operation code.
18406 The returned result should not be overwritten. OP is the rtx of the
18407 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18408 was shifted. */
18409 const char *
18410 arithmetic_instr (rtx op, int shift_first_arg)
18411 {
18412 switch (GET_CODE (op))
18413 {
18414 case PLUS:
18415 return "add";
18416
18417 case MINUS:
18418 return shift_first_arg ? "rsb" : "sub";
18419
18420 case IOR:
18421 return "orr";
18422
18423 case XOR:
18424 return "eor";
18425
18426 case AND:
18427 return "and";
18428
18429 case ASHIFT:
18430 case ASHIFTRT:
18431 case LSHIFTRT:
18432 case ROTATERT:
18433 return arm_shift_nmem(GET_CODE(op));
18434
18435 default:
18436 gcc_unreachable ();
18437 }
18438 }
18439
18440 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18441 for the operation code. The returned result should not be overwritten.
18442 OP is the rtx code of the shift.
18443 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18444 shift. */
18445 static const char *
18446 shift_op (rtx op, HOST_WIDE_INT *amountp)
18447 {
18448 const char * mnem;
18449 enum rtx_code code = GET_CODE (op);
18450
18451 switch (code)
18452 {
18453 case ROTATE:
18454 if (!CONST_INT_P (XEXP (op, 1)))
18455 {
18456 output_operand_lossage ("invalid shift operand");
18457 return NULL;
18458 }
18459
18460 code = ROTATERT;
18461 *amountp = 32 - INTVAL (XEXP (op, 1));
18462 mnem = "ror";
18463 break;
18464
18465 case ASHIFT:
18466 case ASHIFTRT:
18467 case LSHIFTRT:
18468 case ROTATERT:
18469 mnem = arm_shift_nmem(code);
18470 if (CONST_INT_P (XEXP (op, 1)))
18471 {
18472 *amountp = INTVAL (XEXP (op, 1));
18473 }
18474 else if (REG_P (XEXP (op, 1)))
18475 {
18476 *amountp = -1;
18477 return mnem;
18478 }
18479 else
18480 {
18481 output_operand_lossage ("invalid shift operand");
18482 return NULL;
18483 }
18484 break;
18485
18486 case MULT:
18487 /* We never have to worry about the amount being other than a
18488 power of 2, since this case can never be reloaded from a reg. */
18489 if (!CONST_INT_P (XEXP (op, 1)))
18490 {
18491 output_operand_lossage ("invalid shift operand");
18492 return NULL;
18493 }
18494
18495 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18496
18497 /* Amount must be a power of two. */
18498 if (*amountp & (*amountp - 1))
18499 {
18500 output_operand_lossage ("invalid shift operand");
18501 return NULL;
18502 }
18503
18504 *amountp = int_log2 (*amountp);
18505 return ARM_LSL_NAME;
18506
18507 default:
18508 output_operand_lossage ("invalid shift operand");
18509 return NULL;
18510 }
18511
18512 /* This is not 100% correct, but follows from the desire to merge
18513 multiplication by a power of 2 with the recognizer for a
18514 shift. >=32 is not a valid shift for "lsl", so we must try and
18515 output a shift that produces the correct arithmetical result.
18516 Using lsr #32 is identical except for the fact that the carry bit
18517 is not set correctly if we set the flags; but we never use the
18518 carry bit from such an operation, so we can ignore that. */
18519 if (code == ROTATERT)
18520 /* Rotate is just modulo 32. */
18521 *amountp &= 31;
18522 else if (*amountp != (*amountp & 31))
18523 {
18524 if (code == ASHIFT)
18525 mnem = "lsr";
18526 *amountp = 32;
18527 }
18528
18529 /* Shifts of 0 are no-ops. */
18530 if (*amountp == 0)
18531 return NULL;
18532
18533 return mnem;
18534 }
18535
18536 /* Obtain the shift from the POWER of two. */
18537
18538 static HOST_WIDE_INT
18539 int_log2 (HOST_WIDE_INT power)
18540 {
18541 HOST_WIDE_INT shift = 0;
18542
18543 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18544 {
18545 gcc_assert (shift <= 31);
18546 shift++;
18547 }
18548
18549 return shift;
18550 }
18551
18552 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18553 because /bin/as is horribly restrictive. The judgement about
18554 whether or not each character is 'printable' (and can be output as
18555 is) or not (and must be printed with an octal escape) must be made
18556 with reference to the *host* character set -- the situation is
18557 similar to that discussed in the comments above pp_c_char in
18558 c-pretty-print.c. */
18559
18560 #define MAX_ASCII_LEN 51
18561
18562 void
18563 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18564 {
18565 int i;
18566 int len_so_far = 0;
18567
18568 fputs ("\t.ascii\t\"", stream);
18569
18570 for (i = 0; i < len; i++)
18571 {
18572 int c = p[i];
18573
18574 if (len_so_far >= MAX_ASCII_LEN)
18575 {
18576 fputs ("\"\n\t.ascii\t\"", stream);
18577 len_so_far = 0;
18578 }
18579
18580 if (ISPRINT (c))
18581 {
18582 if (c == '\\' || c == '\"')
18583 {
18584 putc ('\\', stream);
18585 len_so_far++;
18586 }
18587 putc (c, stream);
18588 len_so_far++;
18589 }
18590 else
18591 {
18592 fprintf (stream, "\\%03o", c);
18593 len_so_far += 4;
18594 }
18595 }
18596
18597 fputs ("\"\n", stream);
18598 }
18599 \f
18600 /* Compute the register save mask for registers 0 through 12
18601 inclusive. This code is used by arm_compute_save_reg_mask. */
18602
18603 static unsigned long
18604 arm_compute_save_reg0_reg12_mask (void)
18605 {
18606 unsigned long func_type = arm_current_func_type ();
18607 unsigned long save_reg_mask = 0;
18608 unsigned int reg;
18609
18610 if (IS_INTERRUPT (func_type))
18611 {
18612 unsigned int max_reg;
18613 /* Interrupt functions must not corrupt any registers,
18614 even call clobbered ones. If this is a leaf function
18615 we can just examine the registers used by the RTL, but
18616 otherwise we have to assume that whatever function is
18617 called might clobber anything, and so we have to save
18618 all the call-clobbered registers as well. */
18619 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18620 /* FIQ handlers have registers r8 - r12 banked, so
18621 we only need to check r0 - r7, Normal ISRs only
18622 bank r14 and r15, so we must check up to r12.
18623 r13 is the stack pointer which is always preserved,
18624 so we do not need to consider it here. */
18625 max_reg = 7;
18626 else
18627 max_reg = 12;
18628
18629 for (reg = 0; reg <= max_reg; reg++)
18630 if (df_regs_ever_live_p (reg)
18631 || (! crtl->is_leaf && call_used_regs[reg]))
18632 save_reg_mask |= (1 << reg);
18633
18634 /* Also save the pic base register if necessary. */
18635 if (flag_pic
18636 && !TARGET_SINGLE_PIC_BASE
18637 && arm_pic_register != INVALID_REGNUM
18638 && crtl->uses_pic_offset_table)
18639 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18640 }
18641 else if (IS_VOLATILE(func_type))
18642 {
18643 /* For noreturn functions we historically omitted register saves
18644 altogether. However this really messes up debugging. As a
18645 compromise save just the frame pointers. Combined with the link
18646 register saved elsewhere this should be sufficient to get
18647 a backtrace. */
18648 if (frame_pointer_needed)
18649 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18650 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
18651 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18652 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
18653 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
18654 }
18655 else
18656 {
18657 /* In the normal case we only need to save those registers
18658 which are call saved and which are used by this function. */
18659 for (reg = 0; reg <= 11; reg++)
18660 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
18661 save_reg_mask |= (1 << reg);
18662
18663 /* Handle the frame pointer as a special case. */
18664 if (frame_pointer_needed)
18665 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
18666
18667 /* If we aren't loading the PIC register,
18668 don't stack it even though it may be live. */
18669 if (flag_pic
18670 && !TARGET_SINGLE_PIC_BASE
18671 && arm_pic_register != INVALID_REGNUM
18672 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
18673 || crtl->uses_pic_offset_table))
18674 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18675
18676 /* The prologue will copy SP into R0, so save it. */
18677 if (IS_STACKALIGN (func_type))
18678 save_reg_mask |= 1;
18679 }
18680
18681 /* Save registers so the exception handler can modify them. */
18682 if (crtl->calls_eh_return)
18683 {
18684 unsigned int i;
18685
18686 for (i = 0; ; i++)
18687 {
18688 reg = EH_RETURN_DATA_REGNO (i);
18689 if (reg == INVALID_REGNUM)
18690 break;
18691 save_reg_mask |= 1 << reg;
18692 }
18693 }
18694
18695 return save_reg_mask;
18696 }
18697
18698 /* Return true if r3 is live at the start of the function. */
18699
18700 static bool
18701 arm_r3_live_at_start_p (void)
18702 {
18703 /* Just look at cfg info, which is still close enough to correct at this
18704 point. This gives false positives for broken functions that might use
18705 uninitialized data that happens to be allocated in r3, but who cares? */
18706 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
18707 }
18708
18709 /* Compute the number of bytes used to store the static chain register on the
18710 stack, above the stack frame. We need to know this accurately to get the
18711 alignment of the rest of the stack frame correct. */
18712
18713 static int
18714 arm_compute_static_chain_stack_bytes (void)
18715 {
18716 /* See the defining assertion in arm_expand_prologue. */
18717 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
18718 && IS_NESTED (arm_current_func_type ())
18719 && arm_r3_live_at_start_p ()
18720 && crtl->args.pretend_args_size == 0)
18721 return 4;
18722
18723 return 0;
18724 }
18725
18726 /* Compute a bit mask of which registers need to be
18727 saved on the stack for the current function.
18728 This is used by arm_get_frame_offsets, which may add extra registers. */
18729
18730 static unsigned long
18731 arm_compute_save_reg_mask (void)
18732 {
18733 unsigned int save_reg_mask = 0;
18734 unsigned long func_type = arm_current_func_type ();
18735 unsigned int reg;
18736
18737 if (IS_NAKED (func_type))
18738 /* This should never really happen. */
18739 return 0;
18740
18741 /* If we are creating a stack frame, then we must save the frame pointer,
18742 IP (which will hold the old stack pointer), LR and the PC. */
18743 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
18744 save_reg_mask |=
18745 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
18746 | (1 << IP_REGNUM)
18747 | (1 << LR_REGNUM)
18748 | (1 << PC_REGNUM);
18749
18750 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
18751
18752 /* Decide if we need to save the link register.
18753 Interrupt routines have their own banked link register,
18754 so they never need to save it.
18755 Otherwise if we do not use the link register we do not need to save
18756 it. If we are pushing other registers onto the stack however, we
18757 can save an instruction in the epilogue by pushing the link register
18758 now and then popping it back into the PC. This incurs extra memory
18759 accesses though, so we only do it when optimizing for size, and only
18760 if we know that we will not need a fancy return sequence. */
18761 if (df_regs_ever_live_p (LR_REGNUM)
18762 || (save_reg_mask
18763 && optimize_size
18764 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
18765 && !crtl->calls_eh_return))
18766 save_reg_mask |= 1 << LR_REGNUM;
18767
18768 if (cfun->machine->lr_save_eliminated)
18769 save_reg_mask &= ~ (1 << LR_REGNUM);
18770
18771 if (TARGET_REALLY_IWMMXT
18772 && ((bit_count (save_reg_mask)
18773 + ARM_NUM_INTS (crtl->args.pretend_args_size +
18774 arm_compute_static_chain_stack_bytes())
18775 ) % 2) != 0)
18776 {
18777 /* The total number of registers that are going to be pushed
18778 onto the stack is odd. We need to ensure that the stack
18779 is 64-bit aligned before we start to save iWMMXt registers,
18780 and also before we start to create locals. (A local variable
18781 might be a double or long long which we will load/store using
18782 an iWMMXt instruction). Therefore we need to push another
18783 ARM register, so that the stack will be 64-bit aligned. We
18784 try to avoid using the arg registers (r0 -r3) as they might be
18785 used to pass values in a tail call. */
18786 for (reg = 4; reg <= 12; reg++)
18787 if ((save_reg_mask & (1 << reg)) == 0)
18788 break;
18789
18790 if (reg <= 12)
18791 save_reg_mask |= (1 << reg);
18792 else
18793 {
18794 cfun->machine->sibcall_blocked = 1;
18795 save_reg_mask |= (1 << 3);
18796 }
18797 }
18798
18799 /* We may need to push an additional register for use initializing the
18800 PIC base register. */
18801 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
18802 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
18803 {
18804 reg = thumb_find_work_register (1 << 4);
18805 if (!call_used_regs[reg])
18806 save_reg_mask |= (1 << reg);
18807 }
18808
18809 return save_reg_mask;
18810 }
18811
18812
18813 /* Compute a bit mask of which registers need to be
18814 saved on the stack for the current function. */
18815 static unsigned long
18816 thumb1_compute_save_reg_mask (void)
18817 {
18818 unsigned long mask;
18819 unsigned reg;
18820
18821 mask = 0;
18822 for (reg = 0; reg < 12; reg ++)
18823 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
18824 mask |= 1 << reg;
18825
18826 if (flag_pic
18827 && !TARGET_SINGLE_PIC_BASE
18828 && arm_pic_register != INVALID_REGNUM
18829 && crtl->uses_pic_offset_table)
18830 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
18831
18832 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
18833 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
18834 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
18835
18836 /* LR will also be pushed if any lo regs are pushed. */
18837 if (mask & 0xff || thumb_force_lr_save ())
18838 mask |= (1 << LR_REGNUM);
18839
18840 /* Make sure we have a low work register if we need one.
18841 We will need one if we are going to push a high register,
18842 but we are not currently intending to push a low register. */
18843 if ((mask & 0xff) == 0
18844 && ((mask & 0x0f00) || TARGET_BACKTRACE))
18845 {
18846 /* Use thumb_find_work_register to choose which register
18847 we will use. If the register is live then we will
18848 have to push it. Use LAST_LO_REGNUM as our fallback
18849 choice for the register to select. */
18850 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
18851 /* Make sure the register returned by thumb_find_work_register is
18852 not part of the return value. */
18853 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
18854 reg = LAST_LO_REGNUM;
18855
18856 if (! call_used_regs[reg])
18857 mask |= 1 << reg;
18858 }
18859
18860 /* The 504 below is 8 bytes less than 512 because there are two possible
18861 alignment words. We can't tell here if they will be present or not so we
18862 have to play it safe and assume that they are. */
18863 if ((CALLER_INTERWORKING_SLOT_SIZE +
18864 ROUND_UP_WORD (get_frame_size ()) +
18865 crtl->outgoing_args_size) >= 504)
18866 {
18867 /* This is the same as the code in thumb1_expand_prologue() which
18868 determines which register to use for stack decrement. */
18869 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
18870 if (mask & (1 << reg))
18871 break;
18872
18873 if (reg > LAST_LO_REGNUM)
18874 {
18875 /* Make sure we have a register available for stack decrement. */
18876 mask |= 1 << LAST_LO_REGNUM;
18877 }
18878 }
18879
18880 return mask;
18881 }
18882
18883
18884 /* Return the number of bytes required to save VFP registers. */
18885 static int
18886 arm_get_vfp_saved_size (void)
18887 {
18888 unsigned int regno;
18889 int count;
18890 int saved;
18891
18892 saved = 0;
18893 /* Space for saved VFP registers. */
18894 if (TARGET_HARD_FLOAT && TARGET_VFP)
18895 {
18896 count = 0;
18897 for (regno = FIRST_VFP_REGNUM;
18898 regno < LAST_VFP_REGNUM;
18899 regno += 2)
18900 {
18901 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
18902 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
18903 {
18904 if (count > 0)
18905 {
18906 /* Workaround ARM10 VFPr1 bug. */
18907 if (count == 2 && !arm_arch6)
18908 count++;
18909 saved += count * 8;
18910 }
18911 count = 0;
18912 }
18913 else
18914 count++;
18915 }
18916 if (count > 0)
18917 {
18918 if (count == 2 && !arm_arch6)
18919 count++;
18920 saved += count * 8;
18921 }
18922 }
18923 return saved;
18924 }
18925
18926
18927 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
18928 everything bar the final return instruction. If simple_return is true,
18929 then do not output epilogue, because it has already been emitted in RTL. */
18930 const char *
18931 output_return_instruction (rtx operand, bool really_return, bool reverse,
18932 bool simple_return)
18933 {
18934 char conditional[10];
18935 char instr[100];
18936 unsigned reg;
18937 unsigned long live_regs_mask;
18938 unsigned long func_type;
18939 arm_stack_offsets *offsets;
18940
18941 func_type = arm_current_func_type ();
18942
18943 if (IS_NAKED (func_type))
18944 return "";
18945
18946 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
18947 {
18948 /* If this function was declared non-returning, and we have
18949 found a tail call, then we have to trust that the called
18950 function won't return. */
18951 if (really_return)
18952 {
18953 rtx ops[2];
18954
18955 /* Otherwise, trap an attempted return by aborting. */
18956 ops[0] = operand;
18957 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
18958 : "abort");
18959 assemble_external_libcall (ops[1]);
18960 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
18961 }
18962
18963 return "";
18964 }
18965
18966 gcc_assert (!cfun->calls_alloca || really_return);
18967
18968 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
18969
18970 cfun->machine->return_used_this_function = 1;
18971
18972 offsets = arm_get_frame_offsets ();
18973 live_regs_mask = offsets->saved_regs_mask;
18974
18975 if (!simple_return && live_regs_mask)
18976 {
18977 const char * return_reg;
18978
18979 /* If we do not have any special requirements for function exit
18980 (e.g. interworking) then we can load the return address
18981 directly into the PC. Otherwise we must load it into LR. */
18982 if (really_return
18983 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
18984 return_reg = reg_names[PC_REGNUM];
18985 else
18986 return_reg = reg_names[LR_REGNUM];
18987
18988 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
18989 {
18990 /* There are three possible reasons for the IP register
18991 being saved. 1) a stack frame was created, in which case
18992 IP contains the old stack pointer, or 2) an ISR routine
18993 corrupted it, or 3) it was saved to align the stack on
18994 iWMMXt. In case 1, restore IP into SP, otherwise just
18995 restore IP. */
18996 if (frame_pointer_needed)
18997 {
18998 live_regs_mask &= ~ (1 << IP_REGNUM);
18999 live_regs_mask |= (1 << SP_REGNUM);
19000 }
19001 else
19002 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19003 }
19004
19005 /* On some ARM architectures it is faster to use LDR rather than
19006 LDM to load a single register. On other architectures, the
19007 cost is the same. In 26 bit mode, or for exception handlers,
19008 we have to use LDM to load the PC so that the CPSR is also
19009 restored. */
19010 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19011 if (live_regs_mask == (1U << reg))
19012 break;
19013
19014 if (reg <= LAST_ARM_REGNUM
19015 && (reg != LR_REGNUM
19016 || ! really_return
19017 || ! IS_INTERRUPT (func_type)))
19018 {
19019 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19020 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19021 }
19022 else
19023 {
19024 char *p;
19025 int first = 1;
19026
19027 /* Generate the load multiple instruction to restore the
19028 registers. Note we can get here, even if
19029 frame_pointer_needed is true, but only if sp already
19030 points to the base of the saved core registers. */
19031 if (live_regs_mask & (1 << SP_REGNUM))
19032 {
19033 unsigned HOST_WIDE_INT stack_adjust;
19034
19035 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19036 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19037
19038 if (stack_adjust && arm_arch5 && TARGET_ARM)
19039 if (TARGET_UNIFIED_ASM)
19040 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19041 else
19042 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19043 else
19044 {
19045 /* If we can't use ldmib (SA110 bug),
19046 then try to pop r3 instead. */
19047 if (stack_adjust)
19048 live_regs_mask |= 1 << 3;
19049
19050 if (TARGET_UNIFIED_ASM)
19051 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19052 else
19053 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19054 }
19055 }
19056 else
19057 if (TARGET_UNIFIED_ASM)
19058 sprintf (instr, "pop%s\t{", conditional);
19059 else
19060 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19061
19062 p = instr + strlen (instr);
19063
19064 for (reg = 0; reg <= SP_REGNUM; reg++)
19065 if (live_regs_mask & (1 << reg))
19066 {
19067 int l = strlen (reg_names[reg]);
19068
19069 if (first)
19070 first = 0;
19071 else
19072 {
19073 memcpy (p, ", ", 2);
19074 p += 2;
19075 }
19076
19077 memcpy (p, "%|", 2);
19078 memcpy (p + 2, reg_names[reg], l);
19079 p += l + 2;
19080 }
19081
19082 if (live_regs_mask & (1 << LR_REGNUM))
19083 {
19084 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19085 /* If returning from an interrupt, restore the CPSR. */
19086 if (IS_INTERRUPT (func_type))
19087 strcat (p, "^");
19088 }
19089 else
19090 strcpy (p, "}");
19091 }
19092
19093 output_asm_insn (instr, & operand);
19094
19095 /* See if we need to generate an extra instruction to
19096 perform the actual function return. */
19097 if (really_return
19098 && func_type != ARM_FT_INTERWORKED
19099 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19100 {
19101 /* The return has already been handled
19102 by loading the LR into the PC. */
19103 return "";
19104 }
19105 }
19106
19107 if (really_return)
19108 {
19109 switch ((int) ARM_FUNC_TYPE (func_type))
19110 {
19111 case ARM_FT_ISR:
19112 case ARM_FT_FIQ:
19113 /* ??? This is wrong for unified assembly syntax. */
19114 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19115 break;
19116
19117 case ARM_FT_INTERWORKED:
19118 sprintf (instr, "bx%s\t%%|lr", conditional);
19119 break;
19120
19121 case ARM_FT_EXCEPTION:
19122 /* ??? This is wrong for unified assembly syntax. */
19123 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19124 break;
19125
19126 default:
19127 /* Use bx if it's available. */
19128 if (arm_arch5 || arm_arch4t)
19129 sprintf (instr, "bx%s\t%%|lr", conditional);
19130 else
19131 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19132 break;
19133 }
19134
19135 output_asm_insn (instr, & operand);
19136 }
19137
19138 return "";
19139 }
19140
19141 /* Write the function name into the code section, directly preceding
19142 the function prologue.
19143
19144 Code will be output similar to this:
19145 t0
19146 .ascii "arm_poke_function_name", 0
19147 .align
19148 t1
19149 .word 0xff000000 + (t1 - t0)
19150 arm_poke_function_name
19151 mov ip, sp
19152 stmfd sp!, {fp, ip, lr, pc}
19153 sub fp, ip, #4
19154
19155 When performing a stack backtrace, code can inspect the value
19156 of 'pc' stored at 'fp' + 0. If the trace function then looks
19157 at location pc - 12 and the top 8 bits are set, then we know
19158 that there is a function name embedded immediately preceding this
19159 location and has length ((pc[-3]) & 0xff000000).
19160
19161 We assume that pc is declared as a pointer to an unsigned long.
19162
19163 It is of no benefit to output the function name if we are assembling
19164 a leaf function. These function types will not contain a stack
19165 backtrace structure, therefore it is not possible to determine the
19166 function name. */
19167 void
19168 arm_poke_function_name (FILE *stream, const char *name)
19169 {
19170 unsigned long alignlength;
19171 unsigned long length;
19172 rtx x;
19173
19174 length = strlen (name) + 1;
19175 alignlength = ROUND_UP_WORD (length);
19176
19177 ASM_OUTPUT_ASCII (stream, name, length);
19178 ASM_OUTPUT_ALIGN (stream, 2);
19179 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19180 assemble_aligned_integer (UNITS_PER_WORD, x);
19181 }
19182
19183 /* Place some comments into the assembler stream
19184 describing the current function. */
19185 static void
19186 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19187 {
19188 unsigned long func_type;
19189
19190 /* ??? Do we want to print some of the below anyway? */
19191 if (TARGET_THUMB1)
19192 return;
19193
19194 /* Sanity check. */
19195 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19196
19197 func_type = arm_current_func_type ();
19198
19199 switch ((int) ARM_FUNC_TYPE (func_type))
19200 {
19201 default:
19202 case ARM_FT_NORMAL:
19203 break;
19204 case ARM_FT_INTERWORKED:
19205 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19206 break;
19207 case ARM_FT_ISR:
19208 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19209 break;
19210 case ARM_FT_FIQ:
19211 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19212 break;
19213 case ARM_FT_EXCEPTION:
19214 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19215 break;
19216 }
19217
19218 if (IS_NAKED (func_type))
19219 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19220
19221 if (IS_VOLATILE (func_type))
19222 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19223
19224 if (IS_NESTED (func_type))
19225 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19226 if (IS_STACKALIGN (func_type))
19227 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19228
19229 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19230 crtl->args.size,
19231 crtl->args.pretend_args_size, frame_size);
19232
19233 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19234 frame_pointer_needed,
19235 cfun->machine->uses_anonymous_args);
19236
19237 if (cfun->machine->lr_save_eliminated)
19238 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19239
19240 if (crtl->calls_eh_return)
19241 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19242
19243 }
19244
19245 static void
19246 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19247 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19248 {
19249 arm_stack_offsets *offsets;
19250
19251 if (TARGET_THUMB1)
19252 {
19253 int regno;
19254
19255 /* Emit any call-via-reg trampolines that are needed for v4t support
19256 of call_reg and call_value_reg type insns. */
19257 for (regno = 0; regno < LR_REGNUM; regno++)
19258 {
19259 rtx label = cfun->machine->call_via[regno];
19260
19261 if (label != NULL)
19262 {
19263 switch_to_section (function_section (current_function_decl));
19264 targetm.asm_out.internal_label (asm_out_file, "L",
19265 CODE_LABEL_NUMBER (label));
19266 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19267 }
19268 }
19269
19270 /* ??? Probably not safe to set this here, since it assumes that a
19271 function will be emitted as assembly immediately after we generate
19272 RTL for it. This does not happen for inline functions. */
19273 cfun->machine->return_used_this_function = 0;
19274 }
19275 else /* TARGET_32BIT */
19276 {
19277 /* We need to take into account any stack-frame rounding. */
19278 offsets = arm_get_frame_offsets ();
19279
19280 gcc_assert (!use_return_insn (FALSE, NULL)
19281 || (cfun->machine->return_used_this_function != 0)
19282 || offsets->saved_regs == offsets->outgoing_args
19283 || frame_pointer_needed);
19284
19285 /* Reset the ARM-specific per-function variables. */
19286 after_arm_reorg = 0;
19287 }
19288 }
19289
19290 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19291 STR and STRD. If an even number of registers are being pushed, one
19292 or more STRD patterns are created for each register pair. If an
19293 odd number of registers are pushed, emit an initial STR followed by
19294 as many STRD instructions as are needed. This works best when the
19295 stack is initially 64-bit aligned (the normal case), since it
19296 ensures that each STRD is also 64-bit aligned. */
19297 static void
19298 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19299 {
19300 int num_regs = 0;
19301 int i;
19302 int regno;
19303 rtx par = NULL_RTX;
19304 rtx dwarf = NULL_RTX;
19305 rtx tmp;
19306 bool first = true;
19307
19308 num_regs = bit_count (saved_regs_mask);
19309
19310 /* Must be at least one register to save, and can't save SP or PC. */
19311 gcc_assert (num_regs > 0 && num_regs <= 14);
19312 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19313 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19314
19315 /* Create sequence for DWARF info. All the frame-related data for
19316 debugging is held in this wrapper. */
19317 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19318
19319 /* Describe the stack adjustment. */
19320 tmp = gen_rtx_SET (VOIDmode,
19321 stack_pointer_rtx,
19322 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19323 RTX_FRAME_RELATED_P (tmp) = 1;
19324 XVECEXP (dwarf, 0, 0) = tmp;
19325
19326 /* Find the first register. */
19327 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19328 ;
19329
19330 i = 0;
19331
19332 /* If there's an odd number of registers to push. Start off by
19333 pushing a single register. This ensures that subsequent strd
19334 operations are dword aligned (assuming that SP was originally
19335 64-bit aligned). */
19336 if ((num_regs & 1) != 0)
19337 {
19338 rtx reg, mem, insn;
19339
19340 reg = gen_rtx_REG (SImode, regno);
19341 if (num_regs == 1)
19342 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19343 stack_pointer_rtx));
19344 else
19345 mem = gen_frame_mem (Pmode,
19346 gen_rtx_PRE_MODIFY
19347 (Pmode, stack_pointer_rtx,
19348 plus_constant (Pmode, stack_pointer_rtx,
19349 -4 * num_regs)));
19350
19351 tmp = gen_rtx_SET (VOIDmode, mem, reg);
19352 RTX_FRAME_RELATED_P (tmp) = 1;
19353 insn = emit_insn (tmp);
19354 RTX_FRAME_RELATED_P (insn) = 1;
19355 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19356 tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19357 reg);
19358 RTX_FRAME_RELATED_P (tmp) = 1;
19359 i++;
19360 regno++;
19361 XVECEXP (dwarf, 0, i) = tmp;
19362 first = false;
19363 }
19364
19365 while (i < num_regs)
19366 if (saved_regs_mask & (1 << regno))
19367 {
19368 rtx reg1, reg2, mem1, mem2;
19369 rtx tmp0, tmp1, tmp2;
19370 int regno2;
19371
19372 /* Find the register to pair with this one. */
19373 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19374 regno2++)
19375 ;
19376
19377 reg1 = gen_rtx_REG (SImode, regno);
19378 reg2 = gen_rtx_REG (SImode, regno2);
19379
19380 if (first)
19381 {
19382 rtx insn;
19383
19384 first = false;
19385 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19386 stack_pointer_rtx,
19387 -4 * num_regs));
19388 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19389 stack_pointer_rtx,
19390 -4 * (num_regs - 1)));
19391 tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19392 plus_constant (Pmode, stack_pointer_rtx,
19393 -4 * (num_regs)));
19394 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19395 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19396 RTX_FRAME_RELATED_P (tmp0) = 1;
19397 RTX_FRAME_RELATED_P (tmp1) = 1;
19398 RTX_FRAME_RELATED_P (tmp2) = 1;
19399 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19400 XVECEXP (par, 0, 0) = tmp0;
19401 XVECEXP (par, 0, 1) = tmp1;
19402 XVECEXP (par, 0, 2) = tmp2;
19403 insn = emit_insn (par);
19404 RTX_FRAME_RELATED_P (insn) = 1;
19405 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19406 }
19407 else
19408 {
19409 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19410 stack_pointer_rtx,
19411 4 * i));
19412 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19413 stack_pointer_rtx,
19414 4 * (i + 1)));
19415 tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19416 tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19417 RTX_FRAME_RELATED_P (tmp1) = 1;
19418 RTX_FRAME_RELATED_P (tmp2) = 1;
19419 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19420 XVECEXP (par, 0, 0) = tmp1;
19421 XVECEXP (par, 0, 1) = tmp2;
19422 emit_insn (par);
19423 }
19424
19425 /* Create unwind information. This is an approximation. */
19426 tmp1 = gen_rtx_SET (VOIDmode,
19427 gen_frame_mem (Pmode,
19428 plus_constant (Pmode,
19429 stack_pointer_rtx,
19430 4 * i)),
19431 reg1);
19432 tmp2 = gen_rtx_SET (VOIDmode,
19433 gen_frame_mem (Pmode,
19434 plus_constant (Pmode,
19435 stack_pointer_rtx,
19436 4 * (i + 1))),
19437 reg2);
19438
19439 RTX_FRAME_RELATED_P (tmp1) = 1;
19440 RTX_FRAME_RELATED_P (tmp2) = 1;
19441 XVECEXP (dwarf, 0, i + 1) = tmp1;
19442 XVECEXP (dwarf, 0, i + 2) = tmp2;
19443 i += 2;
19444 regno = regno2 + 1;
19445 }
19446 else
19447 regno++;
19448
19449 return;
19450 }
19451
19452 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19453 whenever possible, otherwise it emits single-word stores. The first store
19454 also allocates stack space for all saved registers, using writeback with
19455 post-addressing mode. All other stores use offset addressing. If no STRD
19456 can be emitted, this function emits a sequence of single-word stores,
19457 and not an STM as before, because single-word stores provide more freedom
19458 scheduling and can be turned into an STM by peephole optimizations. */
19459 static void
19460 arm_emit_strd_push (unsigned long saved_regs_mask)
19461 {
19462 int num_regs = 0;
19463 int i, j, dwarf_index = 0;
19464 int offset = 0;
19465 rtx dwarf = NULL_RTX;
19466 rtx insn = NULL_RTX;
19467 rtx tmp, mem;
19468
19469 /* TODO: A more efficient code can be emitted by changing the
19470 layout, e.g., first push all pairs that can use STRD to keep the
19471 stack aligned, and then push all other registers. */
19472 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19473 if (saved_regs_mask & (1 << i))
19474 num_regs++;
19475
19476 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19477 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19478 gcc_assert (num_regs > 0);
19479
19480 /* Create sequence for DWARF info. */
19481 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19482
19483 /* For dwarf info, we generate explicit stack update. */
19484 tmp = gen_rtx_SET (VOIDmode,
19485 stack_pointer_rtx,
19486 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19487 RTX_FRAME_RELATED_P (tmp) = 1;
19488 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19489
19490 /* Save registers. */
19491 offset = - 4 * num_regs;
19492 j = 0;
19493 while (j <= LAST_ARM_REGNUM)
19494 if (saved_regs_mask & (1 << j))
19495 {
19496 if ((j % 2 == 0)
19497 && (saved_regs_mask & (1 << (j + 1))))
19498 {
19499 /* Current register and previous register form register pair for
19500 which STRD can be generated. */
19501 if (offset < 0)
19502 {
19503 /* Allocate stack space for all saved registers. */
19504 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19505 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19506 mem = gen_frame_mem (DImode, tmp);
19507 offset = 0;
19508 }
19509 else if (offset > 0)
19510 mem = gen_frame_mem (DImode,
19511 plus_constant (Pmode,
19512 stack_pointer_rtx,
19513 offset));
19514 else
19515 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19516
19517 tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19518 RTX_FRAME_RELATED_P (tmp) = 1;
19519 tmp = emit_insn (tmp);
19520
19521 /* Record the first store insn. */
19522 if (dwarf_index == 1)
19523 insn = tmp;
19524
19525 /* Generate dwarf info. */
19526 mem = gen_frame_mem (SImode,
19527 plus_constant (Pmode,
19528 stack_pointer_rtx,
19529 offset));
19530 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19531 RTX_FRAME_RELATED_P (tmp) = 1;
19532 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19533
19534 mem = gen_frame_mem (SImode,
19535 plus_constant (Pmode,
19536 stack_pointer_rtx,
19537 offset + 4));
19538 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19539 RTX_FRAME_RELATED_P (tmp) = 1;
19540 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19541
19542 offset += 8;
19543 j += 2;
19544 }
19545 else
19546 {
19547 /* Emit a single word store. */
19548 if (offset < 0)
19549 {
19550 /* Allocate stack space for all saved registers. */
19551 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19552 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19553 mem = gen_frame_mem (SImode, tmp);
19554 offset = 0;
19555 }
19556 else if (offset > 0)
19557 mem = gen_frame_mem (SImode,
19558 plus_constant (Pmode,
19559 stack_pointer_rtx,
19560 offset));
19561 else
19562 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19563
19564 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19565 RTX_FRAME_RELATED_P (tmp) = 1;
19566 tmp = emit_insn (tmp);
19567
19568 /* Record the first store insn. */
19569 if (dwarf_index == 1)
19570 insn = tmp;
19571
19572 /* Generate dwarf info. */
19573 mem = gen_frame_mem (SImode,
19574 plus_constant(Pmode,
19575 stack_pointer_rtx,
19576 offset));
19577 tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19578 RTX_FRAME_RELATED_P (tmp) = 1;
19579 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19580
19581 offset += 4;
19582 j += 1;
19583 }
19584 }
19585 else
19586 j++;
19587
19588 /* Attach dwarf info to the first insn we generate. */
19589 gcc_assert (insn != NULL_RTX);
19590 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19591 RTX_FRAME_RELATED_P (insn) = 1;
19592 }
19593
19594 /* Generate and emit an insn that we will recognize as a push_multi.
19595 Unfortunately, since this insn does not reflect very well the actual
19596 semantics of the operation, we need to annotate the insn for the benefit
19597 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19598 MASK for registers that should be annotated for DWARF2 frame unwind
19599 information. */
19600 static rtx
19601 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19602 {
19603 int num_regs = 0;
19604 int num_dwarf_regs = 0;
19605 int i, j;
19606 rtx par;
19607 rtx dwarf;
19608 int dwarf_par_index;
19609 rtx tmp, reg;
19610
19611 /* We don't record the PC in the dwarf frame information. */
19612 dwarf_regs_mask &= ~(1 << PC_REGNUM);
19613
19614 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19615 {
19616 if (mask & (1 << i))
19617 num_regs++;
19618 if (dwarf_regs_mask & (1 << i))
19619 num_dwarf_regs++;
19620 }
19621
19622 gcc_assert (num_regs && num_regs <= 16);
19623 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
19624
19625 /* For the body of the insn we are going to generate an UNSPEC in
19626 parallel with several USEs. This allows the insn to be recognized
19627 by the push_multi pattern in the arm.md file.
19628
19629 The body of the insn looks something like this:
19630
19631 (parallel [
19632 (set (mem:BLK (pre_modify:SI (reg:SI sp)
19633 (const_int:SI <num>)))
19634 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
19635 (use (reg:SI XX))
19636 (use (reg:SI YY))
19637 ...
19638 ])
19639
19640 For the frame note however, we try to be more explicit and actually
19641 show each register being stored into the stack frame, plus a (single)
19642 decrement of the stack pointer. We do it this way in order to be
19643 friendly to the stack unwinding code, which only wants to see a single
19644 stack decrement per instruction. The RTL we generate for the note looks
19645 something like this:
19646
19647 (sequence [
19648 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
19649 (set (mem:SI (reg:SI sp)) (reg:SI r4))
19650 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
19651 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
19652 ...
19653 ])
19654
19655 FIXME:: In an ideal world the PRE_MODIFY would not exist and
19656 instead we'd have a parallel expression detailing all
19657 the stores to the various memory addresses so that debug
19658 information is more up-to-date. Remember however while writing
19659 this to take care of the constraints with the push instruction.
19660
19661 Note also that this has to be taken care of for the VFP registers.
19662
19663 For more see PR43399. */
19664
19665 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
19666 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
19667 dwarf_par_index = 1;
19668
19669 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19670 {
19671 if (mask & (1 << i))
19672 {
19673 reg = gen_rtx_REG (SImode, i);
19674
19675 XVECEXP (par, 0, 0)
19676 = gen_rtx_SET (VOIDmode,
19677 gen_frame_mem
19678 (BLKmode,
19679 gen_rtx_PRE_MODIFY (Pmode,
19680 stack_pointer_rtx,
19681 plus_constant
19682 (Pmode, stack_pointer_rtx,
19683 -4 * num_regs))
19684 ),
19685 gen_rtx_UNSPEC (BLKmode,
19686 gen_rtvec (1, reg),
19687 UNSPEC_PUSH_MULT));
19688
19689 if (dwarf_regs_mask & (1 << i))
19690 {
19691 tmp = gen_rtx_SET (VOIDmode,
19692 gen_frame_mem (SImode, stack_pointer_rtx),
19693 reg);
19694 RTX_FRAME_RELATED_P (tmp) = 1;
19695 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19696 }
19697
19698 break;
19699 }
19700 }
19701
19702 for (j = 1, i++; j < num_regs; i++)
19703 {
19704 if (mask & (1 << i))
19705 {
19706 reg = gen_rtx_REG (SImode, i);
19707
19708 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
19709
19710 if (dwarf_regs_mask & (1 << i))
19711 {
19712 tmp
19713 = gen_rtx_SET (VOIDmode,
19714 gen_frame_mem
19715 (SImode,
19716 plus_constant (Pmode, stack_pointer_rtx,
19717 4 * j)),
19718 reg);
19719 RTX_FRAME_RELATED_P (tmp) = 1;
19720 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
19721 }
19722
19723 j++;
19724 }
19725 }
19726
19727 par = emit_insn (par);
19728
19729 tmp = gen_rtx_SET (VOIDmode,
19730 stack_pointer_rtx,
19731 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19732 RTX_FRAME_RELATED_P (tmp) = 1;
19733 XVECEXP (dwarf, 0, 0) = tmp;
19734
19735 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
19736
19737 return par;
19738 }
19739
19740 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
19741 SIZE is the offset to be adjusted.
19742 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
19743 static void
19744 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
19745 {
19746 rtx dwarf;
19747
19748 RTX_FRAME_RELATED_P (insn) = 1;
19749 dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
19750 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
19751 }
19752
19753 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
19754 SAVED_REGS_MASK shows which registers need to be restored.
19755
19756 Unfortunately, since this insn does not reflect very well the actual
19757 semantics of the operation, we need to annotate the insn for the benefit
19758 of DWARF2 frame unwind information. */
19759 static void
19760 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
19761 {
19762 int num_regs = 0;
19763 int i, j;
19764 rtx par;
19765 rtx dwarf = NULL_RTX;
19766 rtx tmp, reg;
19767 bool return_in_pc;
19768 int offset_adj;
19769 int emit_update;
19770
19771 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
19772 offset_adj = return_in_pc ? 1 : 0;
19773 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19774 if (saved_regs_mask & (1 << i))
19775 num_regs++;
19776
19777 gcc_assert (num_regs && num_regs <= 16);
19778
19779 /* If SP is in reglist, then we don't emit SP update insn. */
19780 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
19781
19782 /* The parallel needs to hold num_regs SETs
19783 and one SET for the stack update. */
19784 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
19785
19786 if (return_in_pc)
19787 {
19788 tmp = ret_rtx;
19789 XVECEXP (par, 0, 0) = tmp;
19790 }
19791
19792 if (emit_update)
19793 {
19794 /* Increment the stack pointer, based on there being
19795 num_regs 4-byte registers to restore. */
19796 tmp = gen_rtx_SET (VOIDmode,
19797 stack_pointer_rtx,
19798 plus_constant (Pmode,
19799 stack_pointer_rtx,
19800 4 * num_regs));
19801 RTX_FRAME_RELATED_P (tmp) = 1;
19802 XVECEXP (par, 0, offset_adj) = tmp;
19803 }
19804
19805 /* Now restore every reg, which may include PC. */
19806 for (j = 0, i = 0; j < num_regs; i++)
19807 if (saved_regs_mask & (1 << i))
19808 {
19809 reg = gen_rtx_REG (SImode, i);
19810 if ((num_regs == 1) && emit_update && !return_in_pc)
19811 {
19812 /* Emit single load with writeback. */
19813 tmp = gen_frame_mem (SImode,
19814 gen_rtx_POST_INC (Pmode,
19815 stack_pointer_rtx));
19816 tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
19817 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19818 return;
19819 }
19820
19821 tmp = gen_rtx_SET (VOIDmode,
19822 reg,
19823 gen_frame_mem
19824 (SImode,
19825 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
19826 RTX_FRAME_RELATED_P (tmp) = 1;
19827 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
19828
19829 /* We need to maintain a sequence for DWARF info too. As dwarf info
19830 should not have PC, skip PC. */
19831 if (i != PC_REGNUM)
19832 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19833
19834 j++;
19835 }
19836
19837 if (return_in_pc)
19838 par = emit_jump_insn (par);
19839 else
19840 par = emit_insn (par);
19841
19842 REG_NOTES (par) = dwarf;
19843 if (!return_in_pc)
19844 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
19845 stack_pointer_rtx, stack_pointer_rtx);
19846 }
19847
19848 /* Generate and emit an insn pattern that we will recognize as a pop_multi
19849 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
19850
19851 Unfortunately, since this insn does not reflect very well the actual
19852 semantics of the operation, we need to annotate the insn for the benefit
19853 of DWARF2 frame unwind information. */
19854 static void
19855 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
19856 {
19857 int i, j;
19858 rtx par;
19859 rtx dwarf = NULL_RTX;
19860 rtx tmp, reg;
19861
19862 gcc_assert (num_regs && num_regs <= 32);
19863
19864 /* Workaround ARM10 VFPr1 bug. */
19865 if (num_regs == 2 && !arm_arch6)
19866 {
19867 if (first_reg == 15)
19868 first_reg--;
19869
19870 num_regs++;
19871 }
19872
19873 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
19874 there could be up to 32 D-registers to restore.
19875 If there are more than 16 D-registers, make two recursive calls,
19876 each of which emits one pop_multi instruction. */
19877 if (num_regs > 16)
19878 {
19879 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
19880 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
19881 return;
19882 }
19883
19884 /* The parallel needs to hold num_regs SETs
19885 and one SET for the stack update. */
19886 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
19887
19888 /* Increment the stack pointer, based on there being
19889 num_regs 8-byte registers to restore. */
19890 tmp = gen_rtx_SET (VOIDmode,
19891 base_reg,
19892 plus_constant (Pmode, base_reg, 8 * num_regs));
19893 RTX_FRAME_RELATED_P (tmp) = 1;
19894 XVECEXP (par, 0, 0) = tmp;
19895
19896 /* Now show every reg that will be restored, using a SET for each. */
19897 for (j = 0, i=first_reg; j < num_regs; i += 2)
19898 {
19899 reg = gen_rtx_REG (DFmode, i);
19900
19901 tmp = gen_rtx_SET (VOIDmode,
19902 reg,
19903 gen_frame_mem
19904 (DFmode,
19905 plus_constant (Pmode, base_reg, 8 * j)));
19906 RTX_FRAME_RELATED_P (tmp) = 1;
19907 XVECEXP (par, 0, j + 1) = tmp;
19908
19909 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19910
19911 j++;
19912 }
19913
19914 par = emit_insn (par);
19915 REG_NOTES (par) = dwarf;
19916
19917 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
19918 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
19919 {
19920 RTX_FRAME_RELATED_P (par) = 1;
19921 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
19922 }
19923 else
19924 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
19925 base_reg, base_reg);
19926 }
19927
19928 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
19929 number of registers are being popped, multiple LDRD patterns are created for
19930 all register pairs. If odd number of registers are popped, last register is
19931 loaded by using LDR pattern. */
19932 static void
19933 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
19934 {
19935 int num_regs = 0;
19936 int i, j;
19937 rtx par = NULL_RTX;
19938 rtx dwarf = NULL_RTX;
19939 rtx tmp, reg, tmp1;
19940 bool return_in_pc;
19941
19942 return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
19943 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19944 if (saved_regs_mask & (1 << i))
19945 num_regs++;
19946
19947 gcc_assert (num_regs && num_regs <= 16);
19948
19949 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
19950 to be popped. So, if num_regs is even, now it will become odd,
19951 and we can generate pop with PC. If num_regs is odd, it will be
19952 even now, and ldr with return can be generated for PC. */
19953 if (return_in_pc)
19954 num_regs--;
19955
19956 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19957
19958 /* Var j iterates over all the registers to gather all the registers in
19959 saved_regs_mask. Var i gives index of saved registers in stack frame.
19960 A PARALLEL RTX of register-pair is created here, so that pattern for
19961 LDRD can be matched. As PC is always last register to be popped, and
19962 we have already decremented num_regs if PC, we don't have to worry
19963 about PC in this loop. */
19964 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
19965 if (saved_regs_mask & (1 << j))
19966 {
19967 /* Create RTX for memory load. */
19968 reg = gen_rtx_REG (SImode, j);
19969 tmp = gen_rtx_SET (SImode,
19970 reg,
19971 gen_frame_mem (SImode,
19972 plus_constant (Pmode,
19973 stack_pointer_rtx, 4 * i)));
19974 RTX_FRAME_RELATED_P (tmp) = 1;
19975
19976 if (i % 2 == 0)
19977 {
19978 /* When saved-register index (i) is even, the RTX to be emitted is
19979 yet to be created. Hence create it first. The LDRD pattern we
19980 are generating is :
19981 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
19982 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
19983 where target registers need not be consecutive. */
19984 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19985 dwarf = NULL_RTX;
19986 }
19987
19988 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
19989 added as 0th element and if i is odd, reg_i is added as 1st element
19990 of LDRD pattern shown above. */
19991 XVECEXP (par, 0, (i % 2)) = tmp;
19992 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
19993
19994 if ((i % 2) == 1)
19995 {
19996 /* When saved-register index (i) is odd, RTXs for both the registers
19997 to be loaded are generated in above given LDRD pattern, and the
19998 pattern can be emitted now. */
19999 par = emit_insn (par);
20000 REG_NOTES (par) = dwarf;
20001 RTX_FRAME_RELATED_P (par) = 1;
20002 }
20003
20004 i++;
20005 }
20006
20007 /* If the number of registers pushed is odd AND return_in_pc is false OR
20008 number of registers are even AND return_in_pc is true, last register is
20009 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20010 then LDR with post increment. */
20011
20012 /* Increment the stack pointer, based on there being
20013 num_regs 4-byte registers to restore. */
20014 tmp = gen_rtx_SET (VOIDmode,
20015 stack_pointer_rtx,
20016 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20017 RTX_FRAME_RELATED_P (tmp) = 1;
20018 tmp = emit_insn (tmp);
20019 if (!return_in_pc)
20020 {
20021 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20022 stack_pointer_rtx, stack_pointer_rtx);
20023 }
20024
20025 dwarf = NULL_RTX;
20026
20027 if (((num_regs % 2) == 1 && !return_in_pc)
20028 || ((num_regs % 2) == 0 && return_in_pc))
20029 {
20030 /* Scan for the single register to be popped. Skip until the saved
20031 register is found. */
20032 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20033
20034 /* Gen LDR with post increment here. */
20035 tmp1 = gen_rtx_MEM (SImode,
20036 gen_rtx_POST_INC (SImode,
20037 stack_pointer_rtx));
20038 set_mem_alias_set (tmp1, get_frame_alias_set ());
20039
20040 reg = gen_rtx_REG (SImode, j);
20041 tmp = gen_rtx_SET (SImode, reg, tmp1);
20042 RTX_FRAME_RELATED_P (tmp) = 1;
20043 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20044
20045 if (return_in_pc)
20046 {
20047 /* If return_in_pc, j must be PC_REGNUM. */
20048 gcc_assert (j == PC_REGNUM);
20049 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20050 XVECEXP (par, 0, 0) = ret_rtx;
20051 XVECEXP (par, 0, 1) = tmp;
20052 par = emit_jump_insn (par);
20053 }
20054 else
20055 {
20056 par = emit_insn (tmp);
20057 REG_NOTES (par) = dwarf;
20058 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20059 stack_pointer_rtx, stack_pointer_rtx);
20060 }
20061
20062 }
20063 else if ((num_regs % 2) == 1 && return_in_pc)
20064 {
20065 /* There are 2 registers to be popped. So, generate the pattern
20066 pop_multiple_with_stack_update_and_return to pop in PC. */
20067 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20068 }
20069
20070 return;
20071 }
20072
20073 /* LDRD in ARM mode needs consecutive registers as operands. This function
20074 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20075 offset addressing and then generates one separate stack udpate. This provides
20076 more scheduling freedom, compared to writeback on every load. However,
20077 if the function returns using load into PC directly
20078 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20079 before the last load. TODO: Add a peephole optimization to recognize
20080 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20081 peephole optimization to merge the load at stack-offset zero
20082 with the stack update instruction using load with writeback
20083 in post-index addressing mode. */
20084 static void
20085 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20086 {
20087 int j = 0;
20088 int offset = 0;
20089 rtx par = NULL_RTX;
20090 rtx dwarf = NULL_RTX;
20091 rtx tmp, mem;
20092
20093 /* Restore saved registers. */
20094 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20095 j = 0;
20096 while (j <= LAST_ARM_REGNUM)
20097 if (saved_regs_mask & (1 << j))
20098 {
20099 if ((j % 2) == 0
20100 && (saved_regs_mask & (1 << (j + 1)))
20101 && (j + 1) != PC_REGNUM)
20102 {
20103 /* Current register and next register form register pair for which
20104 LDRD can be generated. PC is always the last register popped, and
20105 we handle it separately. */
20106 if (offset > 0)
20107 mem = gen_frame_mem (DImode,
20108 plus_constant (Pmode,
20109 stack_pointer_rtx,
20110 offset));
20111 else
20112 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20113
20114 tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20115 tmp = emit_insn (tmp);
20116 RTX_FRAME_RELATED_P (tmp) = 1;
20117
20118 /* Generate dwarf info. */
20119
20120 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20121 gen_rtx_REG (SImode, j),
20122 NULL_RTX);
20123 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20124 gen_rtx_REG (SImode, j + 1),
20125 dwarf);
20126
20127 REG_NOTES (tmp) = dwarf;
20128
20129 offset += 8;
20130 j += 2;
20131 }
20132 else if (j != PC_REGNUM)
20133 {
20134 /* Emit a single word load. */
20135 if (offset > 0)
20136 mem = gen_frame_mem (SImode,
20137 plus_constant (Pmode,
20138 stack_pointer_rtx,
20139 offset));
20140 else
20141 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20142
20143 tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20144 tmp = emit_insn (tmp);
20145 RTX_FRAME_RELATED_P (tmp) = 1;
20146
20147 /* Generate dwarf info. */
20148 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20149 gen_rtx_REG (SImode, j),
20150 NULL_RTX);
20151
20152 offset += 4;
20153 j += 1;
20154 }
20155 else /* j == PC_REGNUM */
20156 j++;
20157 }
20158 else
20159 j++;
20160
20161 /* Update the stack. */
20162 if (offset > 0)
20163 {
20164 tmp = gen_rtx_SET (Pmode,
20165 stack_pointer_rtx,
20166 plus_constant (Pmode,
20167 stack_pointer_rtx,
20168 offset));
20169 tmp = emit_insn (tmp);
20170 arm_add_cfa_adjust_cfa_note (tmp, offset,
20171 stack_pointer_rtx, stack_pointer_rtx);
20172 offset = 0;
20173 }
20174
20175 if (saved_regs_mask & (1 << PC_REGNUM))
20176 {
20177 /* Only PC is to be popped. */
20178 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20179 XVECEXP (par, 0, 0) = ret_rtx;
20180 tmp = gen_rtx_SET (SImode,
20181 gen_rtx_REG (SImode, PC_REGNUM),
20182 gen_frame_mem (SImode,
20183 gen_rtx_POST_INC (SImode,
20184 stack_pointer_rtx)));
20185 RTX_FRAME_RELATED_P (tmp) = 1;
20186 XVECEXP (par, 0, 1) = tmp;
20187 par = emit_jump_insn (par);
20188
20189 /* Generate dwarf info. */
20190 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20191 gen_rtx_REG (SImode, PC_REGNUM),
20192 NULL_RTX);
20193 REG_NOTES (par) = dwarf;
20194 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20195 stack_pointer_rtx, stack_pointer_rtx);
20196 }
20197 }
20198
20199 /* Calculate the size of the return value that is passed in registers. */
20200 static unsigned
20201 arm_size_return_regs (void)
20202 {
20203 enum machine_mode mode;
20204
20205 if (crtl->return_rtx != 0)
20206 mode = GET_MODE (crtl->return_rtx);
20207 else
20208 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20209
20210 return GET_MODE_SIZE (mode);
20211 }
20212
20213 /* Return true if the current function needs to save/restore LR. */
20214 static bool
20215 thumb_force_lr_save (void)
20216 {
20217 return !cfun->machine->lr_save_eliminated
20218 && (!leaf_function_p ()
20219 || thumb_far_jump_used_p ()
20220 || df_regs_ever_live_p (LR_REGNUM));
20221 }
20222
20223 /* We do not know if r3 will be available because
20224 we do have an indirect tailcall happening in this
20225 particular case. */
20226 static bool
20227 is_indirect_tailcall_p (rtx call)
20228 {
20229 rtx pat = PATTERN (call);
20230
20231 /* Indirect tail call. */
20232 pat = XVECEXP (pat, 0, 0);
20233 if (GET_CODE (pat) == SET)
20234 pat = SET_SRC (pat);
20235
20236 pat = XEXP (XEXP (pat, 0), 0);
20237 return REG_P (pat);
20238 }
20239
20240 /* Return true if r3 is used by any of the tail call insns in the
20241 current function. */
20242 static bool
20243 any_sibcall_could_use_r3 (void)
20244 {
20245 edge_iterator ei;
20246 edge e;
20247
20248 if (!crtl->tail_call_emit)
20249 return false;
20250 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20251 if (e->flags & EDGE_SIBCALL)
20252 {
20253 rtx call = BB_END (e->src);
20254 if (!CALL_P (call))
20255 call = prev_nonnote_nondebug_insn (call);
20256 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20257 if (find_regno_fusage (call, USE, 3)
20258 || is_indirect_tailcall_p (call))
20259 return true;
20260 }
20261 return false;
20262 }
20263
20264
20265 /* Compute the distance from register FROM to register TO.
20266 These can be the arg pointer (26), the soft frame pointer (25),
20267 the stack pointer (13) or the hard frame pointer (11).
20268 In thumb mode r7 is used as the soft frame pointer, if needed.
20269 Typical stack layout looks like this:
20270
20271 old stack pointer -> | |
20272 ----
20273 | | \
20274 | | saved arguments for
20275 | | vararg functions
20276 | | /
20277 --
20278 hard FP & arg pointer -> | | \
20279 | | stack
20280 | | frame
20281 | | /
20282 --
20283 | | \
20284 | | call saved
20285 | | registers
20286 soft frame pointer -> | | /
20287 --
20288 | | \
20289 | | local
20290 | | variables
20291 locals base pointer -> | | /
20292 --
20293 | | \
20294 | | outgoing
20295 | | arguments
20296 current stack pointer -> | | /
20297 --
20298
20299 For a given function some or all of these stack components
20300 may not be needed, giving rise to the possibility of
20301 eliminating some of the registers.
20302
20303 The values returned by this function must reflect the behavior
20304 of arm_expand_prologue() and arm_compute_save_reg_mask().
20305
20306 The sign of the number returned reflects the direction of stack
20307 growth, so the values are positive for all eliminations except
20308 from the soft frame pointer to the hard frame pointer.
20309
20310 SFP may point just inside the local variables block to ensure correct
20311 alignment. */
20312
20313
20314 /* Calculate stack offsets. These are used to calculate register elimination
20315 offsets and in prologue/epilogue code. Also calculates which registers
20316 should be saved. */
20317
20318 static arm_stack_offsets *
20319 arm_get_frame_offsets (void)
20320 {
20321 struct arm_stack_offsets *offsets;
20322 unsigned long func_type;
20323 int leaf;
20324 int saved;
20325 int core_saved;
20326 HOST_WIDE_INT frame_size;
20327 int i;
20328
20329 offsets = &cfun->machine->stack_offsets;
20330
20331 /* We need to know if we are a leaf function. Unfortunately, it
20332 is possible to be called after start_sequence has been called,
20333 which causes get_insns to return the insns for the sequence,
20334 not the function, which will cause leaf_function_p to return
20335 the incorrect result.
20336
20337 to know about leaf functions once reload has completed, and the
20338 frame size cannot be changed after that time, so we can safely
20339 use the cached value. */
20340
20341 if (reload_completed)
20342 return offsets;
20343
20344 /* Initially this is the size of the local variables. It will translated
20345 into an offset once we have determined the size of preceding data. */
20346 frame_size = ROUND_UP_WORD (get_frame_size ());
20347
20348 leaf = leaf_function_p ();
20349
20350 /* Space for variadic functions. */
20351 offsets->saved_args = crtl->args.pretend_args_size;
20352
20353 /* In Thumb mode this is incorrect, but never used. */
20354 offsets->frame
20355 = (offsets->saved_args
20356 + arm_compute_static_chain_stack_bytes ()
20357 + (frame_pointer_needed ? 4 : 0));
20358
20359 if (TARGET_32BIT)
20360 {
20361 unsigned int regno;
20362
20363 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20364 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20365 saved = core_saved;
20366
20367 /* We know that SP will be doubleword aligned on entry, and we must
20368 preserve that condition at any subroutine call. We also require the
20369 soft frame pointer to be doubleword aligned. */
20370
20371 if (TARGET_REALLY_IWMMXT)
20372 {
20373 /* Check for the call-saved iWMMXt registers. */
20374 for (regno = FIRST_IWMMXT_REGNUM;
20375 regno <= LAST_IWMMXT_REGNUM;
20376 regno++)
20377 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20378 saved += 8;
20379 }
20380
20381 func_type = arm_current_func_type ();
20382 /* Space for saved VFP registers. */
20383 if (! IS_VOLATILE (func_type)
20384 && TARGET_HARD_FLOAT && TARGET_VFP)
20385 saved += arm_get_vfp_saved_size ();
20386 }
20387 else /* TARGET_THUMB1 */
20388 {
20389 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20390 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20391 saved = core_saved;
20392 if (TARGET_BACKTRACE)
20393 saved += 16;
20394 }
20395
20396 /* Saved registers include the stack frame. */
20397 offsets->saved_regs
20398 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20399 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20400
20401 /* A leaf function does not need any stack alignment if it has nothing
20402 on the stack. */
20403 if (leaf && frame_size == 0
20404 /* However if it calls alloca(), we have a dynamically allocated
20405 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20406 && ! cfun->calls_alloca)
20407 {
20408 offsets->outgoing_args = offsets->soft_frame;
20409 offsets->locals_base = offsets->soft_frame;
20410 return offsets;
20411 }
20412
20413 /* Ensure SFP has the correct alignment. */
20414 if (ARM_DOUBLEWORD_ALIGN
20415 && (offsets->soft_frame & 7))
20416 {
20417 offsets->soft_frame += 4;
20418 /* Try to align stack by pushing an extra reg. Don't bother doing this
20419 when there is a stack frame as the alignment will be rolled into
20420 the normal stack adjustment. */
20421 if (frame_size + crtl->outgoing_args_size == 0)
20422 {
20423 int reg = -1;
20424
20425 /* If it is safe to use r3, then do so. This sometimes
20426 generates better code on Thumb-2 by avoiding the need to
20427 use 32-bit push/pop instructions. */
20428 if (! any_sibcall_could_use_r3 ()
20429 && arm_size_return_regs () <= 12
20430 && (offsets->saved_regs_mask & (1 << 3)) == 0
20431 && (TARGET_THUMB2
20432 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20433 {
20434 reg = 3;
20435 }
20436 else
20437 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20438 {
20439 /* Avoid fixed registers; they may be changed at
20440 arbitrary times so it's unsafe to restore them
20441 during the epilogue. */
20442 if (!fixed_regs[i]
20443 && (offsets->saved_regs_mask & (1 << i)) == 0)
20444 {
20445 reg = i;
20446 break;
20447 }
20448 }
20449
20450 if (reg != -1)
20451 {
20452 offsets->saved_regs += 4;
20453 offsets->saved_regs_mask |= (1 << reg);
20454 }
20455 }
20456 }
20457
20458 offsets->locals_base = offsets->soft_frame + frame_size;
20459 offsets->outgoing_args = (offsets->locals_base
20460 + crtl->outgoing_args_size);
20461
20462 if (ARM_DOUBLEWORD_ALIGN)
20463 {
20464 /* Ensure SP remains doubleword aligned. */
20465 if (offsets->outgoing_args & 7)
20466 offsets->outgoing_args += 4;
20467 gcc_assert (!(offsets->outgoing_args & 7));
20468 }
20469
20470 return offsets;
20471 }
20472
20473
20474 /* Calculate the relative offsets for the different stack pointers. Positive
20475 offsets are in the direction of stack growth. */
20476
20477 HOST_WIDE_INT
20478 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20479 {
20480 arm_stack_offsets *offsets;
20481
20482 offsets = arm_get_frame_offsets ();
20483
20484 /* OK, now we have enough information to compute the distances.
20485 There must be an entry in these switch tables for each pair
20486 of registers in ELIMINABLE_REGS, even if some of the entries
20487 seem to be redundant or useless. */
20488 switch (from)
20489 {
20490 case ARG_POINTER_REGNUM:
20491 switch (to)
20492 {
20493 case THUMB_HARD_FRAME_POINTER_REGNUM:
20494 return 0;
20495
20496 case FRAME_POINTER_REGNUM:
20497 /* This is the reverse of the soft frame pointer
20498 to hard frame pointer elimination below. */
20499 return offsets->soft_frame - offsets->saved_args;
20500
20501 case ARM_HARD_FRAME_POINTER_REGNUM:
20502 /* This is only non-zero in the case where the static chain register
20503 is stored above the frame. */
20504 return offsets->frame - offsets->saved_args - 4;
20505
20506 case STACK_POINTER_REGNUM:
20507 /* If nothing has been pushed on the stack at all
20508 then this will return -4. This *is* correct! */
20509 return offsets->outgoing_args - (offsets->saved_args + 4);
20510
20511 default:
20512 gcc_unreachable ();
20513 }
20514 gcc_unreachable ();
20515
20516 case FRAME_POINTER_REGNUM:
20517 switch (to)
20518 {
20519 case THUMB_HARD_FRAME_POINTER_REGNUM:
20520 return 0;
20521
20522 case ARM_HARD_FRAME_POINTER_REGNUM:
20523 /* The hard frame pointer points to the top entry in the
20524 stack frame. The soft frame pointer to the bottom entry
20525 in the stack frame. If there is no stack frame at all,
20526 then they are identical. */
20527
20528 return offsets->frame - offsets->soft_frame;
20529
20530 case STACK_POINTER_REGNUM:
20531 return offsets->outgoing_args - offsets->soft_frame;
20532
20533 default:
20534 gcc_unreachable ();
20535 }
20536 gcc_unreachable ();
20537
20538 default:
20539 /* You cannot eliminate from the stack pointer.
20540 In theory you could eliminate from the hard frame
20541 pointer to the stack pointer, but this will never
20542 happen, since if a stack frame is not needed the
20543 hard frame pointer will never be used. */
20544 gcc_unreachable ();
20545 }
20546 }
20547
20548 /* Given FROM and TO register numbers, say whether this elimination is
20549 allowed. Frame pointer elimination is automatically handled.
20550
20551 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20552 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20553 pointer, we must eliminate FRAME_POINTER_REGNUM into
20554 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20555 ARG_POINTER_REGNUM. */
20556
20557 bool
20558 arm_can_eliminate (const int from, const int to)
20559 {
20560 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20561 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20562 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20563 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20564 true);
20565 }
20566
20567 /* Emit RTL to save coprocessor registers on function entry. Returns the
20568 number of bytes pushed. */
20569
20570 static int
20571 arm_save_coproc_regs(void)
20572 {
20573 int saved_size = 0;
20574 unsigned reg;
20575 unsigned start_reg;
20576 rtx insn;
20577
20578 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20579 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20580 {
20581 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20582 insn = gen_rtx_MEM (V2SImode, insn);
20583 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20584 RTX_FRAME_RELATED_P (insn) = 1;
20585 saved_size += 8;
20586 }
20587
20588 if (TARGET_HARD_FLOAT && TARGET_VFP)
20589 {
20590 start_reg = FIRST_VFP_REGNUM;
20591
20592 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20593 {
20594 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20595 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20596 {
20597 if (start_reg != reg)
20598 saved_size += vfp_emit_fstmd (start_reg,
20599 (reg - start_reg) / 2);
20600 start_reg = reg + 2;
20601 }
20602 }
20603 if (start_reg != reg)
20604 saved_size += vfp_emit_fstmd (start_reg,
20605 (reg - start_reg) / 2);
20606 }
20607 return saved_size;
20608 }
20609
20610
20611 /* Set the Thumb frame pointer from the stack pointer. */
20612
20613 static void
20614 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20615 {
20616 HOST_WIDE_INT amount;
20617 rtx insn, dwarf;
20618
20619 amount = offsets->outgoing_args - offsets->locals_base;
20620 if (amount < 1024)
20621 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20622 stack_pointer_rtx, GEN_INT (amount)));
20623 else
20624 {
20625 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
20626 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20627 expects the first two operands to be the same. */
20628 if (TARGET_THUMB2)
20629 {
20630 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20631 stack_pointer_rtx,
20632 hard_frame_pointer_rtx));
20633 }
20634 else
20635 {
20636 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20637 hard_frame_pointer_rtx,
20638 stack_pointer_rtx));
20639 }
20640 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
20641 plus_constant (Pmode, stack_pointer_rtx, amount));
20642 RTX_FRAME_RELATED_P (dwarf) = 1;
20643 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20644 }
20645
20646 RTX_FRAME_RELATED_P (insn) = 1;
20647 }
20648
20649 /* Generate the prologue instructions for entry into an ARM or Thumb-2
20650 function. */
20651 void
20652 arm_expand_prologue (void)
20653 {
20654 rtx amount;
20655 rtx insn;
20656 rtx ip_rtx;
20657 unsigned long live_regs_mask;
20658 unsigned long func_type;
20659 int fp_offset = 0;
20660 int saved_pretend_args = 0;
20661 int saved_regs = 0;
20662 unsigned HOST_WIDE_INT args_to_push;
20663 arm_stack_offsets *offsets;
20664
20665 func_type = arm_current_func_type ();
20666
20667 /* Naked functions don't have prologues. */
20668 if (IS_NAKED (func_type))
20669 return;
20670
20671 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
20672 args_to_push = crtl->args.pretend_args_size;
20673
20674 /* Compute which register we will have to save onto the stack. */
20675 offsets = arm_get_frame_offsets ();
20676 live_regs_mask = offsets->saved_regs_mask;
20677
20678 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
20679
20680 if (IS_STACKALIGN (func_type))
20681 {
20682 rtx r0, r1;
20683
20684 /* Handle a word-aligned stack pointer. We generate the following:
20685
20686 mov r0, sp
20687 bic r1, r0, #7
20688 mov sp, r1
20689 <save and restore r0 in normal prologue/epilogue>
20690 mov sp, r0
20691 bx lr
20692
20693 The unwinder doesn't need to know about the stack realignment.
20694 Just tell it we saved SP in r0. */
20695 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
20696
20697 r0 = gen_rtx_REG (SImode, 0);
20698 r1 = gen_rtx_REG (SImode, 1);
20699
20700 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
20701 RTX_FRAME_RELATED_P (insn) = 1;
20702 add_reg_note (insn, REG_CFA_REGISTER, NULL);
20703
20704 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
20705
20706 /* ??? The CFA changes here, which may cause GDB to conclude that it
20707 has entered a different function. That said, the unwind info is
20708 correct, individually, before and after this instruction because
20709 we've described the save of SP, which will override the default
20710 handling of SP as restoring from the CFA. */
20711 emit_insn (gen_movsi (stack_pointer_rtx, r1));
20712 }
20713
20714 /* For APCS frames, if IP register is clobbered
20715 when creating frame, save that register in a special
20716 way. */
20717 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
20718 {
20719 if (IS_INTERRUPT (func_type))
20720 {
20721 /* Interrupt functions must not corrupt any registers.
20722 Creating a frame pointer however, corrupts the IP
20723 register, so we must push it first. */
20724 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
20725
20726 /* Do not set RTX_FRAME_RELATED_P on this insn.
20727 The dwarf stack unwinding code only wants to see one
20728 stack decrement per function, and this is not it. If
20729 this instruction is labeled as being part of the frame
20730 creation sequence then dwarf2out_frame_debug_expr will
20731 die when it encounters the assignment of IP to FP
20732 later on, since the use of SP here establishes SP as
20733 the CFA register and not IP.
20734
20735 Anyway this instruction is not really part of the stack
20736 frame creation although it is part of the prologue. */
20737 }
20738 else if (IS_NESTED (func_type))
20739 {
20740 /* The static chain register is the same as the IP register
20741 used as a scratch register during stack frame creation.
20742 To get around this need to find somewhere to store IP
20743 whilst the frame is being created. We try the following
20744 places in order:
20745
20746 1. The last argument register r3 if it is available.
20747 2. A slot on the stack above the frame if there are no
20748 arguments to push onto the stack.
20749 3. Register r3 again, after pushing the argument registers
20750 onto the stack, if this is a varargs function.
20751 4. The last slot on the stack created for the arguments to
20752 push, if this isn't a varargs function.
20753
20754 Note - we only need to tell the dwarf2 backend about the SP
20755 adjustment in the second variant; the static chain register
20756 doesn't need to be unwound, as it doesn't contain a value
20757 inherited from the caller. */
20758
20759 if (!arm_r3_live_at_start_p ())
20760 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
20761 else if (args_to_push == 0)
20762 {
20763 rtx addr, dwarf;
20764
20765 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
20766 saved_regs += 4;
20767
20768 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20769 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
20770 fp_offset = 4;
20771
20772 /* Just tell the dwarf backend that we adjusted SP. */
20773 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20774 plus_constant (Pmode, stack_pointer_rtx,
20775 -fp_offset));
20776 RTX_FRAME_RELATED_P (insn) = 1;
20777 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20778 }
20779 else
20780 {
20781 /* Store the args on the stack. */
20782 if (cfun->machine->uses_anonymous_args)
20783 {
20784 insn
20785 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
20786 (0xf0 >> (args_to_push / 4)) & 0xf);
20787 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
20788 saved_pretend_args = 1;
20789 }
20790 else
20791 {
20792 rtx addr, dwarf;
20793
20794 if (args_to_push == 4)
20795 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20796 else
20797 addr
20798 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
20799 plus_constant (Pmode,
20800 stack_pointer_rtx,
20801 -args_to_push));
20802
20803 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
20804
20805 /* Just tell the dwarf backend that we adjusted SP. */
20806 dwarf
20807 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20808 plus_constant (Pmode, stack_pointer_rtx,
20809 -args_to_push));
20810 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20811 }
20812
20813 RTX_FRAME_RELATED_P (insn) = 1;
20814 fp_offset = args_to_push;
20815 args_to_push = 0;
20816 }
20817 }
20818
20819 insn = emit_set_insn (ip_rtx,
20820 plus_constant (Pmode, stack_pointer_rtx,
20821 fp_offset));
20822 RTX_FRAME_RELATED_P (insn) = 1;
20823 }
20824
20825 if (args_to_push)
20826 {
20827 /* Push the argument registers, or reserve space for them. */
20828 if (cfun->machine->uses_anonymous_args)
20829 insn = emit_multi_reg_push
20830 ((0xf0 >> (args_to_push / 4)) & 0xf,
20831 (0xf0 >> (args_to_push / 4)) & 0xf);
20832 else
20833 insn = emit_insn
20834 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20835 GEN_INT (- args_to_push)));
20836 RTX_FRAME_RELATED_P (insn) = 1;
20837 }
20838
20839 /* If this is an interrupt service routine, and the link register
20840 is going to be pushed, and we're not generating extra
20841 push of IP (needed when frame is needed and frame layout if apcs),
20842 subtracting four from LR now will mean that the function return
20843 can be done with a single instruction. */
20844 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
20845 && (live_regs_mask & (1 << LR_REGNUM)) != 0
20846 && !(frame_pointer_needed && TARGET_APCS_FRAME)
20847 && TARGET_ARM)
20848 {
20849 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
20850
20851 emit_set_insn (lr, plus_constant (SImode, lr, -4));
20852 }
20853
20854 if (live_regs_mask)
20855 {
20856 unsigned long dwarf_regs_mask = live_regs_mask;
20857
20858 saved_regs += bit_count (live_regs_mask) * 4;
20859 if (optimize_size && !frame_pointer_needed
20860 && saved_regs == offsets->saved_regs - offsets->saved_args)
20861 {
20862 /* If no coprocessor registers are being pushed and we don't have
20863 to worry about a frame pointer then push extra registers to
20864 create the stack frame. This is done is a way that does not
20865 alter the frame layout, so is independent of the epilogue. */
20866 int n;
20867 int frame;
20868 n = 0;
20869 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
20870 n++;
20871 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
20872 if (frame && n * 4 >= frame)
20873 {
20874 n = frame / 4;
20875 live_regs_mask |= (1 << n) - 1;
20876 saved_regs += frame;
20877 }
20878 }
20879
20880 if (TARGET_LDRD
20881 && current_tune->prefer_ldrd_strd
20882 && !optimize_function_for_size_p (cfun))
20883 {
20884 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
20885 if (TARGET_THUMB2)
20886 thumb2_emit_strd_push (live_regs_mask);
20887 else if (TARGET_ARM
20888 && !TARGET_APCS_FRAME
20889 && !IS_INTERRUPT (func_type))
20890 arm_emit_strd_push (live_regs_mask);
20891 else
20892 {
20893 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
20894 RTX_FRAME_RELATED_P (insn) = 1;
20895 }
20896 }
20897 else
20898 {
20899 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
20900 RTX_FRAME_RELATED_P (insn) = 1;
20901 }
20902 }
20903
20904 if (! IS_VOLATILE (func_type))
20905 saved_regs += arm_save_coproc_regs ();
20906
20907 if (frame_pointer_needed && TARGET_ARM)
20908 {
20909 /* Create the new frame pointer. */
20910 if (TARGET_APCS_FRAME)
20911 {
20912 insn = GEN_INT (-(4 + args_to_push + fp_offset));
20913 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
20914 RTX_FRAME_RELATED_P (insn) = 1;
20915
20916 if (IS_NESTED (func_type))
20917 {
20918 /* Recover the static chain register. */
20919 if (!arm_r3_live_at_start_p () || saved_pretend_args)
20920 insn = gen_rtx_REG (SImode, 3);
20921 else
20922 {
20923 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
20924 insn = gen_frame_mem (SImode, insn);
20925 }
20926 emit_set_insn (ip_rtx, insn);
20927 /* Add a USE to stop propagate_one_insn() from barfing. */
20928 emit_insn (gen_force_register_use (ip_rtx));
20929 }
20930 }
20931 else
20932 {
20933 insn = GEN_INT (saved_regs - 4);
20934 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20935 stack_pointer_rtx, insn));
20936 RTX_FRAME_RELATED_P (insn) = 1;
20937 }
20938 }
20939
20940 if (flag_stack_usage_info)
20941 current_function_static_stack_size
20942 = offsets->outgoing_args - offsets->saved_args;
20943
20944 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
20945 {
20946 /* This add can produce multiple insns for a large constant, so we
20947 need to get tricky. */
20948 rtx last = get_last_insn ();
20949
20950 amount = GEN_INT (offsets->saved_args + saved_regs
20951 - offsets->outgoing_args);
20952
20953 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
20954 amount));
20955 do
20956 {
20957 last = last ? NEXT_INSN (last) : get_insns ();
20958 RTX_FRAME_RELATED_P (last) = 1;
20959 }
20960 while (last != insn);
20961
20962 /* If the frame pointer is needed, emit a special barrier that
20963 will prevent the scheduler from moving stores to the frame
20964 before the stack adjustment. */
20965 if (frame_pointer_needed)
20966 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
20967 hard_frame_pointer_rtx));
20968 }
20969
20970
20971 if (frame_pointer_needed && TARGET_THUMB2)
20972 thumb_set_frame_pointer (offsets);
20973
20974 if (flag_pic && arm_pic_register != INVALID_REGNUM)
20975 {
20976 unsigned long mask;
20977
20978 mask = live_regs_mask;
20979 mask &= THUMB2_WORK_REGS;
20980 if (!IS_NESTED (func_type))
20981 mask |= (1 << IP_REGNUM);
20982 arm_load_pic_register (mask);
20983 }
20984
20985 /* If we are profiling, make sure no instructions are scheduled before
20986 the call to mcount. Similarly if the user has requested no
20987 scheduling in the prolog. Similarly if we want non-call exceptions
20988 using the EABI unwinder, to prevent faulting instructions from being
20989 swapped with a stack adjustment. */
20990 if (crtl->profile || !TARGET_SCHED_PROLOG
20991 || (arm_except_unwind_info (&global_options) == UI_TARGET
20992 && cfun->can_throw_non_call_exceptions))
20993 emit_insn (gen_blockage ());
20994
20995 /* If the link register is being kept alive, with the return address in it,
20996 then make sure that it does not get reused by the ce2 pass. */
20997 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
20998 cfun->machine->lr_save_eliminated = 1;
20999 }
21000 \f
21001 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21002 static void
21003 arm_print_condition (FILE *stream)
21004 {
21005 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21006 {
21007 /* Branch conversion is not implemented for Thumb-2. */
21008 if (TARGET_THUMB)
21009 {
21010 output_operand_lossage ("predicated Thumb instruction");
21011 return;
21012 }
21013 if (current_insn_predicate != NULL)
21014 {
21015 output_operand_lossage
21016 ("predicated instruction in conditional sequence");
21017 return;
21018 }
21019
21020 fputs (arm_condition_codes[arm_current_cc], stream);
21021 }
21022 else if (current_insn_predicate)
21023 {
21024 enum arm_cond_code code;
21025
21026 if (TARGET_THUMB1)
21027 {
21028 output_operand_lossage ("predicated Thumb instruction");
21029 return;
21030 }
21031
21032 code = get_arm_condition_code (current_insn_predicate);
21033 fputs (arm_condition_codes[code], stream);
21034 }
21035 }
21036
21037
21038 /* If CODE is 'd', then the X is a condition operand and the instruction
21039 should only be executed if the condition is true.
21040 if CODE is 'D', then the X is a condition operand and the instruction
21041 should only be executed if the condition is false: however, if the mode
21042 of the comparison is CCFPEmode, then always execute the instruction -- we
21043 do this because in these circumstances !GE does not necessarily imply LT;
21044 in these cases the instruction pattern will take care to make sure that
21045 an instruction containing %d will follow, thereby undoing the effects of
21046 doing this instruction unconditionally.
21047 If CODE is 'N' then X is a floating point operand that must be negated
21048 before output.
21049 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21050 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21051 static void
21052 arm_print_operand (FILE *stream, rtx x, int code)
21053 {
21054 switch (code)
21055 {
21056 case '@':
21057 fputs (ASM_COMMENT_START, stream);
21058 return;
21059
21060 case '_':
21061 fputs (user_label_prefix, stream);
21062 return;
21063
21064 case '|':
21065 fputs (REGISTER_PREFIX, stream);
21066 return;
21067
21068 case '?':
21069 arm_print_condition (stream);
21070 return;
21071
21072 case '(':
21073 /* Nothing in unified syntax, otherwise the current condition code. */
21074 if (!TARGET_UNIFIED_ASM)
21075 arm_print_condition (stream);
21076 break;
21077
21078 case ')':
21079 /* The current condition code in unified syntax, otherwise nothing. */
21080 if (TARGET_UNIFIED_ASM)
21081 arm_print_condition (stream);
21082 break;
21083
21084 case '.':
21085 /* The current condition code for a condition code setting instruction.
21086 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21087 if (TARGET_UNIFIED_ASM)
21088 {
21089 fputc('s', stream);
21090 arm_print_condition (stream);
21091 }
21092 else
21093 {
21094 arm_print_condition (stream);
21095 fputc('s', stream);
21096 }
21097 return;
21098
21099 case '!':
21100 /* If the instruction is conditionally executed then print
21101 the current condition code, otherwise print 's'. */
21102 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21103 if (current_insn_predicate)
21104 arm_print_condition (stream);
21105 else
21106 fputc('s', stream);
21107 break;
21108
21109 /* %# is a "break" sequence. It doesn't output anything, but is used to
21110 separate e.g. operand numbers from following text, if that text consists
21111 of further digits which we don't want to be part of the operand
21112 number. */
21113 case '#':
21114 return;
21115
21116 case 'N':
21117 {
21118 REAL_VALUE_TYPE r;
21119 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21120 r = real_value_negate (&r);
21121 fprintf (stream, "%s", fp_const_from_val (&r));
21122 }
21123 return;
21124
21125 /* An integer or symbol address without a preceding # sign. */
21126 case 'c':
21127 switch (GET_CODE (x))
21128 {
21129 case CONST_INT:
21130 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21131 break;
21132
21133 case SYMBOL_REF:
21134 output_addr_const (stream, x);
21135 break;
21136
21137 case CONST:
21138 if (GET_CODE (XEXP (x, 0)) == PLUS
21139 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21140 {
21141 output_addr_const (stream, x);
21142 break;
21143 }
21144 /* Fall through. */
21145
21146 default:
21147 output_operand_lossage ("Unsupported operand for code '%c'", code);
21148 }
21149 return;
21150
21151 /* An integer that we want to print in HEX. */
21152 case 'x':
21153 switch (GET_CODE (x))
21154 {
21155 case CONST_INT:
21156 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21157 break;
21158
21159 default:
21160 output_operand_lossage ("Unsupported operand for code '%c'", code);
21161 }
21162 return;
21163
21164 case 'B':
21165 if (CONST_INT_P (x))
21166 {
21167 HOST_WIDE_INT val;
21168 val = ARM_SIGN_EXTEND (~INTVAL (x));
21169 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21170 }
21171 else
21172 {
21173 putc ('~', stream);
21174 output_addr_const (stream, x);
21175 }
21176 return;
21177
21178 case 'L':
21179 /* The low 16 bits of an immediate constant. */
21180 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21181 return;
21182
21183 case 'i':
21184 fprintf (stream, "%s", arithmetic_instr (x, 1));
21185 return;
21186
21187 case 'I':
21188 fprintf (stream, "%s", arithmetic_instr (x, 0));
21189 return;
21190
21191 case 'S':
21192 {
21193 HOST_WIDE_INT val;
21194 const char *shift;
21195
21196 shift = shift_op (x, &val);
21197
21198 if (shift)
21199 {
21200 fprintf (stream, ", %s ", shift);
21201 if (val == -1)
21202 arm_print_operand (stream, XEXP (x, 1), 0);
21203 else
21204 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21205 }
21206 }
21207 return;
21208
21209 /* An explanation of the 'Q', 'R' and 'H' register operands:
21210
21211 In a pair of registers containing a DI or DF value the 'Q'
21212 operand returns the register number of the register containing
21213 the least significant part of the value. The 'R' operand returns
21214 the register number of the register containing the most
21215 significant part of the value.
21216
21217 The 'H' operand returns the higher of the two register numbers.
21218 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21219 same as the 'Q' operand, since the most significant part of the
21220 value is held in the lower number register. The reverse is true
21221 on systems where WORDS_BIG_ENDIAN is false.
21222
21223 The purpose of these operands is to distinguish between cases
21224 where the endian-ness of the values is important (for example
21225 when they are added together), and cases where the endian-ness
21226 is irrelevant, but the order of register operations is important.
21227 For example when loading a value from memory into a register
21228 pair, the endian-ness does not matter. Provided that the value
21229 from the lower memory address is put into the lower numbered
21230 register, and the value from the higher address is put into the
21231 higher numbered register, the load will work regardless of whether
21232 the value being loaded is big-wordian or little-wordian. The
21233 order of the two register loads can matter however, if the address
21234 of the memory location is actually held in one of the registers
21235 being overwritten by the load.
21236
21237 The 'Q' and 'R' constraints are also available for 64-bit
21238 constants. */
21239 case 'Q':
21240 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21241 {
21242 rtx part = gen_lowpart (SImode, x);
21243 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21244 return;
21245 }
21246
21247 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21248 {
21249 output_operand_lossage ("invalid operand for code '%c'", code);
21250 return;
21251 }
21252
21253 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21254 return;
21255
21256 case 'R':
21257 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21258 {
21259 enum machine_mode mode = GET_MODE (x);
21260 rtx part;
21261
21262 if (mode == VOIDmode)
21263 mode = DImode;
21264 part = gen_highpart_mode (SImode, mode, x);
21265 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21266 return;
21267 }
21268
21269 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21270 {
21271 output_operand_lossage ("invalid operand for code '%c'", code);
21272 return;
21273 }
21274
21275 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21276 return;
21277
21278 case 'H':
21279 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21280 {
21281 output_operand_lossage ("invalid operand for code '%c'", code);
21282 return;
21283 }
21284
21285 asm_fprintf (stream, "%r", REGNO (x) + 1);
21286 return;
21287
21288 case 'J':
21289 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21290 {
21291 output_operand_lossage ("invalid operand for code '%c'", code);
21292 return;
21293 }
21294
21295 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21296 return;
21297
21298 case 'K':
21299 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21300 {
21301 output_operand_lossage ("invalid operand for code '%c'", code);
21302 return;
21303 }
21304
21305 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21306 return;
21307
21308 case 'm':
21309 asm_fprintf (stream, "%r",
21310 REG_P (XEXP (x, 0))
21311 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21312 return;
21313
21314 case 'M':
21315 asm_fprintf (stream, "{%r-%r}",
21316 REGNO (x),
21317 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21318 return;
21319
21320 /* Like 'M', but writing doubleword vector registers, for use by Neon
21321 insns. */
21322 case 'h':
21323 {
21324 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21325 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21326 if (numregs == 1)
21327 asm_fprintf (stream, "{d%d}", regno);
21328 else
21329 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21330 }
21331 return;
21332
21333 case 'd':
21334 /* CONST_TRUE_RTX means always -- that's the default. */
21335 if (x == const_true_rtx)
21336 return;
21337
21338 if (!COMPARISON_P (x))
21339 {
21340 output_operand_lossage ("invalid operand for code '%c'", code);
21341 return;
21342 }
21343
21344 fputs (arm_condition_codes[get_arm_condition_code (x)],
21345 stream);
21346 return;
21347
21348 case 'D':
21349 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21350 want to do that. */
21351 if (x == const_true_rtx)
21352 {
21353 output_operand_lossage ("instruction never executed");
21354 return;
21355 }
21356 if (!COMPARISON_P (x))
21357 {
21358 output_operand_lossage ("invalid operand for code '%c'", code);
21359 return;
21360 }
21361
21362 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21363 (get_arm_condition_code (x))],
21364 stream);
21365 return;
21366
21367 case 's':
21368 case 'V':
21369 case 'W':
21370 case 'X':
21371 case 'Y':
21372 case 'Z':
21373 /* Former Maverick support, removed after GCC-4.7. */
21374 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21375 return;
21376
21377 case 'U':
21378 if (!REG_P (x)
21379 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21380 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21381 /* Bad value for wCG register number. */
21382 {
21383 output_operand_lossage ("invalid operand for code '%c'", code);
21384 return;
21385 }
21386
21387 else
21388 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21389 return;
21390
21391 /* Print an iWMMXt control register name. */
21392 case 'w':
21393 if (!CONST_INT_P (x)
21394 || INTVAL (x) < 0
21395 || INTVAL (x) >= 16)
21396 /* Bad value for wC register number. */
21397 {
21398 output_operand_lossage ("invalid operand for code '%c'", code);
21399 return;
21400 }
21401
21402 else
21403 {
21404 static const char * wc_reg_names [16] =
21405 {
21406 "wCID", "wCon", "wCSSF", "wCASF",
21407 "wC4", "wC5", "wC6", "wC7",
21408 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21409 "wC12", "wC13", "wC14", "wC15"
21410 };
21411
21412 fputs (wc_reg_names [INTVAL (x)], stream);
21413 }
21414 return;
21415
21416 /* Print the high single-precision register of a VFP double-precision
21417 register. */
21418 case 'p':
21419 {
21420 int mode = GET_MODE (x);
21421 int regno;
21422
21423 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21424 {
21425 output_operand_lossage ("invalid operand for code '%c'", code);
21426 return;
21427 }
21428
21429 regno = REGNO (x);
21430 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21431 {
21432 output_operand_lossage ("invalid operand for code '%c'", code);
21433 return;
21434 }
21435
21436 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21437 }
21438 return;
21439
21440 /* Print a VFP/Neon double precision or quad precision register name. */
21441 case 'P':
21442 case 'q':
21443 {
21444 int mode = GET_MODE (x);
21445 int is_quad = (code == 'q');
21446 int regno;
21447
21448 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21449 {
21450 output_operand_lossage ("invalid operand for code '%c'", code);
21451 return;
21452 }
21453
21454 if (!REG_P (x)
21455 || !IS_VFP_REGNUM (REGNO (x)))
21456 {
21457 output_operand_lossage ("invalid operand for code '%c'", code);
21458 return;
21459 }
21460
21461 regno = REGNO (x);
21462 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21463 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21464 {
21465 output_operand_lossage ("invalid operand for code '%c'", code);
21466 return;
21467 }
21468
21469 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21470 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21471 }
21472 return;
21473
21474 /* These two codes print the low/high doubleword register of a Neon quad
21475 register, respectively. For pair-structure types, can also print
21476 low/high quadword registers. */
21477 case 'e':
21478 case 'f':
21479 {
21480 int mode = GET_MODE (x);
21481 int regno;
21482
21483 if ((GET_MODE_SIZE (mode) != 16
21484 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21485 {
21486 output_operand_lossage ("invalid operand for code '%c'", code);
21487 return;
21488 }
21489
21490 regno = REGNO (x);
21491 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21492 {
21493 output_operand_lossage ("invalid operand for code '%c'", code);
21494 return;
21495 }
21496
21497 if (GET_MODE_SIZE (mode) == 16)
21498 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21499 + (code == 'f' ? 1 : 0));
21500 else
21501 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21502 + (code == 'f' ? 1 : 0));
21503 }
21504 return;
21505
21506 /* Print a VFPv3 floating-point constant, represented as an integer
21507 index. */
21508 case 'G':
21509 {
21510 int index = vfp3_const_double_index (x);
21511 gcc_assert (index != -1);
21512 fprintf (stream, "%d", index);
21513 }
21514 return;
21515
21516 /* Print bits representing opcode features for Neon.
21517
21518 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21519 and polynomials as unsigned.
21520
21521 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21522
21523 Bit 2 is 1 for rounding functions, 0 otherwise. */
21524
21525 /* Identify the type as 's', 'u', 'p' or 'f'. */
21526 case 'T':
21527 {
21528 HOST_WIDE_INT bits = INTVAL (x);
21529 fputc ("uspf"[bits & 3], stream);
21530 }
21531 return;
21532
21533 /* Likewise, but signed and unsigned integers are both 'i'. */
21534 case 'F':
21535 {
21536 HOST_WIDE_INT bits = INTVAL (x);
21537 fputc ("iipf"[bits & 3], stream);
21538 }
21539 return;
21540
21541 /* As for 'T', but emit 'u' instead of 'p'. */
21542 case 't':
21543 {
21544 HOST_WIDE_INT bits = INTVAL (x);
21545 fputc ("usuf"[bits & 3], stream);
21546 }
21547 return;
21548
21549 /* Bit 2: rounding (vs none). */
21550 case 'O':
21551 {
21552 HOST_WIDE_INT bits = INTVAL (x);
21553 fputs ((bits & 4) != 0 ? "r" : "", stream);
21554 }
21555 return;
21556
21557 /* Memory operand for vld1/vst1 instruction. */
21558 case 'A':
21559 {
21560 rtx addr;
21561 bool postinc = FALSE;
21562 unsigned align, memsize, align_bits;
21563
21564 gcc_assert (MEM_P (x));
21565 addr = XEXP (x, 0);
21566 if (GET_CODE (addr) == POST_INC)
21567 {
21568 postinc = 1;
21569 addr = XEXP (addr, 0);
21570 }
21571 asm_fprintf (stream, "[%r", REGNO (addr));
21572
21573 /* We know the alignment of this access, so we can emit a hint in the
21574 instruction (for some alignments) as an aid to the memory subsystem
21575 of the target. */
21576 align = MEM_ALIGN (x) >> 3;
21577 memsize = MEM_SIZE (x);
21578
21579 /* Only certain alignment specifiers are supported by the hardware. */
21580 if (memsize == 32 && (align % 32) == 0)
21581 align_bits = 256;
21582 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
21583 align_bits = 128;
21584 else if (memsize >= 8 && (align % 8) == 0)
21585 align_bits = 64;
21586 else
21587 align_bits = 0;
21588
21589 if (align_bits != 0)
21590 asm_fprintf (stream, ":%d", align_bits);
21591
21592 asm_fprintf (stream, "]");
21593
21594 if (postinc)
21595 fputs("!", stream);
21596 }
21597 return;
21598
21599 case 'C':
21600 {
21601 rtx addr;
21602
21603 gcc_assert (MEM_P (x));
21604 addr = XEXP (x, 0);
21605 gcc_assert (REG_P (addr));
21606 asm_fprintf (stream, "[%r]", REGNO (addr));
21607 }
21608 return;
21609
21610 /* Translate an S register number into a D register number and element index. */
21611 case 'y':
21612 {
21613 int mode = GET_MODE (x);
21614 int regno;
21615
21616 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
21617 {
21618 output_operand_lossage ("invalid operand for code '%c'", code);
21619 return;
21620 }
21621
21622 regno = REGNO (x);
21623 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21624 {
21625 output_operand_lossage ("invalid operand for code '%c'", code);
21626 return;
21627 }
21628
21629 regno = regno - FIRST_VFP_REGNUM;
21630 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
21631 }
21632 return;
21633
21634 case 'v':
21635 gcc_assert (CONST_DOUBLE_P (x));
21636 int result;
21637 result = vfp3_const_double_for_fract_bits (x);
21638 if (result == 0)
21639 result = vfp3_const_double_for_bits (x);
21640 fprintf (stream, "#%d", result);
21641 return;
21642
21643 /* Register specifier for vld1.16/vst1.16. Translate the S register
21644 number into a D register number and element index. */
21645 case 'z':
21646 {
21647 int mode = GET_MODE (x);
21648 int regno;
21649
21650 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
21651 {
21652 output_operand_lossage ("invalid operand for code '%c'", code);
21653 return;
21654 }
21655
21656 regno = REGNO (x);
21657 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
21658 {
21659 output_operand_lossage ("invalid operand for code '%c'", code);
21660 return;
21661 }
21662
21663 regno = regno - FIRST_VFP_REGNUM;
21664 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
21665 }
21666 return;
21667
21668 default:
21669 if (x == 0)
21670 {
21671 output_operand_lossage ("missing operand");
21672 return;
21673 }
21674
21675 switch (GET_CODE (x))
21676 {
21677 case REG:
21678 asm_fprintf (stream, "%r", REGNO (x));
21679 break;
21680
21681 case MEM:
21682 output_memory_reference_mode = GET_MODE (x);
21683 output_address (XEXP (x, 0));
21684 break;
21685
21686 case CONST_DOUBLE:
21687 if (TARGET_NEON)
21688 {
21689 char fpstr[20];
21690 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
21691 sizeof (fpstr), 0, 1);
21692 fprintf (stream, "#%s", fpstr);
21693 }
21694 else
21695 fprintf (stream, "#%s", fp_immediate_constant (x));
21696 break;
21697
21698 default:
21699 gcc_assert (GET_CODE (x) != NEG);
21700 fputc ('#', stream);
21701 if (GET_CODE (x) == HIGH)
21702 {
21703 fputs (":lower16:", stream);
21704 x = XEXP (x, 0);
21705 }
21706
21707 output_addr_const (stream, x);
21708 break;
21709 }
21710 }
21711 }
21712 \f
21713 /* Target hook for printing a memory address. */
21714 static void
21715 arm_print_operand_address (FILE *stream, rtx x)
21716 {
21717 if (TARGET_32BIT)
21718 {
21719 int is_minus = GET_CODE (x) == MINUS;
21720
21721 if (REG_P (x))
21722 asm_fprintf (stream, "[%r]", REGNO (x));
21723 else if (GET_CODE (x) == PLUS || is_minus)
21724 {
21725 rtx base = XEXP (x, 0);
21726 rtx index = XEXP (x, 1);
21727 HOST_WIDE_INT offset = 0;
21728 if (!REG_P (base)
21729 || (REG_P (index) && REGNO (index) == SP_REGNUM))
21730 {
21731 /* Ensure that BASE is a register. */
21732 /* (one of them must be). */
21733 /* Also ensure the SP is not used as in index register. */
21734 rtx temp = base;
21735 base = index;
21736 index = temp;
21737 }
21738 switch (GET_CODE (index))
21739 {
21740 case CONST_INT:
21741 offset = INTVAL (index);
21742 if (is_minus)
21743 offset = -offset;
21744 asm_fprintf (stream, "[%r, #%wd]",
21745 REGNO (base), offset);
21746 break;
21747
21748 case REG:
21749 asm_fprintf (stream, "[%r, %s%r]",
21750 REGNO (base), is_minus ? "-" : "",
21751 REGNO (index));
21752 break;
21753
21754 case MULT:
21755 case ASHIFTRT:
21756 case LSHIFTRT:
21757 case ASHIFT:
21758 case ROTATERT:
21759 {
21760 asm_fprintf (stream, "[%r, %s%r",
21761 REGNO (base), is_minus ? "-" : "",
21762 REGNO (XEXP (index, 0)));
21763 arm_print_operand (stream, index, 'S');
21764 fputs ("]", stream);
21765 break;
21766 }
21767
21768 default:
21769 gcc_unreachable ();
21770 }
21771 }
21772 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
21773 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
21774 {
21775 extern enum machine_mode output_memory_reference_mode;
21776
21777 gcc_assert (REG_P (XEXP (x, 0)));
21778
21779 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
21780 asm_fprintf (stream, "[%r, #%s%d]!",
21781 REGNO (XEXP (x, 0)),
21782 GET_CODE (x) == PRE_DEC ? "-" : "",
21783 GET_MODE_SIZE (output_memory_reference_mode));
21784 else
21785 asm_fprintf (stream, "[%r], #%s%d",
21786 REGNO (XEXP (x, 0)),
21787 GET_CODE (x) == POST_DEC ? "-" : "",
21788 GET_MODE_SIZE (output_memory_reference_mode));
21789 }
21790 else if (GET_CODE (x) == PRE_MODIFY)
21791 {
21792 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
21793 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
21794 asm_fprintf (stream, "#%wd]!",
21795 INTVAL (XEXP (XEXP (x, 1), 1)));
21796 else
21797 asm_fprintf (stream, "%r]!",
21798 REGNO (XEXP (XEXP (x, 1), 1)));
21799 }
21800 else if (GET_CODE (x) == POST_MODIFY)
21801 {
21802 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
21803 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
21804 asm_fprintf (stream, "#%wd",
21805 INTVAL (XEXP (XEXP (x, 1), 1)));
21806 else
21807 asm_fprintf (stream, "%r",
21808 REGNO (XEXP (XEXP (x, 1), 1)));
21809 }
21810 else output_addr_const (stream, x);
21811 }
21812 else
21813 {
21814 if (REG_P (x))
21815 asm_fprintf (stream, "[%r]", REGNO (x));
21816 else if (GET_CODE (x) == POST_INC)
21817 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
21818 else if (GET_CODE (x) == PLUS)
21819 {
21820 gcc_assert (REG_P (XEXP (x, 0)));
21821 if (CONST_INT_P (XEXP (x, 1)))
21822 asm_fprintf (stream, "[%r, #%wd]",
21823 REGNO (XEXP (x, 0)),
21824 INTVAL (XEXP (x, 1)));
21825 else
21826 asm_fprintf (stream, "[%r, %r]",
21827 REGNO (XEXP (x, 0)),
21828 REGNO (XEXP (x, 1)));
21829 }
21830 else
21831 output_addr_const (stream, x);
21832 }
21833 }
21834 \f
21835 /* Target hook for indicating whether a punctuation character for
21836 TARGET_PRINT_OPERAND is valid. */
21837 static bool
21838 arm_print_operand_punct_valid_p (unsigned char code)
21839 {
21840 return (code == '@' || code == '|' || code == '.'
21841 || code == '(' || code == ')' || code == '#'
21842 || (TARGET_32BIT && (code == '?'))
21843 || (TARGET_THUMB2 && (code == '!'))
21844 || (TARGET_THUMB && (code == '_')));
21845 }
21846 \f
21847 /* Target hook for assembling integer objects. The ARM version needs to
21848 handle word-sized values specially. */
21849 static bool
21850 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
21851 {
21852 enum machine_mode mode;
21853
21854 if (size == UNITS_PER_WORD && aligned_p)
21855 {
21856 fputs ("\t.word\t", asm_out_file);
21857 output_addr_const (asm_out_file, x);
21858
21859 /* Mark symbols as position independent. We only do this in the
21860 .text segment, not in the .data segment. */
21861 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
21862 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
21863 {
21864 /* See legitimize_pic_address for an explanation of the
21865 TARGET_VXWORKS_RTP check. */
21866 if (!arm_pic_data_is_text_relative
21867 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
21868 fputs ("(GOT)", asm_out_file);
21869 else
21870 fputs ("(GOTOFF)", asm_out_file);
21871 }
21872 fputc ('\n', asm_out_file);
21873 return true;
21874 }
21875
21876 mode = GET_MODE (x);
21877
21878 if (arm_vector_mode_supported_p (mode))
21879 {
21880 int i, units;
21881
21882 gcc_assert (GET_CODE (x) == CONST_VECTOR);
21883
21884 units = CONST_VECTOR_NUNITS (x);
21885 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
21886
21887 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21888 for (i = 0; i < units; i++)
21889 {
21890 rtx elt = CONST_VECTOR_ELT (x, i);
21891 assemble_integer
21892 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
21893 }
21894 else
21895 for (i = 0; i < units; i++)
21896 {
21897 rtx elt = CONST_VECTOR_ELT (x, i);
21898 REAL_VALUE_TYPE rval;
21899
21900 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
21901
21902 assemble_real
21903 (rval, GET_MODE_INNER (mode),
21904 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
21905 }
21906
21907 return true;
21908 }
21909
21910 return default_assemble_integer (x, size, aligned_p);
21911 }
21912
21913 static void
21914 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
21915 {
21916 section *s;
21917
21918 if (!TARGET_AAPCS_BASED)
21919 {
21920 (is_ctor ?
21921 default_named_section_asm_out_constructor
21922 : default_named_section_asm_out_destructor) (symbol, priority);
21923 return;
21924 }
21925
21926 /* Put these in the .init_array section, using a special relocation. */
21927 if (priority != DEFAULT_INIT_PRIORITY)
21928 {
21929 char buf[18];
21930 sprintf (buf, "%s.%.5u",
21931 is_ctor ? ".init_array" : ".fini_array",
21932 priority);
21933 s = get_section (buf, SECTION_WRITE, NULL_TREE);
21934 }
21935 else if (is_ctor)
21936 s = ctors_section;
21937 else
21938 s = dtors_section;
21939
21940 switch_to_section (s);
21941 assemble_align (POINTER_SIZE);
21942 fputs ("\t.word\t", asm_out_file);
21943 output_addr_const (asm_out_file, symbol);
21944 fputs ("(target1)\n", asm_out_file);
21945 }
21946
21947 /* Add a function to the list of static constructors. */
21948
21949 static void
21950 arm_elf_asm_constructor (rtx symbol, int priority)
21951 {
21952 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
21953 }
21954
21955 /* Add a function to the list of static destructors. */
21956
21957 static void
21958 arm_elf_asm_destructor (rtx symbol, int priority)
21959 {
21960 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
21961 }
21962 \f
21963 /* A finite state machine takes care of noticing whether or not instructions
21964 can be conditionally executed, and thus decrease execution time and code
21965 size by deleting branch instructions. The fsm is controlled by
21966 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
21967
21968 /* The state of the fsm controlling condition codes are:
21969 0: normal, do nothing special
21970 1: make ASM_OUTPUT_OPCODE not output this instruction
21971 2: make ASM_OUTPUT_OPCODE not output this instruction
21972 3: make instructions conditional
21973 4: make instructions conditional
21974
21975 State transitions (state->state by whom under condition):
21976 0 -> 1 final_prescan_insn if the `target' is a label
21977 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
21978 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
21979 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
21980 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
21981 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
21982 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
21983 (the target insn is arm_target_insn).
21984
21985 If the jump clobbers the conditions then we use states 2 and 4.
21986
21987 A similar thing can be done with conditional return insns.
21988
21989 XXX In case the `target' is an unconditional branch, this conditionalising
21990 of the instructions always reduces code size, but not always execution
21991 time. But then, I want to reduce the code size to somewhere near what
21992 /bin/cc produces. */
21993
21994 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
21995 instructions. When a COND_EXEC instruction is seen the subsequent
21996 instructions are scanned so that multiple conditional instructions can be
21997 combined into a single IT block. arm_condexec_count and arm_condexec_mask
21998 specify the length and true/false mask for the IT block. These will be
21999 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22000
22001 /* Returns the index of the ARM condition code string in
22002 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22003 COMPARISON should be an rtx like `(eq (...) (...))'. */
22004
22005 enum arm_cond_code
22006 maybe_get_arm_condition_code (rtx comparison)
22007 {
22008 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
22009 enum arm_cond_code code;
22010 enum rtx_code comp_code = GET_CODE (comparison);
22011
22012 if (GET_MODE_CLASS (mode) != MODE_CC)
22013 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22014 XEXP (comparison, 1));
22015
22016 switch (mode)
22017 {
22018 case CC_DNEmode: code = ARM_NE; goto dominance;
22019 case CC_DEQmode: code = ARM_EQ; goto dominance;
22020 case CC_DGEmode: code = ARM_GE; goto dominance;
22021 case CC_DGTmode: code = ARM_GT; goto dominance;
22022 case CC_DLEmode: code = ARM_LE; goto dominance;
22023 case CC_DLTmode: code = ARM_LT; goto dominance;
22024 case CC_DGEUmode: code = ARM_CS; goto dominance;
22025 case CC_DGTUmode: code = ARM_HI; goto dominance;
22026 case CC_DLEUmode: code = ARM_LS; goto dominance;
22027 case CC_DLTUmode: code = ARM_CC;
22028
22029 dominance:
22030 if (comp_code == EQ)
22031 return ARM_INVERSE_CONDITION_CODE (code);
22032 if (comp_code == NE)
22033 return code;
22034 return ARM_NV;
22035
22036 case CC_NOOVmode:
22037 switch (comp_code)
22038 {
22039 case NE: return ARM_NE;
22040 case EQ: return ARM_EQ;
22041 case GE: return ARM_PL;
22042 case LT: return ARM_MI;
22043 default: return ARM_NV;
22044 }
22045
22046 case CC_Zmode:
22047 switch (comp_code)
22048 {
22049 case NE: return ARM_NE;
22050 case EQ: return ARM_EQ;
22051 default: return ARM_NV;
22052 }
22053
22054 case CC_Nmode:
22055 switch (comp_code)
22056 {
22057 case NE: return ARM_MI;
22058 case EQ: return ARM_PL;
22059 default: return ARM_NV;
22060 }
22061
22062 case CCFPEmode:
22063 case CCFPmode:
22064 /* We can handle all cases except UNEQ and LTGT. */
22065 switch (comp_code)
22066 {
22067 case GE: return ARM_GE;
22068 case GT: return ARM_GT;
22069 case LE: return ARM_LS;
22070 case LT: return ARM_MI;
22071 case NE: return ARM_NE;
22072 case EQ: return ARM_EQ;
22073 case ORDERED: return ARM_VC;
22074 case UNORDERED: return ARM_VS;
22075 case UNLT: return ARM_LT;
22076 case UNLE: return ARM_LE;
22077 case UNGT: return ARM_HI;
22078 case UNGE: return ARM_PL;
22079 /* UNEQ and LTGT do not have a representation. */
22080 case UNEQ: /* Fall through. */
22081 case LTGT: /* Fall through. */
22082 default: return ARM_NV;
22083 }
22084
22085 case CC_SWPmode:
22086 switch (comp_code)
22087 {
22088 case NE: return ARM_NE;
22089 case EQ: return ARM_EQ;
22090 case GE: return ARM_LE;
22091 case GT: return ARM_LT;
22092 case LE: return ARM_GE;
22093 case LT: return ARM_GT;
22094 case GEU: return ARM_LS;
22095 case GTU: return ARM_CC;
22096 case LEU: return ARM_CS;
22097 case LTU: return ARM_HI;
22098 default: return ARM_NV;
22099 }
22100
22101 case CC_Cmode:
22102 switch (comp_code)
22103 {
22104 case LTU: return ARM_CS;
22105 case GEU: return ARM_CC;
22106 default: return ARM_NV;
22107 }
22108
22109 case CC_CZmode:
22110 switch (comp_code)
22111 {
22112 case NE: return ARM_NE;
22113 case EQ: return ARM_EQ;
22114 case GEU: return ARM_CS;
22115 case GTU: return ARM_HI;
22116 case LEU: return ARM_LS;
22117 case LTU: return ARM_CC;
22118 default: return ARM_NV;
22119 }
22120
22121 case CC_NCVmode:
22122 switch (comp_code)
22123 {
22124 case GE: return ARM_GE;
22125 case LT: return ARM_LT;
22126 case GEU: return ARM_CS;
22127 case LTU: return ARM_CC;
22128 default: return ARM_NV;
22129 }
22130
22131 case CCmode:
22132 switch (comp_code)
22133 {
22134 case NE: return ARM_NE;
22135 case EQ: return ARM_EQ;
22136 case GE: return ARM_GE;
22137 case GT: return ARM_GT;
22138 case LE: return ARM_LE;
22139 case LT: return ARM_LT;
22140 case GEU: return ARM_CS;
22141 case GTU: return ARM_HI;
22142 case LEU: return ARM_LS;
22143 case LTU: return ARM_CC;
22144 default: return ARM_NV;
22145 }
22146
22147 default: gcc_unreachable ();
22148 }
22149 }
22150
22151 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22152 static enum arm_cond_code
22153 get_arm_condition_code (rtx comparison)
22154 {
22155 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22156 gcc_assert (code != ARM_NV);
22157 return code;
22158 }
22159
22160 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22161 instructions. */
22162 void
22163 thumb2_final_prescan_insn (rtx insn)
22164 {
22165 rtx first_insn = insn;
22166 rtx body = PATTERN (insn);
22167 rtx predicate;
22168 enum arm_cond_code code;
22169 int n;
22170 int mask;
22171 int max;
22172
22173 /* max_insns_skipped in the tune was already taken into account in the
22174 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22175 just emit the IT blocks as we can. It does not make sense to split
22176 the IT blocks. */
22177 max = MAX_INSN_PER_IT_BLOCK;
22178
22179 /* Remove the previous insn from the count of insns to be output. */
22180 if (arm_condexec_count)
22181 arm_condexec_count--;
22182
22183 /* Nothing to do if we are already inside a conditional block. */
22184 if (arm_condexec_count)
22185 return;
22186
22187 if (GET_CODE (body) != COND_EXEC)
22188 return;
22189
22190 /* Conditional jumps are implemented directly. */
22191 if (JUMP_P (insn))
22192 return;
22193
22194 predicate = COND_EXEC_TEST (body);
22195 arm_current_cc = get_arm_condition_code (predicate);
22196
22197 n = get_attr_ce_count (insn);
22198 arm_condexec_count = 1;
22199 arm_condexec_mask = (1 << n) - 1;
22200 arm_condexec_masklen = n;
22201 /* See if subsequent instructions can be combined into the same block. */
22202 for (;;)
22203 {
22204 insn = next_nonnote_insn (insn);
22205
22206 /* Jumping into the middle of an IT block is illegal, so a label or
22207 barrier terminates the block. */
22208 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22209 break;
22210
22211 body = PATTERN (insn);
22212 /* USE and CLOBBER aren't really insns, so just skip them. */
22213 if (GET_CODE (body) == USE
22214 || GET_CODE (body) == CLOBBER)
22215 continue;
22216
22217 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22218 if (GET_CODE (body) != COND_EXEC)
22219 break;
22220 /* Maximum number of conditionally executed instructions in a block. */
22221 n = get_attr_ce_count (insn);
22222 if (arm_condexec_masklen + n > max)
22223 break;
22224
22225 predicate = COND_EXEC_TEST (body);
22226 code = get_arm_condition_code (predicate);
22227 mask = (1 << n) - 1;
22228 if (arm_current_cc == code)
22229 arm_condexec_mask |= (mask << arm_condexec_masklen);
22230 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22231 break;
22232
22233 arm_condexec_count++;
22234 arm_condexec_masklen += n;
22235
22236 /* A jump must be the last instruction in a conditional block. */
22237 if (JUMP_P (insn))
22238 break;
22239 }
22240 /* Restore recog_data (getting the attributes of other insns can
22241 destroy this array, but final.c assumes that it remains intact
22242 across this call). */
22243 extract_constrain_insn_cached (first_insn);
22244 }
22245
22246 void
22247 arm_final_prescan_insn (rtx insn)
22248 {
22249 /* BODY will hold the body of INSN. */
22250 rtx body = PATTERN (insn);
22251
22252 /* This will be 1 if trying to repeat the trick, and things need to be
22253 reversed if it appears to fail. */
22254 int reverse = 0;
22255
22256 /* If we start with a return insn, we only succeed if we find another one. */
22257 int seeking_return = 0;
22258 enum rtx_code return_code = UNKNOWN;
22259
22260 /* START_INSN will hold the insn from where we start looking. This is the
22261 first insn after the following code_label if REVERSE is true. */
22262 rtx start_insn = insn;
22263
22264 /* If in state 4, check if the target branch is reached, in order to
22265 change back to state 0. */
22266 if (arm_ccfsm_state == 4)
22267 {
22268 if (insn == arm_target_insn)
22269 {
22270 arm_target_insn = NULL;
22271 arm_ccfsm_state = 0;
22272 }
22273 return;
22274 }
22275
22276 /* If in state 3, it is possible to repeat the trick, if this insn is an
22277 unconditional branch to a label, and immediately following this branch
22278 is the previous target label which is only used once, and the label this
22279 branch jumps to is not too far off. */
22280 if (arm_ccfsm_state == 3)
22281 {
22282 if (simplejump_p (insn))
22283 {
22284 start_insn = next_nonnote_insn (start_insn);
22285 if (BARRIER_P (start_insn))
22286 {
22287 /* XXX Isn't this always a barrier? */
22288 start_insn = next_nonnote_insn (start_insn);
22289 }
22290 if (LABEL_P (start_insn)
22291 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22292 && LABEL_NUSES (start_insn) == 1)
22293 reverse = TRUE;
22294 else
22295 return;
22296 }
22297 else if (ANY_RETURN_P (body))
22298 {
22299 start_insn = next_nonnote_insn (start_insn);
22300 if (BARRIER_P (start_insn))
22301 start_insn = next_nonnote_insn (start_insn);
22302 if (LABEL_P (start_insn)
22303 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22304 && LABEL_NUSES (start_insn) == 1)
22305 {
22306 reverse = TRUE;
22307 seeking_return = 1;
22308 return_code = GET_CODE (body);
22309 }
22310 else
22311 return;
22312 }
22313 else
22314 return;
22315 }
22316
22317 gcc_assert (!arm_ccfsm_state || reverse);
22318 if (!JUMP_P (insn))
22319 return;
22320
22321 /* This jump might be paralleled with a clobber of the condition codes
22322 the jump should always come first */
22323 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22324 body = XVECEXP (body, 0, 0);
22325
22326 if (reverse
22327 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22328 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22329 {
22330 int insns_skipped;
22331 int fail = FALSE, succeed = FALSE;
22332 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22333 int then_not_else = TRUE;
22334 rtx this_insn = start_insn, label = 0;
22335
22336 /* Register the insn jumped to. */
22337 if (reverse)
22338 {
22339 if (!seeking_return)
22340 label = XEXP (SET_SRC (body), 0);
22341 }
22342 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22343 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22344 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22345 {
22346 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22347 then_not_else = FALSE;
22348 }
22349 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22350 {
22351 seeking_return = 1;
22352 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22353 }
22354 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22355 {
22356 seeking_return = 1;
22357 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22358 then_not_else = FALSE;
22359 }
22360 else
22361 gcc_unreachable ();
22362
22363 /* See how many insns this branch skips, and what kind of insns. If all
22364 insns are okay, and the label or unconditional branch to the same
22365 label is not too far away, succeed. */
22366 for (insns_skipped = 0;
22367 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22368 {
22369 rtx scanbody;
22370
22371 this_insn = next_nonnote_insn (this_insn);
22372 if (!this_insn)
22373 break;
22374
22375 switch (GET_CODE (this_insn))
22376 {
22377 case CODE_LABEL:
22378 /* Succeed if it is the target label, otherwise fail since
22379 control falls in from somewhere else. */
22380 if (this_insn == label)
22381 {
22382 arm_ccfsm_state = 1;
22383 succeed = TRUE;
22384 }
22385 else
22386 fail = TRUE;
22387 break;
22388
22389 case BARRIER:
22390 /* Succeed if the following insn is the target label.
22391 Otherwise fail.
22392 If return insns are used then the last insn in a function
22393 will be a barrier. */
22394 this_insn = next_nonnote_insn (this_insn);
22395 if (this_insn && this_insn == label)
22396 {
22397 arm_ccfsm_state = 1;
22398 succeed = TRUE;
22399 }
22400 else
22401 fail = TRUE;
22402 break;
22403
22404 case CALL_INSN:
22405 /* The AAPCS says that conditional calls should not be
22406 used since they make interworking inefficient (the
22407 linker can't transform BL<cond> into BLX). That's
22408 only a problem if the machine has BLX. */
22409 if (arm_arch5)
22410 {
22411 fail = TRUE;
22412 break;
22413 }
22414
22415 /* Succeed if the following insn is the target label, or
22416 if the following two insns are a barrier and the
22417 target label. */
22418 this_insn = next_nonnote_insn (this_insn);
22419 if (this_insn && BARRIER_P (this_insn))
22420 this_insn = next_nonnote_insn (this_insn);
22421
22422 if (this_insn && this_insn == label
22423 && insns_skipped < max_insns_skipped)
22424 {
22425 arm_ccfsm_state = 1;
22426 succeed = TRUE;
22427 }
22428 else
22429 fail = TRUE;
22430 break;
22431
22432 case JUMP_INSN:
22433 /* If this is an unconditional branch to the same label, succeed.
22434 If it is to another label, do nothing. If it is conditional,
22435 fail. */
22436 /* XXX Probably, the tests for SET and the PC are
22437 unnecessary. */
22438
22439 scanbody = PATTERN (this_insn);
22440 if (GET_CODE (scanbody) == SET
22441 && GET_CODE (SET_DEST (scanbody)) == PC)
22442 {
22443 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22444 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22445 {
22446 arm_ccfsm_state = 2;
22447 succeed = TRUE;
22448 }
22449 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22450 fail = TRUE;
22451 }
22452 /* Fail if a conditional return is undesirable (e.g. on a
22453 StrongARM), but still allow this if optimizing for size. */
22454 else if (GET_CODE (scanbody) == return_code
22455 && !use_return_insn (TRUE, NULL)
22456 && !optimize_size)
22457 fail = TRUE;
22458 else if (GET_CODE (scanbody) == return_code)
22459 {
22460 arm_ccfsm_state = 2;
22461 succeed = TRUE;
22462 }
22463 else if (GET_CODE (scanbody) == PARALLEL)
22464 {
22465 switch (get_attr_conds (this_insn))
22466 {
22467 case CONDS_NOCOND:
22468 break;
22469 default:
22470 fail = TRUE;
22471 break;
22472 }
22473 }
22474 else
22475 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22476
22477 break;
22478
22479 case INSN:
22480 /* Instructions using or affecting the condition codes make it
22481 fail. */
22482 scanbody = PATTERN (this_insn);
22483 if (!(GET_CODE (scanbody) == SET
22484 || GET_CODE (scanbody) == PARALLEL)
22485 || get_attr_conds (this_insn) != CONDS_NOCOND)
22486 fail = TRUE;
22487 break;
22488
22489 default:
22490 break;
22491 }
22492 }
22493 if (succeed)
22494 {
22495 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22496 arm_target_label = CODE_LABEL_NUMBER (label);
22497 else
22498 {
22499 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22500
22501 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22502 {
22503 this_insn = next_nonnote_insn (this_insn);
22504 gcc_assert (!this_insn
22505 || (!BARRIER_P (this_insn)
22506 && !LABEL_P (this_insn)));
22507 }
22508 if (!this_insn)
22509 {
22510 /* Oh, dear! we ran off the end.. give up. */
22511 extract_constrain_insn_cached (insn);
22512 arm_ccfsm_state = 0;
22513 arm_target_insn = NULL;
22514 return;
22515 }
22516 arm_target_insn = this_insn;
22517 }
22518
22519 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22520 what it was. */
22521 if (!reverse)
22522 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22523
22524 if (reverse || then_not_else)
22525 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22526 }
22527
22528 /* Restore recog_data (getting the attributes of other insns can
22529 destroy this array, but final.c assumes that it remains intact
22530 across this call. */
22531 extract_constrain_insn_cached (insn);
22532 }
22533 }
22534
22535 /* Output IT instructions. */
22536 void
22537 thumb2_asm_output_opcode (FILE * stream)
22538 {
22539 char buff[5];
22540 int n;
22541
22542 if (arm_condexec_mask)
22543 {
22544 for (n = 0; n < arm_condexec_masklen; n++)
22545 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22546 buff[n] = 0;
22547 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22548 arm_condition_codes[arm_current_cc]);
22549 arm_condexec_mask = 0;
22550 }
22551 }
22552
22553 /* Returns true if REGNO is a valid register
22554 for holding a quantity of type MODE. */
22555 int
22556 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
22557 {
22558 if (GET_MODE_CLASS (mode) == MODE_CC)
22559 return (regno == CC_REGNUM
22560 || (TARGET_HARD_FLOAT && TARGET_VFP
22561 && regno == VFPCC_REGNUM));
22562
22563 if (TARGET_THUMB1)
22564 /* For the Thumb we only allow values bigger than SImode in
22565 registers 0 - 6, so that there is always a second low
22566 register available to hold the upper part of the value.
22567 We probably we ought to ensure that the register is the
22568 start of an even numbered register pair. */
22569 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
22570
22571 if (TARGET_HARD_FLOAT && TARGET_VFP
22572 && IS_VFP_REGNUM (regno))
22573 {
22574 if (mode == SFmode || mode == SImode)
22575 return VFP_REGNO_OK_FOR_SINGLE (regno);
22576
22577 if (mode == DFmode)
22578 return VFP_REGNO_OK_FOR_DOUBLE (regno);
22579
22580 /* VFP registers can hold HFmode values, but there is no point in
22581 putting them there unless we have hardware conversion insns. */
22582 if (mode == HFmode)
22583 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
22584
22585 if (TARGET_NEON)
22586 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
22587 || (VALID_NEON_QREG_MODE (mode)
22588 && NEON_REGNO_OK_FOR_QUAD (regno))
22589 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
22590 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
22591 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
22592 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
22593 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
22594
22595 return FALSE;
22596 }
22597
22598 if (TARGET_REALLY_IWMMXT)
22599 {
22600 if (IS_IWMMXT_GR_REGNUM (regno))
22601 return mode == SImode;
22602
22603 if (IS_IWMMXT_REGNUM (regno))
22604 return VALID_IWMMXT_REG_MODE (mode);
22605 }
22606
22607 /* We allow almost any value to be stored in the general registers.
22608 Restrict doubleword quantities to even register pairs so that we can
22609 use ldrd. Do not allow very large Neon structure opaque modes in
22610 general registers; they would use too many. */
22611 if (regno <= LAST_ARM_REGNUM)
22612 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
22613 && ARM_NUM_REGS (mode) <= 4;
22614
22615 if (regno == FRAME_POINTER_REGNUM
22616 || regno == ARG_POINTER_REGNUM)
22617 /* We only allow integers in the fake hard registers. */
22618 return GET_MODE_CLASS (mode) == MODE_INT;
22619
22620 return FALSE;
22621 }
22622
22623 /* Implement MODES_TIEABLE_P. */
22624
22625 bool
22626 arm_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22627 {
22628 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
22629 return true;
22630
22631 /* We specifically want to allow elements of "structure" modes to
22632 be tieable to the structure. This more general condition allows
22633 other rarer situations too. */
22634 if (TARGET_NEON
22635 && (VALID_NEON_DREG_MODE (mode1)
22636 || VALID_NEON_QREG_MODE (mode1)
22637 || VALID_NEON_STRUCT_MODE (mode1))
22638 && (VALID_NEON_DREG_MODE (mode2)
22639 || VALID_NEON_QREG_MODE (mode2)
22640 || VALID_NEON_STRUCT_MODE (mode2)))
22641 return true;
22642
22643 return false;
22644 }
22645
22646 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
22647 not used in arm mode. */
22648
22649 enum reg_class
22650 arm_regno_class (int regno)
22651 {
22652 if (TARGET_THUMB1)
22653 {
22654 if (regno == STACK_POINTER_REGNUM)
22655 return STACK_REG;
22656 if (regno == CC_REGNUM)
22657 return CC_REG;
22658 if (regno < 8)
22659 return LO_REGS;
22660 return HI_REGS;
22661 }
22662
22663 if (TARGET_THUMB2 && regno < 8)
22664 return LO_REGS;
22665
22666 if ( regno <= LAST_ARM_REGNUM
22667 || regno == FRAME_POINTER_REGNUM
22668 || regno == ARG_POINTER_REGNUM)
22669 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
22670
22671 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
22672 return TARGET_THUMB2 ? CC_REG : NO_REGS;
22673
22674 if (IS_VFP_REGNUM (regno))
22675 {
22676 if (regno <= D7_VFP_REGNUM)
22677 return VFP_D0_D7_REGS;
22678 else if (regno <= LAST_LO_VFP_REGNUM)
22679 return VFP_LO_REGS;
22680 else
22681 return VFP_HI_REGS;
22682 }
22683
22684 if (IS_IWMMXT_REGNUM (regno))
22685 return IWMMXT_REGS;
22686
22687 if (IS_IWMMXT_GR_REGNUM (regno))
22688 return IWMMXT_GR_REGS;
22689
22690 return NO_REGS;
22691 }
22692
22693 /* Handle a special case when computing the offset
22694 of an argument from the frame pointer. */
22695 int
22696 arm_debugger_arg_offset (int value, rtx addr)
22697 {
22698 rtx insn;
22699
22700 /* We are only interested if dbxout_parms() failed to compute the offset. */
22701 if (value != 0)
22702 return 0;
22703
22704 /* We can only cope with the case where the address is held in a register. */
22705 if (!REG_P (addr))
22706 return 0;
22707
22708 /* If we are using the frame pointer to point at the argument, then
22709 an offset of 0 is correct. */
22710 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
22711 return 0;
22712
22713 /* If we are using the stack pointer to point at the
22714 argument, then an offset of 0 is correct. */
22715 /* ??? Check this is consistent with thumb2 frame layout. */
22716 if ((TARGET_THUMB || !frame_pointer_needed)
22717 && REGNO (addr) == SP_REGNUM)
22718 return 0;
22719
22720 /* Oh dear. The argument is pointed to by a register rather
22721 than being held in a register, or being stored at a known
22722 offset from the frame pointer. Since GDB only understands
22723 those two kinds of argument we must translate the address
22724 held in the register into an offset from the frame pointer.
22725 We do this by searching through the insns for the function
22726 looking to see where this register gets its value. If the
22727 register is initialized from the frame pointer plus an offset
22728 then we are in luck and we can continue, otherwise we give up.
22729
22730 This code is exercised by producing debugging information
22731 for a function with arguments like this:
22732
22733 double func (double a, double b, int c, double d) {return d;}
22734
22735 Without this code the stab for parameter 'd' will be set to
22736 an offset of 0 from the frame pointer, rather than 8. */
22737
22738 /* The if() statement says:
22739
22740 If the insn is a normal instruction
22741 and if the insn is setting the value in a register
22742 and if the register being set is the register holding the address of the argument
22743 and if the address is computing by an addition
22744 that involves adding to a register
22745 which is the frame pointer
22746 a constant integer
22747
22748 then... */
22749
22750 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22751 {
22752 if ( NONJUMP_INSN_P (insn)
22753 && GET_CODE (PATTERN (insn)) == SET
22754 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
22755 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
22756 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
22757 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
22758 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
22759 )
22760 {
22761 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
22762
22763 break;
22764 }
22765 }
22766
22767 if (value == 0)
22768 {
22769 debug_rtx (addr);
22770 warning (0, "unable to compute real location of stacked parameter");
22771 value = 8; /* XXX magic hack */
22772 }
22773
22774 return value;
22775 }
22776 \f
22777 typedef enum {
22778 T_V8QI,
22779 T_V4HI,
22780 T_V4HF,
22781 T_V2SI,
22782 T_V2SF,
22783 T_DI,
22784 T_V16QI,
22785 T_V8HI,
22786 T_V4SI,
22787 T_V4SF,
22788 T_V2DI,
22789 T_TI,
22790 T_EI,
22791 T_OI,
22792 T_MAX /* Size of enum. Keep last. */
22793 } neon_builtin_type_mode;
22794
22795 #define TYPE_MODE_BIT(X) (1 << (X))
22796
22797 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
22798 | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
22799 | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
22800 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
22801 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
22802 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
22803
22804 #define v8qi_UP T_V8QI
22805 #define v4hi_UP T_V4HI
22806 #define v4hf_UP T_V4HF
22807 #define v2si_UP T_V2SI
22808 #define v2sf_UP T_V2SF
22809 #define di_UP T_DI
22810 #define v16qi_UP T_V16QI
22811 #define v8hi_UP T_V8HI
22812 #define v4si_UP T_V4SI
22813 #define v4sf_UP T_V4SF
22814 #define v2di_UP T_V2DI
22815 #define ti_UP T_TI
22816 #define ei_UP T_EI
22817 #define oi_UP T_OI
22818
22819 #define UP(X) X##_UP
22820
22821 typedef enum {
22822 NEON_BINOP,
22823 NEON_TERNOP,
22824 NEON_UNOP,
22825 NEON_GETLANE,
22826 NEON_SETLANE,
22827 NEON_CREATE,
22828 NEON_RINT,
22829 NEON_DUP,
22830 NEON_DUPLANE,
22831 NEON_COMBINE,
22832 NEON_SPLIT,
22833 NEON_LANEMUL,
22834 NEON_LANEMULL,
22835 NEON_LANEMULH,
22836 NEON_LANEMAC,
22837 NEON_SCALARMUL,
22838 NEON_SCALARMULL,
22839 NEON_SCALARMULH,
22840 NEON_SCALARMAC,
22841 NEON_CONVERT,
22842 NEON_FLOAT_WIDEN,
22843 NEON_FLOAT_NARROW,
22844 NEON_FIXCONV,
22845 NEON_SELECT,
22846 NEON_RESULTPAIR,
22847 NEON_REINTERP,
22848 NEON_VTBL,
22849 NEON_VTBX,
22850 NEON_LOAD1,
22851 NEON_LOAD1LANE,
22852 NEON_STORE1,
22853 NEON_STORE1LANE,
22854 NEON_LOADSTRUCT,
22855 NEON_LOADSTRUCTLANE,
22856 NEON_STORESTRUCT,
22857 NEON_STORESTRUCTLANE,
22858 NEON_LOGICBINOP,
22859 NEON_SHIFTINSERT,
22860 NEON_SHIFTIMM,
22861 NEON_SHIFTACC
22862 } neon_itype;
22863
22864 typedef struct {
22865 const char *name;
22866 const neon_itype itype;
22867 const neon_builtin_type_mode mode;
22868 const enum insn_code code;
22869 unsigned int fcode;
22870 } neon_builtin_datum;
22871
22872 #define CF(N,X) CODE_FOR_neon_##N##X
22873
22874 #define VAR1(T, N, A) \
22875 {#N, NEON_##T, UP (A), CF (N, A), 0}
22876 #define VAR2(T, N, A, B) \
22877 VAR1 (T, N, A), \
22878 {#N, NEON_##T, UP (B), CF (N, B), 0}
22879 #define VAR3(T, N, A, B, C) \
22880 VAR2 (T, N, A, B), \
22881 {#N, NEON_##T, UP (C), CF (N, C), 0}
22882 #define VAR4(T, N, A, B, C, D) \
22883 VAR3 (T, N, A, B, C), \
22884 {#N, NEON_##T, UP (D), CF (N, D), 0}
22885 #define VAR5(T, N, A, B, C, D, E) \
22886 VAR4 (T, N, A, B, C, D), \
22887 {#N, NEON_##T, UP (E), CF (N, E), 0}
22888 #define VAR6(T, N, A, B, C, D, E, F) \
22889 VAR5 (T, N, A, B, C, D, E), \
22890 {#N, NEON_##T, UP (F), CF (N, F), 0}
22891 #define VAR7(T, N, A, B, C, D, E, F, G) \
22892 VAR6 (T, N, A, B, C, D, E, F), \
22893 {#N, NEON_##T, UP (G), CF (N, G), 0}
22894 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22895 VAR7 (T, N, A, B, C, D, E, F, G), \
22896 {#N, NEON_##T, UP (H), CF (N, H), 0}
22897 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22898 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22899 {#N, NEON_##T, UP (I), CF (N, I), 0}
22900 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22901 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22902 {#N, NEON_##T, UP (J), CF (N, J), 0}
22903
22904 /* The NEON builtin data can be found in arm_neon_builtins.def.
22905 The mode entries in the following table correspond to the "key" type of the
22906 instruction variant, i.e. equivalent to that which would be specified after
22907 the assembler mnemonic, which usually refers to the last vector operand.
22908 (Signed/unsigned/polynomial types are not differentiated between though, and
22909 are all mapped onto the same mode for a given element size.) The modes
22910 listed per instruction should be the same as those defined for that
22911 instruction's pattern in neon.md. */
22912
22913 static neon_builtin_datum neon_builtin_data[] =
22914 {
22915 #include "arm_neon_builtins.def"
22916 };
22917
22918 #undef CF
22919 #undef VAR1
22920 #undef VAR2
22921 #undef VAR3
22922 #undef VAR4
22923 #undef VAR5
22924 #undef VAR6
22925 #undef VAR7
22926 #undef VAR8
22927 #undef VAR9
22928 #undef VAR10
22929
22930 #define CF(N,X) ARM_BUILTIN_NEON_##N##X
22931 #define VAR1(T, N, A) \
22932 CF (N, A)
22933 #define VAR2(T, N, A, B) \
22934 VAR1 (T, N, A), \
22935 CF (N, B)
22936 #define VAR3(T, N, A, B, C) \
22937 VAR2 (T, N, A, B), \
22938 CF (N, C)
22939 #define VAR4(T, N, A, B, C, D) \
22940 VAR3 (T, N, A, B, C), \
22941 CF (N, D)
22942 #define VAR5(T, N, A, B, C, D, E) \
22943 VAR4 (T, N, A, B, C, D), \
22944 CF (N, E)
22945 #define VAR6(T, N, A, B, C, D, E, F) \
22946 VAR5 (T, N, A, B, C, D, E), \
22947 CF (N, F)
22948 #define VAR7(T, N, A, B, C, D, E, F, G) \
22949 VAR6 (T, N, A, B, C, D, E, F), \
22950 CF (N, G)
22951 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
22952 VAR7 (T, N, A, B, C, D, E, F, G), \
22953 CF (N, H)
22954 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
22955 VAR8 (T, N, A, B, C, D, E, F, G, H), \
22956 CF (N, I)
22957 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
22958 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
22959 CF (N, J)
22960 enum arm_builtins
22961 {
22962 ARM_BUILTIN_GETWCGR0,
22963 ARM_BUILTIN_GETWCGR1,
22964 ARM_BUILTIN_GETWCGR2,
22965 ARM_BUILTIN_GETWCGR3,
22966
22967 ARM_BUILTIN_SETWCGR0,
22968 ARM_BUILTIN_SETWCGR1,
22969 ARM_BUILTIN_SETWCGR2,
22970 ARM_BUILTIN_SETWCGR3,
22971
22972 ARM_BUILTIN_WZERO,
22973
22974 ARM_BUILTIN_WAVG2BR,
22975 ARM_BUILTIN_WAVG2HR,
22976 ARM_BUILTIN_WAVG2B,
22977 ARM_BUILTIN_WAVG2H,
22978
22979 ARM_BUILTIN_WACCB,
22980 ARM_BUILTIN_WACCH,
22981 ARM_BUILTIN_WACCW,
22982
22983 ARM_BUILTIN_WMACS,
22984 ARM_BUILTIN_WMACSZ,
22985 ARM_BUILTIN_WMACU,
22986 ARM_BUILTIN_WMACUZ,
22987
22988 ARM_BUILTIN_WSADB,
22989 ARM_BUILTIN_WSADBZ,
22990 ARM_BUILTIN_WSADH,
22991 ARM_BUILTIN_WSADHZ,
22992
22993 ARM_BUILTIN_WALIGNI,
22994 ARM_BUILTIN_WALIGNR0,
22995 ARM_BUILTIN_WALIGNR1,
22996 ARM_BUILTIN_WALIGNR2,
22997 ARM_BUILTIN_WALIGNR3,
22998
22999 ARM_BUILTIN_TMIA,
23000 ARM_BUILTIN_TMIAPH,
23001 ARM_BUILTIN_TMIABB,
23002 ARM_BUILTIN_TMIABT,
23003 ARM_BUILTIN_TMIATB,
23004 ARM_BUILTIN_TMIATT,
23005
23006 ARM_BUILTIN_TMOVMSKB,
23007 ARM_BUILTIN_TMOVMSKH,
23008 ARM_BUILTIN_TMOVMSKW,
23009
23010 ARM_BUILTIN_TBCSTB,
23011 ARM_BUILTIN_TBCSTH,
23012 ARM_BUILTIN_TBCSTW,
23013
23014 ARM_BUILTIN_WMADDS,
23015 ARM_BUILTIN_WMADDU,
23016
23017 ARM_BUILTIN_WPACKHSS,
23018 ARM_BUILTIN_WPACKWSS,
23019 ARM_BUILTIN_WPACKDSS,
23020 ARM_BUILTIN_WPACKHUS,
23021 ARM_BUILTIN_WPACKWUS,
23022 ARM_BUILTIN_WPACKDUS,
23023
23024 ARM_BUILTIN_WADDB,
23025 ARM_BUILTIN_WADDH,
23026 ARM_BUILTIN_WADDW,
23027 ARM_BUILTIN_WADDSSB,
23028 ARM_BUILTIN_WADDSSH,
23029 ARM_BUILTIN_WADDSSW,
23030 ARM_BUILTIN_WADDUSB,
23031 ARM_BUILTIN_WADDUSH,
23032 ARM_BUILTIN_WADDUSW,
23033 ARM_BUILTIN_WSUBB,
23034 ARM_BUILTIN_WSUBH,
23035 ARM_BUILTIN_WSUBW,
23036 ARM_BUILTIN_WSUBSSB,
23037 ARM_BUILTIN_WSUBSSH,
23038 ARM_BUILTIN_WSUBSSW,
23039 ARM_BUILTIN_WSUBUSB,
23040 ARM_BUILTIN_WSUBUSH,
23041 ARM_BUILTIN_WSUBUSW,
23042
23043 ARM_BUILTIN_WAND,
23044 ARM_BUILTIN_WANDN,
23045 ARM_BUILTIN_WOR,
23046 ARM_BUILTIN_WXOR,
23047
23048 ARM_BUILTIN_WCMPEQB,
23049 ARM_BUILTIN_WCMPEQH,
23050 ARM_BUILTIN_WCMPEQW,
23051 ARM_BUILTIN_WCMPGTUB,
23052 ARM_BUILTIN_WCMPGTUH,
23053 ARM_BUILTIN_WCMPGTUW,
23054 ARM_BUILTIN_WCMPGTSB,
23055 ARM_BUILTIN_WCMPGTSH,
23056 ARM_BUILTIN_WCMPGTSW,
23057
23058 ARM_BUILTIN_TEXTRMSB,
23059 ARM_BUILTIN_TEXTRMSH,
23060 ARM_BUILTIN_TEXTRMSW,
23061 ARM_BUILTIN_TEXTRMUB,
23062 ARM_BUILTIN_TEXTRMUH,
23063 ARM_BUILTIN_TEXTRMUW,
23064 ARM_BUILTIN_TINSRB,
23065 ARM_BUILTIN_TINSRH,
23066 ARM_BUILTIN_TINSRW,
23067
23068 ARM_BUILTIN_WMAXSW,
23069 ARM_BUILTIN_WMAXSH,
23070 ARM_BUILTIN_WMAXSB,
23071 ARM_BUILTIN_WMAXUW,
23072 ARM_BUILTIN_WMAXUH,
23073 ARM_BUILTIN_WMAXUB,
23074 ARM_BUILTIN_WMINSW,
23075 ARM_BUILTIN_WMINSH,
23076 ARM_BUILTIN_WMINSB,
23077 ARM_BUILTIN_WMINUW,
23078 ARM_BUILTIN_WMINUH,
23079 ARM_BUILTIN_WMINUB,
23080
23081 ARM_BUILTIN_WMULUM,
23082 ARM_BUILTIN_WMULSM,
23083 ARM_BUILTIN_WMULUL,
23084
23085 ARM_BUILTIN_PSADBH,
23086 ARM_BUILTIN_WSHUFH,
23087
23088 ARM_BUILTIN_WSLLH,
23089 ARM_BUILTIN_WSLLW,
23090 ARM_BUILTIN_WSLLD,
23091 ARM_BUILTIN_WSRAH,
23092 ARM_BUILTIN_WSRAW,
23093 ARM_BUILTIN_WSRAD,
23094 ARM_BUILTIN_WSRLH,
23095 ARM_BUILTIN_WSRLW,
23096 ARM_BUILTIN_WSRLD,
23097 ARM_BUILTIN_WRORH,
23098 ARM_BUILTIN_WRORW,
23099 ARM_BUILTIN_WRORD,
23100 ARM_BUILTIN_WSLLHI,
23101 ARM_BUILTIN_WSLLWI,
23102 ARM_BUILTIN_WSLLDI,
23103 ARM_BUILTIN_WSRAHI,
23104 ARM_BUILTIN_WSRAWI,
23105 ARM_BUILTIN_WSRADI,
23106 ARM_BUILTIN_WSRLHI,
23107 ARM_BUILTIN_WSRLWI,
23108 ARM_BUILTIN_WSRLDI,
23109 ARM_BUILTIN_WRORHI,
23110 ARM_BUILTIN_WRORWI,
23111 ARM_BUILTIN_WRORDI,
23112
23113 ARM_BUILTIN_WUNPCKIHB,
23114 ARM_BUILTIN_WUNPCKIHH,
23115 ARM_BUILTIN_WUNPCKIHW,
23116 ARM_BUILTIN_WUNPCKILB,
23117 ARM_BUILTIN_WUNPCKILH,
23118 ARM_BUILTIN_WUNPCKILW,
23119
23120 ARM_BUILTIN_WUNPCKEHSB,
23121 ARM_BUILTIN_WUNPCKEHSH,
23122 ARM_BUILTIN_WUNPCKEHSW,
23123 ARM_BUILTIN_WUNPCKEHUB,
23124 ARM_BUILTIN_WUNPCKEHUH,
23125 ARM_BUILTIN_WUNPCKEHUW,
23126 ARM_BUILTIN_WUNPCKELSB,
23127 ARM_BUILTIN_WUNPCKELSH,
23128 ARM_BUILTIN_WUNPCKELSW,
23129 ARM_BUILTIN_WUNPCKELUB,
23130 ARM_BUILTIN_WUNPCKELUH,
23131 ARM_BUILTIN_WUNPCKELUW,
23132
23133 ARM_BUILTIN_WABSB,
23134 ARM_BUILTIN_WABSH,
23135 ARM_BUILTIN_WABSW,
23136
23137 ARM_BUILTIN_WADDSUBHX,
23138 ARM_BUILTIN_WSUBADDHX,
23139
23140 ARM_BUILTIN_WABSDIFFB,
23141 ARM_BUILTIN_WABSDIFFH,
23142 ARM_BUILTIN_WABSDIFFW,
23143
23144 ARM_BUILTIN_WADDCH,
23145 ARM_BUILTIN_WADDCW,
23146
23147 ARM_BUILTIN_WAVG4,
23148 ARM_BUILTIN_WAVG4R,
23149
23150 ARM_BUILTIN_WMADDSX,
23151 ARM_BUILTIN_WMADDUX,
23152
23153 ARM_BUILTIN_WMADDSN,
23154 ARM_BUILTIN_WMADDUN,
23155
23156 ARM_BUILTIN_WMULWSM,
23157 ARM_BUILTIN_WMULWUM,
23158
23159 ARM_BUILTIN_WMULWSMR,
23160 ARM_BUILTIN_WMULWUMR,
23161
23162 ARM_BUILTIN_WMULWL,
23163
23164 ARM_BUILTIN_WMULSMR,
23165 ARM_BUILTIN_WMULUMR,
23166
23167 ARM_BUILTIN_WQMULM,
23168 ARM_BUILTIN_WQMULMR,
23169
23170 ARM_BUILTIN_WQMULWM,
23171 ARM_BUILTIN_WQMULWMR,
23172
23173 ARM_BUILTIN_WADDBHUSM,
23174 ARM_BUILTIN_WADDBHUSL,
23175
23176 ARM_BUILTIN_WQMIABB,
23177 ARM_BUILTIN_WQMIABT,
23178 ARM_BUILTIN_WQMIATB,
23179 ARM_BUILTIN_WQMIATT,
23180
23181 ARM_BUILTIN_WQMIABBN,
23182 ARM_BUILTIN_WQMIABTN,
23183 ARM_BUILTIN_WQMIATBN,
23184 ARM_BUILTIN_WQMIATTN,
23185
23186 ARM_BUILTIN_WMIABB,
23187 ARM_BUILTIN_WMIABT,
23188 ARM_BUILTIN_WMIATB,
23189 ARM_BUILTIN_WMIATT,
23190
23191 ARM_BUILTIN_WMIABBN,
23192 ARM_BUILTIN_WMIABTN,
23193 ARM_BUILTIN_WMIATBN,
23194 ARM_BUILTIN_WMIATTN,
23195
23196 ARM_BUILTIN_WMIAWBB,
23197 ARM_BUILTIN_WMIAWBT,
23198 ARM_BUILTIN_WMIAWTB,
23199 ARM_BUILTIN_WMIAWTT,
23200
23201 ARM_BUILTIN_WMIAWBBN,
23202 ARM_BUILTIN_WMIAWBTN,
23203 ARM_BUILTIN_WMIAWTBN,
23204 ARM_BUILTIN_WMIAWTTN,
23205
23206 ARM_BUILTIN_WMERGE,
23207
23208 ARM_BUILTIN_CRC32B,
23209 ARM_BUILTIN_CRC32H,
23210 ARM_BUILTIN_CRC32W,
23211 ARM_BUILTIN_CRC32CB,
23212 ARM_BUILTIN_CRC32CH,
23213 ARM_BUILTIN_CRC32CW,
23214
23215 #undef CRYPTO1
23216 #undef CRYPTO2
23217 #undef CRYPTO3
23218
23219 #define CRYPTO1(L, U, M1, M2) \
23220 ARM_BUILTIN_CRYPTO_##U,
23221 #define CRYPTO2(L, U, M1, M2, M3) \
23222 ARM_BUILTIN_CRYPTO_##U,
23223 #define CRYPTO3(L, U, M1, M2, M3, M4) \
23224 ARM_BUILTIN_CRYPTO_##U,
23225
23226 #include "crypto.def"
23227
23228 #undef CRYPTO1
23229 #undef CRYPTO2
23230 #undef CRYPTO3
23231
23232 #include "arm_neon_builtins.def"
23233
23234 ,ARM_BUILTIN_MAX
23235 };
23236
23237 #define ARM_BUILTIN_NEON_BASE (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data))
23238
23239 #undef CF
23240 #undef VAR1
23241 #undef VAR2
23242 #undef VAR3
23243 #undef VAR4
23244 #undef VAR5
23245 #undef VAR6
23246 #undef VAR7
23247 #undef VAR8
23248 #undef VAR9
23249 #undef VAR10
23250
23251 static GTY(()) tree arm_builtin_decls[ARM_BUILTIN_MAX];
23252
23253 #define NUM_DREG_TYPES 5
23254 #define NUM_QREG_TYPES 6
23255
23256 static void
23257 arm_init_neon_builtins (void)
23258 {
23259 unsigned int i, fcode;
23260 tree decl;
23261
23262 tree neon_intQI_type_node;
23263 tree neon_intHI_type_node;
23264 tree neon_floatHF_type_node;
23265 tree neon_polyQI_type_node;
23266 tree neon_polyHI_type_node;
23267 tree neon_intSI_type_node;
23268 tree neon_intDI_type_node;
23269 tree neon_intUTI_type_node;
23270 tree neon_float_type_node;
23271
23272 tree intQI_pointer_node;
23273 tree intHI_pointer_node;
23274 tree intSI_pointer_node;
23275 tree intDI_pointer_node;
23276 tree float_pointer_node;
23277
23278 tree const_intQI_node;
23279 tree const_intHI_node;
23280 tree const_intSI_node;
23281 tree const_intDI_node;
23282 tree const_float_node;
23283
23284 tree const_intQI_pointer_node;
23285 tree const_intHI_pointer_node;
23286 tree const_intSI_pointer_node;
23287 tree const_intDI_pointer_node;
23288 tree const_float_pointer_node;
23289
23290 tree V8QI_type_node;
23291 tree V4HI_type_node;
23292 tree V4HF_type_node;
23293 tree V2SI_type_node;
23294 tree V2SF_type_node;
23295 tree V16QI_type_node;
23296 tree V8HI_type_node;
23297 tree V4SI_type_node;
23298 tree V4SF_type_node;
23299 tree V2DI_type_node;
23300
23301 tree intUQI_type_node;
23302 tree intUHI_type_node;
23303 tree intUSI_type_node;
23304 tree intUDI_type_node;
23305
23306 tree intEI_type_node;
23307 tree intOI_type_node;
23308 tree intCI_type_node;
23309 tree intXI_type_node;
23310
23311 tree V8QI_pointer_node;
23312 tree V4HI_pointer_node;
23313 tree V2SI_pointer_node;
23314 tree V2SF_pointer_node;
23315 tree V16QI_pointer_node;
23316 tree V8HI_pointer_node;
23317 tree V4SI_pointer_node;
23318 tree V4SF_pointer_node;
23319 tree V2DI_pointer_node;
23320
23321 tree void_ftype_pv8qi_v8qi_v8qi;
23322 tree void_ftype_pv4hi_v4hi_v4hi;
23323 tree void_ftype_pv2si_v2si_v2si;
23324 tree void_ftype_pv2sf_v2sf_v2sf;
23325 tree void_ftype_pdi_di_di;
23326 tree void_ftype_pv16qi_v16qi_v16qi;
23327 tree void_ftype_pv8hi_v8hi_v8hi;
23328 tree void_ftype_pv4si_v4si_v4si;
23329 tree void_ftype_pv4sf_v4sf_v4sf;
23330 tree void_ftype_pv2di_v2di_v2di;
23331
23332 tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES];
23333 tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES];
23334 tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES];
23335
23336 /* Create distinguished type nodes for NEON vector element types,
23337 and pointers to values of such types, so we can detect them later. */
23338 neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23339 neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23340 neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
23341 neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
23342 neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
23343 neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
23344 neon_float_type_node = make_node (REAL_TYPE);
23345 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
23346 layout_type (neon_float_type_node);
23347 neon_floatHF_type_node = make_node (REAL_TYPE);
23348 TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
23349 layout_type (neon_floatHF_type_node);
23350
23351 /* Define typedefs which exactly correspond to the modes we are basing vector
23352 types on. If you change these names you'll need to change
23353 the table used by arm_mangle_type too. */
23354 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
23355 "__builtin_neon_qi");
23356 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
23357 "__builtin_neon_hi");
23358 (*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
23359 "__builtin_neon_hf");
23360 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
23361 "__builtin_neon_si");
23362 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
23363 "__builtin_neon_sf");
23364 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
23365 "__builtin_neon_di");
23366 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
23367 "__builtin_neon_poly8");
23368 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
23369 "__builtin_neon_poly16");
23370
23371 intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
23372 intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
23373 intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
23374 intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
23375 float_pointer_node = build_pointer_type (neon_float_type_node);
23376
23377 /* Next create constant-qualified versions of the above types. */
23378 const_intQI_node = build_qualified_type (neon_intQI_type_node,
23379 TYPE_QUAL_CONST);
23380 const_intHI_node = build_qualified_type (neon_intHI_type_node,
23381 TYPE_QUAL_CONST);
23382 const_intSI_node = build_qualified_type (neon_intSI_type_node,
23383 TYPE_QUAL_CONST);
23384 const_intDI_node = build_qualified_type (neon_intDI_type_node,
23385 TYPE_QUAL_CONST);
23386 const_float_node = build_qualified_type (neon_float_type_node,
23387 TYPE_QUAL_CONST);
23388
23389 const_intQI_pointer_node = build_pointer_type (const_intQI_node);
23390 const_intHI_pointer_node = build_pointer_type (const_intHI_node);
23391 const_intSI_pointer_node = build_pointer_type (const_intSI_node);
23392 const_intDI_pointer_node = build_pointer_type (const_intDI_node);
23393 const_float_pointer_node = build_pointer_type (const_float_node);
23394
23395 /* Now create vector types based on our NEON element types. */
23396 /* 64-bit vectors. */
23397 V8QI_type_node =
23398 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
23399 V4HI_type_node =
23400 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
23401 V4HF_type_node =
23402 build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
23403 V2SI_type_node =
23404 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
23405 V2SF_type_node =
23406 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
23407 /* 128-bit vectors. */
23408 V16QI_type_node =
23409 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
23410 V8HI_type_node =
23411 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
23412 V4SI_type_node =
23413 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
23414 V4SF_type_node =
23415 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
23416 V2DI_type_node =
23417 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
23418
23419 /* Unsigned integer types for various mode sizes. */
23420 intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
23421 intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
23422 intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
23423 intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
23424 neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode));
23425
23426
23427 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
23428 "__builtin_neon_uqi");
23429 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
23430 "__builtin_neon_uhi");
23431 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
23432 "__builtin_neon_usi");
23433 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23434 "__builtin_neon_udi");
23435 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
23436 "__builtin_neon_poly64");
23437 (*lang_hooks.types.register_builtin_type) (neon_intUTI_type_node,
23438 "__builtin_neon_poly128");
23439
23440 /* Opaque integer types for structures of vectors. */
23441 intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
23442 intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
23443 intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
23444 intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
23445
23446 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
23447 "__builtin_neon_ti");
23448 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
23449 "__builtin_neon_ei");
23450 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
23451 "__builtin_neon_oi");
23452 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
23453 "__builtin_neon_ci");
23454 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
23455 "__builtin_neon_xi");
23456
23457 /* Pointers to vector types. */
23458 V8QI_pointer_node = build_pointer_type (V8QI_type_node);
23459 V4HI_pointer_node = build_pointer_type (V4HI_type_node);
23460 V2SI_pointer_node = build_pointer_type (V2SI_type_node);
23461 V2SF_pointer_node = build_pointer_type (V2SF_type_node);
23462 V16QI_pointer_node = build_pointer_type (V16QI_type_node);
23463 V8HI_pointer_node = build_pointer_type (V8HI_type_node);
23464 V4SI_pointer_node = build_pointer_type (V4SI_type_node);
23465 V4SF_pointer_node = build_pointer_type (V4SF_type_node);
23466 V2DI_pointer_node = build_pointer_type (V2DI_type_node);
23467
23468 /* Operations which return results as pairs. */
23469 void_ftype_pv8qi_v8qi_v8qi =
23470 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
23471 V8QI_type_node, NULL);
23472 void_ftype_pv4hi_v4hi_v4hi =
23473 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
23474 V4HI_type_node, NULL);
23475 void_ftype_pv2si_v2si_v2si =
23476 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
23477 V2SI_type_node, NULL);
23478 void_ftype_pv2sf_v2sf_v2sf =
23479 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
23480 V2SF_type_node, NULL);
23481 void_ftype_pdi_di_di =
23482 build_function_type_list (void_type_node, intDI_pointer_node,
23483 neon_intDI_type_node, neon_intDI_type_node, NULL);
23484 void_ftype_pv16qi_v16qi_v16qi =
23485 build_function_type_list (void_type_node, V16QI_pointer_node,
23486 V16QI_type_node, V16QI_type_node, NULL);
23487 void_ftype_pv8hi_v8hi_v8hi =
23488 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
23489 V8HI_type_node, NULL);
23490 void_ftype_pv4si_v4si_v4si =
23491 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
23492 V4SI_type_node, NULL);
23493 void_ftype_pv4sf_v4sf_v4sf =
23494 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
23495 V4SF_type_node, NULL);
23496 void_ftype_pv2di_v2di_v2di =
23497 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
23498 V2DI_type_node, NULL);
23499
23500 if (TARGET_CRYPTO && TARGET_HARD_FLOAT)
23501 {
23502 tree V4USI_type_node =
23503 build_vector_type_for_mode (intUSI_type_node, V4SImode);
23504
23505 tree V16UQI_type_node =
23506 build_vector_type_for_mode (intUQI_type_node, V16QImode);
23507
23508 tree v16uqi_ftype_v16uqi
23509 = build_function_type_list (V16UQI_type_node, V16UQI_type_node, NULL_TREE);
23510
23511 tree v16uqi_ftype_v16uqi_v16uqi
23512 = build_function_type_list (V16UQI_type_node, V16UQI_type_node,
23513 V16UQI_type_node, NULL_TREE);
23514
23515 tree v4usi_ftype_v4usi
23516 = build_function_type_list (V4USI_type_node, V4USI_type_node, NULL_TREE);
23517
23518 tree v4usi_ftype_v4usi_v4usi
23519 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23520 V4USI_type_node, NULL_TREE);
23521
23522 tree v4usi_ftype_v4usi_v4usi_v4usi
23523 = build_function_type_list (V4USI_type_node, V4USI_type_node,
23524 V4USI_type_node, V4USI_type_node, NULL_TREE);
23525
23526 tree uti_ftype_udi_udi
23527 = build_function_type_list (neon_intUTI_type_node, intUDI_type_node,
23528 intUDI_type_node, NULL_TREE);
23529
23530 #undef CRYPTO1
23531 #undef CRYPTO2
23532 #undef CRYPTO3
23533 #undef C
23534 #undef N
23535 #undef CF
23536 #undef FT1
23537 #undef FT2
23538 #undef FT3
23539
23540 #define C(U) \
23541 ARM_BUILTIN_CRYPTO_##U
23542 #define N(L) \
23543 "__builtin_arm_crypto_"#L
23544 #define FT1(R, A) \
23545 R##_ftype_##A
23546 #define FT2(R, A1, A2) \
23547 R##_ftype_##A1##_##A2
23548 #define FT3(R, A1, A2, A3) \
23549 R##_ftype_##A1##_##A2##_##A3
23550 #define CRYPTO1(L, U, R, A) \
23551 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT1 (R, A), \
23552 C (U), BUILT_IN_MD, \
23553 NULL, NULL_TREE);
23554 #define CRYPTO2(L, U, R, A1, A2) \
23555 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT2 (R, A1, A2), \
23556 C (U), BUILT_IN_MD, \
23557 NULL, NULL_TREE);
23558
23559 #define CRYPTO3(L, U, R, A1, A2, A3) \
23560 arm_builtin_decls[C (U)] = add_builtin_function (N (L), FT3 (R, A1, A2, A3), \
23561 C (U), BUILT_IN_MD, \
23562 NULL, NULL_TREE);
23563 #include "crypto.def"
23564
23565 #undef CRYPTO1
23566 #undef CRYPTO2
23567 #undef CRYPTO3
23568 #undef C
23569 #undef N
23570 #undef FT1
23571 #undef FT2
23572 #undef FT3
23573 }
23574 dreg_types[0] = V8QI_type_node;
23575 dreg_types[1] = V4HI_type_node;
23576 dreg_types[2] = V2SI_type_node;
23577 dreg_types[3] = V2SF_type_node;
23578 dreg_types[4] = neon_intDI_type_node;
23579
23580 qreg_types[0] = V16QI_type_node;
23581 qreg_types[1] = V8HI_type_node;
23582 qreg_types[2] = V4SI_type_node;
23583 qreg_types[3] = V4SF_type_node;
23584 qreg_types[4] = V2DI_type_node;
23585 qreg_types[5] = neon_intUTI_type_node;
23586
23587 for (i = 0; i < NUM_QREG_TYPES; i++)
23588 {
23589 int j;
23590 for (j = 0; j < NUM_QREG_TYPES; j++)
23591 {
23592 if (i < NUM_DREG_TYPES && j < NUM_DREG_TYPES)
23593 reinterp_ftype_dreg[i][j]
23594 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
23595
23596 reinterp_ftype_qreg[i][j]
23597 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
23598 }
23599 }
23600
23601 for (i = 0, fcode = ARM_BUILTIN_NEON_BASE;
23602 i < ARRAY_SIZE (neon_builtin_data);
23603 i++, fcode++)
23604 {
23605 neon_builtin_datum *d = &neon_builtin_data[i];
23606
23607 const char* const modenames[] = {
23608 "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
23609 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
23610 "ti", "ei", "oi"
23611 };
23612 char namebuf[60];
23613 tree ftype = NULL;
23614 int is_load = 0, is_store = 0;
23615
23616 gcc_assert (ARRAY_SIZE (modenames) == T_MAX);
23617
23618 d->fcode = fcode;
23619
23620 switch (d->itype)
23621 {
23622 case NEON_LOAD1:
23623 case NEON_LOAD1LANE:
23624 case NEON_LOADSTRUCT:
23625 case NEON_LOADSTRUCTLANE:
23626 is_load = 1;
23627 /* Fall through. */
23628 case NEON_STORE1:
23629 case NEON_STORE1LANE:
23630 case NEON_STORESTRUCT:
23631 case NEON_STORESTRUCTLANE:
23632 if (!is_load)
23633 is_store = 1;
23634 /* Fall through. */
23635 case NEON_UNOP:
23636 case NEON_RINT:
23637 case NEON_BINOP:
23638 case NEON_LOGICBINOP:
23639 case NEON_SHIFTINSERT:
23640 case NEON_TERNOP:
23641 case NEON_GETLANE:
23642 case NEON_SETLANE:
23643 case NEON_CREATE:
23644 case NEON_DUP:
23645 case NEON_DUPLANE:
23646 case NEON_SHIFTIMM:
23647 case NEON_SHIFTACC:
23648 case NEON_COMBINE:
23649 case NEON_SPLIT:
23650 case NEON_CONVERT:
23651 case NEON_FIXCONV:
23652 case NEON_LANEMUL:
23653 case NEON_LANEMULL:
23654 case NEON_LANEMULH:
23655 case NEON_LANEMAC:
23656 case NEON_SCALARMUL:
23657 case NEON_SCALARMULL:
23658 case NEON_SCALARMULH:
23659 case NEON_SCALARMAC:
23660 case NEON_SELECT:
23661 case NEON_VTBL:
23662 case NEON_VTBX:
23663 {
23664 int k;
23665 tree return_type = void_type_node, args = void_list_node;
23666
23667 /* Build a function type directly from the insn_data for
23668 this builtin. The build_function_type() function takes
23669 care of removing duplicates for us. */
23670 for (k = insn_data[d->code].n_generator_args - 1; k >= 0; k--)
23671 {
23672 tree eltype;
23673
23674 if (is_load && k == 1)
23675 {
23676 /* Neon load patterns always have the memory
23677 operand in the operand 1 position. */
23678 gcc_assert (insn_data[d->code].operand[k].predicate
23679 == neon_struct_operand);
23680
23681 switch (d->mode)
23682 {
23683 case T_V8QI:
23684 case T_V16QI:
23685 eltype = const_intQI_pointer_node;
23686 break;
23687
23688 case T_V4HI:
23689 case T_V8HI:
23690 eltype = const_intHI_pointer_node;
23691 break;
23692
23693 case T_V2SI:
23694 case T_V4SI:
23695 eltype = const_intSI_pointer_node;
23696 break;
23697
23698 case T_V2SF:
23699 case T_V4SF:
23700 eltype = const_float_pointer_node;
23701 break;
23702
23703 case T_DI:
23704 case T_V2DI:
23705 eltype = const_intDI_pointer_node;
23706 break;
23707
23708 default: gcc_unreachable ();
23709 }
23710 }
23711 else if (is_store && k == 0)
23712 {
23713 /* Similarly, Neon store patterns use operand 0 as
23714 the memory location to store to. */
23715 gcc_assert (insn_data[d->code].operand[k].predicate
23716 == neon_struct_operand);
23717
23718 switch (d->mode)
23719 {
23720 case T_V8QI:
23721 case T_V16QI:
23722 eltype = intQI_pointer_node;
23723 break;
23724
23725 case T_V4HI:
23726 case T_V8HI:
23727 eltype = intHI_pointer_node;
23728 break;
23729
23730 case T_V2SI:
23731 case T_V4SI:
23732 eltype = intSI_pointer_node;
23733 break;
23734
23735 case T_V2SF:
23736 case T_V4SF:
23737 eltype = float_pointer_node;
23738 break;
23739
23740 case T_DI:
23741 case T_V2DI:
23742 eltype = intDI_pointer_node;
23743 break;
23744
23745 default: gcc_unreachable ();
23746 }
23747 }
23748 else
23749 {
23750 switch (insn_data[d->code].operand[k].mode)
23751 {
23752 case VOIDmode: eltype = void_type_node; break;
23753 /* Scalars. */
23754 case QImode: eltype = neon_intQI_type_node; break;
23755 case HImode: eltype = neon_intHI_type_node; break;
23756 case SImode: eltype = neon_intSI_type_node; break;
23757 case SFmode: eltype = neon_float_type_node; break;
23758 case DImode: eltype = neon_intDI_type_node; break;
23759 case TImode: eltype = intTI_type_node; break;
23760 case EImode: eltype = intEI_type_node; break;
23761 case OImode: eltype = intOI_type_node; break;
23762 case CImode: eltype = intCI_type_node; break;
23763 case XImode: eltype = intXI_type_node; break;
23764 /* 64-bit vectors. */
23765 case V8QImode: eltype = V8QI_type_node; break;
23766 case V4HImode: eltype = V4HI_type_node; break;
23767 case V2SImode: eltype = V2SI_type_node; break;
23768 case V2SFmode: eltype = V2SF_type_node; break;
23769 /* 128-bit vectors. */
23770 case V16QImode: eltype = V16QI_type_node; break;
23771 case V8HImode: eltype = V8HI_type_node; break;
23772 case V4SImode: eltype = V4SI_type_node; break;
23773 case V4SFmode: eltype = V4SF_type_node; break;
23774 case V2DImode: eltype = V2DI_type_node; break;
23775 default: gcc_unreachable ();
23776 }
23777 }
23778
23779 if (k == 0 && !is_store)
23780 return_type = eltype;
23781 else
23782 args = tree_cons (NULL_TREE, eltype, args);
23783 }
23784
23785 ftype = build_function_type (return_type, args);
23786 }
23787 break;
23788
23789 case NEON_RESULTPAIR:
23790 {
23791 switch (insn_data[d->code].operand[1].mode)
23792 {
23793 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
23794 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
23795 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
23796 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
23797 case DImode: ftype = void_ftype_pdi_di_di; break;
23798 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
23799 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
23800 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
23801 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
23802 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
23803 default: gcc_unreachable ();
23804 }
23805 }
23806 break;
23807
23808 case NEON_REINTERP:
23809 {
23810 /* We iterate over NUM_DREG_TYPES doubleword types,
23811 then NUM_QREG_TYPES quadword types.
23812 V4HF is not a type used in reinterpret, so we translate
23813 d->mode to the correct index in reinterp_ftype_dreg. */
23814 bool qreg_p
23815 = GET_MODE_SIZE (insn_data[d->code].operand[0].mode) > 8;
23816 int rhs = (d->mode - ((!qreg_p && (d->mode > T_V4HF)) ? 1 : 0))
23817 % NUM_QREG_TYPES;
23818 switch (insn_data[d->code].operand[0].mode)
23819 {
23820 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
23821 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
23822 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
23823 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
23824 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
23825 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
23826 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
23827 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
23828 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
23829 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
23830 case TImode: ftype = reinterp_ftype_qreg[5][rhs]; break;
23831 default: gcc_unreachable ();
23832 }
23833 }
23834 break;
23835 case NEON_FLOAT_WIDEN:
23836 {
23837 tree eltype = NULL_TREE;
23838 tree return_type = NULL_TREE;
23839
23840 switch (insn_data[d->code].operand[1].mode)
23841 {
23842 case V4HFmode:
23843 eltype = V4HF_type_node;
23844 return_type = V4SF_type_node;
23845 break;
23846 default: gcc_unreachable ();
23847 }
23848 ftype = build_function_type_list (return_type, eltype, NULL);
23849 break;
23850 }
23851 case NEON_FLOAT_NARROW:
23852 {
23853 tree eltype = NULL_TREE;
23854 tree return_type = NULL_TREE;
23855
23856 switch (insn_data[d->code].operand[1].mode)
23857 {
23858 case V4SFmode:
23859 eltype = V4SF_type_node;
23860 return_type = V4HF_type_node;
23861 break;
23862 default: gcc_unreachable ();
23863 }
23864 ftype = build_function_type_list (return_type, eltype, NULL);
23865 break;
23866 }
23867 default:
23868 gcc_unreachable ();
23869 }
23870
23871 gcc_assert (ftype != NULL);
23872
23873 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
23874
23875 decl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, NULL,
23876 NULL_TREE);
23877 arm_builtin_decls[fcode] = decl;
23878 }
23879 }
23880
23881 #undef NUM_DREG_TYPES
23882 #undef NUM_QREG_TYPES
23883
23884 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
23885 do \
23886 { \
23887 if ((MASK) & insn_flags) \
23888 { \
23889 tree bdecl; \
23890 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
23891 BUILT_IN_MD, NULL, NULL_TREE); \
23892 arm_builtin_decls[CODE] = bdecl; \
23893 } \
23894 } \
23895 while (0)
23896
23897 struct builtin_description
23898 {
23899 const unsigned int mask;
23900 const enum insn_code icode;
23901 const char * const name;
23902 const enum arm_builtins code;
23903 const enum rtx_code comparison;
23904 const unsigned int flag;
23905 };
23906
23907 static const struct builtin_description bdesc_2arg[] =
23908 {
23909 #define IWMMXT_BUILTIN(code, string, builtin) \
23910 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
23911 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23912
23913 #define IWMMXT2_BUILTIN(code, string, builtin) \
23914 { FL_IWMMXT2, CODE_FOR_##code, "__builtin_arm_" string, \
23915 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23916
23917 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
23918 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
23919 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
23920 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
23921 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
23922 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
23923 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
23924 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
23925 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
23926 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
23927 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
23928 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
23929 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
23930 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
23931 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
23932 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
23933 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
23934 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
23935 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
23936 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
23937 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
23938 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
23939 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
23940 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
23941 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
23942 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
23943 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
23944 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
23945 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
23946 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
23947 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
23948 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
23949 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
23950 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
23951 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
23952 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
23953 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
23954 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
23955 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
23956 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
23957 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
23958 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
23959 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
23960 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
23961 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
23962 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
23963 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
23964 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
23965 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
23966 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
23967 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
23968 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
23969 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
23970 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
23971 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
23972 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
23973 IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
23974 IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
23975 IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
23976 IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
23977 IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
23978 IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
23979 IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
23980 IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
23981 IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
23982 IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
23983 IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
23984 IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
23985 IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
23986 IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
23987 IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
23988 IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
23989 IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
23990 IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
23991 IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
23992 IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
23993 IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
23994 IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
23995
23996 #define IWMMXT_BUILTIN2(code, builtin) \
23997 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
23998
23999 #define IWMMXT2_BUILTIN2(code, builtin) \
24000 { FL_IWMMXT2, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
24001
24002 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
24003 IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
24004 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
24005 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
24006 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
24007 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
24008 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
24009 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
24010 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
24011 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
24012
24013 #define CRC32_BUILTIN(L, U) \
24014 {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
24015 UNKNOWN, 0},
24016 CRC32_BUILTIN (crc32b, CRC32B)
24017 CRC32_BUILTIN (crc32h, CRC32H)
24018 CRC32_BUILTIN (crc32w, CRC32W)
24019 CRC32_BUILTIN (crc32cb, CRC32CB)
24020 CRC32_BUILTIN (crc32ch, CRC32CH)
24021 CRC32_BUILTIN (crc32cw, CRC32CW)
24022 #undef CRC32_BUILTIN
24023
24024
24025 #define CRYPTO_BUILTIN(L, U) \
24026 {0, CODE_FOR_crypto_##L, "__builtin_arm_crypto_"#L, ARM_BUILTIN_CRYPTO_##U, \
24027 UNKNOWN, 0},
24028 #undef CRYPTO1
24029 #undef CRYPTO2
24030 #undef CRYPTO3
24031 #define CRYPTO2(L, U, R, A1, A2) CRYPTO_BUILTIN (L, U)
24032 #define CRYPTO1(L, U, R, A)
24033 #define CRYPTO3(L, U, R, A1, A2, A3)
24034 #include "crypto.def"
24035 #undef CRYPTO1
24036 #undef CRYPTO2
24037 #undef CRYPTO3
24038
24039 };
24040
24041 static const struct builtin_description bdesc_1arg[] =
24042 {
24043 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
24044 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
24045 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
24046 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
24047 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
24048 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
24049 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
24050 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
24051 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
24052 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
24053 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
24054 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
24055 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
24056 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
24057 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
24058 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
24059 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
24060 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
24061 IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
24062 IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
24063 IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
24064 IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
24065 IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
24066 IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
24067
24068 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
24069 #define CRYPTO2(L, U, R, A1, A2)
24070 #define CRYPTO3(L, U, R, A1, A2, A3)
24071 #include "crypto.def"
24072 #undef CRYPTO1
24073 #undef CRYPTO2
24074 #undef CRYPTO3
24075 };
24076
24077 static const struct builtin_description bdesc_3arg[] =
24078 {
24079 #define CRYPTO3(L, U, R, A1, A2, A3) CRYPTO_BUILTIN (L, U)
24080 #define CRYPTO1(L, U, R, A)
24081 #define CRYPTO2(L, U, R, A1, A2)
24082 #include "crypto.def"
24083 #undef CRYPTO1
24084 #undef CRYPTO2
24085 #undef CRYPTO3
24086 };
24087 #undef CRYPTO_BUILTIN
24088
24089 /* Set up all the iWMMXt builtins. This is not called if
24090 TARGET_IWMMXT is zero. */
24091
24092 static void
24093 arm_init_iwmmxt_builtins (void)
24094 {
24095 const struct builtin_description * d;
24096 size_t i;
24097
24098 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
24099 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
24100 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
24101
24102 tree v8qi_ftype_v8qi_v8qi_int
24103 = build_function_type_list (V8QI_type_node,
24104 V8QI_type_node, V8QI_type_node,
24105 integer_type_node, NULL_TREE);
24106 tree v4hi_ftype_v4hi_int
24107 = build_function_type_list (V4HI_type_node,
24108 V4HI_type_node, integer_type_node, NULL_TREE);
24109 tree v2si_ftype_v2si_int
24110 = build_function_type_list (V2SI_type_node,
24111 V2SI_type_node, integer_type_node, NULL_TREE);
24112 tree v2si_ftype_di_di
24113 = build_function_type_list (V2SI_type_node,
24114 long_long_integer_type_node,
24115 long_long_integer_type_node,
24116 NULL_TREE);
24117 tree di_ftype_di_int
24118 = build_function_type_list (long_long_integer_type_node,
24119 long_long_integer_type_node,
24120 integer_type_node, NULL_TREE);
24121 tree di_ftype_di_int_int
24122 = build_function_type_list (long_long_integer_type_node,
24123 long_long_integer_type_node,
24124 integer_type_node,
24125 integer_type_node, NULL_TREE);
24126 tree int_ftype_v8qi
24127 = build_function_type_list (integer_type_node,
24128 V8QI_type_node, NULL_TREE);
24129 tree int_ftype_v4hi
24130 = build_function_type_list (integer_type_node,
24131 V4HI_type_node, NULL_TREE);
24132 tree int_ftype_v2si
24133 = build_function_type_list (integer_type_node,
24134 V2SI_type_node, NULL_TREE);
24135 tree int_ftype_v8qi_int
24136 = build_function_type_list (integer_type_node,
24137 V8QI_type_node, integer_type_node, NULL_TREE);
24138 tree int_ftype_v4hi_int
24139 = build_function_type_list (integer_type_node,
24140 V4HI_type_node, integer_type_node, NULL_TREE);
24141 tree int_ftype_v2si_int
24142 = build_function_type_list (integer_type_node,
24143 V2SI_type_node, integer_type_node, NULL_TREE);
24144 tree v8qi_ftype_v8qi_int_int
24145 = build_function_type_list (V8QI_type_node,
24146 V8QI_type_node, integer_type_node,
24147 integer_type_node, NULL_TREE);
24148 tree v4hi_ftype_v4hi_int_int
24149 = build_function_type_list (V4HI_type_node,
24150 V4HI_type_node, integer_type_node,
24151 integer_type_node, NULL_TREE);
24152 tree v2si_ftype_v2si_int_int
24153 = build_function_type_list (V2SI_type_node,
24154 V2SI_type_node, integer_type_node,
24155 integer_type_node, NULL_TREE);
24156 /* Miscellaneous. */
24157 tree v8qi_ftype_v4hi_v4hi
24158 = build_function_type_list (V8QI_type_node,
24159 V4HI_type_node, V4HI_type_node, NULL_TREE);
24160 tree v4hi_ftype_v2si_v2si
24161 = build_function_type_list (V4HI_type_node,
24162 V2SI_type_node, V2SI_type_node, NULL_TREE);
24163 tree v8qi_ftype_v4hi_v8qi
24164 = build_function_type_list (V8QI_type_node,
24165 V4HI_type_node, V8QI_type_node, NULL_TREE);
24166 tree v2si_ftype_v4hi_v4hi
24167 = build_function_type_list (V2SI_type_node,
24168 V4HI_type_node, V4HI_type_node, NULL_TREE);
24169 tree v2si_ftype_v8qi_v8qi
24170 = build_function_type_list (V2SI_type_node,
24171 V8QI_type_node, V8QI_type_node, NULL_TREE);
24172 tree v4hi_ftype_v4hi_di
24173 = build_function_type_list (V4HI_type_node,
24174 V4HI_type_node, long_long_integer_type_node,
24175 NULL_TREE);
24176 tree v2si_ftype_v2si_di
24177 = build_function_type_list (V2SI_type_node,
24178 V2SI_type_node, long_long_integer_type_node,
24179 NULL_TREE);
24180 tree di_ftype_void
24181 = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
24182 tree int_ftype_void
24183 = build_function_type_list (integer_type_node, NULL_TREE);
24184 tree di_ftype_v8qi
24185 = build_function_type_list (long_long_integer_type_node,
24186 V8QI_type_node, NULL_TREE);
24187 tree di_ftype_v4hi
24188 = build_function_type_list (long_long_integer_type_node,
24189 V4HI_type_node, NULL_TREE);
24190 tree di_ftype_v2si
24191 = build_function_type_list (long_long_integer_type_node,
24192 V2SI_type_node, NULL_TREE);
24193 tree v2si_ftype_v4hi
24194 = build_function_type_list (V2SI_type_node,
24195 V4HI_type_node, NULL_TREE);
24196 tree v4hi_ftype_v8qi
24197 = build_function_type_list (V4HI_type_node,
24198 V8QI_type_node, NULL_TREE);
24199 tree v8qi_ftype_v8qi
24200 = build_function_type_list (V8QI_type_node,
24201 V8QI_type_node, NULL_TREE);
24202 tree v4hi_ftype_v4hi
24203 = build_function_type_list (V4HI_type_node,
24204 V4HI_type_node, NULL_TREE);
24205 tree v2si_ftype_v2si
24206 = build_function_type_list (V2SI_type_node,
24207 V2SI_type_node, NULL_TREE);
24208
24209 tree di_ftype_di_v4hi_v4hi
24210 = build_function_type_list (long_long_unsigned_type_node,
24211 long_long_unsigned_type_node,
24212 V4HI_type_node, V4HI_type_node,
24213 NULL_TREE);
24214
24215 tree di_ftype_v4hi_v4hi
24216 = build_function_type_list (long_long_unsigned_type_node,
24217 V4HI_type_node,V4HI_type_node,
24218 NULL_TREE);
24219
24220 tree v2si_ftype_v2si_v4hi_v4hi
24221 = build_function_type_list (V2SI_type_node,
24222 V2SI_type_node, V4HI_type_node,
24223 V4HI_type_node, NULL_TREE);
24224
24225 tree v2si_ftype_v2si_v8qi_v8qi
24226 = build_function_type_list (V2SI_type_node,
24227 V2SI_type_node, V8QI_type_node,
24228 V8QI_type_node, NULL_TREE);
24229
24230 tree di_ftype_di_v2si_v2si
24231 = build_function_type_list (long_long_unsigned_type_node,
24232 long_long_unsigned_type_node,
24233 V2SI_type_node, V2SI_type_node,
24234 NULL_TREE);
24235
24236 tree di_ftype_di_di_int
24237 = build_function_type_list (long_long_unsigned_type_node,
24238 long_long_unsigned_type_node,
24239 long_long_unsigned_type_node,
24240 integer_type_node, NULL_TREE);
24241
24242 tree void_ftype_int
24243 = build_function_type_list (void_type_node,
24244 integer_type_node, NULL_TREE);
24245
24246 tree v8qi_ftype_char
24247 = build_function_type_list (V8QI_type_node,
24248 signed_char_type_node, NULL_TREE);
24249
24250 tree v4hi_ftype_short
24251 = build_function_type_list (V4HI_type_node,
24252 short_integer_type_node, NULL_TREE);
24253
24254 tree v2si_ftype_int
24255 = build_function_type_list (V2SI_type_node,
24256 integer_type_node, NULL_TREE);
24257
24258 /* Normal vector binops. */
24259 tree v8qi_ftype_v8qi_v8qi
24260 = build_function_type_list (V8QI_type_node,
24261 V8QI_type_node, V8QI_type_node, NULL_TREE);
24262 tree v4hi_ftype_v4hi_v4hi
24263 = build_function_type_list (V4HI_type_node,
24264 V4HI_type_node,V4HI_type_node, NULL_TREE);
24265 tree v2si_ftype_v2si_v2si
24266 = build_function_type_list (V2SI_type_node,
24267 V2SI_type_node, V2SI_type_node, NULL_TREE);
24268 tree di_ftype_di_di
24269 = build_function_type_list (long_long_unsigned_type_node,
24270 long_long_unsigned_type_node,
24271 long_long_unsigned_type_node,
24272 NULL_TREE);
24273
24274 /* Add all builtins that are more or less simple operations on two
24275 operands. */
24276 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
24277 {
24278 /* Use one of the operands; the target can have a different mode for
24279 mask-generating compares. */
24280 enum machine_mode mode;
24281 tree type;
24282
24283 if (d->name == 0 || !(d->mask == FL_IWMMXT || d->mask == FL_IWMMXT2))
24284 continue;
24285
24286 mode = insn_data[d->icode].operand[1].mode;
24287
24288 switch (mode)
24289 {
24290 case V8QImode:
24291 type = v8qi_ftype_v8qi_v8qi;
24292 break;
24293 case V4HImode:
24294 type = v4hi_ftype_v4hi_v4hi;
24295 break;
24296 case V2SImode:
24297 type = v2si_ftype_v2si_v2si;
24298 break;
24299 case DImode:
24300 type = di_ftype_di_di;
24301 break;
24302
24303 default:
24304 gcc_unreachable ();
24305 }
24306
24307 def_mbuiltin (d->mask, d->name, type, d->code);
24308 }
24309
24310 /* Add the remaining MMX insns with somewhat more complicated types. */
24311 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
24312 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
24313 ARM_BUILTIN_ ## CODE)
24314
24315 #define iwmmx2_mbuiltin(NAME, TYPE, CODE) \
24316 def_mbuiltin (FL_IWMMXT2, "__builtin_arm_" NAME, (TYPE), \
24317 ARM_BUILTIN_ ## CODE)
24318
24319 iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
24320 iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
24321 iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
24322 iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
24323 iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
24324 iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
24325 iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
24326 iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
24327 iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
24328
24329 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
24330 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
24331 iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
24332 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
24333 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
24334 iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
24335
24336 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
24337 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
24338 iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
24339 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
24340 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
24341 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
24342
24343 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
24344 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
24345 iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
24346 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
24347 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
24348 iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
24349
24350 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
24351 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
24352 iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
24353 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
24354 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
24355 iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
24356
24357 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
24358
24359 iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
24360 iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
24361 iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
24362 iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
24363 iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
24364 iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
24365 iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
24366 iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
24367 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
24368 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
24369
24370 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
24371 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
24372 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
24373 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
24374 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
24375 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
24376 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
24377 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
24378 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
24379
24380 iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
24381 iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
24382 iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
24383
24384 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
24385 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
24386 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
24387
24388 iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
24389 iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
24390
24391 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
24392 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
24393 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
24394 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
24395 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
24396 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
24397
24398 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
24399 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
24400 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
24401 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
24402 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
24403 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
24404 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
24405 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
24406 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
24407 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
24408 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
24409 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
24410
24411 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
24412 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
24413 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
24414 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
24415
24416 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
24417 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
24418 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
24419 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
24420 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
24421 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
24422 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
24423
24424 iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
24425 iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
24426 iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
24427
24428 iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
24429 iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
24430 iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
24431 iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
24432
24433 iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
24434 iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
24435 iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
24436 iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
24437
24438 iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
24439 iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
24440 iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
24441 iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
24442
24443 iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
24444 iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
24445 iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
24446 iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
24447
24448 iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
24449 iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
24450 iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
24451 iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
24452
24453 iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
24454 iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
24455 iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
24456 iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
24457
24458 iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
24459
24460 iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
24461 iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
24462 iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
24463
24464 #undef iwmmx_mbuiltin
24465 #undef iwmmx2_mbuiltin
24466 }
24467
24468 static void
24469 arm_init_fp16_builtins (void)
24470 {
24471 tree fp16_type = make_node (REAL_TYPE);
24472 TYPE_PRECISION (fp16_type) = 16;
24473 layout_type (fp16_type);
24474 (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
24475 }
24476
24477 static void
24478 arm_init_crc32_builtins ()
24479 {
24480 tree si_ftype_si_qi
24481 = build_function_type_list (unsigned_intSI_type_node,
24482 unsigned_intSI_type_node,
24483 unsigned_intQI_type_node, NULL_TREE);
24484 tree si_ftype_si_hi
24485 = build_function_type_list (unsigned_intSI_type_node,
24486 unsigned_intSI_type_node,
24487 unsigned_intHI_type_node, NULL_TREE);
24488 tree si_ftype_si_si
24489 = build_function_type_list (unsigned_intSI_type_node,
24490 unsigned_intSI_type_node,
24491 unsigned_intSI_type_node, NULL_TREE);
24492
24493 arm_builtin_decls[ARM_BUILTIN_CRC32B]
24494 = add_builtin_function ("__builtin_arm_crc32b", si_ftype_si_qi,
24495 ARM_BUILTIN_CRC32B, BUILT_IN_MD, NULL, NULL_TREE);
24496 arm_builtin_decls[ARM_BUILTIN_CRC32H]
24497 = add_builtin_function ("__builtin_arm_crc32h", si_ftype_si_hi,
24498 ARM_BUILTIN_CRC32H, BUILT_IN_MD, NULL, NULL_TREE);
24499 arm_builtin_decls[ARM_BUILTIN_CRC32W]
24500 = add_builtin_function ("__builtin_arm_crc32w", si_ftype_si_si,
24501 ARM_BUILTIN_CRC32W, BUILT_IN_MD, NULL, NULL_TREE);
24502 arm_builtin_decls[ARM_BUILTIN_CRC32CB]
24503 = add_builtin_function ("__builtin_arm_crc32cb", si_ftype_si_qi,
24504 ARM_BUILTIN_CRC32CB, BUILT_IN_MD, NULL, NULL_TREE);
24505 arm_builtin_decls[ARM_BUILTIN_CRC32CH]
24506 = add_builtin_function ("__builtin_arm_crc32ch", si_ftype_si_hi,
24507 ARM_BUILTIN_CRC32CH, BUILT_IN_MD, NULL, NULL_TREE);
24508 arm_builtin_decls[ARM_BUILTIN_CRC32CW]
24509 = add_builtin_function ("__builtin_arm_crc32cw", si_ftype_si_si,
24510 ARM_BUILTIN_CRC32CW, BUILT_IN_MD, NULL, NULL_TREE);
24511 }
24512
24513 static void
24514 arm_init_builtins (void)
24515 {
24516 if (TARGET_REALLY_IWMMXT)
24517 arm_init_iwmmxt_builtins ();
24518
24519 if (TARGET_NEON)
24520 arm_init_neon_builtins ();
24521
24522 if (arm_fp16_format)
24523 arm_init_fp16_builtins ();
24524
24525 if (TARGET_CRC32)
24526 arm_init_crc32_builtins ();
24527 }
24528
24529 /* Return the ARM builtin for CODE. */
24530
24531 static tree
24532 arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24533 {
24534 if (code >= ARM_BUILTIN_MAX)
24535 return error_mark_node;
24536
24537 return arm_builtin_decls[code];
24538 }
24539
24540 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24541
24542 static const char *
24543 arm_invalid_parameter_type (const_tree t)
24544 {
24545 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24546 return N_("function parameters cannot have __fp16 type");
24547 return NULL;
24548 }
24549
24550 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
24551
24552 static const char *
24553 arm_invalid_return_type (const_tree t)
24554 {
24555 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24556 return N_("functions cannot return __fp16 type");
24557 return NULL;
24558 }
24559
24560 /* Implement TARGET_PROMOTED_TYPE. */
24561
24562 static tree
24563 arm_promoted_type (const_tree t)
24564 {
24565 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
24566 return float_type_node;
24567 return NULL_TREE;
24568 }
24569
24570 /* Implement TARGET_CONVERT_TO_TYPE.
24571 Specifically, this hook implements the peculiarity of the ARM
24572 half-precision floating-point C semantics that requires conversions between
24573 __fp16 to or from double to do an intermediate conversion to float. */
24574
24575 static tree
24576 arm_convert_to_type (tree type, tree expr)
24577 {
24578 tree fromtype = TREE_TYPE (expr);
24579 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
24580 return NULL_TREE;
24581 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
24582 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
24583 return convert (type, convert (float_type_node, expr));
24584 return NULL_TREE;
24585 }
24586
24587 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24588 This simply adds HFmode as a supported mode; even though we don't
24589 implement arithmetic on this type directly, it's supported by
24590 optabs conversions, much the way the double-word arithmetic is
24591 special-cased in the default hook. */
24592
24593 static bool
24594 arm_scalar_mode_supported_p (enum machine_mode mode)
24595 {
24596 if (mode == HFmode)
24597 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
24598 else if (ALL_FIXED_POINT_MODE_P (mode))
24599 return true;
24600 else
24601 return default_scalar_mode_supported_p (mode);
24602 }
24603
24604 /* Errors in the source file can cause expand_expr to return const0_rtx
24605 where we expect a vector. To avoid crashing, use one of the vector
24606 clear instructions. */
24607
24608 static rtx
24609 safe_vector_operand (rtx x, enum machine_mode mode)
24610 {
24611 if (x != const0_rtx)
24612 return x;
24613 x = gen_reg_rtx (mode);
24614
24615 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
24616 : gen_rtx_SUBREG (DImode, x, 0)));
24617 return x;
24618 }
24619
24620 /* Function to expand ternary builtins. */
24621 static rtx
24622 arm_expand_ternop_builtin (enum insn_code icode,
24623 tree exp, rtx target)
24624 {
24625 rtx pat;
24626 tree arg0 = CALL_EXPR_ARG (exp, 0);
24627 tree arg1 = CALL_EXPR_ARG (exp, 1);
24628 tree arg2 = CALL_EXPR_ARG (exp, 2);
24629
24630 rtx op0 = expand_normal (arg0);
24631 rtx op1 = expand_normal (arg1);
24632 rtx op2 = expand_normal (arg2);
24633 rtx op3 = NULL_RTX;
24634
24635 /* The sha1c, sha1p, sha1m crypto builtins require a different vec_select
24636 lane operand depending on endianness. */
24637 bool builtin_sha1cpm_p = false;
24638
24639 if (insn_data[icode].n_operands == 5)
24640 {
24641 gcc_assert (icode == CODE_FOR_crypto_sha1c
24642 || icode == CODE_FOR_crypto_sha1p
24643 || icode == CODE_FOR_crypto_sha1m);
24644 builtin_sha1cpm_p = true;
24645 }
24646 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24647 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24648 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24649 enum machine_mode mode2 = insn_data[icode].operand[3].mode;
24650
24651
24652 if (VECTOR_MODE_P (mode0))
24653 op0 = safe_vector_operand (op0, mode0);
24654 if (VECTOR_MODE_P (mode1))
24655 op1 = safe_vector_operand (op1, mode1);
24656 if (VECTOR_MODE_P (mode2))
24657 op2 = safe_vector_operand (op2, mode2);
24658
24659 if (! target
24660 || GET_MODE (target) != tmode
24661 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24662 target = gen_reg_rtx (tmode);
24663
24664 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24665 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
24666 && (GET_MODE (op2) == mode2 || GET_MODE (op2) == VOIDmode));
24667
24668 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24669 op0 = copy_to_mode_reg (mode0, op0);
24670 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24671 op1 = copy_to_mode_reg (mode1, op1);
24672 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
24673 op2 = copy_to_mode_reg (mode2, op2);
24674 if (builtin_sha1cpm_p)
24675 op3 = GEN_INT (TARGET_BIG_END ? 1 : 0);
24676
24677 if (builtin_sha1cpm_p)
24678 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
24679 else
24680 pat = GEN_FCN (icode) (target, op0, op1, op2);
24681 if (! pat)
24682 return 0;
24683 emit_insn (pat);
24684 return target;
24685 }
24686
24687 /* Subroutine of arm_expand_builtin to take care of binop insns. */
24688
24689 static rtx
24690 arm_expand_binop_builtin (enum insn_code icode,
24691 tree exp, rtx target)
24692 {
24693 rtx pat;
24694 tree arg0 = CALL_EXPR_ARG (exp, 0);
24695 tree arg1 = CALL_EXPR_ARG (exp, 1);
24696 rtx op0 = expand_normal (arg0);
24697 rtx op1 = expand_normal (arg1);
24698 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24699 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24700 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24701
24702 if (VECTOR_MODE_P (mode0))
24703 op0 = safe_vector_operand (op0, mode0);
24704 if (VECTOR_MODE_P (mode1))
24705 op1 = safe_vector_operand (op1, mode1);
24706
24707 if (! target
24708 || GET_MODE (target) != tmode
24709 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24710 target = gen_reg_rtx (tmode);
24711
24712 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
24713 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
24714
24715 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24716 op0 = copy_to_mode_reg (mode0, op0);
24717 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
24718 op1 = copy_to_mode_reg (mode1, op1);
24719
24720 pat = GEN_FCN (icode) (target, op0, op1);
24721 if (! pat)
24722 return 0;
24723 emit_insn (pat);
24724 return target;
24725 }
24726
24727 /* Subroutine of arm_expand_builtin to take care of unop insns. */
24728
24729 static rtx
24730 arm_expand_unop_builtin (enum insn_code icode,
24731 tree exp, rtx target, int do_load)
24732 {
24733 rtx pat;
24734 tree arg0 = CALL_EXPR_ARG (exp, 0);
24735 rtx op0 = expand_normal (arg0);
24736 rtx op1 = NULL_RTX;
24737 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24738 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24739 bool builtin_sha1h_p = false;
24740
24741 if (insn_data[icode].n_operands == 3)
24742 {
24743 gcc_assert (icode == CODE_FOR_crypto_sha1h);
24744 builtin_sha1h_p = true;
24745 }
24746
24747 if (! target
24748 || GET_MODE (target) != tmode
24749 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24750 target = gen_reg_rtx (tmode);
24751 if (do_load)
24752 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
24753 else
24754 {
24755 if (VECTOR_MODE_P (mode0))
24756 op0 = safe_vector_operand (op0, mode0);
24757
24758 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24759 op0 = copy_to_mode_reg (mode0, op0);
24760 }
24761 if (builtin_sha1h_p)
24762 op1 = GEN_INT (TARGET_BIG_END ? 1 : 0);
24763
24764 if (builtin_sha1h_p)
24765 pat = GEN_FCN (icode) (target, op0, op1);
24766 else
24767 pat = GEN_FCN (icode) (target, op0);
24768 if (! pat)
24769 return 0;
24770 emit_insn (pat);
24771 return target;
24772 }
24773
24774 typedef enum {
24775 NEON_ARG_COPY_TO_REG,
24776 NEON_ARG_CONSTANT,
24777 NEON_ARG_MEMORY,
24778 NEON_ARG_STOP
24779 } builtin_arg;
24780
24781 #define NEON_MAX_BUILTIN_ARGS 5
24782
24783 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
24784 and return an expression for the accessed memory.
24785
24786 The intrinsic function operates on a block of registers that has
24787 mode REG_MODE. This block contains vectors of type TYPE_MODE. The
24788 function references the memory at EXP of type TYPE and in mode
24789 MEM_MODE; this mode may be BLKmode if no more suitable mode is
24790 available. */
24791
24792 static tree
24793 neon_dereference_pointer (tree exp, tree type, enum machine_mode mem_mode,
24794 enum machine_mode reg_mode,
24795 neon_builtin_type_mode type_mode)
24796 {
24797 HOST_WIDE_INT reg_size, vector_size, nvectors, nelems;
24798 tree elem_type, upper_bound, array_type;
24799
24800 /* Work out the size of the register block in bytes. */
24801 reg_size = GET_MODE_SIZE (reg_mode);
24802
24803 /* Work out the size of each vector in bytes. */
24804 gcc_assert (TYPE_MODE_BIT (type_mode) & (TB_DREG | TB_QREG));
24805 vector_size = (TYPE_MODE_BIT (type_mode) & TB_QREG ? 16 : 8);
24806
24807 /* Work out how many vectors there are. */
24808 gcc_assert (reg_size % vector_size == 0);
24809 nvectors = reg_size / vector_size;
24810
24811 /* Work out the type of each element. */
24812 gcc_assert (POINTER_TYPE_P (type));
24813 elem_type = TREE_TYPE (type);
24814
24815 /* Work out how many elements are being loaded or stored.
24816 MEM_MODE == REG_MODE implies a one-to-one mapping between register
24817 and memory elements; anything else implies a lane load or store. */
24818 if (mem_mode == reg_mode)
24819 nelems = vector_size * nvectors / int_size_in_bytes (elem_type);
24820 else
24821 nelems = nvectors;
24822
24823 /* Create a type that describes the full access. */
24824 upper_bound = build_int_cst (size_type_node, nelems - 1);
24825 array_type = build_array_type (elem_type, build_index_type (upper_bound));
24826
24827 /* Dereference EXP using that type. */
24828 return fold_build2 (MEM_REF, array_type, exp,
24829 build_int_cst (build_pointer_type (array_type), 0));
24830 }
24831
24832 /* Expand a Neon builtin. */
24833 static rtx
24834 arm_expand_neon_args (rtx target, int icode, int have_retval,
24835 neon_builtin_type_mode type_mode,
24836 tree exp, int fcode, ...)
24837 {
24838 va_list ap;
24839 rtx pat;
24840 tree arg[NEON_MAX_BUILTIN_ARGS];
24841 rtx op[NEON_MAX_BUILTIN_ARGS];
24842 tree arg_type;
24843 tree formals;
24844 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24845 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
24846 enum machine_mode other_mode;
24847 int argc = 0;
24848 int opno;
24849
24850 if (have_retval
24851 && (!target
24852 || GET_MODE (target) != tmode
24853 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
24854 target = gen_reg_rtx (tmode);
24855
24856 va_start (ap, fcode);
24857
24858 formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
24859
24860 for (;;)
24861 {
24862 builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
24863
24864 if (thisarg == NEON_ARG_STOP)
24865 break;
24866 else
24867 {
24868 opno = argc + have_retval;
24869 mode[argc] = insn_data[icode].operand[opno].mode;
24870 arg[argc] = CALL_EXPR_ARG (exp, argc);
24871 arg_type = TREE_VALUE (formals);
24872 if (thisarg == NEON_ARG_MEMORY)
24873 {
24874 other_mode = insn_data[icode].operand[1 - opno].mode;
24875 arg[argc] = neon_dereference_pointer (arg[argc], arg_type,
24876 mode[argc], other_mode,
24877 type_mode);
24878 }
24879
24880 /* Use EXPAND_MEMORY for NEON_ARG_MEMORY to ensure a MEM_P
24881 be returned. */
24882 op[argc] = expand_expr (arg[argc], NULL_RTX, VOIDmode,
24883 (thisarg == NEON_ARG_MEMORY
24884 ? EXPAND_MEMORY : EXPAND_NORMAL));
24885
24886 switch (thisarg)
24887 {
24888 case NEON_ARG_COPY_TO_REG:
24889 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
24890 if (!(*insn_data[icode].operand[opno].predicate)
24891 (op[argc], mode[argc]))
24892 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
24893 break;
24894
24895 case NEON_ARG_CONSTANT:
24896 /* FIXME: This error message is somewhat unhelpful. */
24897 if (!(*insn_data[icode].operand[opno].predicate)
24898 (op[argc], mode[argc]))
24899 error ("argument must be a constant");
24900 break;
24901
24902 case NEON_ARG_MEMORY:
24903 /* Check if expand failed. */
24904 if (op[argc] == const0_rtx)
24905 return 0;
24906 gcc_assert (MEM_P (op[argc]));
24907 PUT_MODE (op[argc], mode[argc]);
24908 /* ??? arm_neon.h uses the same built-in functions for signed
24909 and unsigned accesses, casting where necessary. This isn't
24910 alias safe. */
24911 set_mem_alias_set (op[argc], 0);
24912 if (!(*insn_data[icode].operand[opno].predicate)
24913 (op[argc], mode[argc]))
24914 op[argc] = (replace_equiv_address
24915 (op[argc], force_reg (Pmode, XEXP (op[argc], 0))));
24916 break;
24917
24918 case NEON_ARG_STOP:
24919 gcc_unreachable ();
24920 }
24921
24922 argc++;
24923 formals = TREE_CHAIN (formals);
24924 }
24925 }
24926
24927 va_end (ap);
24928
24929 if (have_retval)
24930 switch (argc)
24931 {
24932 case 1:
24933 pat = GEN_FCN (icode) (target, op[0]);
24934 break;
24935
24936 case 2:
24937 pat = GEN_FCN (icode) (target, op[0], op[1]);
24938 break;
24939
24940 case 3:
24941 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
24942 break;
24943
24944 case 4:
24945 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
24946 break;
24947
24948 case 5:
24949 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
24950 break;
24951
24952 default:
24953 gcc_unreachable ();
24954 }
24955 else
24956 switch (argc)
24957 {
24958 case 1:
24959 pat = GEN_FCN (icode) (op[0]);
24960 break;
24961
24962 case 2:
24963 pat = GEN_FCN (icode) (op[0], op[1]);
24964 break;
24965
24966 case 3:
24967 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
24968 break;
24969
24970 case 4:
24971 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
24972 break;
24973
24974 case 5:
24975 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
24976 break;
24977
24978 default:
24979 gcc_unreachable ();
24980 }
24981
24982 if (!pat)
24983 return 0;
24984
24985 emit_insn (pat);
24986
24987 return target;
24988 }
24989
24990 /* Expand a Neon builtin. These are "special" because they don't have symbolic
24991 constants defined per-instruction or per instruction-variant. Instead, the
24992 required info is looked up in the table neon_builtin_data. */
24993 static rtx
24994 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
24995 {
24996 neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
24997 neon_itype itype = d->itype;
24998 enum insn_code icode = d->code;
24999 neon_builtin_type_mode type_mode = d->mode;
25000
25001 switch (itype)
25002 {
25003 case NEON_UNOP:
25004 case NEON_CONVERT:
25005 case NEON_DUPLANE:
25006 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25007 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25008
25009 case NEON_BINOP:
25010 case NEON_SETLANE:
25011 case NEON_SCALARMUL:
25012 case NEON_SCALARMULL:
25013 case NEON_SCALARMULH:
25014 case NEON_SHIFTINSERT:
25015 case NEON_LOGICBINOP:
25016 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25017 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25018 NEON_ARG_STOP);
25019
25020 case NEON_TERNOP:
25021 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25022 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25023 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25024
25025 case NEON_GETLANE:
25026 case NEON_FIXCONV:
25027 case NEON_SHIFTIMM:
25028 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25029 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
25030 NEON_ARG_STOP);
25031
25032 case NEON_CREATE:
25033 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25034 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25035
25036 case NEON_DUP:
25037 case NEON_RINT:
25038 case NEON_SPLIT:
25039 case NEON_FLOAT_WIDEN:
25040 case NEON_FLOAT_NARROW:
25041 case NEON_REINTERP:
25042 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25043 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25044
25045 case NEON_COMBINE:
25046 case NEON_VTBL:
25047 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25048 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25049
25050 case NEON_RESULTPAIR:
25051 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25052 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25053 NEON_ARG_STOP);
25054
25055 case NEON_LANEMUL:
25056 case NEON_LANEMULL:
25057 case NEON_LANEMULH:
25058 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25059 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25060 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25061
25062 case NEON_LANEMAC:
25063 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25064 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25065 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
25066
25067 case NEON_SHIFTACC:
25068 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25069 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25070 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25071
25072 case NEON_SCALARMAC:
25073 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25074 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25075 NEON_ARG_CONSTANT, NEON_ARG_STOP);
25076
25077 case NEON_SELECT:
25078 case NEON_VTBX:
25079 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25080 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
25081 NEON_ARG_STOP);
25082
25083 case NEON_LOAD1:
25084 case NEON_LOADSTRUCT:
25085 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25086 NEON_ARG_MEMORY, NEON_ARG_STOP);
25087
25088 case NEON_LOAD1LANE:
25089 case NEON_LOADSTRUCTLANE:
25090 return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
25091 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25092 NEON_ARG_STOP);
25093
25094 case NEON_STORE1:
25095 case NEON_STORESTRUCT:
25096 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25097 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
25098
25099 case NEON_STORE1LANE:
25100 case NEON_STORESTRUCTLANE:
25101 return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode,
25102 NEON_ARG_MEMORY, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
25103 NEON_ARG_STOP);
25104 }
25105
25106 gcc_unreachable ();
25107 }
25108
25109 /* Emit code to reinterpret one Neon type as another, without altering bits. */
25110 void
25111 neon_reinterpret (rtx dest, rtx src)
25112 {
25113 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
25114 }
25115
25116 /* Emit code to place a Neon pair result in memory locations (with equal
25117 registers). */
25118 void
25119 neon_emit_pair_result_insn (enum machine_mode mode,
25120 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
25121 rtx op1, rtx op2)
25122 {
25123 rtx mem = gen_rtx_MEM (mode, destaddr);
25124 rtx tmp1 = gen_reg_rtx (mode);
25125 rtx tmp2 = gen_reg_rtx (mode);
25126
25127 emit_insn (intfn (tmp1, op1, op2, tmp2));
25128
25129 emit_move_insn (mem, tmp1);
25130 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
25131 emit_move_insn (mem, tmp2);
25132 }
25133
25134 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
25135 not to early-clobber SRC registers in the process.
25136
25137 We assume that the operands described by SRC and DEST represent a
25138 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
25139 number of components into which the copy has been decomposed. */
25140 void
25141 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
25142 {
25143 unsigned int i;
25144
25145 if (!reg_overlap_mentioned_p (operands[0], operands[1])
25146 || REGNO (operands[0]) < REGNO (operands[1]))
25147 {
25148 for (i = 0; i < count; i++)
25149 {
25150 operands[2 * i] = dest[i];
25151 operands[2 * i + 1] = src[i];
25152 }
25153 }
25154 else
25155 {
25156 for (i = 0; i < count; i++)
25157 {
25158 operands[2 * i] = dest[count - i - 1];
25159 operands[2 * i + 1] = src[count - i - 1];
25160 }
25161 }
25162 }
25163
25164 /* Split operands into moves from op[1] + op[2] into op[0]. */
25165
25166 void
25167 neon_split_vcombine (rtx operands[3])
25168 {
25169 unsigned int dest = REGNO (operands[0]);
25170 unsigned int src1 = REGNO (operands[1]);
25171 unsigned int src2 = REGNO (operands[2]);
25172 enum machine_mode halfmode = GET_MODE (operands[1]);
25173 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
25174 rtx destlo, desthi;
25175
25176 if (src1 == dest && src2 == dest + halfregs)
25177 {
25178 /* No-op move. Can't split to nothing; emit something. */
25179 emit_note (NOTE_INSN_DELETED);
25180 return;
25181 }
25182
25183 /* Preserve register attributes for variable tracking. */
25184 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
25185 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
25186 GET_MODE_SIZE (halfmode));
25187
25188 /* Special case of reversed high/low parts. Use VSWP. */
25189 if (src2 == dest && src1 == dest + halfregs)
25190 {
25191 rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
25192 rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
25193 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
25194 return;
25195 }
25196
25197 if (!reg_overlap_mentioned_p (operands[2], destlo))
25198 {
25199 /* Try to avoid unnecessary moves if part of the result
25200 is in the right place already. */
25201 if (src1 != dest)
25202 emit_move_insn (destlo, operands[1]);
25203 if (src2 != dest + halfregs)
25204 emit_move_insn (desthi, operands[2]);
25205 }
25206 else
25207 {
25208 if (src2 != dest + halfregs)
25209 emit_move_insn (desthi, operands[2]);
25210 if (src1 != dest)
25211 emit_move_insn (destlo, operands[1]);
25212 }
25213 }
25214
25215 /* Expand an expression EXP that calls a built-in function,
25216 with result going to TARGET if that's convenient
25217 (and in mode MODE if that's convenient).
25218 SUBTARGET may be used as the target for computing one of EXP's operands.
25219 IGNORE is nonzero if the value is to be ignored. */
25220
25221 static rtx
25222 arm_expand_builtin (tree exp,
25223 rtx target,
25224 rtx subtarget ATTRIBUTE_UNUSED,
25225 enum machine_mode mode ATTRIBUTE_UNUSED,
25226 int ignore ATTRIBUTE_UNUSED)
25227 {
25228 const struct builtin_description * d;
25229 enum insn_code icode;
25230 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25231 tree arg0;
25232 tree arg1;
25233 tree arg2;
25234 rtx op0;
25235 rtx op1;
25236 rtx op2;
25237 rtx pat;
25238 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25239 size_t i;
25240 enum machine_mode tmode;
25241 enum machine_mode mode0;
25242 enum machine_mode mode1;
25243 enum machine_mode mode2;
25244 int opint;
25245 int selector;
25246 int mask;
25247 int imm;
25248
25249 if (fcode >= ARM_BUILTIN_NEON_BASE)
25250 return arm_expand_neon_builtin (fcode, exp, target);
25251
25252 switch (fcode)
25253 {
25254 case ARM_BUILTIN_TEXTRMSB:
25255 case ARM_BUILTIN_TEXTRMUB:
25256 case ARM_BUILTIN_TEXTRMSH:
25257 case ARM_BUILTIN_TEXTRMUH:
25258 case ARM_BUILTIN_TEXTRMSW:
25259 case ARM_BUILTIN_TEXTRMUW:
25260 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
25261 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
25262 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
25263 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
25264 : CODE_FOR_iwmmxt_textrmw);
25265
25266 arg0 = CALL_EXPR_ARG (exp, 0);
25267 arg1 = CALL_EXPR_ARG (exp, 1);
25268 op0 = expand_normal (arg0);
25269 op1 = expand_normal (arg1);
25270 tmode = insn_data[icode].operand[0].mode;
25271 mode0 = insn_data[icode].operand[1].mode;
25272 mode1 = insn_data[icode].operand[2].mode;
25273
25274 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25275 op0 = copy_to_mode_reg (mode0, op0);
25276 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25277 {
25278 /* @@@ better error message */
25279 error ("selector must be an immediate");
25280 return gen_reg_rtx (tmode);
25281 }
25282
25283 opint = INTVAL (op1);
25284 if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
25285 {
25286 if (opint > 7 || opint < 0)
25287 error ("the range of selector should be in 0 to 7");
25288 }
25289 else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
25290 {
25291 if (opint > 3 || opint < 0)
25292 error ("the range of selector should be in 0 to 3");
25293 }
25294 else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW. */
25295 {
25296 if (opint > 1 || opint < 0)
25297 error ("the range of selector should be in 0 to 1");
25298 }
25299
25300 if (target == 0
25301 || GET_MODE (target) != tmode
25302 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25303 target = gen_reg_rtx (tmode);
25304 pat = GEN_FCN (icode) (target, op0, op1);
25305 if (! pat)
25306 return 0;
25307 emit_insn (pat);
25308 return target;
25309
25310 case ARM_BUILTIN_WALIGNI:
25311 /* If op2 is immediate, call walighi, else call walighr. */
25312 arg0 = CALL_EXPR_ARG (exp, 0);
25313 arg1 = CALL_EXPR_ARG (exp, 1);
25314 arg2 = CALL_EXPR_ARG (exp, 2);
25315 op0 = expand_normal (arg0);
25316 op1 = expand_normal (arg1);
25317 op2 = expand_normal (arg2);
25318 if (CONST_INT_P (op2))
25319 {
25320 icode = CODE_FOR_iwmmxt_waligni;
25321 tmode = insn_data[icode].operand[0].mode;
25322 mode0 = insn_data[icode].operand[1].mode;
25323 mode1 = insn_data[icode].operand[2].mode;
25324 mode2 = insn_data[icode].operand[3].mode;
25325 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25326 op0 = copy_to_mode_reg (mode0, op0);
25327 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25328 op1 = copy_to_mode_reg (mode1, op1);
25329 gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
25330 selector = INTVAL (op2);
25331 if (selector > 7 || selector < 0)
25332 error ("the range of selector should be in 0 to 7");
25333 }
25334 else
25335 {
25336 icode = CODE_FOR_iwmmxt_walignr;
25337 tmode = insn_data[icode].operand[0].mode;
25338 mode0 = insn_data[icode].operand[1].mode;
25339 mode1 = insn_data[icode].operand[2].mode;
25340 mode2 = insn_data[icode].operand[3].mode;
25341 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
25342 op0 = copy_to_mode_reg (mode0, op0);
25343 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
25344 op1 = copy_to_mode_reg (mode1, op1);
25345 if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
25346 op2 = copy_to_mode_reg (mode2, op2);
25347 }
25348 if (target == 0
25349 || GET_MODE (target) != tmode
25350 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25351 target = gen_reg_rtx (tmode);
25352 pat = GEN_FCN (icode) (target, op0, op1, op2);
25353 if (!pat)
25354 return 0;
25355 emit_insn (pat);
25356 return target;
25357
25358 case ARM_BUILTIN_TINSRB:
25359 case ARM_BUILTIN_TINSRH:
25360 case ARM_BUILTIN_TINSRW:
25361 case ARM_BUILTIN_WMERGE:
25362 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
25363 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
25364 : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
25365 : CODE_FOR_iwmmxt_tinsrw);
25366 arg0 = CALL_EXPR_ARG (exp, 0);
25367 arg1 = CALL_EXPR_ARG (exp, 1);
25368 arg2 = CALL_EXPR_ARG (exp, 2);
25369 op0 = expand_normal (arg0);
25370 op1 = expand_normal (arg1);
25371 op2 = expand_normal (arg2);
25372 tmode = insn_data[icode].operand[0].mode;
25373 mode0 = insn_data[icode].operand[1].mode;
25374 mode1 = insn_data[icode].operand[2].mode;
25375 mode2 = insn_data[icode].operand[3].mode;
25376
25377 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25378 op0 = copy_to_mode_reg (mode0, op0);
25379 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25380 op1 = copy_to_mode_reg (mode1, op1);
25381 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25382 {
25383 error ("selector must be an immediate");
25384 return const0_rtx;
25385 }
25386 if (icode == CODE_FOR_iwmmxt_wmerge)
25387 {
25388 selector = INTVAL (op2);
25389 if (selector > 7 || selector < 0)
25390 error ("the range of selector should be in 0 to 7");
25391 }
25392 if ((icode == CODE_FOR_iwmmxt_tinsrb)
25393 || (icode == CODE_FOR_iwmmxt_tinsrh)
25394 || (icode == CODE_FOR_iwmmxt_tinsrw))
25395 {
25396 mask = 0x01;
25397 selector= INTVAL (op2);
25398 if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
25399 error ("the range of selector should be in 0 to 7");
25400 else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
25401 error ("the range of selector should be in 0 to 3");
25402 else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
25403 error ("the range of selector should be in 0 to 1");
25404 mask <<= selector;
25405 op2 = GEN_INT (mask);
25406 }
25407 if (target == 0
25408 || GET_MODE (target) != tmode
25409 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25410 target = gen_reg_rtx (tmode);
25411 pat = GEN_FCN (icode) (target, op0, op1, op2);
25412 if (! pat)
25413 return 0;
25414 emit_insn (pat);
25415 return target;
25416
25417 case ARM_BUILTIN_SETWCGR0:
25418 case ARM_BUILTIN_SETWCGR1:
25419 case ARM_BUILTIN_SETWCGR2:
25420 case ARM_BUILTIN_SETWCGR3:
25421 icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
25422 : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
25423 : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
25424 : CODE_FOR_iwmmxt_setwcgr3);
25425 arg0 = CALL_EXPR_ARG (exp, 0);
25426 op0 = expand_normal (arg0);
25427 mode0 = insn_data[icode].operand[0].mode;
25428 if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
25429 op0 = copy_to_mode_reg (mode0, op0);
25430 pat = GEN_FCN (icode) (op0);
25431 if (!pat)
25432 return 0;
25433 emit_insn (pat);
25434 return 0;
25435
25436 case ARM_BUILTIN_GETWCGR0:
25437 case ARM_BUILTIN_GETWCGR1:
25438 case ARM_BUILTIN_GETWCGR2:
25439 case ARM_BUILTIN_GETWCGR3:
25440 icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
25441 : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
25442 : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
25443 : CODE_FOR_iwmmxt_getwcgr3);
25444 tmode = insn_data[icode].operand[0].mode;
25445 if (target == 0
25446 || GET_MODE (target) != tmode
25447 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
25448 target = gen_reg_rtx (tmode);
25449 pat = GEN_FCN (icode) (target);
25450 if (!pat)
25451 return 0;
25452 emit_insn (pat);
25453 return target;
25454
25455 case ARM_BUILTIN_WSHUFH:
25456 icode = CODE_FOR_iwmmxt_wshufh;
25457 arg0 = CALL_EXPR_ARG (exp, 0);
25458 arg1 = CALL_EXPR_ARG (exp, 1);
25459 op0 = expand_normal (arg0);
25460 op1 = expand_normal (arg1);
25461 tmode = insn_data[icode].operand[0].mode;
25462 mode1 = insn_data[icode].operand[1].mode;
25463 mode2 = insn_data[icode].operand[2].mode;
25464
25465 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
25466 op0 = copy_to_mode_reg (mode1, op0);
25467 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
25468 {
25469 error ("mask must be an immediate");
25470 return const0_rtx;
25471 }
25472 selector = INTVAL (op1);
25473 if (selector < 0 || selector > 255)
25474 error ("the range of mask should be in 0 to 255");
25475 if (target == 0
25476 || GET_MODE (target) != tmode
25477 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25478 target = gen_reg_rtx (tmode);
25479 pat = GEN_FCN (icode) (target, op0, op1);
25480 if (! pat)
25481 return 0;
25482 emit_insn (pat);
25483 return target;
25484
25485 case ARM_BUILTIN_WMADDS:
25486 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
25487 case ARM_BUILTIN_WMADDSX:
25488 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
25489 case ARM_BUILTIN_WMADDSN:
25490 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
25491 case ARM_BUILTIN_WMADDU:
25492 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
25493 case ARM_BUILTIN_WMADDUX:
25494 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
25495 case ARM_BUILTIN_WMADDUN:
25496 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
25497 case ARM_BUILTIN_WSADBZ:
25498 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
25499 case ARM_BUILTIN_WSADHZ:
25500 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
25501
25502 /* Several three-argument builtins. */
25503 case ARM_BUILTIN_WMACS:
25504 case ARM_BUILTIN_WMACU:
25505 case ARM_BUILTIN_TMIA:
25506 case ARM_BUILTIN_TMIAPH:
25507 case ARM_BUILTIN_TMIATT:
25508 case ARM_BUILTIN_TMIATB:
25509 case ARM_BUILTIN_TMIABT:
25510 case ARM_BUILTIN_TMIABB:
25511 case ARM_BUILTIN_WQMIABB:
25512 case ARM_BUILTIN_WQMIABT:
25513 case ARM_BUILTIN_WQMIATB:
25514 case ARM_BUILTIN_WQMIATT:
25515 case ARM_BUILTIN_WQMIABBN:
25516 case ARM_BUILTIN_WQMIABTN:
25517 case ARM_BUILTIN_WQMIATBN:
25518 case ARM_BUILTIN_WQMIATTN:
25519 case ARM_BUILTIN_WMIABB:
25520 case ARM_BUILTIN_WMIABT:
25521 case ARM_BUILTIN_WMIATB:
25522 case ARM_BUILTIN_WMIATT:
25523 case ARM_BUILTIN_WMIABBN:
25524 case ARM_BUILTIN_WMIABTN:
25525 case ARM_BUILTIN_WMIATBN:
25526 case ARM_BUILTIN_WMIATTN:
25527 case ARM_BUILTIN_WMIAWBB:
25528 case ARM_BUILTIN_WMIAWBT:
25529 case ARM_BUILTIN_WMIAWTB:
25530 case ARM_BUILTIN_WMIAWTT:
25531 case ARM_BUILTIN_WMIAWBBN:
25532 case ARM_BUILTIN_WMIAWBTN:
25533 case ARM_BUILTIN_WMIAWTBN:
25534 case ARM_BUILTIN_WMIAWTTN:
25535 case ARM_BUILTIN_WSADB:
25536 case ARM_BUILTIN_WSADH:
25537 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
25538 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
25539 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
25540 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
25541 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
25542 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
25543 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
25544 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
25545 : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
25546 : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
25547 : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
25548 : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
25549 : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
25550 : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
25551 : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
25552 : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
25553 : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
25554 : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
25555 : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
25556 : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
25557 : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
25558 : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
25559 : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
25560 : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
25561 : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
25562 : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
25563 : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
25564 : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
25565 : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
25566 : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
25567 : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
25568 : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
25569 : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
25570 : CODE_FOR_iwmmxt_wsadh);
25571 arg0 = CALL_EXPR_ARG (exp, 0);
25572 arg1 = CALL_EXPR_ARG (exp, 1);
25573 arg2 = CALL_EXPR_ARG (exp, 2);
25574 op0 = expand_normal (arg0);
25575 op1 = expand_normal (arg1);
25576 op2 = expand_normal (arg2);
25577 tmode = insn_data[icode].operand[0].mode;
25578 mode0 = insn_data[icode].operand[1].mode;
25579 mode1 = insn_data[icode].operand[2].mode;
25580 mode2 = insn_data[icode].operand[3].mode;
25581
25582 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
25583 op0 = copy_to_mode_reg (mode0, op0);
25584 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
25585 op1 = copy_to_mode_reg (mode1, op1);
25586 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
25587 op2 = copy_to_mode_reg (mode2, op2);
25588 if (target == 0
25589 || GET_MODE (target) != tmode
25590 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
25591 target = gen_reg_rtx (tmode);
25592 pat = GEN_FCN (icode) (target, op0, op1, op2);
25593 if (! pat)
25594 return 0;
25595 emit_insn (pat);
25596 return target;
25597
25598 case ARM_BUILTIN_WZERO:
25599 target = gen_reg_rtx (DImode);
25600 emit_insn (gen_iwmmxt_clrdi (target));
25601 return target;
25602
25603 case ARM_BUILTIN_WSRLHI:
25604 case ARM_BUILTIN_WSRLWI:
25605 case ARM_BUILTIN_WSRLDI:
25606 case ARM_BUILTIN_WSLLHI:
25607 case ARM_BUILTIN_WSLLWI:
25608 case ARM_BUILTIN_WSLLDI:
25609 case ARM_BUILTIN_WSRAHI:
25610 case ARM_BUILTIN_WSRAWI:
25611 case ARM_BUILTIN_WSRADI:
25612 case ARM_BUILTIN_WRORHI:
25613 case ARM_BUILTIN_WRORWI:
25614 case ARM_BUILTIN_WRORDI:
25615 case ARM_BUILTIN_WSRLH:
25616 case ARM_BUILTIN_WSRLW:
25617 case ARM_BUILTIN_WSRLD:
25618 case ARM_BUILTIN_WSLLH:
25619 case ARM_BUILTIN_WSLLW:
25620 case ARM_BUILTIN_WSLLD:
25621 case ARM_BUILTIN_WSRAH:
25622 case ARM_BUILTIN_WSRAW:
25623 case ARM_BUILTIN_WSRAD:
25624 case ARM_BUILTIN_WRORH:
25625 case ARM_BUILTIN_WRORW:
25626 case ARM_BUILTIN_WRORD:
25627 icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
25628 : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
25629 : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
25630 : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
25631 : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
25632 : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
25633 : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
25634 : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
25635 : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
25636 : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
25637 : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
25638 : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
25639 : fcode == ARM_BUILTIN_WSRLH ? CODE_FOR_lshrv4hi3_di
25640 : fcode == ARM_BUILTIN_WSRLW ? CODE_FOR_lshrv2si3_di
25641 : fcode == ARM_BUILTIN_WSRLD ? CODE_FOR_lshrdi3_di
25642 : fcode == ARM_BUILTIN_WSLLH ? CODE_FOR_ashlv4hi3_di
25643 : fcode == ARM_BUILTIN_WSLLW ? CODE_FOR_ashlv2si3_di
25644 : fcode == ARM_BUILTIN_WSLLD ? CODE_FOR_ashldi3_di
25645 : fcode == ARM_BUILTIN_WSRAH ? CODE_FOR_ashrv4hi3_di
25646 : fcode == ARM_BUILTIN_WSRAW ? CODE_FOR_ashrv2si3_di
25647 : fcode == ARM_BUILTIN_WSRAD ? CODE_FOR_ashrdi3_di
25648 : fcode == ARM_BUILTIN_WRORH ? CODE_FOR_rorv4hi3_di
25649 : fcode == ARM_BUILTIN_WRORW ? CODE_FOR_rorv2si3_di
25650 : fcode == ARM_BUILTIN_WRORD ? CODE_FOR_rordi3_di
25651 : CODE_FOR_nothing);
25652 arg1 = CALL_EXPR_ARG (exp, 1);
25653 op1 = expand_normal (arg1);
25654 if (GET_MODE (op1) == VOIDmode)
25655 {
25656 imm = INTVAL (op1);
25657 if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORWI
25658 || fcode == ARM_BUILTIN_WRORH || fcode == ARM_BUILTIN_WRORW)
25659 && (imm < 0 || imm > 32))
25660 {
25661 if (fcode == ARM_BUILTIN_WRORHI)
25662 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi16 in code.");
25663 else if (fcode == ARM_BUILTIN_WRORWI)
25664 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_rori_pi32 in code.");
25665 else if (fcode == ARM_BUILTIN_WRORH)
25666 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi16 in code.");
25667 else
25668 error ("the range of count should be in 0 to 32. please check the intrinsic _mm_ror_pi32 in code.");
25669 }
25670 else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
25671 && (imm < 0 || imm > 64))
25672 {
25673 if (fcode == ARM_BUILTIN_WRORDI)
25674 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_rori_si64 in code.");
25675 else
25676 error ("the range of count should be in 0 to 64. please check the intrinsic _mm_ror_si64 in code.");
25677 }
25678 else if (imm < 0)
25679 {
25680 if (fcode == ARM_BUILTIN_WSRLHI)
25681 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi16 in code.");
25682 else if (fcode == ARM_BUILTIN_WSRLWI)
25683 error ("the count should be no less than 0. please check the intrinsic _mm_srli_pi32 in code.");
25684 else if (fcode == ARM_BUILTIN_WSRLDI)
25685 error ("the count should be no less than 0. please check the intrinsic _mm_srli_si64 in code.");
25686 else if (fcode == ARM_BUILTIN_WSLLHI)
25687 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi16 in code.");
25688 else if (fcode == ARM_BUILTIN_WSLLWI)
25689 error ("the count should be no less than 0. please check the intrinsic _mm_slli_pi32 in code.");
25690 else if (fcode == ARM_BUILTIN_WSLLDI)
25691 error ("the count should be no less than 0. please check the intrinsic _mm_slli_si64 in code.");
25692 else if (fcode == ARM_BUILTIN_WSRAHI)
25693 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi16 in code.");
25694 else if (fcode == ARM_BUILTIN_WSRAWI)
25695 error ("the count should be no less than 0. please check the intrinsic _mm_srai_pi32 in code.");
25696 else if (fcode == ARM_BUILTIN_WSRADI)
25697 error ("the count should be no less than 0. please check the intrinsic _mm_srai_si64 in code.");
25698 else if (fcode == ARM_BUILTIN_WSRLH)
25699 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi16 in code.");
25700 else if (fcode == ARM_BUILTIN_WSRLW)
25701 error ("the count should be no less than 0. please check the intrinsic _mm_srl_pi32 in code.");
25702 else if (fcode == ARM_BUILTIN_WSRLD)
25703 error ("the count should be no less than 0. please check the intrinsic _mm_srl_si64 in code.");
25704 else if (fcode == ARM_BUILTIN_WSLLH)
25705 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi16 in code.");
25706 else if (fcode == ARM_BUILTIN_WSLLW)
25707 error ("the count should be no less than 0. please check the intrinsic _mm_sll_pi32 in code.");
25708 else if (fcode == ARM_BUILTIN_WSLLD)
25709 error ("the count should be no less than 0. please check the intrinsic _mm_sll_si64 in code.");
25710 else if (fcode == ARM_BUILTIN_WSRAH)
25711 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi16 in code.");
25712 else if (fcode == ARM_BUILTIN_WSRAW)
25713 error ("the count should be no less than 0. please check the intrinsic _mm_sra_pi32 in code.");
25714 else
25715 error ("the count should be no less than 0. please check the intrinsic _mm_sra_si64 in code.");
25716 }
25717 }
25718 return arm_expand_binop_builtin (icode, exp, target);
25719
25720 default:
25721 break;
25722 }
25723
25724 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
25725 if (d->code == (const enum arm_builtins) fcode)
25726 return arm_expand_binop_builtin (d->icode, exp, target);
25727
25728 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
25729 if (d->code == (const enum arm_builtins) fcode)
25730 return arm_expand_unop_builtin (d->icode, exp, target, 0);
25731
25732 for (i = 0, d = bdesc_3arg; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
25733 if (d->code == (const enum arm_builtins) fcode)
25734 return arm_expand_ternop_builtin (d->icode, exp, target);
25735
25736 /* @@@ Should really do something sensible here. */
25737 return NULL_RTX;
25738 }
25739 \f
25740 /* Return the number (counting from 0) of
25741 the least significant set bit in MASK. */
25742
25743 inline static int
25744 number_of_first_bit_set (unsigned mask)
25745 {
25746 return ctz_hwi (mask);
25747 }
25748
25749 /* Like emit_multi_reg_push, but allowing for a different set of
25750 registers to be described as saved. MASK is the set of registers
25751 to be saved; REAL_REGS is the set of registers to be described as
25752 saved. If REAL_REGS is 0, only describe the stack adjustment. */
25753
25754 static rtx
25755 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
25756 {
25757 unsigned long regno;
25758 rtx par[10], tmp, reg, insn;
25759 int i, j;
25760
25761 /* Build the parallel of the registers actually being stored. */
25762 for (i = 0; mask; ++i, mask &= mask - 1)
25763 {
25764 regno = ctz_hwi (mask);
25765 reg = gen_rtx_REG (SImode, regno);
25766
25767 if (i == 0)
25768 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
25769 else
25770 tmp = gen_rtx_USE (VOIDmode, reg);
25771
25772 par[i] = tmp;
25773 }
25774
25775 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25776 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
25777 tmp = gen_frame_mem (BLKmode, tmp);
25778 tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
25779 par[0] = tmp;
25780
25781 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
25782 insn = emit_insn (tmp);
25783
25784 /* Always build the stack adjustment note for unwind info. */
25785 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
25786 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
25787 par[0] = tmp;
25788
25789 /* Build the parallel of the registers recorded as saved for unwind. */
25790 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
25791 {
25792 regno = ctz_hwi (real_regs);
25793 reg = gen_rtx_REG (SImode, regno);
25794
25795 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
25796 tmp = gen_frame_mem (SImode, tmp);
25797 tmp = gen_rtx_SET (VOIDmode, tmp, reg);
25798 RTX_FRAME_RELATED_P (tmp) = 1;
25799 par[j + 1] = tmp;
25800 }
25801
25802 if (j == 0)
25803 tmp = par[0];
25804 else
25805 {
25806 RTX_FRAME_RELATED_P (par[0]) = 1;
25807 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
25808 }
25809
25810 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
25811
25812 return insn;
25813 }
25814
25815 /* Emit code to push or pop registers to or from the stack. F is the
25816 assembly file. MASK is the registers to pop. */
25817 static void
25818 thumb_pop (FILE *f, unsigned long mask)
25819 {
25820 int regno;
25821 int lo_mask = mask & 0xFF;
25822 int pushed_words = 0;
25823
25824 gcc_assert (mask);
25825
25826 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
25827 {
25828 /* Special case. Do not generate a POP PC statement here, do it in
25829 thumb_exit() */
25830 thumb_exit (f, -1);
25831 return;
25832 }
25833
25834 fprintf (f, "\tpop\t{");
25835
25836 /* Look at the low registers first. */
25837 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
25838 {
25839 if (lo_mask & 1)
25840 {
25841 asm_fprintf (f, "%r", regno);
25842
25843 if ((lo_mask & ~1) != 0)
25844 fprintf (f, ", ");
25845
25846 pushed_words++;
25847 }
25848 }
25849
25850 if (mask & (1 << PC_REGNUM))
25851 {
25852 /* Catch popping the PC. */
25853 if (TARGET_INTERWORK || TARGET_BACKTRACE
25854 || crtl->calls_eh_return)
25855 {
25856 /* The PC is never poped directly, instead
25857 it is popped into r3 and then BX is used. */
25858 fprintf (f, "}\n");
25859
25860 thumb_exit (f, -1);
25861
25862 return;
25863 }
25864 else
25865 {
25866 if (mask & 0xFF)
25867 fprintf (f, ", ");
25868
25869 asm_fprintf (f, "%r", PC_REGNUM);
25870 }
25871 }
25872
25873 fprintf (f, "}\n");
25874 }
25875
25876 /* Generate code to return from a thumb function.
25877 If 'reg_containing_return_addr' is -1, then the return address is
25878 actually on the stack, at the stack pointer. */
25879 static void
25880 thumb_exit (FILE *f, int reg_containing_return_addr)
25881 {
25882 unsigned regs_available_for_popping;
25883 unsigned regs_to_pop;
25884 int pops_needed;
25885 unsigned available;
25886 unsigned required;
25887 int mode;
25888 int size;
25889 int restore_a4 = FALSE;
25890
25891 /* Compute the registers we need to pop. */
25892 regs_to_pop = 0;
25893 pops_needed = 0;
25894
25895 if (reg_containing_return_addr == -1)
25896 {
25897 regs_to_pop |= 1 << LR_REGNUM;
25898 ++pops_needed;
25899 }
25900
25901 if (TARGET_BACKTRACE)
25902 {
25903 /* Restore the (ARM) frame pointer and stack pointer. */
25904 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
25905 pops_needed += 2;
25906 }
25907
25908 /* If there is nothing to pop then just emit the BX instruction and
25909 return. */
25910 if (pops_needed == 0)
25911 {
25912 if (crtl->calls_eh_return)
25913 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
25914
25915 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
25916 return;
25917 }
25918 /* Otherwise if we are not supporting interworking and we have not created
25919 a backtrace structure and the function was not entered in ARM mode then
25920 just pop the return address straight into the PC. */
25921 else if (!TARGET_INTERWORK
25922 && !TARGET_BACKTRACE
25923 && !is_called_in_ARM_mode (current_function_decl)
25924 && !crtl->calls_eh_return)
25925 {
25926 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
25927 return;
25928 }
25929
25930 /* Find out how many of the (return) argument registers we can corrupt. */
25931 regs_available_for_popping = 0;
25932
25933 /* If returning via __builtin_eh_return, the bottom three registers
25934 all contain information needed for the return. */
25935 if (crtl->calls_eh_return)
25936 size = 12;
25937 else
25938 {
25939 /* If we can deduce the registers used from the function's
25940 return value. This is more reliable that examining
25941 df_regs_ever_live_p () because that will be set if the register is
25942 ever used in the function, not just if the register is used
25943 to hold a return value. */
25944
25945 if (crtl->return_rtx != 0)
25946 mode = GET_MODE (crtl->return_rtx);
25947 else
25948 mode = DECL_MODE (DECL_RESULT (current_function_decl));
25949
25950 size = GET_MODE_SIZE (mode);
25951
25952 if (size == 0)
25953 {
25954 /* In a void function we can use any argument register.
25955 In a function that returns a structure on the stack
25956 we can use the second and third argument registers. */
25957 if (mode == VOIDmode)
25958 regs_available_for_popping =
25959 (1 << ARG_REGISTER (1))
25960 | (1 << ARG_REGISTER (2))
25961 | (1 << ARG_REGISTER (3));
25962 else
25963 regs_available_for_popping =
25964 (1 << ARG_REGISTER (2))
25965 | (1 << ARG_REGISTER (3));
25966 }
25967 else if (size <= 4)
25968 regs_available_for_popping =
25969 (1 << ARG_REGISTER (2))
25970 | (1 << ARG_REGISTER (3));
25971 else if (size <= 8)
25972 regs_available_for_popping =
25973 (1 << ARG_REGISTER (3));
25974 }
25975
25976 /* Match registers to be popped with registers into which we pop them. */
25977 for (available = regs_available_for_popping,
25978 required = regs_to_pop;
25979 required != 0 && available != 0;
25980 available &= ~(available & - available),
25981 required &= ~(required & - required))
25982 -- pops_needed;
25983
25984 /* If we have any popping registers left over, remove them. */
25985 if (available > 0)
25986 regs_available_for_popping &= ~available;
25987
25988 /* Otherwise if we need another popping register we can use
25989 the fourth argument register. */
25990 else if (pops_needed)
25991 {
25992 /* If we have not found any free argument registers and
25993 reg a4 contains the return address, we must move it. */
25994 if (regs_available_for_popping == 0
25995 && reg_containing_return_addr == LAST_ARG_REGNUM)
25996 {
25997 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
25998 reg_containing_return_addr = LR_REGNUM;
25999 }
26000 else if (size > 12)
26001 {
26002 /* Register a4 is being used to hold part of the return value,
26003 but we have dire need of a free, low register. */
26004 restore_a4 = TRUE;
26005
26006 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
26007 }
26008
26009 if (reg_containing_return_addr != LAST_ARG_REGNUM)
26010 {
26011 /* The fourth argument register is available. */
26012 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
26013
26014 --pops_needed;
26015 }
26016 }
26017
26018 /* Pop as many registers as we can. */
26019 thumb_pop (f, regs_available_for_popping);
26020
26021 /* Process the registers we popped. */
26022 if (reg_containing_return_addr == -1)
26023 {
26024 /* The return address was popped into the lowest numbered register. */
26025 regs_to_pop &= ~(1 << LR_REGNUM);
26026
26027 reg_containing_return_addr =
26028 number_of_first_bit_set (regs_available_for_popping);
26029
26030 /* Remove this register for the mask of available registers, so that
26031 the return address will not be corrupted by further pops. */
26032 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
26033 }
26034
26035 /* If we popped other registers then handle them here. */
26036 if (regs_available_for_popping)
26037 {
26038 int frame_pointer;
26039
26040 /* Work out which register currently contains the frame pointer. */
26041 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
26042
26043 /* Move it into the correct place. */
26044 asm_fprintf (f, "\tmov\t%r, %r\n",
26045 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
26046
26047 /* (Temporarily) remove it from the mask of popped registers. */
26048 regs_available_for_popping &= ~(1 << frame_pointer);
26049 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
26050
26051 if (regs_available_for_popping)
26052 {
26053 int stack_pointer;
26054
26055 /* We popped the stack pointer as well,
26056 find the register that contains it. */
26057 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
26058
26059 /* Move it into the stack register. */
26060 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
26061
26062 /* At this point we have popped all necessary registers, so
26063 do not worry about restoring regs_available_for_popping
26064 to its correct value:
26065
26066 assert (pops_needed == 0)
26067 assert (regs_available_for_popping == (1 << frame_pointer))
26068 assert (regs_to_pop == (1 << STACK_POINTER)) */
26069 }
26070 else
26071 {
26072 /* Since we have just move the popped value into the frame
26073 pointer, the popping register is available for reuse, and
26074 we know that we still have the stack pointer left to pop. */
26075 regs_available_for_popping |= (1 << frame_pointer);
26076 }
26077 }
26078
26079 /* If we still have registers left on the stack, but we no longer have
26080 any registers into which we can pop them, then we must move the return
26081 address into the link register and make available the register that
26082 contained it. */
26083 if (regs_available_for_popping == 0 && pops_needed > 0)
26084 {
26085 regs_available_for_popping |= 1 << reg_containing_return_addr;
26086
26087 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
26088 reg_containing_return_addr);
26089
26090 reg_containing_return_addr = LR_REGNUM;
26091 }
26092
26093 /* If we have registers left on the stack then pop some more.
26094 We know that at most we will want to pop FP and SP. */
26095 if (pops_needed > 0)
26096 {
26097 int popped_into;
26098 int move_to;
26099
26100 thumb_pop (f, regs_available_for_popping);
26101
26102 /* We have popped either FP or SP.
26103 Move whichever one it is into the correct register. */
26104 popped_into = number_of_first_bit_set (regs_available_for_popping);
26105 move_to = number_of_first_bit_set (regs_to_pop);
26106
26107 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
26108
26109 regs_to_pop &= ~(1 << move_to);
26110
26111 --pops_needed;
26112 }
26113
26114 /* If we still have not popped everything then we must have only
26115 had one register available to us and we are now popping the SP. */
26116 if (pops_needed > 0)
26117 {
26118 int popped_into;
26119
26120 thumb_pop (f, regs_available_for_popping);
26121
26122 popped_into = number_of_first_bit_set (regs_available_for_popping);
26123
26124 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
26125 /*
26126 assert (regs_to_pop == (1 << STACK_POINTER))
26127 assert (pops_needed == 1)
26128 */
26129 }
26130
26131 /* If necessary restore the a4 register. */
26132 if (restore_a4)
26133 {
26134 if (reg_containing_return_addr != LR_REGNUM)
26135 {
26136 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
26137 reg_containing_return_addr = LR_REGNUM;
26138 }
26139
26140 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
26141 }
26142
26143 if (crtl->calls_eh_return)
26144 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
26145
26146 /* Return to caller. */
26147 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
26148 }
26149 \f
26150 /* Scan INSN just before assembler is output for it.
26151 For Thumb-1, we track the status of the condition codes; this
26152 information is used in the cbranchsi4_insn pattern. */
26153 void
26154 thumb1_final_prescan_insn (rtx insn)
26155 {
26156 if (flag_print_asm_name)
26157 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
26158 INSN_ADDRESSES (INSN_UID (insn)));
26159 /* Don't overwrite the previous setter when we get to a cbranch. */
26160 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
26161 {
26162 enum attr_conds conds;
26163
26164 if (cfun->machine->thumb1_cc_insn)
26165 {
26166 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
26167 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
26168 CC_STATUS_INIT;
26169 }
26170 conds = get_attr_conds (insn);
26171 if (conds == CONDS_SET)
26172 {
26173 rtx set = single_set (insn);
26174 cfun->machine->thumb1_cc_insn = insn;
26175 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
26176 cfun->machine->thumb1_cc_op1 = const0_rtx;
26177 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
26178 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
26179 {
26180 rtx src1 = XEXP (SET_SRC (set), 1);
26181 if (src1 == const0_rtx)
26182 cfun->machine->thumb1_cc_mode = CCmode;
26183 }
26184 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
26185 {
26186 /* Record the src register operand instead of dest because
26187 cprop_hardreg pass propagates src. */
26188 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
26189 }
26190 }
26191 else if (conds != CONDS_NOCOND)
26192 cfun->machine->thumb1_cc_insn = NULL_RTX;
26193 }
26194
26195 /* Check if unexpected far jump is used. */
26196 if (cfun->machine->lr_save_eliminated
26197 && get_attr_far_jump (insn) == FAR_JUMP_YES)
26198 internal_error("Unexpected thumb1 far jump");
26199 }
26200
26201 int
26202 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
26203 {
26204 unsigned HOST_WIDE_INT mask = 0xff;
26205 int i;
26206
26207 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
26208 if (val == 0) /* XXX */
26209 return 0;
26210
26211 for (i = 0; i < 25; i++)
26212 if ((val & (mask << i)) == val)
26213 return 1;
26214
26215 return 0;
26216 }
26217
26218 /* Returns nonzero if the current function contains,
26219 or might contain a far jump. */
26220 static int
26221 thumb_far_jump_used_p (void)
26222 {
26223 rtx insn;
26224 bool far_jump = false;
26225 unsigned int func_size = 0;
26226
26227 /* This test is only important for leaf functions. */
26228 /* assert (!leaf_function_p ()); */
26229
26230 /* If we have already decided that far jumps may be used,
26231 do not bother checking again, and always return true even if
26232 it turns out that they are not being used. Once we have made
26233 the decision that far jumps are present (and that hence the link
26234 register will be pushed onto the stack) we cannot go back on it. */
26235 if (cfun->machine->far_jump_used)
26236 return 1;
26237
26238 /* If this function is not being called from the prologue/epilogue
26239 generation code then it must be being called from the
26240 INITIAL_ELIMINATION_OFFSET macro. */
26241 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
26242 {
26243 /* In this case we know that we are being asked about the elimination
26244 of the arg pointer register. If that register is not being used,
26245 then there are no arguments on the stack, and we do not have to
26246 worry that a far jump might force the prologue to push the link
26247 register, changing the stack offsets. In this case we can just
26248 return false, since the presence of far jumps in the function will
26249 not affect stack offsets.
26250
26251 If the arg pointer is live (or if it was live, but has now been
26252 eliminated and so set to dead) then we do have to test to see if
26253 the function might contain a far jump. This test can lead to some
26254 false negatives, since before reload is completed, then length of
26255 branch instructions is not known, so gcc defaults to returning their
26256 longest length, which in turn sets the far jump attribute to true.
26257
26258 A false negative will not result in bad code being generated, but it
26259 will result in a needless push and pop of the link register. We
26260 hope that this does not occur too often.
26261
26262 If we need doubleword stack alignment this could affect the other
26263 elimination offsets so we can't risk getting it wrong. */
26264 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
26265 cfun->machine->arg_pointer_live = 1;
26266 else if (!cfun->machine->arg_pointer_live)
26267 return 0;
26268 }
26269
26270 /* We should not change far_jump_used during or after reload, as there is
26271 no chance to change stack frame layout. */
26272 if (reload_in_progress || reload_completed)
26273 return 0;
26274
26275 /* Check to see if the function contains a branch
26276 insn with the far jump attribute set. */
26277 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
26278 {
26279 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
26280 {
26281 far_jump = true;
26282 }
26283 func_size += get_attr_length (insn);
26284 }
26285
26286 /* Attribute far_jump will always be true for thumb1 before
26287 shorten_branch pass. So checking far_jump attribute before
26288 shorten_branch isn't much useful.
26289
26290 Following heuristic tries to estimate more accurately if a far jump
26291 may finally be used. The heuristic is very conservative as there is
26292 no chance to roll-back the decision of not to use far jump.
26293
26294 Thumb1 long branch offset is -2048 to 2046. The worst case is each
26295 2-byte insn is associated with a 4 byte constant pool. Using
26296 function size 2048/3 as the threshold is conservative enough. */
26297 if (far_jump)
26298 {
26299 if ((func_size * 3) >= 2048)
26300 {
26301 /* Record the fact that we have decided that
26302 the function does use far jumps. */
26303 cfun->machine->far_jump_used = 1;
26304 return 1;
26305 }
26306 }
26307
26308 return 0;
26309 }
26310
26311 /* Return nonzero if FUNC must be entered in ARM mode. */
26312 int
26313 is_called_in_ARM_mode (tree func)
26314 {
26315 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
26316
26317 /* Ignore the problem about functions whose address is taken. */
26318 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
26319 return TRUE;
26320
26321 #ifdef ARM_PE
26322 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
26323 #else
26324 return FALSE;
26325 #endif
26326 }
26327
26328 /* Given the stack offsets and register mask in OFFSETS, decide how
26329 many additional registers to push instead of subtracting a constant
26330 from SP. For epilogues the principle is the same except we use pop.
26331 FOR_PROLOGUE indicates which we're generating. */
26332 static int
26333 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
26334 {
26335 HOST_WIDE_INT amount;
26336 unsigned long live_regs_mask = offsets->saved_regs_mask;
26337 /* Extract a mask of the ones we can give to the Thumb's push/pop
26338 instruction. */
26339 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
26340 /* Then count how many other high registers will need to be pushed. */
26341 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26342 int n_free, reg_base, size;
26343
26344 if (!for_prologue && frame_pointer_needed)
26345 amount = offsets->locals_base - offsets->saved_regs;
26346 else
26347 amount = offsets->outgoing_args - offsets->saved_regs;
26348
26349 /* If the stack frame size is 512 exactly, we can save one load
26350 instruction, which should make this a win even when optimizing
26351 for speed. */
26352 if (!optimize_size && amount != 512)
26353 return 0;
26354
26355 /* Can't do this if there are high registers to push. */
26356 if (high_regs_pushed != 0)
26357 return 0;
26358
26359 /* Shouldn't do it in the prologue if no registers would normally
26360 be pushed at all. In the epilogue, also allow it if we'll have
26361 a pop insn for the PC. */
26362 if (l_mask == 0
26363 && (for_prologue
26364 || TARGET_BACKTRACE
26365 || (live_regs_mask & 1 << LR_REGNUM) == 0
26366 || TARGET_INTERWORK
26367 || crtl->args.pretend_args_size != 0))
26368 return 0;
26369
26370 /* Don't do this if thumb_expand_prologue wants to emit instructions
26371 between the push and the stack frame allocation. */
26372 if (for_prologue
26373 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
26374 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
26375 return 0;
26376
26377 reg_base = 0;
26378 n_free = 0;
26379 if (!for_prologue)
26380 {
26381 size = arm_size_return_regs ();
26382 reg_base = ARM_NUM_INTS (size);
26383 live_regs_mask >>= reg_base;
26384 }
26385
26386 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
26387 && (for_prologue || call_used_regs[reg_base + n_free]))
26388 {
26389 live_regs_mask >>= 1;
26390 n_free++;
26391 }
26392
26393 if (n_free == 0)
26394 return 0;
26395 gcc_assert (amount / 4 * 4 == amount);
26396
26397 if (amount >= 512 && (amount - n_free * 4) < 512)
26398 return (amount - 508) / 4;
26399 if (amount <= n_free * 4)
26400 return amount / 4;
26401 return 0;
26402 }
26403
26404 /* The bits which aren't usefully expanded as rtl. */
26405 const char *
26406 thumb1_unexpanded_epilogue (void)
26407 {
26408 arm_stack_offsets *offsets;
26409 int regno;
26410 unsigned long live_regs_mask = 0;
26411 int high_regs_pushed = 0;
26412 int extra_pop;
26413 int had_to_push_lr;
26414 int size;
26415
26416 if (cfun->machine->return_used_this_function != 0)
26417 return "";
26418
26419 if (IS_NAKED (arm_current_func_type ()))
26420 return "";
26421
26422 offsets = arm_get_frame_offsets ();
26423 live_regs_mask = offsets->saved_regs_mask;
26424 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26425
26426 /* If we can deduce the registers used from the function's return value.
26427 This is more reliable that examining df_regs_ever_live_p () because that
26428 will be set if the register is ever used in the function, not just if
26429 the register is used to hold a return value. */
26430 size = arm_size_return_regs ();
26431
26432 extra_pop = thumb1_extra_regs_pushed (offsets, false);
26433 if (extra_pop > 0)
26434 {
26435 unsigned long extra_mask = (1 << extra_pop) - 1;
26436 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
26437 }
26438
26439 /* The prolog may have pushed some high registers to use as
26440 work registers. e.g. the testsuite file:
26441 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
26442 compiles to produce:
26443 push {r4, r5, r6, r7, lr}
26444 mov r7, r9
26445 mov r6, r8
26446 push {r6, r7}
26447 as part of the prolog. We have to undo that pushing here. */
26448
26449 if (high_regs_pushed)
26450 {
26451 unsigned long mask = live_regs_mask & 0xff;
26452 int next_hi_reg;
26453
26454 /* The available low registers depend on the size of the value we are
26455 returning. */
26456 if (size <= 12)
26457 mask |= 1 << 3;
26458 if (size <= 8)
26459 mask |= 1 << 2;
26460
26461 if (mask == 0)
26462 /* Oh dear! We have no low registers into which we can pop
26463 high registers! */
26464 internal_error
26465 ("no low registers available for popping high registers");
26466
26467 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
26468 if (live_regs_mask & (1 << next_hi_reg))
26469 break;
26470
26471 while (high_regs_pushed)
26472 {
26473 /* Find lo register(s) into which the high register(s) can
26474 be popped. */
26475 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26476 {
26477 if (mask & (1 << regno))
26478 high_regs_pushed--;
26479 if (high_regs_pushed == 0)
26480 break;
26481 }
26482
26483 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
26484
26485 /* Pop the values into the low register(s). */
26486 thumb_pop (asm_out_file, mask);
26487
26488 /* Move the value(s) into the high registers. */
26489 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
26490 {
26491 if (mask & (1 << regno))
26492 {
26493 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
26494 regno);
26495
26496 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
26497 if (live_regs_mask & (1 << next_hi_reg))
26498 break;
26499 }
26500 }
26501 }
26502 live_regs_mask &= ~0x0f00;
26503 }
26504
26505 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
26506 live_regs_mask &= 0xff;
26507
26508 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
26509 {
26510 /* Pop the return address into the PC. */
26511 if (had_to_push_lr)
26512 live_regs_mask |= 1 << PC_REGNUM;
26513
26514 /* Either no argument registers were pushed or a backtrace
26515 structure was created which includes an adjusted stack
26516 pointer, so just pop everything. */
26517 if (live_regs_mask)
26518 thumb_pop (asm_out_file, live_regs_mask);
26519
26520 /* We have either just popped the return address into the
26521 PC or it is was kept in LR for the entire function.
26522 Note that thumb_pop has already called thumb_exit if the
26523 PC was in the list. */
26524 if (!had_to_push_lr)
26525 thumb_exit (asm_out_file, LR_REGNUM);
26526 }
26527 else
26528 {
26529 /* Pop everything but the return address. */
26530 if (live_regs_mask)
26531 thumb_pop (asm_out_file, live_regs_mask);
26532
26533 if (had_to_push_lr)
26534 {
26535 if (size > 12)
26536 {
26537 /* We have no free low regs, so save one. */
26538 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
26539 LAST_ARG_REGNUM);
26540 }
26541
26542 /* Get the return address into a temporary register. */
26543 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
26544
26545 if (size > 12)
26546 {
26547 /* Move the return address to lr. */
26548 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
26549 LAST_ARG_REGNUM);
26550 /* Restore the low register. */
26551 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
26552 IP_REGNUM);
26553 regno = LR_REGNUM;
26554 }
26555 else
26556 regno = LAST_ARG_REGNUM;
26557 }
26558 else
26559 regno = LR_REGNUM;
26560
26561 /* Remove the argument registers that were pushed onto the stack. */
26562 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
26563 SP_REGNUM, SP_REGNUM,
26564 crtl->args.pretend_args_size);
26565
26566 thumb_exit (asm_out_file, regno);
26567 }
26568
26569 return "";
26570 }
26571
26572 /* Functions to save and restore machine-specific function data. */
26573 static struct machine_function *
26574 arm_init_machine_status (void)
26575 {
26576 struct machine_function *machine;
26577 machine = ggc_alloc_cleared_machine_function ();
26578
26579 #if ARM_FT_UNKNOWN != 0
26580 machine->func_type = ARM_FT_UNKNOWN;
26581 #endif
26582 return machine;
26583 }
26584
26585 /* Return an RTX indicating where the return address to the
26586 calling function can be found. */
26587 rtx
26588 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
26589 {
26590 if (count != 0)
26591 return NULL_RTX;
26592
26593 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
26594 }
26595
26596 /* Do anything needed before RTL is emitted for each function. */
26597 void
26598 arm_init_expanders (void)
26599 {
26600 /* Arrange to initialize and mark the machine per-function status. */
26601 init_machine_status = arm_init_machine_status;
26602
26603 /* This is to stop the combine pass optimizing away the alignment
26604 adjustment of va_arg. */
26605 /* ??? It is claimed that this should not be necessary. */
26606 if (cfun)
26607 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
26608 }
26609
26610
26611 /* Like arm_compute_initial_elimination offset. Simpler because there
26612 isn't an ABI specified frame pointer for Thumb. Instead, we set it
26613 to point at the base of the local variables after static stack
26614 space for a function has been allocated. */
26615
26616 HOST_WIDE_INT
26617 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
26618 {
26619 arm_stack_offsets *offsets;
26620
26621 offsets = arm_get_frame_offsets ();
26622
26623 switch (from)
26624 {
26625 case ARG_POINTER_REGNUM:
26626 switch (to)
26627 {
26628 case STACK_POINTER_REGNUM:
26629 return offsets->outgoing_args - offsets->saved_args;
26630
26631 case FRAME_POINTER_REGNUM:
26632 return offsets->soft_frame - offsets->saved_args;
26633
26634 case ARM_HARD_FRAME_POINTER_REGNUM:
26635 return offsets->saved_regs - offsets->saved_args;
26636
26637 case THUMB_HARD_FRAME_POINTER_REGNUM:
26638 return offsets->locals_base - offsets->saved_args;
26639
26640 default:
26641 gcc_unreachable ();
26642 }
26643 break;
26644
26645 case FRAME_POINTER_REGNUM:
26646 switch (to)
26647 {
26648 case STACK_POINTER_REGNUM:
26649 return offsets->outgoing_args - offsets->soft_frame;
26650
26651 case ARM_HARD_FRAME_POINTER_REGNUM:
26652 return offsets->saved_regs - offsets->soft_frame;
26653
26654 case THUMB_HARD_FRAME_POINTER_REGNUM:
26655 return offsets->locals_base - offsets->soft_frame;
26656
26657 default:
26658 gcc_unreachable ();
26659 }
26660 break;
26661
26662 default:
26663 gcc_unreachable ();
26664 }
26665 }
26666
26667 /* Generate the function's prologue. */
26668
26669 void
26670 thumb1_expand_prologue (void)
26671 {
26672 rtx insn;
26673
26674 HOST_WIDE_INT amount;
26675 arm_stack_offsets *offsets;
26676 unsigned long func_type;
26677 int regno;
26678 unsigned long live_regs_mask;
26679 unsigned long l_mask;
26680 unsigned high_regs_pushed = 0;
26681
26682 func_type = arm_current_func_type ();
26683
26684 /* Naked functions don't have prologues. */
26685 if (IS_NAKED (func_type))
26686 return;
26687
26688 if (IS_INTERRUPT (func_type))
26689 {
26690 error ("interrupt Service Routines cannot be coded in Thumb mode");
26691 return;
26692 }
26693
26694 if (is_called_in_ARM_mode (current_function_decl))
26695 emit_insn (gen_prologue_thumb1_interwork ());
26696
26697 offsets = arm_get_frame_offsets ();
26698 live_regs_mask = offsets->saved_regs_mask;
26699
26700 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
26701 l_mask = live_regs_mask & 0x40ff;
26702 /* Then count how many other high registers will need to be pushed. */
26703 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
26704
26705 if (crtl->args.pretend_args_size)
26706 {
26707 rtx x = GEN_INT (-crtl->args.pretend_args_size);
26708
26709 if (cfun->machine->uses_anonymous_args)
26710 {
26711 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
26712 unsigned long mask;
26713
26714 mask = 1ul << (LAST_ARG_REGNUM + 1);
26715 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
26716
26717 insn = thumb1_emit_multi_reg_push (mask, 0);
26718 }
26719 else
26720 {
26721 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26722 stack_pointer_rtx, x));
26723 }
26724 RTX_FRAME_RELATED_P (insn) = 1;
26725 }
26726
26727 if (TARGET_BACKTRACE)
26728 {
26729 HOST_WIDE_INT offset = 0;
26730 unsigned work_register;
26731 rtx work_reg, x, arm_hfp_rtx;
26732
26733 /* We have been asked to create a stack backtrace structure.
26734 The code looks like this:
26735
26736 0 .align 2
26737 0 func:
26738 0 sub SP, #16 Reserve space for 4 registers.
26739 2 push {R7} Push low registers.
26740 4 add R7, SP, #20 Get the stack pointer before the push.
26741 6 str R7, [SP, #8] Store the stack pointer
26742 (before reserving the space).
26743 8 mov R7, PC Get hold of the start of this code + 12.
26744 10 str R7, [SP, #16] Store it.
26745 12 mov R7, FP Get hold of the current frame pointer.
26746 14 str R7, [SP, #4] Store it.
26747 16 mov R7, LR Get hold of the current return address.
26748 18 str R7, [SP, #12] Store it.
26749 20 add R7, SP, #16 Point at the start of the
26750 backtrace structure.
26751 22 mov FP, R7 Put this value into the frame pointer. */
26752
26753 work_register = thumb_find_work_register (live_regs_mask);
26754 work_reg = gen_rtx_REG (SImode, work_register);
26755 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
26756
26757 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26758 stack_pointer_rtx, GEN_INT (-16)));
26759 RTX_FRAME_RELATED_P (insn) = 1;
26760
26761 if (l_mask)
26762 {
26763 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
26764 RTX_FRAME_RELATED_P (insn) = 1;
26765
26766 offset = bit_count (l_mask) * UNITS_PER_WORD;
26767 }
26768
26769 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
26770 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26771
26772 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
26773 x = gen_frame_mem (SImode, x);
26774 emit_move_insn (x, work_reg);
26775
26776 /* Make sure that the instruction fetching the PC is in the right place
26777 to calculate "start of backtrace creation code + 12". */
26778 /* ??? The stores using the common WORK_REG ought to be enough to
26779 prevent the scheduler from doing anything weird. Failing that
26780 we could always move all of the following into an UNSPEC_VOLATILE. */
26781 if (l_mask)
26782 {
26783 x = gen_rtx_REG (SImode, PC_REGNUM);
26784 emit_move_insn (work_reg, x);
26785
26786 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26787 x = gen_frame_mem (SImode, x);
26788 emit_move_insn (x, work_reg);
26789
26790 emit_move_insn (work_reg, arm_hfp_rtx);
26791
26792 x = plus_constant (Pmode, stack_pointer_rtx, offset);
26793 x = gen_frame_mem (SImode, x);
26794 emit_move_insn (x, work_reg);
26795 }
26796 else
26797 {
26798 emit_move_insn (work_reg, arm_hfp_rtx);
26799
26800 x = plus_constant (Pmode, stack_pointer_rtx, offset);
26801 x = gen_frame_mem (SImode, x);
26802 emit_move_insn (x, work_reg);
26803
26804 x = gen_rtx_REG (SImode, PC_REGNUM);
26805 emit_move_insn (work_reg, x);
26806
26807 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
26808 x = gen_frame_mem (SImode, x);
26809 emit_move_insn (x, work_reg);
26810 }
26811
26812 x = gen_rtx_REG (SImode, LR_REGNUM);
26813 emit_move_insn (work_reg, x);
26814
26815 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
26816 x = gen_frame_mem (SImode, x);
26817 emit_move_insn (x, work_reg);
26818
26819 x = GEN_INT (offset + 12);
26820 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
26821
26822 emit_move_insn (arm_hfp_rtx, work_reg);
26823 }
26824 /* Optimization: If we are not pushing any low registers but we are going
26825 to push some high registers then delay our first push. This will just
26826 be a push of LR and we can combine it with the push of the first high
26827 register. */
26828 else if ((l_mask & 0xff) != 0
26829 || (high_regs_pushed == 0 && l_mask))
26830 {
26831 unsigned long mask = l_mask;
26832 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
26833 insn = thumb1_emit_multi_reg_push (mask, mask);
26834 RTX_FRAME_RELATED_P (insn) = 1;
26835 }
26836
26837 if (high_regs_pushed)
26838 {
26839 unsigned pushable_regs;
26840 unsigned next_hi_reg;
26841 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
26842 : crtl->args.info.nregs;
26843 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
26844
26845 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
26846 if (live_regs_mask & (1 << next_hi_reg))
26847 break;
26848
26849 /* Here we need to mask out registers used for passing arguments
26850 even if they can be pushed. This is to avoid using them to stash the high
26851 registers. Such kind of stash may clobber the use of arguments. */
26852 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
26853
26854 if (pushable_regs == 0)
26855 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
26856
26857 while (high_regs_pushed > 0)
26858 {
26859 unsigned long real_regs_mask = 0;
26860
26861 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
26862 {
26863 if (pushable_regs & (1 << regno))
26864 {
26865 emit_move_insn (gen_rtx_REG (SImode, regno),
26866 gen_rtx_REG (SImode, next_hi_reg));
26867
26868 high_regs_pushed --;
26869 real_regs_mask |= (1 << next_hi_reg);
26870
26871 if (high_regs_pushed)
26872 {
26873 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
26874 next_hi_reg --)
26875 if (live_regs_mask & (1 << next_hi_reg))
26876 break;
26877 }
26878 else
26879 {
26880 pushable_regs &= ~((1 << regno) - 1);
26881 break;
26882 }
26883 }
26884 }
26885
26886 /* If we had to find a work register and we have not yet
26887 saved the LR then add it to the list of regs to push. */
26888 if (l_mask == (1 << LR_REGNUM))
26889 {
26890 pushable_regs |= l_mask;
26891 real_regs_mask |= l_mask;
26892 l_mask = 0;
26893 }
26894
26895 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
26896 RTX_FRAME_RELATED_P (insn) = 1;
26897 }
26898 }
26899
26900 /* Load the pic register before setting the frame pointer,
26901 so we can use r7 as a temporary work register. */
26902 if (flag_pic && arm_pic_register != INVALID_REGNUM)
26903 arm_load_pic_register (live_regs_mask);
26904
26905 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
26906 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
26907 stack_pointer_rtx);
26908
26909 if (flag_stack_usage_info)
26910 current_function_static_stack_size
26911 = offsets->outgoing_args - offsets->saved_args;
26912
26913 amount = offsets->outgoing_args - offsets->saved_regs;
26914 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
26915 if (amount)
26916 {
26917 if (amount < 512)
26918 {
26919 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
26920 GEN_INT (- amount)));
26921 RTX_FRAME_RELATED_P (insn) = 1;
26922 }
26923 else
26924 {
26925 rtx reg, dwarf;
26926
26927 /* The stack decrement is too big for an immediate value in a single
26928 insn. In theory we could issue multiple subtracts, but after
26929 three of them it becomes more space efficient to place the full
26930 value in the constant pool and load into a register. (Also the
26931 ARM debugger really likes to see only one stack decrement per
26932 function). So instead we look for a scratch register into which
26933 we can load the decrement, and then we subtract this from the
26934 stack pointer. Unfortunately on the thumb the only available
26935 scratch registers are the argument registers, and we cannot use
26936 these as they may hold arguments to the function. Instead we
26937 attempt to locate a call preserved register which is used by this
26938 function. If we can find one, then we know that it will have
26939 been pushed at the start of the prologue and so we can corrupt
26940 it now. */
26941 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
26942 if (live_regs_mask & (1 << regno))
26943 break;
26944
26945 gcc_assert(regno <= LAST_LO_REGNUM);
26946
26947 reg = gen_rtx_REG (SImode, regno);
26948
26949 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
26950
26951 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
26952 stack_pointer_rtx, reg));
26953
26954 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26955 plus_constant (Pmode, stack_pointer_rtx,
26956 -amount));
26957 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
26958 RTX_FRAME_RELATED_P (insn) = 1;
26959 }
26960 }
26961
26962 if (frame_pointer_needed)
26963 thumb_set_frame_pointer (offsets);
26964
26965 /* If we are profiling, make sure no instructions are scheduled before
26966 the call to mcount. Similarly if the user has requested no
26967 scheduling in the prolog. Similarly if we want non-call exceptions
26968 using the EABI unwinder, to prevent faulting instructions from being
26969 swapped with a stack adjustment. */
26970 if (crtl->profile || !TARGET_SCHED_PROLOG
26971 || (arm_except_unwind_info (&global_options) == UI_TARGET
26972 && cfun->can_throw_non_call_exceptions))
26973 emit_insn (gen_blockage ());
26974
26975 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
26976 if (live_regs_mask & 0xff)
26977 cfun->machine->lr_save_eliminated = 0;
26978 }
26979
26980 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
26981 POP instruction can be generated. LR should be replaced by PC. All
26982 the checks required are already done by USE_RETURN_INSN (). Hence,
26983 all we really need to check here is if single register is to be
26984 returned, or multiple register return. */
26985 void
26986 thumb2_expand_return (bool simple_return)
26987 {
26988 int i, num_regs;
26989 unsigned long saved_regs_mask;
26990 arm_stack_offsets *offsets;
26991
26992 offsets = arm_get_frame_offsets ();
26993 saved_regs_mask = offsets->saved_regs_mask;
26994
26995 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
26996 if (saved_regs_mask & (1 << i))
26997 num_regs++;
26998
26999 if (!simple_return && saved_regs_mask)
27000 {
27001 if (num_regs == 1)
27002 {
27003 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27004 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
27005 rtx addr = gen_rtx_MEM (SImode,
27006 gen_rtx_POST_INC (SImode,
27007 stack_pointer_rtx));
27008 set_mem_alias_set (addr, get_frame_alias_set ());
27009 XVECEXP (par, 0, 0) = ret_rtx;
27010 XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
27011 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
27012 emit_jump_insn (par);
27013 }
27014 else
27015 {
27016 saved_regs_mask &= ~ (1 << LR_REGNUM);
27017 saved_regs_mask |= (1 << PC_REGNUM);
27018 arm_emit_multi_reg_pop (saved_regs_mask);
27019 }
27020 }
27021 else
27022 {
27023 emit_jump_insn (simple_return_rtx);
27024 }
27025 }
27026
27027 void
27028 thumb1_expand_epilogue (void)
27029 {
27030 HOST_WIDE_INT amount;
27031 arm_stack_offsets *offsets;
27032 int regno;
27033
27034 /* Naked functions don't have prologues. */
27035 if (IS_NAKED (arm_current_func_type ()))
27036 return;
27037
27038 offsets = arm_get_frame_offsets ();
27039 amount = offsets->outgoing_args - offsets->saved_regs;
27040
27041 if (frame_pointer_needed)
27042 {
27043 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
27044 amount = offsets->locals_base - offsets->saved_regs;
27045 }
27046 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
27047
27048 gcc_assert (amount >= 0);
27049 if (amount)
27050 {
27051 emit_insn (gen_blockage ());
27052
27053 if (amount < 512)
27054 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
27055 GEN_INT (amount)));
27056 else
27057 {
27058 /* r3 is always free in the epilogue. */
27059 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
27060
27061 emit_insn (gen_movsi (reg, GEN_INT (amount)));
27062 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
27063 }
27064 }
27065
27066 /* Emit a USE (stack_pointer_rtx), so that
27067 the stack adjustment will not be deleted. */
27068 emit_insn (gen_force_register_use (stack_pointer_rtx));
27069
27070 if (crtl->profile || !TARGET_SCHED_PROLOG)
27071 emit_insn (gen_blockage ());
27072
27073 /* Emit a clobber for each insn that will be restored in the epilogue,
27074 so that flow2 will get register lifetimes correct. */
27075 for (regno = 0; regno < 13; regno++)
27076 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
27077 emit_clobber (gen_rtx_REG (SImode, regno));
27078
27079 if (! df_regs_ever_live_p (LR_REGNUM))
27080 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
27081 }
27082
27083 /* Epilogue code for APCS frame. */
27084 static void
27085 arm_expand_epilogue_apcs_frame (bool really_return)
27086 {
27087 unsigned long func_type;
27088 unsigned long saved_regs_mask;
27089 int num_regs = 0;
27090 int i;
27091 int floats_from_frame = 0;
27092 arm_stack_offsets *offsets;
27093
27094 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
27095 func_type = arm_current_func_type ();
27096
27097 /* Get frame offsets for ARM. */
27098 offsets = arm_get_frame_offsets ();
27099 saved_regs_mask = offsets->saved_regs_mask;
27100
27101 /* Find the offset of the floating-point save area in the frame. */
27102 floats_from_frame
27103 = (offsets->saved_args
27104 + arm_compute_static_chain_stack_bytes ()
27105 - offsets->frame);
27106
27107 /* Compute how many core registers saved and how far away the floats are. */
27108 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27109 if (saved_regs_mask & (1 << i))
27110 {
27111 num_regs++;
27112 floats_from_frame += 4;
27113 }
27114
27115 if (TARGET_HARD_FLOAT && TARGET_VFP)
27116 {
27117 int start_reg;
27118 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
27119
27120 /* The offset is from IP_REGNUM. */
27121 int saved_size = arm_get_vfp_saved_size ();
27122 if (saved_size > 0)
27123 {
27124 rtx insn;
27125 floats_from_frame += saved_size;
27126 insn = emit_insn (gen_addsi3 (ip_rtx,
27127 hard_frame_pointer_rtx,
27128 GEN_INT (-floats_from_frame)));
27129 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
27130 ip_rtx, hard_frame_pointer_rtx);
27131 }
27132
27133 /* Generate VFP register multi-pop. */
27134 start_reg = FIRST_VFP_REGNUM;
27135
27136 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
27137 /* Look for a case where a reg does not need restoring. */
27138 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27139 && (!df_regs_ever_live_p (i + 1)
27140 || call_used_regs[i + 1]))
27141 {
27142 if (start_reg != i)
27143 arm_emit_vfp_multi_reg_pop (start_reg,
27144 (i - start_reg) / 2,
27145 gen_rtx_REG (SImode,
27146 IP_REGNUM));
27147 start_reg = i + 2;
27148 }
27149
27150 /* Restore the remaining regs that we have discovered (or possibly
27151 even all of them, if the conditional in the for loop never
27152 fired). */
27153 if (start_reg != i)
27154 arm_emit_vfp_multi_reg_pop (start_reg,
27155 (i - start_reg) / 2,
27156 gen_rtx_REG (SImode, IP_REGNUM));
27157 }
27158
27159 if (TARGET_IWMMXT)
27160 {
27161 /* The frame pointer is guaranteed to be non-double-word aligned, as
27162 it is set to double-word-aligned old_stack_pointer - 4. */
27163 rtx insn;
27164 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
27165
27166 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
27167 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27168 {
27169 rtx addr = gen_frame_mem (V2SImode,
27170 plus_constant (Pmode, hard_frame_pointer_rtx,
27171 - lrm_count * 4));
27172 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27173 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27174 gen_rtx_REG (V2SImode, i),
27175 NULL_RTX);
27176 lrm_count += 2;
27177 }
27178 }
27179
27180 /* saved_regs_mask should contain IP which contains old stack pointer
27181 at the time of activation creation. Since SP and IP are adjacent registers,
27182 we can restore the value directly into SP. */
27183 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
27184 saved_regs_mask &= ~(1 << IP_REGNUM);
27185 saved_regs_mask |= (1 << SP_REGNUM);
27186
27187 /* There are two registers left in saved_regs_mask - LR and PC. We
27188 only need to restore LR (the return address), but to
27189 save time we can load it directly into PC, unless we need a
27190 special function exit sequence, or we are not really returning. */
27191 if (really_return
27192 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
27193 && !crtl->calls_eh_return)
27194 /* Delete LR from the register mask, so that LR on
27195 the stack is loaded into the PC in the register mask. */
27196 saved_regs_mask &= ~(1 << LR_REGNUM);
27197 else
27198 saved_regs_mask &= ~(1 << PC_REGNUM);
27199
27200 num_regs = bit_count (saved_regs_mask);
27201 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
27202 {
27203 rtx insn;
27204 emit_insn (gen_blockage ());
27205 /* Unwind the stack to just below the saved registers. */
27206 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27207 hard_frame_pointer_rtx,
27208 GEN_INT (- 4 * num_regs)));
27209
27210 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
27211 stack_pointer_rtx, hard_frame_pointer_rtx);
27212 }
27213
27214 arm_emit_multi_reg_pop (saved_regs_mask);
27215
27216 if (IS_INTERRUPT (func_type))
27217 {
27218 /* Interrupt handlers will have pushed the
27219 IP onto the stack, so restore it now. */
27220 rtx insn;
27221 rtx addr = gen_rtx_MEM (SImode,
27222 gen_rtx_POST_INC (SImode,
27223 stack_pointer_rtx));
27224 set_mem_alias_set (addr, get_frame_alias_set ());
27225 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
27226 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27227 gen_rtx_REG (SImode, IP_REGNUM),
27228 NULL_RTX);
27229 }
27230
27231 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
27232 return;
27233
27234 if (crtl->calls_eh_return)
27235 emit_insn (gen_addsi3 (stack_pointer_rtx,
27236 stack_pointer_rtx,
27237 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27238
27239 if (IS_STACKALIGN (func_type))
27240 /* Restore the original stack pointer. Before prologue, the stack was
27241 realigned and the original stack pointer saved in r0. For details,
27242 see comment in arm_expand_prologue. */
27243 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27244
27245 emit_jump_insn (simple_return_rtx);
27246 }
27247
27248 /* Generate RTL to represent ARM epilogue. Really_return is true if the
27249 function is not a sibcall. */
27250 void
27251 arm_expand_epilogue (bool really_return)
27252 {
27253 unsigned long func_type;
27254 unsigned long saved_regs_mask;
27255 int num_regs = 0;
27256 int i;
27257 int amount;
27258 arm_stack_offsets *offsets;
27259
27260 func_type = arm_current_func_type ();
27261
27262 /* Naked functions don't have epilogue. Hence, generate return pattern, and
27263 let output_return_instruction take care of instruction emission if any. */
27264 if (IS_NAKED (func_type)
27265 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
27266 {
27267 if (really_return)
27268 emit_jump_insn (simple_return_rtx);
27269 return;
27270 }
27271
27272 /* If we are throwing an exception, then we really must be doing a
27273 return, so we can't tail-call. */
27274 gcc_assert (!crtl->calls_eh_return || really_return);
27275
27276 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
27277 {
27278 arm_expand_epilogue_apcs_frame (really_return);
27279 return;
27280 }
27281
27282 /* Get frame offsets for ARM. */
27283 offsets = arm_get_frame_offsets ();
27284 saved_regs_mask = offsets->saved_regs_mask;
27285 num_regs = bit_count (saved_regs_mask);
27286
27287 if (frame_pointer_needed)
27288 {
27289 rtx insn;
27290 /* Restore stack pointer if necessary. */
27291 if (TARGET_ARM)
27292 {
27293 /* In ARM mode, frame pointer points to first saved register.
27294 Restore stack pointer to last saved register. */
27295 amount = offsets->frame - offsets->saved_regs;
27296
27297 /* Force out any pending memory operations that reference stacked data
27298 before stack de-allocation occurs. */
27299 emit_insn (gen_blockage ());
27300 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
27301 hard_frame_pointer_rtx,
27302 GEN_INT (amount)));
27303 arm_add_cfa_adjust_cfa_note (insn, amount,
27304 stack_pointer_rtx,
27305 hard_frame_pointer_rtx);
27306
27307 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27308 deleted. */
27309 emit_insn (gen_force_register_use (stack_pointer_rtx));
27310 }
27311 else
27312 {
27313 /* In Thumb-2 mode, the frame pointer points to the last saved
27314 register. */
27315 amount = offsets->locals_base - offsets->saved_regs;
27316 if (amount)
27317 {
27318 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
27319 hard_frame_pointer_rtx,
27320 GEN_INT (amount)));
27321 arm_add_cfa_adjust_cfa_note (insn, amount,
27322 hard_frame_pointer_rtx,
27323 hard_frame_pointer_rtx);
27324 }
27325
27326 /* Force out any pending memory operations that reference stacked data
27327 before stack de-allocation occurs. */
27328 emit_insn (gen_blockage ());
27329 insn = emit_insn (gen_movsi (stack_pointer_rtx,
27330 hard_frame_pointer_rtx));
27331 arm_add_cfa_adjust_cfa_note (insn, 0,
27332 stack_pointer_rtx,
27333 hard_frame_pointer_rtx);
27334 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
27335 deleted. */
27336 emit_insn (gen_force_register_use (stack_pointer_rtx));
27337 }
27338 }
27339 else
27340 {
27341 /* Pop off outgoing args and local frame to adjust stack pointer to
27342 last saved register. */
27343 amount = offsets->outgoing_args - offsets->saved_regs;
27344 if (amount)
27345 {
27346 rtx tmp;
27347 /* Force out any pending memory operations that reference stacked data
27348 before stack de-allocation occurs. */
27349 emit_insn (gen_blockage ());
27350 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27351 stack_pointer_rtx,
27352 GEN_INT (amount)));
27353 arm_add_cfa_adjust_cfa_note (tmp, amount,
27354 stack_pointer_rtx, stack_pointer_rtx);
27355 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
27356 not deleted. */
27357 emit_insn (gen_force_register_use (stack_pointer_rtx));
27358 }
27359 }
27360
27361 if (TARGET_HARD_FLOAT && TARGET_VFP)
27362 {
27363 /* Generate VFP register multi-pop. */
27364 int end_reg = LAST_VFP_REGNUM + 1;
27365
27366 /* Scan the registers in reverse order. We need to match
27367 any groupings made in the prologue and generate matching
27368 vldm operations. The need to match groups is because,
27369 unlike pop, vldm can only do consecutive regs. */
27370 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
27371 /* Look for a case where a reg does not need restoring. */
27372 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
27373 && (!df_regs_ever_live_p (i + 1)
27374 || call_used_regs[i + 1]))
27375 {
27376 /* Restore the regs discovered so far (from reg+2 to
27377 end_reg). */
27378 if (end_reg > i + 2)
27379 arm_emit_vfp_multi_reg_pop (i + 2,
27380 (end_reg - (i + 2)) / 2,
27381 stack_pointer_rtx);
27382 end_reg = i;
27383 }
27384
27385 /* Restore the remaining regs that we have discovered (or possibly
27386 even all of them, if the conditional in the for loop never
27387 fired). */
27388 if (end_reg > i + 2)
27389 arm_emit_vfp_multi_reg_pop (i + 2,
27390 (end_reg - (i + 2)) / 2,
27391 stack_pointer_rtx);
27392 }
27393
27394 if (TARGET_IWMMXT)
27395 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
27396 if (df_regs_ever_live_p (i) && !call_used_regs[i])
27397 {
27398 rtx insn;
27399 rtx addr = gen_rtx_MEM (V2SImode,
27400 gen_rtx_POST_INC (SImode,
27401 stack_pointer_rtx));
27402 set_mem_alias_set (addr, get_frame_alias_set ());
27403 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
27404 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27405 gen_rtx_REG (V2SImode, i),
27406 NULL_RTX);
27407 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27408 stack_pointer_rtx, stack_pointer_rtx);
27409 }
27410
27411 if (saved_regs_mask)
27412 {
27413 rtx insn;
27414 bool return_in_pc = false;
27415
27416 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
27417 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
27418 && !IS_STACKALIGN (func_type)
27419 && really_return
27420 && crtl->args.pretend_args_size == 0
27421 && saved_regs_mask & (1 << LR_REGNUM)
27422 && !crtl->calls_eh_return)
27423 {
27424 saved_regs_mask &= ~(1 << LR_REGNUM);
27425 saved_regs_mask |= (1 << PC_REGNUM);
27426 return_in_pc = true;
27427 }
27428
27429 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
27430 {
27431 for (i = 0; i <= LAST_ARM_REGNUM; i++)
27432 if (saved_regs_mask & (1 << i))
27433 {
27434 rtx addr = gen_rtx_MEM (SImode,
27435 gen_rtx_POST_INC (SImode,
27436 stack_pointer_rtx));
27437 set_mem_alias_set (addr, get_frame_alias_set ());
27438
27439 if (i == PC_REGNUM)
27440 {
27441 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
27442 XVECEXP (insn, 0, 0) = ret_rtx;
27443 XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
27444 gen_rtx_REG (SImode, i),
27445 addr);
27446 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
27447 insn = emit_jump_insn (insn);
27448 }
27449 else
27450 {
27451 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
27452 addr));
27453 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
27454 gen_rtx_REG (SImode, i),
27455 NULL_RTX);
27456 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
27457 stack_pointer_rtx,
27458 stack_pointer_rtx);
27459 }
27460 }
27461 }
27462 else
27463 {
27464 if (TARGET_LDRD
27465 && current_tune->prefer_ldrd_strd
27466 && !optimize_function_for_size_p (cfun))
27467 {
27468 if (TARGET_THUMB2)
27469 thumb2_emit_ldrd_pop (saved_regs_mask);
27470 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
27471 arm_emit_ldrd_pop (saved_regs_mask);
27472 else
27473 arm_emit_multi_reg_pop (saved_regs_mask);
27474 }
27475 else
27476 arm_emit_multi_reg_pop (saved_regs_mask);
27477 }
27478
27479 if (return_in_pc == true)
27480 return;
27481 }
27482
27483 if (crtl->args.pretend_args_size)
27484 {
27485 int i, j;
27486 rtx dwarf = NULL_RTX;
27487 rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
27488 stack_pointer_rtx,
27489 GEN_INT (crtl->args.pretend_args_size)));
27490
27491 RTX_FRAME_RELATED_P (tmp) = 1;
27492
27493 if (cfun->machine->uses_anonymous_args)
27494 {
27495 /* Restore pretend args. Refer arm_expand_prologue on how to save
27496 pretend_args in stack. */
27497 int num_regs = crtl->args.pretend_args_size / 4;
27498 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
27499 for (j = 0, i = 0; j < num_regs; i++)
27500 if (saved_regs_mask & (1 << i))
27501 {
27502 rtx reg = gen_rtx_REG (SImode, i);
27503 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
27504 j++;
27505 }
27506 REG_NOTES (tmp) = dwarf;
27507 }
27508 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
27509 stack_pointer_rtx, stack_pointer_rtx);
27510 }
27511
27512 if (!really_return)
27513 return;
27514
27515 if (crtl->calls_eh_return)
27516 emit_insn (gen_addsi3 (stack_pointer_rtx,
27517 stack_pointer_rtx,
27518 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
27519
27520 if (IS_STACKALIGN (func_type))
27521 /* Restore the original stack pointer. Before prologue, the stack was
27522 realigned and the original stack pointer saved in r0. For details,
27523 see comment in arm_expand_prologue. */
27524 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
27525
27526 emit_jump_insn (simple_return_rtx);
27527 }
27528
27529 /* Implementation of insn prologue_thumb1_interwork. This is the first
27530 "instruction" of a function called in ARM mode. Swap to thumb mode. */
27531
27532 const char *
27533 thumb1_output_interwork (void)
27534 {
27535 const char * name;
27536 FILE *f = asm_out_file;
27537
27538 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
27539 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
27540 == SYMBOL_REF);
27541 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27542
27543 /* Generate code sequence to switch us into Thumb mode. */
27544 /* The .code 32 directive has already been emitted by
27545 ASM_DECLARE_FUNCTION_NAME. */
27546 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
27547 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
27548
27549 /* Generate a label, so that the debugger will notice the
27550 change in instruction sets. This label is also used by
27551 the assembler to bypass the ARM code when this function
27552 is called from a Thumb encoded function elsewhere in the
27553 same file. Hence the definition of STUB_NAME here must
27554 agree with the definition in gas/config/tc-arm.c. */
27555
27556 #define STUB_NAME ".real_start_of"
27557
27558 fprintf (f, "\t.code\t16\n");
27559 #ifdef ARM_PE
27560 if (arm_dllexport_name_p (name))
27561 name = arm_strip_name_encoding (name);
27562 #endif
27563 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
27564 fprintf (f, "\t.thumb_func\n");
27565 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
27566
27567 return "";
27568 }
27569
27570 /* Handle the case of a double word load into a low register from
27571 a computed memory address. The computed address may involve a
27572 register which is overwritten by the load. */
27573 const char *
27574 thumb_load_double_from_address (rtx *operands)
27575 {
27576 rtx addr;
27577 rtx base;
27578 rtx offset;
27579 rtx arg1;
27580 rtx arg2;
27581
27582 gcc_assert (REG_P (operands[0]));
27583 gcc_assert (MEM_P (operands[1]));
27584
27585 /* Get the memory address. */
27586 addr = XEXP (operands[1], 0);
27587
27588 /* Work out how the memory address is computed. */
27589 switch (GET_CODE (addr))
27590 {
27591 case REG:
27592 operands[2] = adjust_address (operands[1], SImode, 4);
27593
27594 if (REGNO (operands[0]) == REGNO (addr))
27595 {
27596 output_asm_insn ("ldr\t%H0, %2", operands);
27597 output_asm_insn ("ldr\t%0, %1", operands);
27598 }
27599 else
27600 {
27601 output_asm_insn ("ldr\t%0, %1", operands);
27602 output_asm_insn ("ldr\t%H0, %2", operands);
27603 }
27604 break;
27605
27606 case CONST:
27607 /* Compute <address> + 4 for the high order load. */
27608 operands[2] = adjust_address (operands[1], SImode, 4);
27609
27610 output_asm_insn ("ldr\t%0, %1", operands);
27611 output_asm_insn ("ldr\t%H0, %2", operands);
27612 break;
27613
27614 case PLUS:
27615 arg1 = XEXP (addr, 0);
27616 arg2 = XEXP (addr, 1);
27617
27618 if (CONSTANT_P (arg1))
27619 base = arg2, offset = arg1;
27620 else
27621 base = arg1, offset = arg2;
27622
27623 gcc_assert (REG_P (base));
27624
27625 /* Catch the case of <address> = <reg> + <reg> */
27626 if (REG_P (offset))
27627 {
27628 int reg_offset = REGNO (offset);
27629 int reg_base = REGNO (base);
27630 int reg_dest = REGNO (operands[0]);
27631
27632 /* Add the base and offset registers together into the
27633 higher destination register. */
27634 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
27635 reg_dest + 1, reg_base, reg_offset);
27636
27637 /* Load the lower destination register from the address in
27638 the higher destination register. */
27639 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
27640 reg_dest, reg_dest + 1);
27641
27642 /* Load the higher destination register from its own address
27643 plus 4. */
27644 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
27645 reg_dest + 1, reg_dest + 1);
27646 }
27647 else
27648 {
27649 /* Compute <address> + 4 for the high order load. */
27650 operands[2] = adjust_address (operands[1], SImode, 4);
27651
27652 /* If the computed address is held in the low order register
27653 then load the high order register first, otherwise always
27654 load the low order register first. */
27655 if (REGNO (operands[0]) == REGNO (base))
27656 {
27657 output_asm_insn ("ldr\t%H0, %2", operands);
27658 output_asm_insn ("ldr\t%0, %1", operands);
27659 }
27660 else
27661 {
27662 output_asm_insn ("ldr\t%0, %1", operands);
27663 output_asm_insn ("ldr\t%H0, %2", operands);
27664 }
27665 }
27666 break;
27667
27668 case LABEL_REF:
27669 /* With no registers to worry about we can just load the value
27670 directly. */
27671 operands[2] = adjust_address (operands[1], SImode, 4);
27672
27673 output_asm_insn ("ldr\t%H0, %2", operands);
27674 output_asm_insn ("ldr\t%0, %1", operands);
27675 break;
27676
27677 default:
27678 gcc_unreachable ();
27679 }
27680
27681 return "";
27682 }
27683
27684 const char *
27685 thumb_output_move_mem_multiple (int n, rtx *operands)
27686 {
27687 rtx tmp;
27688
27689 switch (n)
27690 {
27691 case 2:
27692 if (REGNO (operands[4]) > REGNO (operands[5]))
27693 {
27694 tmp = operands[4];
27695 operands[4] = operands[5];
27696 operands[5] = tmp;
27697 }
27698 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
27699 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
27700 break;
27701
27702 case 3:
27703 if (REGNO (operands[4]) > REGNO (operands[5]))
27704 {
27705 tmp = operands[4];
27706 operands[4] = operands[5];
27707 operands[5] = tmp;
27708 }
27709 if (REGNO (operands[5]) > REGNO (operands[6]))
27710 {
27711 tmp = operands[5];
27712 operands[5] = operands[6];
27713 operands[6] = tmp;
27714 }
27715 if (REGNO (operands[4]) > REGNO (operands[5]))
27716 {
27717 tmp = operands[4];
27718 operands[4] = operands[5];
27719 operands[5] = tmp;
27720 }
27721
27722 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
27723 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
27724 break;
27725
27726 default:
27727 gcc_unreachable ();
27728 }
27729
27730 return "";
27731 }
27732
27733 /* Output a call-via instruction for thumb state. */
27734 const char *
27735 thumb_call_via_reg (rtx reg)
27736 {
27737 int regno = REGNO (reg);
27738 rtx *labelp;
27739
27740 gcc_assert (regno < LR_REGNUM);
27741
27742 /* If we are in the normal text section we can use a single instance
27743 per compilation unit. If we are doing function sections, then we need
27744 an entry per section, since we can't rely on reachability. */
27745 if (in_section == text_section)
27746 {
27747 thumb_call_reg_needed = 1;
27748
27749 if (thumb_call_via_label[regno] == NULL)
27750 thumb_call_via_label[regno] = gen_label_rtx ();
27751 labelp = thumb_call_via_label + regno;
27752 }
27753 else
27754 {
27755 if (cfun->machine->call_via[regno] == NULL)
27756 cfun->machine->call_via[regno] = gen_label_rtx ();
27757 labelp = cfun->machine->call_via + regno;
27758 }
27759
27760 output_asm_insn ("bl\t%a0", labelp);
27761 return "";
27762 }
27763
27764 /* Routines for generating rtl. */
27765 void
27766 thumb_expand_movmemqi (rtx *operands)
27767 {
27768 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
27769 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
27770 HOST_WIDE_INT len = INTVAL (operands[2]);
27771 HOST_WIDE_INT offset = 0;
27772
27773 while (len >= 12)
27774 {
27775 emit_insn (gen_movmem12b (out, in, out, in));
27776 len -= 12;
27777 }
27778
27779 if (len >= 8)
27780 {
27781 emit_insn (gen_movmem8b (out, in, out, in));
27782 len -= 8;
27783 }
27784
27785 if (len >= 4)
27786 {
27787 rtx reg = gen_reg_rtx (SImode);
27788 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
27789 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
27790 len -= 4;
27791 offset += 4;
27792 }
27793
27794 if (len >= 2)
27795 {
27796 rtx reg = gen_reg_rtx (HImode);
27797 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
27798 plus_constant (Pmode, in,
27799 offset))));
27800 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
27801 offset)),
27802 reg));
27803 len -= 2;
27804 offset += 2;
27805 }
27806
27807 if (len)
27808 {
27809 rtx reg = gen_reg_rtx (QImode);
27810 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
27811 plus_constant (Pmode, in,
27812 offset))));
27813 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
27814 offset)),
27815 reg));
27816 }
27817 }
27818
27819 void
27820 thumb_reload_out_hi (rtx *operands)
27821 {
27822 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
27823 }
27824
27825 /* Handle reading a half-word from memory during reload. */
27826 void
27827 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
27828 {
27829 gcc_unreachable ();
27830 }
27831
27832 /* Return the length of a function name prefix
27833 that starts with the character 'c'. */
27834 static int
27835 arm_get_strip_length (int c)
27836 {
27837 switch (c)
27838 {
27839 ARM_NAME_ENCODING_LENGTHS
27840 default: return 0;
27841 }
27842 }
27843
27844 /* Return a pointer to a function's name with any
27845 and all prefix encodings stripped from it. */
27846 const char *
27847 arm_strip_name_encoding (const char *name)
27848 {
27849 int skip;
27850
27851 while ((skip = arm_get_strip_length (* name)))
27852 name += skip;
27853
27854 return name;
27855 }
27856
27857 /* If there is a '*' anywhere in the name's prefix, then
27858 emit the stripped name verbatim, otherwise prepend an
27859 underscore if leading underscores are being used. */
27860 void
27861 arm_asm_output_labelref (FILE *stream, const char *name)
27862 {
27863 int skip;
27864 int verbatim = 0;
27865
27866 while ((skip = arm_get_strip_length (* name)))
27867 {
27868 verbatim |= (*name == '*');
27869 name += skip;
27870 }
27871
27872 if (verbatim)
27873 fputs (name, stream);
27874 else
27875 asm_fprintf (stream, "%U%s", name);
27876 }
27877
27878 /* This function is used to emit an EABI tag and its associated value.
27879 We emit the numerical value of the tag in case the assembler does not
27880 support textual tags. (Eg gas prior to 2.20). If requested we include
27881 the tag name in a comment so that anyone reading the assembler output
27882 will know which tag is being set.
27883
27884 This function is not static because arm-c.c needs it too. */
27885
27886 void
27887 arm_emit_eabi_attribute (const char *name, int num, int val)
27888 {
27889 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
27890 if (flag_verbose_asm || flag_debug_asm)
27891 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
27892 asm_fprintf (asm_out_file, "\n");
27893 }
27894
27895 static void
27896 arm_file_start (void)
27897 {
27898 int val;
27899
27900 if (TARGET_UNIFIED_ASM)
27901 asm_fprintf (asm_out_file, "\t.syntax unified\n");
27902
27903 if (TARGET_BPABI)
27904 {
27905 const char *fpu_name;
27906 if (arm_selected_arch)
27907 {
27908 /* armv7ve doesn't support any extensions. */
27909 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
27910 {
27911 /* Keep backward compatability for assemblers
27912 which don't support armv7ve. */
27913 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
27914 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
27915 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
27916 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
27917 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
27918 }
27919 else
27920 {
27921 const char* pos = strchr (arm_selected_arch->name, '+');
27922 if (pos)
27923 {
27924 char buf[15];
27925 gcc_assert (strlen (arm_selected_arch->name)
27926 <= sizeof (buf) / sizeof (*pos));
27927 strncpy (buf, arm_selected_arch->name,
27928 (pos - arm_selected_arch->name) * sizeof (*pos));
27929 buf[pos - arm_selected_arch->name] = '\0';
27930 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
27931 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
27932 }
27933 else
27934 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
27935 }
27936 }
27937 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
27938 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
27939 else
27940 {
27941 const char* truncated_name
27942 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
27943 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
27944 }
27945
27946 if (TARGET_SOFT_FLOAT)
27947 {
27948 fpu_name = "softvfp";
27949 }
27950 else
27951 {
27952 fpu_name = arm_fpu_desc->name;
27953 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
27954 {
27955 if (TARGET_HARD_FLOAT)
27956 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
27957 if (TARGET_HARD_FLOAT_ABI)
27958 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
27959 }
27960 }
27961 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
27962
27963 /* Some of these attributes only apply when the corresponding features
27964 are used. However we don't have any easy way of figuring this out.
27965 Conservatively record the setting that would have been used. */
27966
27967 if (flag_rounding_math)
27968 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
27969
27970 if (!flag_unsafe_math_optimizations)
27971 {
27972 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
27973 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
27974 }
27975 if (flag_signaling_nans)
27976 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
27977
27978 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
27979 flag_finite_math_only ? 1 : 3);
27980
27981 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
27982 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
27983 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
27984 flag_short_enums ? 1 : 2);
27985
27986 /* Tag_ABI_optimization_goals. */
27987 if (optimize_size)
27988 val = 4;
27989 else if (optimize >= 2)
27990 val = 2;
27991 else if (optimize)
27992 val = 1;
27993 else
27994 val = 6;
27995 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
27996
27997 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
27998 unaligned_access);
27999
28000 if (arm_fp16_format)
28001 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
28002 (int) arm_fp16_format);
28003
28004 if (arm_lang_output_object_attributes_hook)
28005 arm_lang_output_object_attributes_hook();
28006 }
28007
28008 default_file_start ();
28009 }
28010
28011 static void
28012 arm_file_end (void)
28013 {
28014 int regno;
28015
28016 if (NEED_INDICATE_EXEC_STACK)
28017 /* Add .note.GNU-stack. */
28018 file_end_indicate_exec_stack ();
28019
28020 if (! thumb_call_reg_needed)
28021 return;
28022
28023 switch_to_section (text_section);
28024 asm_fprintf (asm_out_file, "\t.code 16\n");
28025 ASM_OUTPUT_ALIGN (asm_out_file, 1);
28026
28027 for (regno = 0; regno < LR_REGNUM; regno++)
28028 {
28029 rtx label = thumb_call_via_label[regno];
28030
28031 if (label != 0)
28032 {
28033 targetm.asm_out.internal_label (asm_out_file, "L",
28034 CODE_LABEL_NUMBER (label));
28035 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
28036 }
28037 }
28038 }
28039
28040 #ifndef ARM_PE
28041 /* Symbols in the text segment can be accessed without indirecting via the
28042 constant pool; it may take an extra binary operation, but this is still
28043 faster than indirecting via memory. Don't do this when not optimizing,
28044 since we won't be calculating al of the offsets necessary to do this
28045 simplification. */
28046
28047 static void
28048 arm_encode_section_info (tree decl, rtx rtl, int first)
28049 {
28050 if (optimize > 0 && TREE_CONSTANT (decl))
28051 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
28052
28053 default_encode_section_info (decl, rtl, first);
28054 }
28055 #endif /* !ARM_PE */
28056
28057 static void
28058 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
28059 {
28060 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
28061 && !strcmp (prefix, "L"))
28062 {
28063 arm_ccfsm_state = 0;
28064 arm_target_insn = NULL;
28065 }
28066 default_internal_label (stream, prefix, labelno);
28067 }
28068
28069 /* Output code to add DELTA to the first argument, and then jump
28070 to FUNCTION. Used for C++ multiple inheritance. */
28071 static void
28072 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
28073 HOST_WIDE_INT delta,
28074 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
28075 tree function)
28076 {
28077 static int thunk_label = 0;
28078 char label[256];
28079 char labelpc[256];
28080 int mi_delta = delta;
28081 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
28082 int shift = 0;
28083 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
28084 ? 1 : 0);
28085 if (mi_delta < 0)
28086 mi_delta = - mi_delta;
28087
28088 final_start_function (emit_barrier (), file, 1);
28089
28090 if (TARGET_THUMB1)
28091 {
28092 int labelno = thunk_label++;
28093 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
28094 /* Thunks are entered in arm mode when avaiable. */
28095 if (TARGET_THUMB1_ONLY)
28096 {
28097 /* push r3 so we can use it as a temporary. */
28098 /* TODO: Omit this save if r3 is not used. */
28099 fputs ("\tpush {r3}\n", file);
28100 fputs ("\tldr\tr3, ", file);
28101 }
28102 else
28103 {
28104 fputs ("\tldr\tr12, ", file);
28105 }
28106 assemble_name (file, label);
28107 fputc ('\n', file);
28108 if (flag_pic)
28109 {
28110 /* If we are generating PIC, the ldr instruction below loads
28111 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
28112 the address of the add + 8, so we have:
28113
28114 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
28115 = target + 1.
28116
28117 Note that we have "+ 1" because some versions of GNU ld
28118 don't set the low bit of the result for R_ARM_REL32
28119 relocations against thumb function symbols.
28120 On ARMv6M this is +4, not +8. */
28121 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
28122 assemble_name (file, labelpc);
28123 fputs (":\n", file);
28124 if (TARGET_THUMB1_ONLY)
28125 {
28126 /* This is 2 insns after the start of the thunk, so we know it
28127 is 4-byte aligned. */
28128 fputs ("\tadd\tr3, pc, r3\n", file);
28129 fputs ("\tmov r12, r3\n", file);
28130 }
28131 else
28132 fputs ("\tadd\tr12, pc, r12\n", file);
28133 }
28134 else if (TARGET_THUMB1_ONLY)
28135 fputs ("\tmov r12, r3\n", file);
28136 }
28137 if (TARGET_THUMB1_ONLY)
28138 {
28139 if (mi_delta > 255)
28140 {
28141 fputs ("\tldr\tr3, ", file);
28142 assemble_name (file, label);
28143 fputs ("+4\n", file);
28144 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
28145 mi_op, this_regno, this_regno);
28146 }
28147 else if (mi_delta != 0)
28148 {
28149 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28150 mi_op, this_regno, this_regno,
28151 mi_delta);
28152 }
28153 }
28154 else
28155 {
28156 /* TODO: Use movw/movt for large constants when available. */
28157 while (mi_delta != 0)
28158 {
28159 if ((mi_delta & (3 << shift)) == 0)
28160 shift += 2;
28161 else
28162 {
28163 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
28164 mi_op, this_regno, this_regno,
28165 mi_delta & (0xff << shift));
28166 mi_delta &= ~(0xff << shift);
28167 shift += 8;
28168 }
28169 }
28170 }
28171 if (TARGET_THUMB1)
28172 {
28173 if (TARGET_THUMB1_ONLY)
28174 fputs ("\tpop\t{r3}\n", file);
28175
28176 fprintf (file, "\tbx\tr12\n");
28177 ASM_OUTPUT_ALIGN (file, 2);
28178 assemble_name (file, label);
28179 fputs (":\n", file);
28180 if (flag_pic)
28181 {
28182 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
28183 rtx tem = XEXP (DECL_RTL (function), 0);
28184 tem = plus_constant (GET_MODE (tem), tem, -7);
28185 tem = gen_rtx_MINUS (GET_MODE (tem),
28186 tem,
28187 gen_rtx_SYMBOL_REF (Pmode,
28188 ggc_strdup (labelpc)));
28189 assemble_integer (tem, 4, BITS_PER_WORD, 1);
28190 }
28191 else
28192 /* Output ".word .LTHUNKn". */
28193 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
28194
28195 if (TARGET_THUMB1_ONLY && mi_delta > 255)
28196 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
28197 }
28198 else
28199 {
28200 fputs ("\tb\t", file);
28201 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
28202 if (NEED_PLT_RELOC)
28203 fputs ("(PLT)", file);
28204 fputc ('\n', file);
28205 }
28206
28207 final_end_function ();
28208 }
28209
28210 int
28211 arm_emit_vector_const (FILE *file, rtx x)
28212 {
28213 int i;
28214 const char * pattern;
28215
28216 gcc_assert (GET_CODE (x) == CONST_VECTOR);
28217
28218 switch (GET_MODE (x))
28219 {
28220 case V2SImode: pattern = "%08x"; break;
28221 case V4HImode: pattern = "%04x"; break;
28222 case V8QImode: pattern = "%02x"; break;
28223 default: gcc_unreachable ();
28224 }
28225
28226 fprintf (file, "0x");
28227 for (i = CONST_VECTOR_NUNITS (x); i--;)
28228 {
28229 rtx element;
28230
28231 element = CONST_VECTOR_ELT (x, i);
28232 fprintf (file, pattern, INTVAL (element));
28233 }
28234
28235 return 1;
28236 }
28237
28238 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
28239 HFmode constant pool entries are actually loaded with ldr. */
28240 void
28241 arm_emit_fp16_const (rtx c)
28242 {
28243 REAL_VALUE_TYPE r;
28244 long bits;
28245
28246 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
28247 bits = real_to_target (NULL, &r, HFmode);
28248 if (WORDS_BIG_ENDIAN)
28249 assemble_zeros (2);
28250 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
28251 if (!WORDS_BIG_ENDIAN)
28252 assemble_zeros (2);
28253 }
28254
28255 const char *
28256 arm_output_load_gr (rtx *operands)
28257 {
28258 rtx reg;
28259 rtx offset;
28260 rtx wcgr;
28261 rtx sum;
28262
28263 if (!MEM_P (operands [1])
28264 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
28265 || !REG_P (reg = XEXP (sum, 0))
28266 || !CONST_INT_P (offset = XEXP (sum, 1))
28267 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
28268 return "wldrw%?\t%0, %1";
28269
28270 /* Fix up an out-of-range load of a GR register. */
28271 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
28272 wcgr = operands[0];
28273 operands[0] = reg;
28274 output_asm_insn ("ldr%?\t%0, %1", operands);
28275
28276 operands[0] = wcgr;
28277 operands[1] = reg;
28278 output_asm_insn ("tmcr%?\t%0, %1", operands);
28279 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
28280
28281 return "";
28282 }
28283
28284 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
28285
28286 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
28287 named arg and all anonymous args onto the stack.
28288 XXX I know the prologue shouldn't be pushing registers, but it is faster
28289 that way. */
28290
28291 static void
28292 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
28293 enum machine_mode mode,
28294 tree type,
28295 int *pretend_size,
28296 int second_time ATTRIBUTE_UNUSED)
28297 {
28298 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
28299 int nregs;
28300
28301 cfun->machine->uses_anonymous_args = 1;
28302 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
28303 {
28304 nregs = pcum->aapcs_ncrn;
28305 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
28306 nregs++;
28307 }
28308 else
28309 nregs = pcum->nregs;
28310
28311 if (nregs < NUM_ARG_REGS)
28312 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
28313 }
28314
28315 /* We can't rely on the caller doing the proper promotion when
28316 using APCS or ATPCS. */
28317
28318 static bool
28319 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
28320 {
28321 return !TARGET_AAPCS_BASED;
28322 }
28323
28324 static enum machine_mode
28325 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
28326 enum machine_mode mode,
28327 int *punsignedp ATTRIBUTE_UNUSED,
28328 const_tree fntype ATTRIBUTE_UNUSED,
28329 int for_return ATTRIBUTE_UNUSED)
28330 {
28331 if (GET_MODE_CLASS (mode) == MODE_INT
28332 && GET_MODE_SIZE (mode) < 4)
28333 return SImode;
28334
28335 return mode;
28336 }
28337
28338 /* AAPCS based ABIs use short enums by default. */
28339
28340 static bool
28341 arm_default_short_enums (void)
28342 {
28343 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
28344 }
28345
28346
28347 /* AAPCS requires that anonymous bitfields affect structure alignment. */
28348
28349 static bool
28350 arm_align_anon_bitfield (void)
28351 {
28352 return TARGET_AAPCS_BASED;
28353 }
28354
28355
28356 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
28357
28358 static tree
28359 arm_cxx_guard_type (void)
28360 {
28361 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
28362 }
28363
28364
28365 /* The EABI says test the least significant bit of a guard variable. */
28366
28367 static bool
28368 arm_cxx_guard_mask_bit (void)
28369 {
28370 return TARGET_AAPCS_BASED;
28371 }
28372
28373
28374 /* The EABI specifies that all array cookies are 8 bytes long. */
28375
28376 static tree
28377 arm_get_cookie_size (tree type)
28378 {
28379 tree size;
28380
28381 if (!TARGET_AAPCS_BASED)
28382 return default_cxx_get_cookie_size (type);
28383
28384 size = build_int_cst (sizetype, 8);
28385 return size;
28386 }
28387
28388
28389 /* The EABI says that array cookies should also contain the element size. */
28390
28391 static bool
28392 arm_cookie_has_size (void)
28393 {
28394 return TARGET_AAPCS_BASED;
28395 }
28396
28397
28398 /* The EABI says constructors and destructors should return a pointer to
28399 the object constructed/destroyed. */
28400
28401 static bool
28402 arm_cxx_cdtor_returns_this (void)
28403 {
28404 return TARGET_AAPCS_BASED;
28405 }
28406
28407 /* The EABI says that an inline function may never be the key
28408 method. */
28409
28410 static bool
28411 arm_cxx_key_method_may_be_inline (void)
28412 {
28413 return !TARGET_AAPCS_BASED;
28414 }
28415
28416 static void
28417 arm_cxx_determine_class_data_visibility (tree decl)
28418 {
28419 if (!TARGET_AAPCS_BASED
28420 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
28421 return;
28422
28423 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
28424 is exported. However, on systems without dynamic vague linkage,
28425 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
28426 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
28427 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
28428 else
28429 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
28430 DECL_VISIBILITY_SPECIFIED (decl) = 1;
28431 }
28432
28433 static bool
28434 arm_cxx_class_data_always_comdat (void)
28435 {
28436 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
28437 vague linkage if the class has no key function. */
28438 return !TARGET_AAPCS_BASED;
28439 }
28440
28441
28442 /* The EABI says __aeabi_atexit should be used to register static
28443 destructors. */
28444
28445 static bool
28446 arm_cxx_use_aeabi_atexit (void)
28447 {
28448 return TARGET_AAPCS_BASED;
28449 }
28450
28451
28452 void
28453 arm_set_return_address (rtx source, rtx scratch)
28454 {
28455 arm_stack_offsets *offsets;
28456 HOST_WIDE_INT delta;
28457 rtx addr;
28458 unsigned long saved_regs;
28459
28460 offsets = arm_get_frame_offsets ();
28461 saved_regs = offsets->saved_regs_mask;
28462
28463 if ((saved_regs & (1 << LR_REGNUM)) == 0)
28464 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28465 else
28466 {
28467 if (frame_pointer_needed)
28468 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
28469 else
28470 {
28471 /* LR will be the first saved register. */
28472 delta = offsets->outgoing_args - (offsets->frame + 4);
28473
28474
28475 if (delta >= 4096)
28476 {
28477 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
28478 GEN_INT (delta & ~4095)));
28479 addr = scratch;
28480 delta &= 4095;
28481 }
28482 else
28483 addr = stack_pointer_rtx;
28484
28485 addr = plus_constant (Pmode, addr, delta);
28486 }
28487 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28488 }
28489 }
28490
28491
28492 void
28493 thumb_set_return_address (rtx source, rtx scratch)
28494 {
28495 arm_stack_offsets *offsets;
28496 HOST_WIDE_INT delta;
28497 HOST_WIDE_INT limit;
28498 int reg;
28499 rtx addr;
28500 unsigned long mask;
28501
28502 emit_use (source);
28503
28504 offsets = arm_get_frame_offsets ();
28505 mask = offsets->saved_regs_mask;
28506 if (mask & (1 << LR_REGNUM))
28507 {
28508 limit = 1024;
28509 /* Find the saved regs. */
28510 if (frame_pointer_needed)
28511 {
28512 delta = offsets->soft_frame - offsets->saved_args;
28513 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
28514 if (TARGET_THUMB1)
28515 limit = 128;
28516 }
28517 else
28518 {
28519 delta = offsets->outgoing_args - offsets->saved_args;
28520 reg = SP_REGNUM;
28521 }
28522 /* Allow for the stack frame. */
28523 if (TARGET_THUMB1 && TARGET_BACKTRACE)
28524 delta -= 16;
28525 /* The link register is always the first saved register. */
28526 delta -= 4;
28527
28528 /* Construct the address. */
28529 addr = gen_rtx_REG (SImode, reg);
28530 if (delta > limit)
28531 {
28532 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
28533 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
28534 addr = scratch;
28535 }
28536 else
28537 addr = plus_constant (Pmode, addr, delta);
28538
28539 emit_move_insn (gen_frame_mem (Pmode, addr), source);
28540 }
28541 else
28542 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
28543 }
28544
28545 /* Implements target hook vector_mode_supported_p. */
28546 bool
28547 arm_vector_mode_supported_p (enum machine_mode mode)
28548 {
28549 /* Neon also supports V2SImode, etc. listed in the clause below. */
28550 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
28551 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
28552 return true;
28553
28554 if ((TARGET_NEON || TARGET_IWMMXT)
28555 && ((mode == V2SImode)
28556 || (mode == V4HImode)
28557 || (mode == V8QImode)))
28558 return true;
28559
28560 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
28561 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
28562 || mode == V2HAmode))
28563 return true;
28564
28565 return false;
28566 }
28567
28568 /* Implements target hook array_mode_supported_p. */
28569
28570 static bool
28571 arm_array_mode_supported_p (enum machine_mode mode,
28572 unsigned HOST_WIDE_INT nelems)
28573 {
28574 if (TARGET_NEON
28575 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
28576 && (nelems >= 2 && nelems <= 4))
28577 return true;
28578
28579 return false;
28580 }
28581
28582 /* Use the option -mvectorize-with-neon-double to override the use of quardword
28583 registers when autovectorizing for Neon, at least until multiple vector
28584 widths are supported properly by the middle-end. */
28585
28586 static enum machine_mode
28587 arm_preferred_simd_mode (enum machine_mode mode)
28588 {
28589 if (TARGET_NEON)
28590 switch (mode)
28591 {
28592 case SFmode:
28593 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
28594 case SImode:
28595 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
28596 case HImode:
28597 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
28598 case QImode:
28599 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
28600 case DImode:
28601 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28602 return V2DImode;
28603 break;
28604
28605 default:;
28606 }
28607
28608 if (TARGET_REALLY_IWMMXT)
28609 switch (mode)
28610 {
28611 case SImode:
28612 return V2SImode;
28613 case HImode:
28614 return V4HImode;
28615 case QImode:
28616 return V8QImode;
28617
28618 default:;
28619 }
28620
28621 return word_mode;
28622 }
28623
28624 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
28625
28626 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
28627 using r0-r4 for function arguments, r7 for the stack frame and don't have
28628 enough left over to do doubleword arithmetic. For Thumb-2 all the
28629 potentially problematic instructions accept high registers so this is not
28630 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
28631 that require many low registers. */
28632 static bool
28633 arm_class_likely_spilled_p (reg_class_t rclass)
28634 {
28635 if ((TARGET_THUMB1 && rclass == LO_REGS)
28636 || rclass == CC_REG)
28637 return true;
28638
28639 return false;
28640 }
28641
28642 /* Implements target hook small_register_classes_for_mode_p. */
28643 bool
28644 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
28645 {
28646 return TARGET_THUMB1;
28647 }
28648
28649 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
28650 ARM insns and therefore guarantee that the shift count is modulo 256.
28651 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
28652 guarantee no particular behavior for out-of-range counts. */
28653
28654 static unsigned HOST_WIDE_INT
28655 arm_shift_truncation_mask (enum machine_mode mode)
28656 {
28657 return mode == SImode ? 255 : 0;
28658 }
28659
28660
28661 /* Map internal gcc register numbers to DWARF2 register numbers. */
28662
28663 unsigned int
28664 arm_dbx_register_number (unsigned int regno)
28665 {
28666 if (regno < 16)
28667 return regno;
28668
28669 if (IS_VFP_REGNUM (regno))
28670 {
28671 /* See comment in arm_dwarf_register_span. */
28672 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28673 return 64 + regno - FIRST_VFP_REGNUM;
28674 else
28675 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
28676 }
28677
28678 if (IS_IWMMXT_GR_REGNUM (regno))
28679 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
28680
28681 if (IS_IWMMXT_REGNUM (regno))
28682 return 112 + regno - FIRST_IWMMXT_REGNUM;
28683
28684 gcc_unreachable ();
28685 }
28686
28687 /* Dwarf models VFPv3 registers as 32 64-bit registers.
28688 GCC models tham as 64 32-bit registers, so we need to describe this to
28689 the DWARF generation code. Other registers can use the default. */
28690 static rtx
28691 arm_dwarf_register_span (rtx rtl)
28692 {
28693 enum machine_mode mode;
28694 unsigned regno;
28695 rtx parts[8];
28696 int nregs;
28697 int i;
28698
28699 regno = REGNO (rtl);
28700 if (!IS_VFP_REGNUM (regno))
28701 return NULL_RTX;
28702
28703 /* XXX FIXME: The EABI defines two VFP register ranges:
28704 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
28705 256-287: D0-D31
28706 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
28707 corresponding D register. Until GDB supports this, we shall use the
28708 legacy encodings. We also use these encodings for D0-D15 for
28709 compatibility with older debuggers. */
28710 mode = GET_MODE (rtl);
28711 if (GET_MODE_SIZE (mode) < 8)
28712 return NULL_RTX;
28713
28714 if (VFP_REGNO_OK_FOR_SINGLE (regno))
28715 {
28716 nregs = GET_MODE_SIZE (mode) / 4;
28717 for (i = 0; i < nregs; i += 2)
28718 if (TARGET_BIG_END)
28719 {
28720 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
28721 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
28722 }
28723 else
28724 {
28725 parts[i] = gen_rtx_REG (SImode, regno + i);
28726 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
28727 }
28728 }
28729 else
28730 {
28731 nregs = GET_MODE_SIZE (mode) / 8;
28732 for (i = 0; i < nregs; i++)
28733 parts[i] = gen_rtx_REG (DImode, regno + i);
28734 }
28735
28736 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
28737 }
28738
28739 #if ARM_UNWIND_INFO
28740 /* Emit unwind directives for a store-multiple instruction or stack pointer
28741 push during alignment.
28742 These should only ever be generated by the function prologue code, so
28743 expect them to have a particular form.
28744 The store-multiple instruction sometimes pushes pc as the last register,
28745 although it should not be tracked into unwind information, or for -Os
28746 sometimes pushes some dummy registers before first register that needs
28747 to be tracked in unwind information; such dummy registers are there just
28748 to avoid separate stack adjustment, and will not be restored in the
28749 epilogue. */
28750
28751 static void
28752 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
28753 {
28754 int i;
28755 HOST_WIDE_INT offset;
28756 HOST_WIDE_INT nregs;
28757 int reg_size;
28758 unsigned reg;
28759 unsigned lastreg;
28760 unsigned padfirst = 0, padlast = 0;
28761 rtx e;
28762
28763 e = XVECEXP (p, 0, 0);
28764 gcc_assert (GET_CODE (e) == SET);
28765
28766 /* First insn will adjust the stack pointer. */
28767 gcc_assert (GET_CODE (e) == SET
28768 && REG_P (SET_DEST (e))
28769 && REGNO (SET_DEST (e)) == SP_REGNUM
28770 && GET_CODE (SET_SRC (e)) == PLUS);
28771
28772 offset = -INTVAL (XEXP (SET_SRC (e), 1));
28773 nregs = XVECLEN (p, 0) - 1;
28774 gcc_assert (nregs);
28775
28776 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
28777 if (reg < 16)
28778 {
28779 /* For -Os dummy registers can be pushed at the beginning to
28780 avoid separate stack pointer adjustment. */
28781 e = XVECEXP (p, 0, 1);
28782 e = XEXP (SET_DEST (e), 0);
28783 if (GET_CODE (e) == PLUS)
28784 padfirst = INTVAL (XEXP (e, 1));
28785 gcc_assert (padfirst == 0 || optimize_size);
28786 /* The function prologue may also push pc, but not annotate it as it is
28787 never restored. We turn this into a stack pointer adjustment. */
28788 e = XVECEXP (p, 0, nregs);
28789 e = XEXP (SET_DEST (e), 0);
28790 if (GET_CODE (e) == PLUS)
28791 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
28792 else
28793 padlast = offset - 4;
28794 gcc_assert (padlast == 0 || padlast == 4);
28795 if (padlast == 4)
28796 fprintf (asm_out_file, "\t.pad #4\n");
28797 reg_size = 4;
28798 fprintf (asm_out_file, "\t.save {");
28799 }
28800 else if (IS_VFP_REGNUM (reg))
28801 {
28802 reg_size = 8;
28803 fprintf (asm_out_file, "\t.vsave {");
28804 }
28805 else
28806 /* Unknown register type. */
28807 gcc_unreachable ();
28808
28809 /* If the stack increment doesn't match the size of the saved registers,
28810 something has gone horribly wrong. */
28811 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
28812
28813 offset = padfirst;
28814 lastreg = 0;
28815 /* The remaining insns will describe the stores. */
28816 for (i = 1; i <= nregs; i++)
28817 {
28818 /* Expect (set (mem <addr>) (reg)).
28819 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
28820 e = XVECEXP (p, 0, i);
28821 gcc_assert (GET_CODE (e) == SET
28822 && MEM_P (SET_DEST (e))
28823 && REG_P (SET_SRC (e)));
28824
28825 reg = REGNO (SET_SRC (e));
28826 gcc_assert (reg >= lastreg);
28827
28828 if (i != 1)
28829 fprintf (asm_out_file, ", ");
28830 /* We can't use %r for vfp because we need to use the
28831 double precision register names. */
28832 if (IS_VFP_REGNUM (reg))
28833 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
28834 else
28835 asm_fprintf (asm_out_file, "%r", reg);
28836
28837 #ifdef ENABLE_CHECKING
28838 /* Check that the addresses are consecutive. */
28839 e = XEXP (SET_DEST (e), 0);
28840 if (GET_CODE (e) == PLUS)
28841 gcc_assert (REG_P (XEXP (e, 0))
28842 && REGNO (XEXP (e, 0)) == SP_REGNUM
28843 && CONST_INT_P (XEXP (e, 1))
28844 && offset == INTVAL (XEXP (e, 1)));
28845 else
28846 gcc_assert (i == 1
28847 && REG_P (e)
28848 && REGNO (e) == SP_REGNUM);
28849 offset += reg_size;
28850 #endif
28851 }
28852 fprintf (asm_out_file, "}\n");
28853 if (padfirst)
28854 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
28855 }
28856
28857 /* Emit unwind directives for a SET. */
28858
28859 static void
28860 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
28861 {
28862 rtx e0;
28863 rtx e1;
28864 unsigned reg;
28865
28866 e0 = XEXP (p, 0);
28867 e1 = XEXP (p, 1);
28868 switch (GET_CODE (e0))
28869 {
28870 case MEM:
28871 /* Pushing a single register. */
28872 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
28873 || !REG_P (XEXP (XEXP (e0, 0), 0))
28874 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
28875 abort ();
28876
28877 asm_fprintf (asm_out_file, "\t.save ");
28878 if (IS_VFP_REGNUM (REGNO (e1)))
28879 asm_fprintf(asm_out_file, "{d%d}\n",
28880 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
28881 else
28882 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
28883 break;
28884
28885 case REG:
28886 if (REGNO (e0) == SP_REGNUM)
28887 {
28888 /* A stack increment. */
28889 if (GET_CODE (e1) != PLUS
28890 || !REG_P (XEXP (e1, 0))
28891 || REGNO (XEXP (e1, 0)) != SP_REGNUM
28892 || !CONST_INT_P (XEXP (e1, 1)))
28893 abort ();
28894
28895 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
28896 -INTVAL (XEXP (e1, 1)));
28897 }
28898 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
28899 {
28900 HOST_WIDE_INT offset;
28901
28902 if (GET_CODE (e1) == PLUS)
28903 {
28904 if (!REG_P (XEXP (e1, 0))
28905 || !CONST_INT_P (XEXP (e1, 1)))
28906 abort ();
28907 reg = REGNO (XEXP (e1, 0));
28908 offset = INTVAL (XEXP (e1, 1));
28909 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
28910 HARD_FRAME_POINTER_REGNUM, reg,
28911 offset);
28912 }
28913 else if (REG_P (e1))
28914 {
28915 reg = REGNO (e1);
28916 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
28917 HARD_FRAME_POINTER_REGNUM, reg);
28918 }
28919 else
28920 abort ();
28921 }
28922 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
28923 {
28924 /* Move from sp to reg. */
28925 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
28926 }
28927 else if (GET_CODE (e1) == PLUS
28928 && REG_P (XEXP (e1, 0))
28929 && REGNO (XEXP (e1, 0)) == SP_REGNUM
28930 && CONST_INT_P (XEXP (e1, 1)))
28931 {
28932 /* Set reg to offset from sp. */
28933 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
28934 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
28935 }
28936 else
28937 abort ();
28938 break;
28939
28940 default:
28941 abort ();
28942 }
28943 }
28944
28945
28946 /* Emit unwind directives for the given insn. */
28947
28948 static void
28949 arm_unwind_emit (FILE * asm_out_file, rtx insn)
28950 {
28951 rtx note, pat;
28952 bool handled_one = false;
28953
28954 if (arm_except_unwind_info (&global_options) != UI_TARGET)
28955 return;
28956
28957 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
28958 && (TREE_NOTHROW (current_function_decl)
28959 || crtl->all_throwers_are_sibcalls))
28960 return;
28961
28962 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
28963 return;
28964
28965 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
28966 {
28967 switch (REG_NOTE_KIND (note))
28968 {
28969 case REG_FRAME_RELATED_EXPR:
28970 pat = XEXP (note, 0);
28971 goto found;
28972
28973 case REG_CFA_REGISTER:
28974 pat = XEXP (note, 0);
28975 if (pat == NULL)
28976 {
28977 pat = PATTERN (insn);
28978 if (GET_CODE (pat) == PARALLEL)
28979 pat = XVECEXP (pat, 0, 0);
28980 }
28981
28982 /* Only emitted for IS_STACKALIGN re-alignment. */
28983 {
28984 rtx dest, src;
28985 unsigned reg;
28986
28987 src = SET_SRC (pat);
28988 dest = SET_DEST (pat);
28989
28990 gcc_assert (src == stack_pointer_rtx);
28991 reg = REGNO (dest);
28992 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
28993 reg + 0x90, reg);
28994 }
28995 handled_one = true;
28996 break;
28997
28998 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
28999 to get correct dwarf information for shrink-wrap. We should not
29000 emit unwind information for it because these are used either for
29001 pretend arguments or notes to adjust sp and restore registers from
29002 stack. */
29003 case REG_CFA_DEF_CFA:
29004 case REG_CFA_ADJUST_CFA:
29005 case REG_CFA_RESTORE:
29006 return;
29007
29008 case REG_CFA_EXPRESSION:
29009 case REG_CFA_OFFSET:
29010 /* ??? Only handling here what we actually emit. */
29011 gcc_unreachable ();
29012
29013 default:
29014 break;
29015 }
29016 }
29017 if (handled_one)
29018 return;
29019 pat = PATTERN (insn);
29020 found:
29021
29022 switch (GET_CODE (pat))
29023 {
29024 case SET:
29025 arm_unwind_emit_set (asm_out_file, pat);
29026 break;
29027
29028 case SEQUENCE:
29029 /* Store multiple. */
29030 arm_unwind_emit_sequence (asm_out_file, pat);
29031 break;
29032
29033 default:
29034 abort();
29035 }
29036 }
29037
29038
29039 /* Output a reference from a function exception table to the type_info
29040 object X. The EABI specifies that the symbol should be relocated by
29041 an R_ARM_TARGET2 relocation. */
29042
29043 static bool
29044 arm_output_ttype (rtx x)
29045 {
29046 fputs ("\t.word\t", asm_out_file);
29047 output_addr_const (asm_out_file, x);
29048 /* Use special relocations for symbol references. */
29049 if (!CONST_INT_P (x))
29050 fputs ("(TARGET2)", asm_out_file);
29051 fputc ('\n', asm_out_file);
29052
29053 return TRUE;
29054 }
29055
29056 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
29057
29058 static void
29059 arm_asm_emit_except_personality (rtx personality)
29060 {
29061 fputs ("\t.personality\t", asm_out_file);
29062 output_addr_const (asm_out_file, personality);
29063 fputc ('\n', asm_out_file);
29064 }
29065
29066 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
29067
29068 static void
29069 arm_asm_init_sections (void)
29070 {
29071 exception_section = get_unnamed_section (0, output_section_asm_op,
29072 "\t.handlerdata");
29073 }
29074 #endif /* ARM_UNWIND_INFO */
29075
29076 /* Output unwind directives for the start/end of a function. */
29077
29078 void
29079 arm_output_fn_unwind (FILE * f, bool prologue)
29080 {
29081 if (arm_except_unwind_info (&global_options) != UI_TARGET)
29082 return;
29083
29084 if (prologue)
29085 fputs ("\t.fnstart\n", f);
29086 else
29087 {
29088 /* If this function will never be unwound, then mark it as such.
29089 The came condition is used in arm_unwind_emit to suppress
29090 the frame annotations. */
29091 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
29092 && (TREE_NOTHROW (current_function_decl)
29093 || crtl->all_throwers_are_sibcalls))
29094 fputs("\t.cantunwind\n", f);
29095
29096 fputs ("\t.fnend\n", f);
29097 }
29098 }
29099
29100 static bool
29101 arm_emit_tls_decoration (FILE *fp, rtx x)
29102 {
29103 enum tls_reloc reloc;
29104 rtx val;
29105
29106 val = XVECEXP (x, 0, 0);
29107 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
29108
29109 output_addr_const (fp, val);
29110
29111 switch (reloc)
29112 {
29113 case TLS_GD32:
29114 fputs ("(tlsgd)", fp);
29115 break;
29116 case TLS_LDM32:
29117 fputs ("(tlsldm)", fp);
29118 break;
29119 case TLS_LDO32:
29120 fputs ("(tlsldo)", fp);
29121 break;
29122 case TLS_IE32:
29123 fputs ("(gottpoff)", fp);
29124 break;
29125 case TLS_LE32:
29126 fputs ("(tpoff)", fp);
29127 break;
29128 case TLS_DESCSEQ:
29129 fputs ("(tlsdesc)", fp);
29130 break;
29131 default:
29132 gcc_unreachable ();
29133 }
29134
29135 switch (reloc)
29136 {
29137 case TLS_GD32:
29138 case TLS_LDM32:
29139 case TLS_IE32:
29140 case TLS_DESCSEQ:
29141 fputs (" + (. - ", fp);
29142 output_addr_const (fp, XVECEXP (x, 0, 2));
29143 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
29144 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
29145 output_addr_const (fp, XVECEXP (x, 0, 3));
29146 fputc (')', fp);
29147 break;
29148 default:
29149 break;
29150 }
29151
29152 return TRUE;
29153 }
29154
29155 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
29156
29157 static void
29158 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
29159 {
29160 gcc_assert (size == 4);
29161 fputs ("\t.word\t", file);
29162 output_addr_const (file, x);
29163 fputs ("(tlsldo)", file);
29164 }
29165
29166 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
29167
29168 static bool
29169 arm_output_addr_const_extra (FILE *fp, rtx x)
29170 {
29171 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
29172 return arm_emit_tls_decoration (fp, x);
29173 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
29174 {
29175 char label[256];
29176 int labelno = INTVAL (XVECEXP (x, 0, 0));
29177
29178 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
29179 assemble_name_raw (fp, label);
29180
29181 return TRUE;
29182 }
29183 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
29184 {
29185 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
29186 if (GOT_PCREL)
29187 fputs ("+.", fp);
29188 fputs ("-(", fp);
29189 output_addr_const (fp, XVECEXP (x, 0, 0));
29190 fputc (')', fp);
29191 return TRUE;
29192 }
29193 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
29194 {
29195 output_addr_const (fp, XVECEXP (x, 0, 0));
29196 if (GOT_PCREL)
29197 fputs ("+.", fp);
29198 fputs ("-(", fp);
29199 output_addr_const (fp, XVECEXP (x, 0, 1));
29200 fputc (')', fp);
29201 return TRUE;
29202 }
29203 else if (GET_CODE (x) == CONST_VECTOR)
29204 return arm_emit_vector_const (fp, x);
29205
29206 return FALSE;
29207 }
29208
29209 /* Output assembly for a shift instruction.
29210 SET_FLAGS determines how the instruction modifies the condition codes.
29211 0 - Do not set condition codes.
29212 1 - Set condition codes.
29213 2 - Use smallest instruction. */
29214 const char *
29215 arm_output_shift(rtx * operands, int set_flags)
29216 {
29217 char pattern[100];
29218 static const char flag_chars[3] = {'?', '.', '!'};
29219 const char *shift;
29220 HOST_WIDE_INT val;
29221 char c;
29222
29223 c = flag_chars[set_flags];
29224 if (TARGET_UNIFIED_ASM)
29225 {
29226 shift = shift_op(operands[3], &val);
29227 if (shift)
29228 {
29229 if (val != -1)
29230 operands[2] = GEN_INT(val);
29231 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
29232 }
29233 else
29234 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
29235 }
29236 else
29237 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
29238 output_asm_insn (pattern, operands);
29239 return "";
29240 }
29241
29242 /* Output assembly for a WMMX immediate shift instruction. */
29243 const char *
29244 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
29245 {
29246 int shift = INTVAL (operands[2]);
29247 char templ[50];
29248 enum machine_mode opmode = GET_MODE (operands[0]);
29249
29250 gcc_assert (shift >= 0);
29251
29252 /* If the shift value in the register versions is > 63 (for D qualifier),
29253 31 (for W qualifier) or 15 (for H qualifier). */
29254 if (((opmode == V4HImode) && (shift > 15))
29255 || ((opmode == V2SImode) && (shift > 31))
29256 || ((opmode == DImode) && (shift > 63)))
29257 {
29258 if (wror_or_wsra)
29259 {
29260 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29261 output_asm_insn (templ, operands);
29262 if (opmode == DImode)
29263 {
29264 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
29265 output_asm_insn (templ, operands);
29266 }
29267 }
29268 else
29269 {
29270 /* The destination register will contain all zeros. */
29271 sprintf (templ, "wzero\t%%0");
29272 output_asm_insn (templ, operands);
29273 }
29274 return "";
29275 }
29276
29277 if ((opmode == DImode) && (shift > 32))
29278 {
29279 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
29280 output_asm_insn (templ, operands);
29281 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
29282 output_asm_insn (templ, operands);
29283 }
29284 else
29285 {
29286 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
29287 output_asm_insn (templ, operands);
29288 }
29289 return "";
29290 }
29291
29292 /* Output assembly for a WMMX tinsr instruction. */
29293 const char *
29294 arm_output_iwmmxt_tinsr (rtx *operands)
29295 {
29296 int mask = INTVAL (operands[3]);
29297 int i;
29298 char templ[50];
29299 int units = mode_nunits[GET_MODE (operands[0])];
29300 gcc_assert ((mask & (mask - 1)) == 0);
29301 for (i = 0; i < units; ++i)
29302 {
29303 if ((mask & 0x01) == 1)
29304 {
29305 break;
29306 }
29307 mask >>= 1;
29308 }
29309 gcc_assert (i < units);
29310 {
29311 switch (GET_MODE (operands[0]))
29312 {
29313 case V8QImode:
29314 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
29315 break;
29316 case V4HImode:
29317 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
29318 break;
29319 case V2SImode:
29320 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
29321 break;
29322 default:
29323 gcc_unreachable ();
29324 break;
29325 }
29326 output_asm_insn (templ, operands);
29327 }
29328 return "";
29329 }
29330
29331 /* Output a Thumb-1 casesi dispatch sequence. */
29332 const char *
29333 thumb1_output_casesi (rtx *operands)
29334 {
29335 rtx diff_vec = PATTERN (NEXT_INSN (operands[0]));
29336
29337 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29338
29339 switch (GET_MODE(diff_vec))
29340 {
29341 case QImode:
29342 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29343 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
29344 case HImode:
29345 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
29346 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
29347 case SImode:
29348 return "bl\t%___gnu_thumb1_case_si";
29349 default:
29350 gcc_unreachable ();
29351 }
29352 }
29353
29354 /* Output a Thumb-2 casesi instruction. */
29355 const char *
29356 thumb2_output_casesi (rtx *operands)
29357 {
29358 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
29359
29360 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
29361
29362 output_asm_insn ("cmp\t%0, %1", operands);
29363 output_asm_insn ("bhi\t%l3", operands);
29364 switch (GET_MODE(diff_vec))
29365 {
29366 case QImode:
29367 return "tbb\t[%|pc, %0]";
29368 case HImode:
29369 return "tbh\t[%|pc, %0, lsl #1]";
29370 case SImode:
29371 if (flag_pic)
29372 {
29373 output_asm_insn ("adr\t%4, %l2", operands);
29374 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
29375 output_asm_insn ("add\t%4, %4, %5", operands);
29376 return "bx\t%4";
29377 }
29378 else
29379 {
29380 output_asm_insn ("adr\t%4, %l2", operands);
29381 return "ldr\t%|pc, [%4, %0, lsl #2]";
29382 }
29383 default:
29384 gcc_unreachable ();
29385 }
29386 }
29387
29388 /* Most ARM cores are single issue, but some newer ones can dual issue.
29389 The scheduler descriptions rely on this being correct. */
29390 static int
29391 arm_issue_rate (void)
29392 {
29393 switch (arm_tune)
29394 {
29395 case cortexa15:
29396 case cortexa57:
29397 return 3;
29398
29399 case cortexr4:
29400 case cortexr4f:
29401 case cortexr5:
29402 case genericv7a:
29403 case cortexa5:
29404 case cortexa7:
29405 case cortexa8:
29406 case cortexa9:
29407 case cortexa12:
29408 case cortexa53:
29409 case fa726te:
29410 case marvell_pj4:
29411 return 2;
29412
29413 default:
29414 return 1;
29415 }
29416 }
29417
29418 /* A table and a function to perform ARM-specific name mangling for
29419 NEON vector types in order to conform to the AAPCS (see "Procedure
29420 Call Standard for the ARM Architecture", Appendix A). To qualify
29421 for emission with the mangled names defined in that document, a
29422 vector type must not only be of the correct mode but also be
29423 composed of NEON vector element types (e.g. __builtin_neon_qi). */
29424 typedef struct
29425 {
29426 enum machine_mode mode;
29427 const char *element_type_name;
29428 const char *aapcs_name;
29429 } arm_mangle_map_entry;
29430
29431 static arm_mangle_map_entry arm_mangle_map[] = {
29432 /* 64-bit containerized types. */
29433 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
29434 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
29435 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
29436 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
29437 { V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
29438 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
29439 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
29440 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
29441 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
29442 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
29443
29444 /* 128-bit containerized types. */
29445 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
29446 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
29447 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
29448 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
29449 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
29450 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
29451 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
29452 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
29453 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
29454 { VOIDmode, NULL, NULL }
29455 };
29456
29457 const char *
29458 arm_mangle_type (const_tree type)
29459 {
29460 arm_mangle_map_entry *pos = arm_mangle_map;
29461
29462 /* The ARM ABI documents (10th October 2008) say that "__va_list"
29463 has to be managled as if it is in the "std" namespace. */
29464 if (TARGET_AAPCS_BASED
29465 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
29466 return "St9__va_list";
29467
29468 /* Half-precision float. */
29469 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
29470 return "Dh";
29471
29472 if (TREE_CODE (type) != VECTOR_TYPE)
29473 return NULL;
29474
29475 /* Check the mode of the vector type, and the name of the vector
29476 element type, against the table. */
29477 while (pos->mode != VOIDmode)
29478 {
29479 tree elt_type = TREE_TYPE (type);
29480
29481 if (pos->mode == TYPE_MODE (type)
29482 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
29483 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
29484 pos->element_type_name))
29485 return pos->aapcs_name;
29486
29487 pos++;
29488 }
29489
29490 /* Use the default mangling for unrecognized (possibly user-defined)
29491 vector types. */
29492 return NULL;
29493 }
29494
29495 /* Order of allocation of core registers for Thumb: this allocation is
29496 written over the corresponding initial entries of the array
29497 initialized with REG_ALLOC_ORDER. We allocate all low registers
29498 first. Saving and restoring a low register is usually cheaper than
29499 using a call-clobbered high register. */
29500
29501 static const int thumb_core_reg_alloc_order[] =
29502 {
29503 3, 2, 1, 0, 4, 5, 6, 7,
29504 14, 12, 8, 9, 10, 11
29505 };
29506
29507 /* Adjust register allocation order when compiling for Thumb. */
29508
29509 void
29510 arm_order_regs_for_local_alloc (void)
29511 {
29512 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
29513 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
29514 if (TARGET_THUMB)
29515 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
29516 sizeof (thumb_core_reg_alloc_order));
29517 }
29518
29519 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
29520
29521 bool
29522 arm_frame_pointer_required (void)
29523 {
29524 return (cfun->has_nonlocal_label
29525 || SUBTARGET_FRAME_POINTER_REQUIRED
29526 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
29527 }
29528
29529 /* Only thumb1 can't support conditional execution, so return true if
29530 the target is not thumb1. */
29531 static bool
29532 arm_have_conditional_execution (void)
29533 {
29534 return !TARGET_THUMB1;
29535 }
29536
29537 tree
29538 arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
29539 {
29540 enum machine_mode in_mode, out_mode;
29541 int in_n, out_n;
29542
29543 if (TREE_CODE (type_out) != VECTOR_TYPE
29544 || TREE_CODE (type_in) != VECTOR_TYPE
29545 || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations))
29546 return NULL_TREE;
29547
29548 out_mode = TYPE_MODE (TREE_TYPE (type_out));
29549 out_n = TYPE_VECTOR_SUBPARTS (type_out);
29550 in_mode = TYPE_MODE (TREE_TYPE (type_in));
29551 in_n = TYPE_VECTOR_SUBPARTS (type_in);
29552
29553 /* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
29554 decl of the vectorized builtin for the appropriate vector mode.
29555 NULL_TREE is returned if no such builtin is available. */
29556 #undef ARM_CHECK_BUILTIN_MODE
29557 #define ARM_CHECK_BUILTIN_MODE(C) \
29558 (out_mode == SFmode && out_n == C \
29559 && in_mode == SFmode && in_n == C)
29560
29561 #undef ARM_FIND_VRINT_VARIANT
29562 #define ARM_FIND_VRINT_VARIANT(N) \
29563 (ARM_CHECK_BUILTIN_MODE (2) \
29564 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
29565 : (ARM_CHECK_BUILTIN_MODE (4) \
29566 ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
29567 : NULL_TREE))
29568
29569 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
29570 {
29571 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
29572 switch (fn)
29573 {
29574 case BUILT_IN_FLOORF:
29575 return ARM_FIND_VRINT_VARIANT (vrintm);
29576 case BUILT_IN_CEILF:
29577 return ARM_FIND_VRINT_VARIANT (vrintp);
29578 case BUILT_IN_TRUNCF:
29579 return ARM_FIND_VRINT_VARIANT (vrintz);
29580 case BUILT_IN_ROUNDF:
29581 return ARM_FIND_VRINT_VARIANT (vrinta);
29582 default:
29583 return NULL_TREE;
29584 }
29585 }
29586 return NULL_TREE;
29587 }
29588 #undef ARM_CHECK_BUILTIN_MODE
29589 #undef ARM_FIND_VRINT_VARIANT
29590
29591 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
29592 static HOST_WIDE_INT
29593 arm_vector_alignment (const_tree type)
29594 {
29595 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
29596
29597 if (TARGET_AAPCS_BASED)
29598 align = MIN (align, 64);
29599
29600 return align;
29601 }
29602
29603 static unsigned int
29604 arm_autovectorize_vector_sizes (void)
29605 {
29606 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
29607 }
29608
29609 static bool
29610 arm_vector_alignment_reachable (const_tree type, bool is_packed)
29611 {
29612 /* Vectors which aren't in packed structures will not be less aligned than
29613 the natural alignment of their element type, so this is safe. */
29614 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29615 return !is_packed;
29616
29617 return default_builtin_vector_alignment_reachable (type, is_packed);
29618 }
29619
29620 static bool
29621 arm_builtin_support_vector_misalignment (enum machine_mode mode,
29622 const_tree type, int misalignment,
29623 bool is_packed)
29624 {
29625 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
29626 {
29627 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
29628
29629 if (is_packed)
29630 return align == 1;
29631
29632 /* If the misalignment is unknown, we should be able to handle the access
29633 so long as it is not to a member of a packed data structure. */
29634 if (misalignment == -1)
29635 return true;
29636
29637 /* Return true if the misalignment is a multiple of the natural alignment
29638 of the vector's element type. This is probably always going to be
29639 true in practice, since we've already established that this isn't a
29640 packed access. */
29641 return ((misalignment % align) == 0);
29642 }
29643
29644 return default_builtin_support_vector_misalignment (mode, type, misalignment,
29645 is_packed);
29646 }
29647
29648 static void
29649 arm_conditional_register_usage (void)
29650 {
29651 int regno;
29652
29653 if (TARGET_THUMB1 && optimize_size)
29654 {
29655 /* When optimizing for size on Thumb-1, it's better not
29656 to use the HI regs, because of the overhead of
29657 stacking them. */
29658 for (regno = FIRST_HI_REGNUM;
29659 regno <= LAST_HI_REGNUM; ++regno)
29660 fixed_regs[regno] = call_used_regs[regno] = 1;
29661 }
29662
29663 /* The link register can be clobbered by any branch insn,
29664 but we have no way to track that at present, so mark
29665 it as unavailable. */
29666 if (TARGET_THUMB1)
29667 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
29668
29669 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
29670 {
29671 /* VFPv3 registers are disabled when earlier VFP
29672 versions are selected due to the definition of
29673 LAST_VFP_REGNUM. */
29674 for (regno = FIRST_VFP_REGNUM;
29675 regno <= LAST_VFP_REGNUM; ++ regno)
29676 {
29677 fixed_regs[regno] = 0;
29678 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
29679 || regno >= FIRST_VFP_REGNUM + 32;
29680 }
29681 }
29682
29683 if (TARGET_REALLY_IWMMXT)
29684 {
29685 regno = FIRST_IWMMXT_GR_REGNUM;
29686 /* The 2002/10/09 revision of the XScale ABI has wCG0
29687 and wCG1 as call-preserved registers. The 2002/11/21
29688 revision changed this so that all wCG registers are
29689 scratch registers. */
29690 for (regno = FIRST_IWMMXT_GR_REGNUM;
29691 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
29692 fixed_regs[regno] = 0;
29693 /* The XScale ABI has wR0 - wR9 as scratch registers,
29694 the rest as call-preserved registers. */
29695 for (regno = FIRST_IWMMXT_REGNUM;
29696 regno <= LAST_IWMMXT_REGNUM; ++ regno)
29697 {
29698 fixed_regs[regno] = 0;
29699 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
29700 }
29701 }
29702
29703 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
29704 {
29705 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29706 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
29707 }
29708 else if (TARGET_APCS_STACK)
29709 {
29710 fixed_regs[10] = 1;
29711 call_used_regs[10] = 1;
29712 }
29713 /* -mcaller-super-interworking reserves r11 for calls to
29714 _interwork_r11_call_via_rN(). Making the register global
29715 is an easy way of ensuring that it remains valid for all
29716 calls. */
29717 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
29718 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
29719 {
29720 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29721 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29722 if (TARGET_CALLER_INTERWORKING)
29723 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
29724 }
29725 SUBTARGET_CONDITIONAL_REGISTER_USAGE
29726 }
29727
29728 static reg_class_t
29729 arm_preferred_rename_class (reg_class_t rclass)
29730 {
29731 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
29732 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
29733 and code size can be reduced. */
29734 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
29735 return LO_REGS;
29736 else
29737 return NO_REGS;
29738 }
29739
29740 /* Compute the atrribute "length" of insn "*push_multi".
29741 So this function MUST be kept in sync with that insn pattern. */
29742 int
29743 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
29744 {
29745 int i, regno, hi_reg;
29746 int num_saves = XVECLEN (parallel_op, 0);
29747
29748 /* ARM mode. */
29749 if (TARGET_ARM)
29750 return 4;
29751 /* Thumb1 mode. */
29752 if (TARGET_THUMB1)
29753 return 2;
29754
29755 /* Thumb2 mode. */
29756 regno = REGNO (first_op);
29757 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
29758 for (i = 1; i < num_saves && !hi_reg; i++)
29759 {
29760 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
29761 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
29762 }
29763
29764 if (!hi_reg)
29765 return 2;
29766 return 4;
29767 }
29768
29769 /* Compute the number of instructions emitted by output_move_double. */
29770 int
29771 arm_count_output_move_double_insns (rtx *operands)
29772 {
29773 int count;
29774 rtx ops[2];
29775 /* output_move_double may modify the operands array, so call it
29776 here on a copy of the array. */
29777 ops[0] = operands[0];
29778 ops[1] = operands[1];
29779 output_move_double (ops, false, &count);
29780 return count;
29781 }
29782
29783 int
29784 vfp3_const_double_for_fract_bits (rtx operand)
29785 {
29786 REAL_VALUE_TYPE r0;
29787
29788 if (!CONST_DOUBLE_P (operand))
29789 return 0;
29790
29791 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
29792 if (exact_real_inverse (DFmode, &r0))
29793 {
29794 if (exact_real_truncate (DFmode, &r0))
29795 {
29796 HOST_WIDE_INT value = real_to_integer (&r0);
29797 value = value & 0xffffffff;
29798 if ((value != 0) && ( (value & (value - 1)) == 0))
29799 return int_log2 (value);
29800 }
29801 }
29802 return 0;
29803 }
29804
29805 int
29806 vfp3_const_double_for_bits (rtx operand)
29807 {
29808 REAL_VALUE_TYPE r0;
29809
29810 if (!CONST_DOUBLE_P (operand))
29811 return 0;
29812
29813 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
29814 if (exact_real_truncate (DFmode, &r0))
29815 {
29816 HOST_WIDE_INT value = real_to_integer (&r0);
29817 value = value & 0xffffffff;
29818 if ((value != 0) && ( (value & (value - 1)) == 0))
29819 return int_log2 (value);
29820 }
29821
29822 return 0;
29823 }
29824 \f
29825 /* Emit a memory barrier around an atomic sequence according to MODEL. */
29826
29827 static void
29828 arm_pre_atomic_barrier (enum memmodel model)
29829 {
29830 if (need_atomic_barrier_p (model, true))
29831 emit_insn (gen_memory_barrier ());
29832 }
29833
29834 static void
29835 arm_post_atomic_barrier (enum memmodel model)
29836 {
29837 if (need_atomic_barrier_p (model, false))
29838 emit_insn (gen_memory_barrier ());
29839 }
29840
29841 /* Emit the load-exclusive and store-exclusive instructions.
29842 Use acquire and release versions if necessary. */
29843
29844 static void
29845 arm_emit_load_exclusive (enum machine_mode mode, rtx rval, rtx mem, bool acq)
29846 {
29847 rtx (*gen) (rtx, rtx);
29848
29849 if (acq)
29850 {
29851 switch (mode)
29852 {
29853 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
29854 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
29855 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
29856 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
29857 default:
29858 gcc_unreachable ();
29859 }
29860 }
29861 else
29862 {
29863 switch (mode)
29864 {
29865 case QImode: gen = gen_arm_load_exclusiveqi; break;
29866 case HImode: gen = gen_arm_load_exclusivehi; break;
29867 case SImode: gen = gen_arm_load_exclusivesi; break;
29868 case DImode: gen = gen_arm_load_exclusivedi; break;
29869 default:
29870 gcc_unreachable ();
29871 }
29872 }
29873
29874 emit_insn (gen (rval, mem));
29875 }
29876
29877 static void
29878 arm_emit_store_exclusive (enum machine_mode mode, rtx bval, rtx rval,
29879 rtx mem, bool rel)
29880 {
29881 rtx (*gen) (rtx, rtx, rtx);
29882
29883 if (rel)
29884 {
29885 switch (mode)
29886 {
29887 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
29888 case HImode: gen = gen_arm_store_release_exclusivehi; break;
29889 case SImode: gen = gen_arm_store_release_exclusivesi; break;
29890 case DImode: gen = gen_arm_store_release_exclusivedi; break;
29891 default:
29892 gcc_unreachable ();
29893 }
29894 }
29895 else
29896 {
29897 switch (mode)
29898 {
29899 case QImode: gen = gen_arm_store_exclusiveqi; break;
29900 case HImode: gen = gen_arm_store_exclusivehi; break;
29901 case SImode: gen = gen_arm_store_exclusivesi; break;
29902 case DImode: gen = gen_arm_store_exclusivedi; break;
29903 default:
29904 gcc_unreachable ();
29905 }
29906 }
29907
29908 emit_insn (gen (bval, rval, mem));
29909 }
29910
29911 /* Mark the previous jump instruction as unlikely. */
29912
29913 static void
29914 emit_unlikely_jump (rtx insn)
29915 {
29916 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
29917
29918 insn = emit_jump_insn (insn);
29919 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
29920 }
29921
29922 /* Expand a compare and swap pattern. */
29923
29924 void
29925 arm_expand_compare_and_swap (rtx operands[])
29926 {
29927 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
29928 enum machine_mode mode;
29929 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
29930
29931 bval = operands[0];
29932 rval = operands[1];
29933 mem = operands[2];
29934 oldval = operands[3];
29935 newval = operands[4];
29936 is_weak = operands[5];
29937 mod_s = operands[6];
29938 mod_f = operands[7];
29939 mode = GET_MODE (mem);
29940
29941 /* Normally the succ memory model must be stronger than fail, but in the
29942 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
29943 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
29944
29945 if (TARGET_HAVE_LDACQ
29946 && INTVAL (mod_f) == MEMMODEL_ACQUIRE
29947 && INTVAL (mod_s) == MEMMODEL_RELEASE)
29948 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
29949
29950 switch (mode)
29951 {
29952 case QImode:
29953 case HImode:
29954 /* For narrow modes, we're going to perform the comparison in SImode,
29955 so do the zero-extension now. */
29956 rval = gen_reg_rtx (SImode);
29957 oldval = convert_modes (SImode, mode, oldval, true);
29958 /* FALLTHRU */
29959
29960 case SImode:
29961 /* Force the value into a register if needed. We waited until after
29962 the zero-extension above to do this properly. */
29963 if (!arm_add_operand (oldval, SImode))
29964 oldval = force_reg (SImode, oldval);
29965 break;
29966
29967 case DImode:
29968 if (!cmpdi_operand (oldval, mode))
29969 oldval = force_reg (mode, oldval);
29970 break;
29971
29972 default:
29973 gcc_unreachable ();
29974 }
29975
29976 switch (mode)
29977 {
29978 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
29979 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
29980 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
29981 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
29982 default:
29983 gcc_unreachable ();
29984 }
29985
29986 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
29987
29988 if (mode == QImode || mode == HImode)
29989 emit_move_insn (operands[1], gen_lowpart (mode, rval));
29990
29991 /* In all cases, we arrange for success to be signaled by Z set.
29992 This arrangement allows for the boolean result to be used directly
29993 in a subsequent branch, post optimization. */
29994 x = gen_rtx_REG (CCmode, CC_REGNUM);
29995 x = gen_rtx_EQ (SImode, x, const0_rtx);
29996 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
29997 }
29998
29999 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
30000 another memory store between the load-exclusive and store-exclusive can
30001 reset the monitor from Exclusive to Open state. This means we must wait
30002 until after reload to split the pattern, lest we get a register spill in
30003 the middle of the atomic sequence. */
30004
30005 void
30006 arm_split_compare_and_swap (rtx operands[])
30007 {
30008 rtx rval, mem, oldval, newval, scratch;
30009 enum machine_mode mode;
30010 enum memmodel mod_s, mod_f;
30011 bool is_weak;
30012 rtx label1, label2, x, cond;
30013
30014 rval = operands[0];
30015 mem = operands[1];
30016 oldval = operands[2];
30017 newval = operands[3];
30018 is_weak = (operands[4] != const0_rtx);
30019 mod_s = (enum memmodel) INTVAL (operands[5]);
30020 mod_f = (enum memmodel) INTVAL (operands[6]);
30021 scratch = operands[7];
30022 mode = GET_MODE (mem);
30023
30024 bool use_acquire = TARGET_HAVE_LDACQ
30025 && !(mod_s == MEMMODEL_RELAXED
30026 || mod_s == MEMMODEL_CONSUME
30027 || mod_s == MEMMODEL_RELEASE);
30028
30029 bool use_release = TARGET_HAVE_LDACQ
30030 && !(mod_s == MEMMODEL_RELAXED
30031 || mod_s == MEMMODEL_CONSUME
30032 || mod_s == MEMMODEL_ACQUIRE);
30033
30034 /* Checks whether a barrier is needed and emits one accordingly. */
30035 if (!(use_acquire || use_release))
30036 arm_pre_atomic_barrier (mod_s);
30037
30038 label1 = NULL_RTX;
30039 if (!is_weak)
30040 {
30041 label1 = gen_label_rtx ();
30042 emit_label (label1);
30043 }
30044 label2 = gen_label_rtx ();
30045
30046 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
30047
30048 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
30049 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30050 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30051 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
30052 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30053
30054 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
30055
30056 /* Weak or strong, we want EQ to be true for success, so that we
30057 match the flags that we got from the compare above. */
30058 cond = gen_rtx_REG (CCmode, CC_REGNUM);
30059 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
30060 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
30061
30062 if (!is_weak)
30063 {
30064 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30065 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30066 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
30067 emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
30068 }
30069
30070 if (mod_f != MEMMODEL_RELAXED)
30071 emit_label (label2);
30072
30073 /* Checks whether a barrier is needed and emits one accordingly. */
30074 if (!(use_acquire || use_release))
30075 arm_post_atomic_barrier (mod_s);
30076
30077 if (mod_f == MEMMODEL_RELAXED)
30078 emit_label (label2);
30079 }
30080
30081 void
30082 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
30083 rtx value, rtx model_rtx, rtx cond)
30084 {
30085 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
30086 enum machine_mode mode = GET_MODE (mem);
30087 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
30088 rtx label, x;
30089
30090 bool use_acquire = TARGET_HAVE_LDACQ
30091 && !(model == MEMMODEL_RELAXED
30092 || model == MEMMODEL_CONSUME
30093 || model == MEMMODEL_RELEASE);
30094
30095 bool use_release = TARGET_HAVE_LDACQ
30096 && !(model == MEMMODEL_RELAXED
30097 || model == MEMMODEL_CONSUME
30098 || model == MEMMODEL_ACQUIRE);
30099
30100 /* Checks whether a barrier is needed and emits one accordingly. */
30101 if (!(use_acquire || use_release))
30102 arm_pre_atomic_barrier (model);
30103
30104 label = gen_label_rtx ();
30105 emit_label (label);
30106
30107 if (new_out)
30108 new_out = gen_lowpart (wmode, new_out);
30109 if (old_out)
30110 old_out = gen_lowpart (wmode, old_out);
30111 else
30112 old_out = new_out;
30113 value = simplify_gen_subreg (wmode, value, mode, 0);
30114
30115 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
30116
30117 switch (code)
30118 {
30119 case SET:
30120 new_out = value;
30121 break;
30122
30123 case NOT:
30124 x = gen_rtx_AND (wmode, old_out, value);
30125 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30126 x = gen_rtx_NOT (wmode, new_out);
30127 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30128 break;
30129
30130 case MINUS:
30131 if (CONST_INT_P (value))
30132 {
30133 value = GEN_INT (-INTVAL (value));
30134 code = PLUS;
30135 }
30136 /* FALLTHRU */
30137
30138 case PLUS:
30139 if (mode == DImode)
30140 {
30141 /* DImode plus/minus need to clobber flags. */
30142 /* The adddi3 and subdi3 patterns are incorrectly written so that
30143 they require matching operands, even when we could easily support
30144 three operands. Thankfully, this can be fixed up post-splitting,
30145 as the individual add+adc patterns do accept three operands and
30146 post-reload cprop can make these moves go away. */
30147 emit_move_insn (new_out, old_out);
30148 if (code == PLUS)
30149 x = gen_adddi3 (new_out, new_out, value);
30150 else
30151 x = gen_subdi3 (new_out, new_out, value);
30152 emit_insn (x);
30153 break;
30154 }
30155 /* FALLTHRU */
30156
30157 default:
30158 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
30159 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
30160 break;
30161 }
30162
30163 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
30164 use_release);
30165
30166 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
30167 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
30168
30169 /* Checks whether a barrier is needed and emits one accordingly. */
30170 if (!(use_acquire || use_release))
30171 arm_post_atomic_barrier (model);
30172 }
30173 \f
30174 #define MAX_VECT_LEN 16
30175
30176 struct expand_vec_perm_d
30177 {
30178 rtx target, op0, op1;
30179 unsigned char perm[MAX_VECT_LEN];
30180 enum machine_mode vmode;
30181 unsigned char nelt;
30182 bool one_vector_p;
30183 bool testing_p;
30184 };
30185
30186 /* Generate a variable permutation. */
30187
30188 static void
30189 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
30190 {
30191 enum machine_mode vmode = GET_MODE (target);
30192 bool one_vector_p = rtx_equal_p (op0, op1);
30193
30194 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
30195 gcc_checking_assert (GET_MODE (op0) == vmode);
30196 gcc_checking_assert (GET_MODE (op1) == vmode);
30197 gcc_checking_assert (GET_MODE (sel) == vmode);
30198 gcc_checking_assert (TARGET_NEON);
30199
30200 if (one_vector_p)
30201 {
30202 if (vmode == V8QImode)
30203 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
30204 else
30205 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
30206 }
30207 else
30208 {
30209 rtx pair;
30210
30211 if (vmode == V8QImode)
30212 {
30213 pair = gen_reg_rtx (V16QImode);
30214 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
30215 pair = gen_lowpart (TImode, pair);
30216 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
30217 }
30218 else
30219 {
30220 pair = gen_reg_rtx (OImode);
30221 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
30222 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
30223 }
30224 }
30225 }
30226
30227 void
30228 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
30229 {
30230 enum machine_mode vmode = GET_MODE (target);
30231 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
30232 bool one_vector_p = rtx_equal_p (op0, op1);
30233 rtx rmask[MAX_VECT_LEN], mask;
30234
30235 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30236 numbering of elements for big-endian, we must reverse the order. */
30237 gcc_checking_assert (!BYTES_BIG_ENDIAN);
30238
30239 /* The VTBL instruction does not use a modulo index, so we must take care
30240 of that ourselves. */
30241 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
30242 for (i = 0; i < nelt; ++i)
30243 rmask[i] = mask;
30244 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
30245 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
30246
30247 arm_expand_vec_perm_1 (target, op0, op1, sel);
30248 }
30249
30250 /* Generate or test for an insn that supports a constant permutation. */
30251
30252 /* Recognize patterns for the VUZP insns. */
30253
30254 static bool
30255 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
30256 {
30257 unsigned int i, odd, mask, nelt = d->nelt;
30258 rtx out0, out1, in0, in1, x;
30259 rtx (*gen)(rtx, rtx, rtx, rtx);
30260
30261 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30262 return false;
30263
30264 /* Note that these are little-endian tests. Adjust for big-endian later. */
30265 if (d->perm[0] == 0)
30266 odd = 0;
30267 else if (d->perm[0] == 1)
30268 odd = 1;
30269 else
30270 return false;
30271 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30272
30273 for (i = 0; i < nelt; i++)
30274 {
30275 unsigned elt = (i * 2 + odd) & mask;
30276 if (d->perm[i] != elt)
30277 return false;
30278 }
30279
30280 /* Success! */
30281 if (d->testing_p)
30282 return true;
30283
30284 switch (d->vmode)
30285 {
30286 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
30287 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
30288 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
30289 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
30290 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
30291 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
30292 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
30293 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
30294 default:
30295 gcc_unreachable ();
30296 }
30297
30298 in0 = d->op0;
30299 in1 = d->op1;
30300 if (BYTES_BIG_ENDIAN)
30301 {
30302 x = in0, in0 = in1, in1 = x;
30303 odd = !odd;
30304 }
30305
30306 out0 = d->target;
30307 out1 = gen_reg_rtx (d->vmode);
30308 if (odd)
30309 x = out0, out0 = out1, out1 = x;
30310
30311 emit_insn (gen (out0, in0, in1, out1));
30312 return true;
30313 }
30314
30315 /* Recognize patterns for the VZIP insns. */
30316
30317 static bool
30318 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
30319 {
30320 unsigned int i, high, mask, nelt = d->nelt;
30321 rtx out0, out1, in0, in1, x;
30322 rtx (*gen)(rtx, rtx, rtx, rtx);
30323
30324 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30325 return false;
30326
30327 /* Note that these are little-endian tests. Adjust for big-endian later. */
30328 high = nelt / 2;
30329 if (d->perm[0] == high)
30330 ;
30331 else if (d->perm[0] == 0)
30332 high = 0;
30333 else
30334 return false;
30335 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30336
30337 for (i = 0; i < nelt / 2; i++)
30338 {
30339 unsigned elt = (i + high) & mask;
30340 if (d->perm[i * 2] != elt)
30341 return false;
30342 elt = (elt + nelt) & mask;
30343 if (d->perm[i * 2 + 1] != elt)
30344 return false;
30345 }
30346
30347 /* Success! */
30348 if (d->testing_p)
30349 return true;
30350
30351 switch (d->vmode)
30352 {
30353 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
30354 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
30355 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
30356 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
30357 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
30358 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
30359 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
30360 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
30361 default:
30362 gcc_unreachable ();
30363 }
30364
30365 in0 = d->op0;
30366 in1 = d->op1;
30367 if (BYTES_BIG_ENDIAN)
30368 {
30369 x = in0, in0 = in1, in1 = x;
30370 high = !high;
30371 }
30372
30373 out0 = d->target;
30374 out1 = gen_reg_rtx (d->vmode);
30375 if (high)
30376 x = out0, out0 = out1, out1 = x;
30377
30378 emit_insn (gen (out0, in0, in1, out1));
30379 return true;
30380 }
30381
30382 /* Recognize patterns for the VREV insns. */
30383
30384 static bool
30385 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
30386 {
30387 unsigned int i, j, diff, nelt = d->nelt;
30388 rtx (*gen)(rtx, rtx, rtx);
30389
30390 if (!d->one_vector_p)
30391 return false;
30392
30393 diff = d->perm[0];
30394 switch (diff)
30395 {
30396 case 7:
30397 switch (d->vmode)
30398 {
30399 case V16QImode: gen = gen_neon_vrev64v16qi; break;
30400 case V8QImode: gen = gen_neon_vrev64v8qi; break;
30401 default:
30402 return false;
30403 }
30404 break;
30405 case 3:
30406 switch (d->vmode)
30407 {
30408 case V16QImode: gen = gen_neon_vrev32v16qi; break;
30409 case V8QImode: gen = gen_neon_vrev32v8qi; break;
30410 case V8HImode: gen = gen_neon_vrev64v8hi; break;
30411 case V4HImode: gen = gen_neon_vrev64v4hi; break;
30412 default:
30413 return false;
30414 }
30415 break;
30416 case 1:
30417 switch (d->vmode)
30418 {
30419 case V16QImode: gen = gen_neon_vrev16v16qi; break;
30420 case V8QImode: gen = gen_neon_vrev16v8qi; break;
30421 case V8HImode: gen = gen_neon_vrev32v8hi; break;
30422 case V4HImode: gen = gen_neon_vrev32v4hi; break;
30423 case V4SImode: gen = gen_neon_vrev64v4si; break;
30424 case V2SImode: gen = gen_neon_vrev64v2si; break;
30425 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
30426 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
30427 default:
30428 return false;
30429 }
30430 break;
30431 default:
30432 return false;
30433 }
30434
30435 for (i = 0; i < nelt ; i += diff + 1)
30436 for (j = 0; j <= diff; j += 1)
30437 {
30438 /* This is guaranteed to be true as the value of diff
30439 is 7, 3, 1 and we should have enough elements in the
30440 queue to generate this. Getting a vector mask with a
30441 value of diff other than these values implies that
30442 something is wrong by the time we get here. */
30443 gcc_assert (i + j < nelt);
30444 if (d->perm[i + j] != i + diff - j)
30445 return false;
30446 }
30447
30448 /* Success! */
30449 if (d->testing_p)
30450 return true;
30451
30452 /* ??? The third operand is an artifact of the builtin infrastructure
30453 and is ignored by the actual instruction. */
30454 emit_insn (gen (d->target, d->op0, const0_rtx));
30455 return true;
30456 }
30457
30458 /* Recognize patterns for the VTRN insns. */
30459
30460 static bool
30461 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
30462 {
30463 unsigned int i, odd, mask, nelt = d->nelt;
30464 rtx out0, out1, in0, in1, x;
30465 rtx (*gen)(rtx, rtx, rtx, rtx);
30466
30467 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
30468 return false;
30469
30470 /* Note that these are little-endian tests. Adjust for big-endian later. */
30471 if (d->perm[0] == 0)
30472 odd = 0;
30473 else if (d->perm[0] == 1)
30474 odd = 1;
30475 else
30476 return false;
30477 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
30478
30479 for (i = 0; i < nelt; i += 2)
30480 {
30481 if (d->perm[i] != i + odd)
30482 return false;
30483 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
30484 return false;
30485 }
30486
30487 /* Success! */
30488 if (d->testing_p)
30489 return true;
30490
30491 switch (d->vmode)
30492 {
30493 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
30494 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
30495 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
30496 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
30497 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
30498 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
30499 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
30500 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
30501 default:
30502 gcc_unreachable ();
30503 }
30504
30505 in0 = d->op0;
30506 in1 = d->op1;
30507 if (BYTES_BIG_ENDIAN)
30508 {
30509 x = in0, in0 = in1, in1 = x;
30510 odd = !odd;
30511 }
30512
30513 out0 = d->target;
30514 out1 = gen_reg_rtx (d->vmode);
30515 if (odd)
30516 x = out0, out0 = out1, out1 = x;
30517
30518 emit_insn (gen (out0, in0, in1, out1));
30519 return true;
30520 }
30521
30522 /* Recognize patterns for the VEXT insns. */
30523
30524 static bool
30525 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
30526 {
30527 unsigned int i, nelt = d->nelt;
30528 rtx (*gen) (rtx, rtx, rtx, rtx);
30529 rtx offset;
30530
30531 unsigned int location;
30532
30533 unsigned int next = d->perm[0] + 1;
30534
30535 /* TODO: Handle GCC's numbering of elements for big-endian. */
30536 if (BYTES_BIG_ENDIAN)
30537 return false;
30538
30539 /* Check if the extracted indexes are increasing by one. */
30540 for (i = 1; i < nelt; next++, i++)
30541 {
30542 /* If we hit the most significant element of the 2nd vector in
30543 the previous iteration, no need to test further. */
30544 if (next == 2 * nelt)
30545 return false;
30546
30547 /* If we are operating on only one vector: it could be a
30548 rotation. If there are only two elements of size < 64, let
30549 arm_evpc_neon_vrev catch it. */
30550 if (d->one_vector_p && (next == nelt))
30551 {
30552 if ((nelt == 2) && (d->vmode != V2DImode))
30553 return false;
30554 else
30555 next = 0;
30556 }
30557
30558 if (d->perm[i] != next)
30559 return false;
30560 }
30561
30562 location = d->perm[0];
30563
30564 switch (d->vmode)
30565 {
30566 case V16QImode: gen = gen_neon_vextv16qi; break;
30567 case V8QImode: gen = gen_neon_vextv8qi; break;
30568 case V4HImode: gen = gen_neon_vextv4hi; break;
30569 case V8HImode: gen = gen_neon_vextv8hi; break;
30570 case V2SImode: gen = gen_neon_vextv2si; break;
30571 case V4SImode: gen = gen_neon_vextv4si; break;
30572 case V2SFmode: gen = gen_neon_vextv2sf; break;
30573 case V4SFmode: gen = gen_neon_vextv4sf; break;
30574 case V2DImode: gen = gen_neon_vextv2di; break;
30575 default:
30576 return false;
30577 }
30578
30579 /* Success! */
30580 if (d->testing_p)
30581 return true;
30582
30583 offset = GEN_INT (location);
30584 emit_insn (gen (d->target, d->op0, d->op1, offset));
30585 return true;
30586 }
30587
30588 /* The NEON VTBL instruction is a fully variable permuation that's even
30589 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
30590 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
30591 can do slightly better by expanding this as a constant where we don't
30592 have to apply a mask. */
30593
30594 static bool
30595 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
30596 {
30597 rtx rperm[MAX_VECT_LEN], sel;
30598 enum machine_mode vmode = d->vmode;
30599 unsigned int i, nelt = d->nelt;
30600
30601 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
30602 numbering of elements for big-endian, we must reverse the order. */
30603 if (BYTES_BIG_ENDIAN)
30604 return false;
30605
30606 if (d->testing_p)
30607 return true;
30608
30609 /* Generic code will try constant permutation twice. Once with the
30610 original mode and again with the elements lowered to QImode.
30611 So wait and don't do the selector expansion ourselves. */
30612 if (vmode != V8QImode && vmode != V16QImode)
30613 return false;
30614
30615 for (i = 0; i < nelt; ++i)
30616 rperm[i] = GEN_INT (d->perm[i]);
30617 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
30618 sel = force_reg (vmode, sel);
30619
30620 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
30621 return true;
30622 }
30623
30624 static bool
30625 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
30626 {
30627 /* Check if the input mask matches vext before reordering the
30628 operands. */
30629 if (TARGET_NEON)
30630 if (arm_evpc_neon_vext (d))
30631 return true;
30632
30633 /* The pattern matching functions above are written to look for a small
30634 number to begin the sequence (0, 1, N/2). If we begin with an index
30635 from the second operand, we can swap the operands. */
30636 if (d->perm[0] >= d->nelt)
30637 {
30638 unsigned i, nelt = d->nelt;
30639 rtx x;
30640
30641 for (i = 0; i < nelt; ++i)
30642 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
30643
30644 x = d->op0;
30645 d->op0 = d->op1;
30646 d->op1 = x;
30647 }
30648
30649 if (TARGET_NEON)
30650 {
30651 if (arm_evpc_neon_vuzp (d))
30652 return true;
30653 if (arm_evpc_neon_vzip (d))
30654 return true;
30655 if (arm_evpc_neon_vrev (d))
30656 return true;
30657 if (arm_evpc_neon_vtrn (d))
30658 return true;
30659 return arm_evpc_neon_vtbl (d);
30660 }
30661 return false;
30662 }
30663
30664 /* Expand a vec_perm_const pattern. */
30665
30666 bool
30667 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
30668 {
30669 struct expand_vec_perm_d d;
30670 int i, nelt, which;
30671
30672 d.target = target;
30673 d.op0 = op0;
30674 d.op1 = op1;
30675
30676 d.vmode = GET_MODE (target);
30677 gcc_assert (VECTOR_MODE_P (d.vmode));
30678 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30679 d.testing_p = false;
30680
30681 for (i = which = 0; i < nelt; ++i)
30682 {
30683 rtx e = XVECEXP (sel, 0, i);
30684 int ei = INTVAL (e) & (2 * nelt - 1);
30685 which |= (ei < nelt ? 1 : 2);
30686 d.perm[i] = ei;
30687 }
30688
30689 switch (which)
30690 {
30691 default:
30692 gcc_unreachable();
30693
30694 case 3:
30695 d.one_vector_p = false;
30696 if (!rtx_equal_p (op0, op1))
30697 break;
30698
30699 /* The elements of PERM do not suggest that only the first operand
30700 is used, but both operands are identical. Allow easier matching
30701 of the permutation by folding the permutation into the single
30702 input vector. */
30703 /* FALLTHRU */
30704 case 2:
30705 for (i = 0; i < nelt; ++i)
30706 d.perm[i] &= nelt - 1;
30707 d.op0 = op1;
30708 d.one_vector_p = true;
30709 break;
30710
30711 case 1:
30712 d.op1 = op0;
30713 d.one_vector_p = true;
30714 break;
30715 }
30716
30717 return arm_expand_vec_perm_const_1 (&d);
30718 }
30719
30720 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
30721
30722 static bool
30723 arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
30724 const unsigned char *sel)
30725 {
30726 struct expand_vec_perm_d d;
30727 unsigned int i, nelt, which;
30728 bool ret;
30729
30730 d.vmode = vmode;
30731 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30732 d.testing_p = true;
30733 memcpy (d.perm, sel, nelt);
30734
30735 /* Categorize the set of elements in the selector. */
30736 for (i = which = 0; i < nelt; ++i)
30737 {
30738 unsigned char e = d.perm[i];
30739 gcc_assert (e < 2 * nelt);
30740 which |= (e < nelt ? 1 : 2);
30741 }
30742
30743 /* For all elements from second vector, fold the elements to first. */
30744 if (which == 2)
30745 for (i = 0; i < nelt; ++i)
30746 d.perm[i] -= nelt;
30747
30748 /* Check whether the mask can be applied to the vector type. */
30749 d.one_vector_p = (which != 3);
30750
30751 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
30752 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
30753 if (!d.one_vector_p)
30754 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
30755
30756 start_sequence ();
30757 ret = arm_expand_vec_perm_const_1 (&d);
30758 end_sequence ();
30759
30760 return ret;
30761 }
30762
30763 bool
30764 arm_autoinc_modes_ok_p (enum machine_mode mode, enum arm_auto_incmodes code)
30765 {
30766 /* If we are soft float and we do not have ldrd
30767 then all auto increment forms are ok. */
30768 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
30769 return true;
30770
30771 switch (code)
30772 {
30773 /* Post increment and Pre Decrement are supported for all
30774 instruction forms except for vector forms. */
30775 case ARM_POST_INC:
30776 case ARM_PRE_DEC:
30777 if (VECTOR_MODE_P (mode))
30778 {
30779 if (code != ARM_PRE_DEC)
30780 return true;
30781 else
30782 return false;
30783 }
30784
30785 return true;
30786
30787 case ARM_POST_DEC:
30788 case ARM_PRE_INC:
30789 /* Without LDRD and mode size greater than
30790 word size, there is no point in auto-incrementing
30791 because ldm and stm will not have these forms. */
30792 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
30793 return false;
30794
30795 /* Vector and floating point modes do not support
30796 these auto increment forms. */
30797 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
30798 return false;
30799
30800 return true;
30801
30802 default:
30803 return false;
30804
30805 }
30806
30807 return false;
30808 }
30809
30810 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
30811 on ARM, since we know that shifts by negative amounts are no-ops.
30812 Additionally, the default expansion code is not available or suitable
30813 for post-reload insn splits (this can occur when the register allocator
30814 chooses not to do a shift in NEON).
30815
30816 This function is used in both initial expand and post-reload splits, and
30817 handles all kinds of 64-bit shifts.
30818
30819 Input requirements:
30820 - It is safe for the input and output to be the same register, but
30821 early-clobber rules apply for the shift amount and scratch registers.
30822 - Shift by register requires both scratch registers. In all other cases
30823 the scratch registers may be NULL.
30824 - Ashiftrt by a register also clobbers the CC register. */
30825 void
30826 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
30827 rtx amount, rtx scratch1, rtx scratch2)
30828 {
30829 rtx out_high = gen_highpart (SImode, out);
30830 rtx out_low = gen_lowpart (SImode, out);
30831 rtx in_high = gen_highpart (SImode, in);
30832 rtx in_low = gen_lowpart (SImode, in);
30833
30834 /* Terminology:
30835 in = the register pair containing the input value.
30836 out = the destination register pair.
30837 up = the high- or low-part of each pair.
30838 down = the opposite part to "up".
30839 In a shift, we can consider bits to shift from "up"-stream to
30840 "down"-stream, so in a left-shift "up" is the low-part and "down"
30841 is the high-part of each register pair. */
30842
30843 rtx out_up = code == ASHIFT ? out_low : out_high;
30844 rtx out_down = code == ASHIFT ? out_high : out_low;
30845 rtx in_up = code == ASHIFT ? in_low : in_high;
30846 rtx in_down = code == ASHIFT ? in_high : in_low;
30847
30848 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
30849 gcc_assert (out
30850 && (REG_P (out) || GET_CODE (out) == SUBREG)
30851 && GET_MODE (out) == DImode);
30852 gcc_assert (in
30853 && (REG_P (in) || GET_CODE (in) == SUBREG)
30854 && GET_MODE (in) == DImode);
30855 gcc_assert (amount
30856 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
30857 && GET_MODE (amount) == SImode)
30858 || CONST_INT_P (amount)));
30859 gcc_assert (scratch1 == NULL
30860 || (GET_CODE (scratch1) == SCRATCH)
30861 || (GET_MODE (scratch1) == SImode
30862 && REG_P (scratch1)));
30863 gcc_assert (scratch2 == NULL
30864 || (GET_CODE (scratch2) == SCRATCH)
30865 || (GET_MODE (scratch2) == SImode
30866 && REG_P (scratch2)));
30867 gcc_assert (!REG_P (out) || !REG_P (amount)
30868 || !HARD_REGISTER_P (out)
30869 || (REGNO (out) != REGNO (amount)
30870 && REGNO (out) + 1 != REGNO (amount)));
30871
30872 /* Macros to make following code more readable. */
30873 #define SUB_32(DEST,SRC) \
30874 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
30875 #define RSB_32(DEST,SRC) \
30876 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
30877 #define SUB_S_32(DEST,SRC) \
30878 gen_addsi3_compare0 ((DEST), (SRC), \
30879 GEN_INT (-32))
30880 #define SET(DEST,SRC) \
30881 gen_rtx_SET (SImode, (DEST), (SRC))
30882 #define SHIFT(CODE,SRC,AMOUNT) \
30883 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
30884 #define LSHIFT(CODE,SRC,AMOUNT) \
30885 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
30886 SImode, (SRC), (AMOUNT))
30887 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
30888 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
30889 SImode, (SRC), (AMOUNT))
30890 #define ORR(A,B) \
30891 gen_rtx_IOR (SImode, (A), (B))
30892 #define BRANCH(COND,LABEL) \
30893 gen_arm_cond_branch ((LABEL), \
30894 gen_rtx_ ## COND (CCmode, cc_reg, \
30895 const0_rtx), \
30896 cc_reg)
30897
30898 /* Shifts by register and shifts by constant are handled separately. */
30899 if (CONST_INT_P (amount))
30900 {
30901 /* We have a shift-by-constant. */
30902
30903 /* First, handle out-of-range shift amounts.
30904 In both cases we try to match the result an ARM instruction in a
30905 shift-by-register would give. This helps reduce execution
30906 differences between optimization levels, but it won't stop other
30907 parts of the compiler doing different things. This is "undefined
30908 behaviour, in any case. */
30909 if (INTVAL (amount) <= 0)
30910 emit_insn (gen_movdi (out, in));
30911 else if (INTVAL (amount) >= 64)
30912 {
30913 if (code == ASHIFTRT)
30914 {
30915 rtx const31_rtx = GEN_INT (31);
30916 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
30917 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
30918 }
30919 else
30920 emit_insn (gen_movdi (out, const0_rtx));
30921 }
30922
30923 /* Now handle valid shifts. */
30924 else if (INTVAL (amount) < 32)
30925 {
30926 /* Shifts by a constant less than 32. */
30927 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
30928
30929 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
30930 emit_insn (SET (out_down,
30931 ORR (REV_LSHIFT (code, in_up, reverse_amount),
30932 out_down)));
30933 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
30934 }
30935 else
30936 {
30937 /* Shifts by a constant greater than 31. */
30938 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
30939
30940 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
30941 if (code == ASHIFTRT)
30942 emit_insn (gen_ashrsi3 (out_up, in_up,
30943 GEN_INT (31)));
30944 else
30945 emit_insn (SET (out_up, const0_rtx));
30946 }
30947 }
30948 else
30949 {
30950 /* We have a shift-by-register. */
30951 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
30952
30953 /* This alternative requires the scratch registers. */
30954 gcc_assert (scratch1 && REG_P (scratch1));
30955 gcc_assert (scratch2 && REG_P (scratch2));
30956
30957 /* We will need the values "amount-32" and "32-amount" later.
30958 Swapping them around now allows the later code to be more general. */
30959 switch (code)
30960 {
30961 case ASHIFT:
30962 emit_insn (SUB_32 (scratch1, amount));
30963 emit_insn (RSB_32 (scratch2, amount));
30964 break;
30965 case ASHIFTRT:
30966 emit_insn (RSB_32 (scratch1, amount));
30967 /* Also set CC = amount > 32. */
30968 emit_insn (SUB_S_32 (scratch2, amount));
30969 break;
30970 case LSHIFTRT:
30971 emit_insn (RSB_32 (scratch1, amount));
30972 emit_insn (SUB_32 (scratch2, amount));
30973 break;
30974 default:
30975 gcc_unreachable ();
30976 }
30977
30978 /* Emit code like this:
30979
30980 arithmetic-left:
30981 out_down = in_down << amount;
30982 out_down = (in_up << (amount - 32)) | out_down;
30983 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
30984 out_up = in_up << amount;
30985
30986 arithmetic-right:
30987 out_down = in_down >> amount;
30988 out_down = (in_up << (32 - amount)) | out_down;
30989 if (amount < 32)
30990 out_down = ((signed)in_up >> (amount - 32)) | out_down;
30991 out_up = in_up << amount;
30992
30993 logical-right:
30994 out_down = in_down >> amount;
30995 out_down = (in_up << (32 - amount)) | out_down;
30996 if (amount < 32)
30997 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
30998 out_up = in_up << amount;
30999
31000 The ARM and Thumb2 variants are the same but implemented slightly
31001 differently. If this were only called during expand we could just
31002 use the Thumb2 case and let combine do the right thing, but this
31003 can also be called from post-reload splitters. */
31004
31005 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
31006
31007 if (!TARGET_THUMB2)
31008 {
31009 /* Emit code for ARM mode. */
31010 emit_insn (SET (out_down,
31011 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
31012 if (code == ASHIFTRT)
31013 {
31014 rtx done_label = gen_label_rtx ();
31015 emit_jump_insn (BRANCH (LT, done_label));
31016 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
31017 out_down)));
31018 emit_label (done_label);
31019 }
31020 else
31021 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
31022 out_down)));
31023 }
31024 else
31025 {
31026 /* Emit code for Thumb2 mode.
31027 Thumb2 can't do shift and or in one insn. */
31028 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
31029 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
31030
31031 if (code == ASHIFTRT)
31032 {
31033 rtx done_label = gen_label_rtx ();
31034 emit_jump_insn (BRANCH (LT, done_label));
31035 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
31036 emit_insn (SET (out_down, ORR (out_down, scratch2)));
31037 emit_label (done_label);
31038 }
31039 else
31040 {
31041 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
31042 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
31043 }
31044 }
31045
31046 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
31047 }
31048
31049 #undef SUB_32
31050 #undef RSB_32
31051 #undef SUB_S_32
31052 #undef SET
31053 #undef SHIFT
31054 #undef LSHIFT
31055 #undef REV_LSHIFT
31056 #undef ORR
31057 #undef BRANCH
31058 }
31059
31060
31061 /* Returns true if a valid comparison operation and makes
31062 the operands in a form that is valid. */
31063 bool
31064 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
31065 {
31066 enum rtx_code code = GET_CODE (*comparison);
31067 int code_int;
31068 enum machine_mode mode = (GET_MODE (*op1) == VOIDmode)
31069 ? GET_MODE (*op2) : GET_MODE (*op1);
31070
31071 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
31072
31073 if (code == UNEQ || code == LTGT)
31074 return false;
31075
31076 code_int = (int)code;
31077 arm_canonicalize_comparison (&code_int, op1, op2, 0);
31078 PUT_CODE (*comparison, (enum rtx_code)code_int);
31079
31080 switch (mode)
31081 {
31082 case SImode:
31083 if (!arm_add_operand (*op1, mode))
31084 *op1 = force_reg (mode, *op1);
31085 if (!arm_add_operand (*op2, mode))
31086 *op2 = force_reg (mode, *op2);
31087 return true;
31088
31089 case DImode:
31090 if (!cmpdi_operand (*op1, mode))
31091 *op1 = force_reg (mode, *op1);
31092 if (!cmpdi_operand (*op2, mode))
31093 *op2 = force_reg (mode, *op2);
31094 return true;
31095
31096 case SFmode:
31097 case DFmode:
31098 if (!arm_float_compare_operand (*op1, mode))
31099 *op1 = force_reg (mode, *op1);
31100 if (!arm_float_compare_operand (*op2, mode))
31101 *op2 = force_reg (mode, *op2);
31102 return true;
31103 default:
31104 break;
31105 }
31106
31107 return false;
31108
31109 }
31110
31111 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31112
31113 static unsigned HOST_WIDE_INT
31114 arm_asan_shadow_offset (void)
31115 {
31116 return (unsigned HOST_WIDE_INT) 1 << 29;
31117 }
31118
31119 #include "gt-arm.h"